#!/usr/local/bin/ruby # # csv-select: select specified column from input where input is in the # comma-separated values format # Specifically: Input is one or more lines, where each line has one # or more fields. # Fields are separated by commas; if field starts with # ", then it must end with ". Fields surrounded by " # marks may have any character within them separated by \. # No field may wrap around the end of the line. # set DEBUG to 0 to turn off all debugging, higher numbers mean more output DEBUG = 0 # parseLine: parses input string into an array of values, using commas # in the input to separate out the values. # type: string -> array(string) # The implementation is based on the finite state machine at # http://www.uwplatt.edu/csse/courses/cs352/samples/csv-fsm.gif # One could use operations like .split to achieve the same goal, # but the point here is to illustrate FSMs. def parseLine(line) state = :S index = 0 result = [] field = "" while index < line.size && state != :Error ch = line[index].chr case state when :S case ch when "\"": state = :Q when ",": result << "" else field += ch; state = :F end when :F case ch when ",": result << field; field = ""; state = :S else field += ch end when :Q case ch when "\\": state = :E when "\"": state = :C; result << field; field = "" else field += ch end when :C if ch == ",": state = :S else state = :Error end when :E field += ch state = :Q else state = :Error end # case state index += 1 end # while # processed all data or hit an error state case state when :S, :C return result when :F result << field return result when :Error $stderr.puts "Could not process line at index #{index} in '#{line}'" return nil else $stderr.puts "Line '#{line}' is not properly terminated" return nil end end def test xs = parseLine "abc,def" $stderr.puts "ERROR: abc,def" if xs != ["abc", "def"] xs = parseLine "\"this\",that,\"other\"" $stderr.puts "ERROR: this,that,other" if xs != ['this', 'that', 'other'] xs = parseLine "" $stderr.puts "ERROR: empty" if xs != [] xs = parseLine "," $stderr.puts "ERROR: ," if xs != [""] xs = parseLine ",," $stderr.puts "ERROR: ,," if xs != ["",""] xs = parseLine "a" $stderr.puts "ERROR: a" if xs != ["a"] xs = parseLine '\\' $stderr.puts 'ERROR: \\' if xs != ['\\'] xs = parseLine 'x\\y' $stderr.puts 'ERROR: x\\y' if xs != ['x\\y'] xs = parseLine '"\"hello\""' $stderr.puts "ERROR: hello" if xs != ['"hello"'] xs = parseLine '"Hi!","I said, \\"hi\\"",a\\b' $stderr.puts "ERROR: hi,said,hi,slash" if xs != ["Hi!", 'I said, "hi"', 'a\b'] puts "All tests completed." end # getNum: parse string for an integer, failing if it isn't a non-negative int # type: string -> natural number def getNum(str) i = str.to_i if i.to_s != str $stderr.puts "Error: #{str} is not a valid number" exit 1 elsif i < 0 $stderr.puts "Error: parameter #{str} must be non-negative" exit 1 end return i end def main if ARGV.length < 1 $stderr.puts "Usage: #{__FILE__} field_number" exit 1 end target_index = getNum ARGV[0] # not clear why need $stdin on next line - normally just gets works while (inline = $stdin.gets) inline.chomp! puts "Read line #{inline}" if DEBUG > 0 fields = parseLine(inline) puts "Fields: #{fields.join('|')}" if DEBUG > 1 if !fields.nil? && fields[target_index] puts fields[target_index] end end end main #test