Last active
August 29, 2015 14:03
-
-
Save labocho/d623e15cb1878fa6f91b to your computer and use it in GitHub Desktop.
CSV Converter
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| # Example: | |
| # # UTF-8 CSV to Excel Compatible TSV | |
| # $ csvconv --output-encoding="bom|utf-16le" --output-col-sep="\t" data.csv > data.txt | |
| # # Excel Compatible TSV to UTF-8 CSV | |
| # $ csvconv --input-encoding="bom|utf-16" --input-col-sep="\t" --output-encoding="utf-8" --output-col-sep="," data.txt > data.csv | |
| require "optparse" | |
| require "csv" | |
| options = { | |
| input_encoding: "utf-8", | |
| input_row_sep: nil, | |
| input_col_sep: nil, | |
| output_encoding: nil, | |
| output_row_sep: nil, | |
| output_col_sep: nil, | |
| new_line: "\n", | |
| } | |
| BOM = "\u{feff}" | |
| OptionParser.new do |o| | |
| o.on("--input-encoding INPUT_ENCODING"){|v| options[:input_encoding] = v } | |
| o.on("--input-row-sep INPUT_ROW_SEPARATOR"){|v| options[:input_row_sep] = v } | |
| o.on("--input-col-sep INPUT_COLUMN_SEPARATOR"){|v| options[:input_col_sep] = v } | |
| o.on("--output-encoding OUTPUT_ENCODING"){|v| options[:output_encoding] = v } | |
| o.on("--output-row-sep OUTPUT_ROW_SEPARATOR"){|v| options[:output_row_sep] = v } | |
| o.on("--output-col-sep OUTPUT_COLUMN_SEPARATOR"){|v| options[:output_col_sep] = v } | |
| o.on("--new-line NEW_LINE_CHARACTER"){|v| options[:new_line] = v } | |
| o.parse!(ARGV) | |
| end | |
| # "\\n" to LF, "\\r" to CR, "\\t" to TAB | |
| CONTROL_CHARS = { | |
| "\\n" => "\n", | |
| "\\r" => "\r", | |
| "\\t" => "\t", | |
| } | |
| options.keys.each{|k| | |
| if options[k] | |
| CONTROL_CHARS.each do |escaped, char| | |
| options[k].gsub!(escaped, char) | |
| end | |
| end | |
| } | |
| ARGV.each do |file| | |
| # Read | |
| src = open(file, "rb:#{options[:input_encoding]}"){|f| f.read } | |
| src.encode!("utf-8") | |
| # Unify new line | |
| src.gsub!(/(\r\n|\r|\n)/, options[:new_line]) | |
| # Convert format | |
| src_csv = CSV.new(src, row_sep: options[:input_row_sep] || :auto, col_sep: options[:input_col_sep] || ",") | |
| dest = "" | |
| dest_csv = CSV.new( | |
| dest, | |
| row_sep: options[:output_row_sep] || src_csv.row_sep, | |
| col_sep: options[:output_col_sep] || src_csv.col_sep | |
| ) | |
| src_csv.each{|row| dest_csv << row } | |
| # Add BOM if required | |
| output_encoding = options[:output_encoding] || options[:input_encoding] | |
| if output_encoding =~ /\Abom\|(.+)\z/i | |
| output_encoding = $~.captures.first | |
| dest = BOM + dest | |
| end | |
| # Encode and output | |
| puts dest.encode(output_encoding) | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment