Created
August 21, 2013 13:25
-
-
Save fiedl/6294424 to your computer and use it in GitHub Desktop.
Ruby script to **split a csv file** into separate files categorized by a category column.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# categorize_csv.rb | |
# Fiedlschuster, 2013-08-21 | |
# | |
# Usage: ruby categorize_csv.rb --column 2 data.csv | |
# This splits the data.csv file into separate files, | |
# one for each category found in column 2 (numbered 1,2,3,...), | |
# and outputs to data.category_name.csv | |
# | |
require 'csv' | |
require 'optparse' | |
import_file = ARGV.last | |
column_to_categorize_by = 1 | |
data_by_category = {} | |
option_parser = OptionParser.new do |opts| | |
opts.banner = " | |
Usage: ruby categorize_csv.rb --column 2 data.csv | |
This splits the data.csv file into separate files, | |
one for each category found in column 2 (numbered 1,2,3,...), | |
and outputs to data.category_name.csv | |
" | |
opts.on("-c [number]", "--column [number]", Integer, "The column number specifying the column to categorize by. (Starting counting from 1.)") do |column_number| | |
column_to_categorize_by = column_number | |
end | |
end | |
option_parser.parse! | |
if not import_file | |
print option_parser.help | |
exit 0 | |
end | |
print "import #{import_file}\n" | |
CSV.foreach import_file, headers: false, col_sep: ";" do |row| | |
category = row[column_to_categorize_by - 1] | |
data_by_category[category] ||= [] | |
data_by_category[category] << row | |
end | |
data_by_category.each do |category, data| | |
export_file = import_file.gsub(".csv", ".#{category}.csv") | |
print "export #{export_file}\n" | |
CSV.open(export_file, "wb", headers: false, col_sep: ";") do |csv| | |
data.each do |row| | |
csv << row | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment