Last active
December 17, 2015 14:38
-
-
Save rguerrettaz/5625393 to your computer and use it in GitHub Desktop.
csv parser. takes 2 csv files and combines them together to make one without duplicates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
# Handles csv's with varrying total columns | |
# Does not handle headers yet | |
# This method serves as initialize method; calls all other methods | |
# Takes 2 csv files as arguments | |
# First csv should be original file | |
# Second should be the updated file | |
def update_csv(orig_file, new_file) | |
orig_file_mapped = parse_orig(orig_file) | |
orig_num_columns = orig_file_mapped.first.first.length | |
new_file_mapped = parse_new(new_file, orig_num_columns) | |
joined_map = join_maps(orig_file_mapped, new_file_mapped) | |
csv_formatted = convert_map(joined_map) | |
output_file_name = create_output_file_name(new_file) | |
write_to_file(csv_formatted, output_file_name) | |
end | |
# Takes csv file and maps each row to a key; value is same as key | |
def parse_orig(file) | |
rows_hash = {} | |
# Tells Ruby the file is in ISO-8859-1 encoding | |
csv_file = File.open(file, "r:ISO-8859-1") | |
CSV.parse(csv_file).each { |row| rows_hash[row] = row } | |
rows_hash | |
end | |
# Takes csv file and sets key to row of length equal to original number of columns | |
# Value is set to full row | |
def parse_new(file, columns) | |
key_length = columns - 1 | |
rows_hash = {} | |
csv_file = File.open(file, "r:ISO-8859-1") | |
CSV.parse(csv_file).each { |row| rows_hash[row[0..key_length]] = row } | |
rows_hash | |
end | |
# adds row (key) to new_map unless it already exists | |
def join_maps(orig_map, new_map) | |
orig_map.each do |key, value| | |
new_map[key] = value unless new_map.has_key?(key) | |
end | |
new_map | |
end | |
def convert_map(mapping) | |
csv_format = [] | |
mapping.each { |key, value| csv_format << value } | |
csv_format | |
end | |
def create_output_file_name(file_name) | |
file_name.gsub!('.csv', '_updated6.csv') | |
end | |
def write_to_file(data=[], file_name) | |
CSV.open(file_name, "w:UTF-8") do |csv| | |
data.each do |row| | |
csv << row | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment