Last active
February 4, 2016 16:03
-
-
Save akcrono/2ea6b1a0ed3f5ea58138 to your computer and use it in GitHub Desktop.
Useful script for quickly pulling data out of csv's into a ruby console.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'csv' | |
require 'pry' | |
require 'active_support/all' | |
class CSVParser | |
attr_accessor :filename, :items, :rows, :options, :headers | |
# Optional params: | |
# -nh No header in csv | |
def initialize(args) | |
@options = build_options(args) | |
@filename = build_filename(args) | |
@headers = [] | |
@items = options[:no_header] ? [] : HashWithIndifferentAccess.new | |
@rows = [] | |
load_csv | |
end | |
def method_missing(method_name) | |
if items[method_name].present? | |
print_array_for_console items[method_name] | |
else | |
puts "No items found for #{method_name}" | |
end | |
end | |
def perform | |
if headers.present? | |
puts "Headers:" | |
headers.each{|header| puts header} | |
else | |
puts "No headers" | |
end | |
console | |
end | |
def console | |
binding.pry | |
end | |
def load_csv(can_retry=true) | |
begin | |
if options[:no_header] | |
@headers = nil | |
CSV.foreach(filename, csv_header_options) do |row| | |
row.count.times{|i| items[i] = []} | |
break | |
end | |
else | |
# Grab header info and initialize array | |
CSV.foreach(filename, csv_header_options) do |row| | |
@headers = row.headers | |
@headers.each{ |header| items[header] = []} | |
break | |
end | |
end | |
# Grab each row and collect items | |
CSV.foreach(filename, csv_header_options) do |row| | |
if headers.nil? | |
row.each_with_index{|data, i| items[i] << data} | |
else | |
@headers.each{ |header| items[header] << row[header] } | |
end | |
rows << row.map{|k, v| v} | |
end | |
rescue => e | |
puts e.message + " ... retrying with new encoding..." | |
@csv_options[:encoding] = 'ISO-8859-1' | |
load_csv(false) if can_retry | |
end | |
end | |
#Change a fixnum to a string with proper commas | |
def commas(number) | |
number = number.to_s | |
chunks = [] | |
number.reverse! | |
while (number.length > 0) | |
chunks << number.slice!(0,3) | |
end | |
number = chunks.join(',') | |
number.reverse! | |
end | |
# Builds a hash of the parsed data, indexed at index | |
# @param [Symbol] index Index of hash | |
def build_hash(index) | |
raise unless headers.include?(index) | |
hash = {} | |
rows.each do |row| | |
hash[row[index]] = {} | |
row.each do |k, v| | |
next if k == index | |
hash[row[index]][k] = v | |
end | |
end | |
hash | |
end | |
# Print array to console in a ruby copypasta way e.g. [item1, item2, item3] | |
# items should be an array | |
def print_array_for_console(items, format = nil) | |
print "[" | |
i = 0 | |
count = items.count | |
while i < (count - 1) | |
print_item(items[i], format) | |
print ", " | |
i += 1 | |
end | |
print_item(items.last, format) | |
print "]" | |
end | |
private | |
def build_filename(args) | |
filepaths.each do |filepath| | |
file_name = filepath | |
file_name += args.count > 1 ? args.join(" ") : args[0] | |
file_name += ".csv" unless file_name.include?(".csv") | |
if Dir[file_name].empty? | |
file_list = Dir[filepath + "*"].select{|x| x.include? args[0]} | |
if file_list.count == 1 | |
file_name = file_list.first | |
elsif file_list.count == 0 | |
next | |
else | |
puts "Filename ambiguous, choose from list:" | |
file_list.each_with_index do |file, i| | |
puts "#{i + 1}) #{file}" | |
end | |
answer = STDIN.gets.chomp | |
file_name = file_list[answer.to_i - 1] | |
end | |
end | |
return file_name | |
end | |
raise StandardError, "File not found" | |
end | |
def build_options(args) | |
options = {} | |
args.each do |arg| | |
if available_options.keys.include?(arg) | |
options[available_options[arg]] = true | |
args.delete(arg) | |
end | |
end | |
options | |
end | |
def filepaths | |
["/Users/andrewkotka/Downloads/", | |
"/Users/andrewkotka/Desktop/"] | |
end | |
def csv_header_options | |
@csv_options ||= options[:no_header] ? {} : {:headers => true, :header_converters => :symbol} | |
end | |
def available_options | |
{"-nh" => :no_header} | |
end | |
# Prints item to console based on format | |
def print_item(item, format) | |
case format | |
when :quotes | |
print "\"" + item + "\"" | |
when :no_quotes | |
print item | |
else | |
print smart_quotes(item) | |
end | |
end | |
# Guess about whether or not to print quotes | |
def smart_quotes(item) | |
# No quotes if number | |
return item if item.class == Fixnum | |
# No quotes if probably a number in string form | |
return item if item.to_i.to_s.length == item.length | |
return "\"" + item + "\"" | |
end | |
end | |
parser = CSVParser.new(ARGV) | |
parser.perform |
Updated to take in params for no header (-nh) and added method to build hash based off an index (e.g. user_email).
TODO: add permanent config, add -p arg for path.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
I have 2 aliases set up as follows:
alias cpro="cd ~/Desktop/custom\ programs/"
alias parse="cpro && ruby ~/Desktop/custom\ programs/csv_parser.rb"
Then I can just type "parse csvname.csv" in the console. Script will handle names with or without spaces and names with or without .csv.
The script will open up a console with the csv parsed in as both whole rows and aggregated columns. Methods available for friendly copypasta from console and printing comma delimited numbers.