Created
January 30, 2014 18:56
-
-
Save validkeys/8716209 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/local/bin/ruby -w | |
| require 'csv' | |
| require 'iconv' unless String.method_defined?(:encode) | |
| ## THE MAIN FILE TO IMPORT FROM | |
| CSV_FILE_PATH = File.join(File.dirname(__FILE__), "eloqua_20140130.csv") | |
| ## THE BASE NAME AND LOCATION OF EXPORT FILES | |
| OUTPUT_FILE_NAME = File.join(File.dirname(__FILE__), "export_#{Time.now}") | |
| ## COMPANY NAMES TO EXCLUDE | |
| EXCLUDES = ["eloqua","oracle","gmail","hotmail"] | |
| ## THE COLUMN WE WILL USE TO EXCLUDE | |
| EXCLUDE_HEADER = "company_unique" | |
| ## IN THE EVENT OF MISSING COMPANY NAMES | |
| ## THIS WILL STORE AS A DICTIONARY OF ALL COMPANY_UNIQUES => COMPANY NAMES | |
| ## IN CASE SOMEONE ELSE FROM THE SAME COMPANY IN THE LIST ENTERED | |
| ## THEIR COMPANY NAME | |
| COMPANY_NAME_LOOKUP = Hash.new | |
| ## THE HEADERS TO EXPORT | |
| HEADERS = [:first_name, :last_name, :company, :email, :industry, :region, :company_unique] | |
| ## | |
| ## OUTPUT THE HEADERS TO THE CSV | |
| ## | |
| def output_headers (csv) | |
| tmp = [] | |
| HEADERS.each { |sym| tmp << sym.to_s } | |
| csv << tmp | |
| end | |
| ## | |
| ## OUTPUT A ROW TO THE PASSED CSV | |
| ## | |
| def output_row (row, csv) | |
| tmp = [] | |
| HEADERS.each { |sym| tmp << row[sym] } | |
| csv << tmp | |
| end | |
| ## | |
| ## OUTS PUT THE QUEUE AND EXCLUDED ARRAYS TO CSV FILES | |
| ## | |
| def output (queue, excludeds) | |
| count = 1 | |
| queue.each do |group| | |
| CSV.open("#{OUTPUT_FILE_NAME}_#{count}.csv", "w") do |csv| | |
| output_headers csv | |
| group.each do |row| | |
| output_row row, csv | |
| end | |
| end | |
| count +=1 | |
| end | |
| if excludeds | |
| CSV.open("excludeds_#{Time.now}.csv", "w") do |csv| | |
| output_headers csv | |
| excludeds.each do |row| | |
| output_row row, csv | |
| end | |
| end | |
| end | |
| end | |
| ## | |
| ## BUILDS THE COMPANY LOOKUP DICIONARY AS ANNOTATED ABOVE | |
| ## | |
| def build_company_name_lookup | |
| CSV.open(CSV_FILE_PATH, {:headers => true, :header_converters => :symbol}).each do |row| | |
| t = row[:company_unique] | |
| if !row[:company].nil? && !COMPANY_NAME_LOOKUP[t.to_sym] | |
| COMPANY_NAME_LOOKUP[t.to_sym] = row[:company] | |
| end | |
| end | |
| end | |
| ## | |
| ## ADDS ITEMS TO THE SPECIFIED QUEUE | |
| ## | |
| def add_to_queue(queue, index, row) | |
| unless queue[index] | |
| queue[index] = [] | |
| end | |
| queue[index] << row | |
| end | |
| ## | |
| ## OPENS THE SOURCE FILE AND ENQUEUES THE ROWS | |
| ## | |
| def enqeue | |
| # HOLDS THE QUEUES TO BE OUTPUT | |
| queue = [] | |
| # HOLD THE QUEUE OF EXCLUDED ROWS TO BE OUTPUT | |
| excludeds = [] | |
| # CHECKS TO SEE IF ANOTHER PERSON FROM THE SAME COMPANY EXISTS | |
| # IN THE CURRENT QUEUE. IF SO, MOVES THEM TO THE NEXT QUEUE | |
| lookup = Hash.new | |
| CSV.open(CSV_FILE_PATH, {:headers => true, :header_converters => :symbol}).each do |row| | |
| if EXCLUDES.include? row[EXCLUDE_HEADER.to_sym] | |
| excludeds << row | |
| else | |
| t = row[:company_unique] | |
| search = lookup[t.to_sym] | |
| # ADD THE COMPANY NAME IF THE CURRENT RECORD IS MISSING | |
| # ONE AND WE ARE ABLE TO FIND THE COMPANY NAME IN THE DICTIONARY | |
| if row[:company].nil? && COMPANY_NAME_LOOKUP[t.to_sym] | |
| row[:company] = COMPANY_NAME_LOOKUP[t.to_sym] | |
| end | |
| if search | |
| add_to_queue queue, (search + 1), row | |
| lookup[row[:company_unique].to_sym] += 1 | |
| else | |
| lookup[row[:company_unique].to_sym] = 0 | |
| add_to_queue queue, 0, row | |
| end | |
| end | |
| end | |
| output queue, excludeds | |
| end | |
| ## | |
| ## INITIALIZE | |
| build_company_name_lookup | |
| enqeue | |
| ## |
You'd probably have fewer hoops to jump through if you dealt exclusively with Strings, rather than converting some things to symbols.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On line 23 (https://gist.github.com/validkeys/8716209#file-gistfile1-rb-L23) you've uppercased the company name lookup table, which in Ruby land implies that it's a constant.