Created
March 3, 2017 00:10
-
-
Save elof/a7e98af0147a42ea1fcc8b25f37093a2 to your computer and use it in GitHub Desktop.
A script to parce a salesforce generated CSV into JSON and pass to Keen IO
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Salesforce CSV import | |
# | |
# Author: Jevon Wild <[email protected]> | |
# Creation Date: 15 DEC 2016 | |
# | |
# This file contains one method for processing multiple CSVs in the same | |
# directory and returning a block of bulk events for immediate ingestion. It | |
# does assume the events are in a particular format (for the Addons) so it would | |
# probably need to be modified for other use | |
require 'csv' | |
require 'keen' | |
require 'time' | |
def process | |
filenames = Dir.glob("*.csv") | |
events = {} | |
filenames.each do |filename| | |
collection_name = filename.split('_').last.split('.').first | |
events[collection_name] = [] | |
file = File.read(filename) | |
csv = CSV.parse(file, :headers => true) | |
csv.each do |row| | |
event = { | |
keen: {} | |
} | |
row.each_with_index do |column, idx| | |
key = column[0].downcase | |
value = column[1] | |
if key == 'timestamp_derived' | |
event[:keen][:timestamp] = Time.parse(value).iso8601 | |
elsif key == 'timestamp' | |
elsif ["run_time", "cpu_time", "db_total_time", "db_blocks", "db_cpu_time", "response_size", "rows_processed", "request_size"].include?(key) | |
if value | |
event[key] = value.to_i | |
else | |
event[key] = nil | |
end | |
else | |
event[key] = value | |
end | |
end | |
event[:keen][:addons] = [ | |
{ | |
name: "keen:date_time_parser", | |
input: { | |
date_time: "keen.timestamp" | |
}, | |
output: "timestamp_info" | |
} | |
] | |
if event["client_ip"] =~ /^\d*\.\d*\.\d*\.\d*$/ | |
event[:keen][:addons].push({ | |
name: "keen:ip_to_geo", | |
input: { | |
ip: "client_ip" | |
}, | |
output: "ip_geo_info" | |
}) | |
end | |
events[collection_name].push(event) | |
end | |
end | |
return events | |
end | |
# Keen.project_id = "" | |
# Keen.write_key = "" | |
# events = process | |
# puts JSON.pretty_generate(events) | |
# Keen.publish_batch(events) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment