Skip to content

Instantly share code, notes, and snippets.

@yannski
Created February 5, 2012 16:28
Show Gist options
  • Save yannski/1746376 to your computer and use it in GitHub Desktop.
Save yannski/1746376 to your computer and use it in GitHub Desktop.
Simple ETL system written with rake
# procedure :
# in development : rake db:restore etl:extract:all db:drop db:create db:migrate etl:load:all
# in production : rake db:backup etl:extract:all db:drop db:create db:migrate etl:load:all
namespace :etl do
namespace :extract do
task :all => :environment do
FileUtils.rm_rf 'db/extracted_data/'
FileUtils.mkdir_p 'db/extracted_data/'
Rake::Task['etl:extract:cities'].invoke
Rake::Task['etl:extract:addresses'].invoke
Rake::Task['etl:extract:users'].invoke
Rake::Task['etl:extract:products'].invoke
Rake::Task['etl:extract:enterprises'].invoke
# etc etc etc
end
task :cities => :environment do
sql = "SELECT * FROM cities"
ary = ActiveRecord::Base.connection.execute(sql)
FasterCSV.open("db/extracted_data/cities.csv", "w") do |csv|
ary.each{|row|
location_id = row[0]
# do your own thing so the variable row is filled with the corrected data
csv << row.cleanup!
}
end
end
end
namespace :load do
task :all => :environment do
Dir.glob("db/extracted_data/*csv").each{|filename|
table_name = FileUtils.basename filename, ".csv"
filename = Dir.pwd + "/" + filename
sql = "LOAD DATA LOCAL INFILE '#{filename}' INTO TABLE #{table_name} CHARACTER SET 'utf8' FIELDS TERMINATED BY ',' ENCLOSED BY '\"'"
begin
ary = ActiveRecord::Base.connection.execute(sql)
rescue
ActiveRecord::Base.connection.reconnect!
retry
end
}
end
end
end
class Time
def to_mysql
strftime("%Y-%m-%d %H:%M:%S %z")
end
end
def untouched new_table_name, old_table_name = nil
old_table_name ||= new_table_name
sql = "SELECT * FROM #{old_table_name}"
ary = ActiveRecord::Base.connection.execute(sql)
FasterCSV.open("db/extracted_data/#{new_table_name}.csv", "w") do |csv|
ary.each{|row|
csv << row.cleanup!
}
end
end
class Array
def to_h(&block)
Hash[*self.collect { |v|
[v, block.call(v)]
}.flatten]
end
def cleanup
self.map{|x|
if x.nil?
"\\N"
elsif x.is_a?(Time)
x.to_mysql
else
x
end
}
end
def cleanup!
self.map!{|x|
if x.nil?
"\\N"
elsif x.is_a?(Time)
x.to_mysql
elsif x.is_a?(TrueClass)
1
elsif x.is_a?(FalseClass)
0
else
x
end
}
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment