Last active
December 22, 2015 09:18
-
-
Save timuruski/6450426 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Eliot, two parts of ETL. | |
# | |
# Yes, in the movie it's Eliott (with TWO Ts), but no one wants to | |
# type that every time. | |
# | |
# | |
# Handles extracting data from multiple formats, CSV, JSON, YAML, XML? | |
# Handles transforming records into models using attributes hash or | |
# assignment. | |
# | |
# This is just an interface experiment so far. | |
# | |
# NOTES: | |
# - How to handle coalescing rows? One entity/member is represented by | |
# multiple rows, uniquely or jointly. | |
# - How to handle non-standard record/model building? | |
# NEW INTERFACE | |
Eliot.csv('ID:id email name', headers: true) | |
.on(:id, &:to_i) | |
.load_files('users_1.csv', 'users_2.csv') | |
.each do |attrs| | |
User.create(&attrs) | |
end | |
# Block style | |
Eliot.csv('users.csv') do |et| | |
# Declare what fields you are interested in and what they map to. | |
# In this example, we are using columns by position. Could also use | |
# key-named columns using automatic header conversion. | |
# | |
# :* ignore remaining columns, this could be the default | |
# :_ ignore a single column | |
# | |
# May have some mechanism for ignoring a few column, :{3} maybe | |
et.extract :id, :name, :_, :status, :* | |
# How to handle duplicates, folding, aggregating, etc. | |
et.fold_on :id | |
# Declare how to format data that has been extracted | |
et.transform :id, :to_i | |
# Format data using a block | |
et.transform :name { |value, data| | |
first, last = value.split(' ', 2) | |
data[:first_name] = first | |
data[:last_name] = last | |
data.set(:first_name, first) | |
data.set(:last_name, last) | |
} | |
# Another way to format data. | |
transform(:name, :first_name, :last_name) { |value| value.split(' ') } | |
# Convert the data into some sort of business model. | |
# Yields an Eliot::DataSet | |
# which can convert to an attributes hash | |
et.to do |data| | |
User.new(data.to_hash) | |
User.new do |u| | |
data.assign_to(u) | |
end | |
end | |
end | |
# Fluent style | |
users = Eliot.csv('users.csv') | |
.extract(:id, :name, :_, :status, :*) | |
.fold_on(:id) | |
.transform(:id, :to_i) | |
.transform(:name, :first_name, :last_name) { |value| value.split(' ') } | |
.transform(:name) { |value, data| | |
first, last = value.split(' ', 2) | |
data[:first_name] = first | |
data[:last_name] = last | |
} | |
.to { |data| User.new(data.to_hash) } | |
.execute | |
# Instance style | |
users = Eliot.csv('users.csv') | |
users.extract(:id, :name, :_, :status, :*) | |
users.fold_on(:id) | |
users.transform(:id, :to_i) | |
users.transform(:name, :first_name, :last_name) { |value| value.split(' ') } | |
users.transform(:name) { |value, data| | |
first, last = value.split(' ', 2) | |
data[:first_name] = first | |
data[:last_name] = last | |
} | |
users.to { |data| User.new(data.to_hash) } | |
users.execute | |
# Version 2.0 extraction parser, probably won't build this for a while. | |
Eliot.csv do |et| | |
# Positional, with skips | |
et.extract "id _ name {3} status *" | |
et.extract :id, :_, :name, :3, :status, :* | |
# Name keys, with conversion | |
et.extract_keys "id full_name:name status" | |
et.extract_keys :id, :name, :status | |
end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment