timuruski · December 22, 2015 09:18
diff --git a/eliot.rb b/eliot.rb
 # Eliot, two parts of ETL.
 #
 #   Yes, in the movie it's Eliott (with TWO Ts), but no one wants to
 #   type that every time.
 #
 #
 # Handles extracting data from multiple formats, CSV, JSON, YAML, XML?
 # Handles transforming records into models using attributes hash or
 # assignment.
 #
 # This is just an interface experiment so far.
 #
 # NOTES:
 # - How to handle coalescing rows? One entity/member is represented by
 # multiple rows, uniquely or jointly.
 # - How to handle non-standard record/model building?

 # NEW INTERFACE
 Eliot.csv('ID:id email name', headers: true)
     .on(:id, &:to_i)
     .load_files('users_1.csv', 'users_2.csv')
     .each do |attrs|
       User.create(&attrs)
     end


 # Block style
 Eliot.csv('users.csv') do |et|
  # Declare what fields you are interested in and what they map to.
  # In this example, we are using columns by position. Could also use
  # key-named columns using automatic header conversion.
  #
  # :* ignore remaining columns, this could be the default
  # :_ ignore a single column
  #
  # May have some mechanism for ignoring a few column, :{3} maybe
  et.extract :id, :name, :_, :status, :*

  # How to handle duplicates, folding, aggregating, etc.
  et.fold_on :id


  # Declare how to format data that has been extracted
  et.transform :id, :to_i

  # Format data using a block
  et.transform :name { |value, data|
    first, last = value.split(' ', 2)
    data[:first_name] = first
    data[:last_name] = last
    data.set(:first_name, first)
    data.set(:last_name, last)
  }

  # Another way to format data.
  transform(:name, :first_name, :last_name) { |value| value.split(' ') }


  # Convert the data into some sort of business model.
  # Yields an Eliot::DataSet
  # which can convert to an attributes hash
  et.to do |data|
    User.new(data.to_hash)

    User.new do |u|
      data.assign_to(u)
    end
  end
 end


 # Fluent style
 users = Eliot.csv('users.csv')
             .extract(:id, :name, :_, :status, :*)
             .fold_on(:id)
             .transform(:id, :to_i)
             .transform(:name, :first_name, :last_name) { |value| value.split(' ') }
             .transform(:name) { |value, data|
                first, last = value.split(' ', 2)
                data[:first_name] = first
                data[:last_name] = last
             }
             .to { |data| User.new(data.to_hash) }
             .execute


 # Instance style
 users = Eliot.csv('users.csv')
 users.extract(:id, :name, :_, :status, :*)
 users.fold_on(:id)
 users.transform(:id, :to_i)
 users.transform(:name, :first_name, :last_name) { |value| value.split(' ') }
 users.transform(:name) { |value, data|
  first, last = value.split(' ', 2)
  data[:first_name] = first
  data[:last_name] = last
 }
 users.to { |data| User.new(data.to_hash) }
 users.execute


 # Version 2.0 extraction parser, probably won't build this for a while.
 Eliot.csv do |et|
  # Positional, with skips
  et.extract "id _ name {3} status *"
  et.extract :id, :_, :name, :3, :status, :*

  # Name keys, with conversion
  et.extract_keys "id full_name:name status"
  et.extract_keys :id, :name, :status
 end
	# Eliot, two parts of ETL.
	#
	# Yes, in the movie it's Eliott (with TWO Ts), but no one wants to
	# type that every time.
	#
	#
	# Handles extracting data from multiple formats, CSV, JSON, YAML, XML?
	# Handles transforming records into models using attributes hash or
	# assignment.
	#
	# This is just an interface experiment so far.
	#
	# NOTES:
	# - How to handle coalescing rows? One entity/member is represented by
	# multiple rows, uniquely or jointly.
	# - How to handle non-standard record/model building?

	# NEW INTERFACE
	Eliot.csv('ID:id email name', headers: true)
	.on(:id, &:to_i)
	.load_files('users_1.csv', 'users_2.csv')
	.each do \|attrs\|
	User.create(&attrs)
	end


	# Block style
	Eliot.csv('users.csv') do \|et\|
	# Declare what fields you are interested in and what they map to.
	# In this example, we are using columns by position. Could also use
	# key-named columns using automatic header conversion.
	#
	# :* ignore remaining columns, this could be the default
	# :_ ignore a single column
	#
	# May have some mechanism for ignoring a few column, :{3} maybe
	et.extract :id, :name, :_, :status, :*

	# How to handle duplicates, folding, aggregating, etc.
	et.fold_on :id


	# Declare how to format data that has been extracted
	et.transform :id, :to_i

	# Format data using a block
	et.transform :name { \|value, data\|
	first, last = value.split(' ', 2)
	data[:first_name] = first
	data[:last_name] = last
	data.set(:first_name, first)
	data.set(:last_name, last)
	}

	# Another way to format data.
	transform(:name, :first_name, :last_name) { \|value\| value.split(' ') }


	# Convert the data into some sort of business model.
	# Yields an Eliot::DataSet
	# which can convert to an attributes hash
	et.to do \|data\|
	User.new(data.to_hash)

	User.new do \|u\|
	data.assign_to(u)
	end
	end
	end


	# Fluent style
	users = Eliot.csv('users.csv')
	.extract(:id, :name, :_, :status, :*)
	.fold_on(:id)
	.transform(:id, :to_i)
	.transform(:name, :first_name, :last_name) { \|value\| value.split(' ') }
	.transform(:name) { \|value, data\|
	first, last = value.split(' ', 2)
	data[:first_name] = first
	data[:last_name] = last
	}
	.to { \|data\| User.new(data.to_hash) }
	.execute


	# Instance style
	users = Eliot.csv('users.csv')
	users.extract(:id, :name, :_, :status, :*)
	users.fold_on(:id)
	users.transform(:id, :to_i)
	users.transform(:name, :first_name, :last_name) { \|value\| value.split(' ') }
	users.transform(:name) { \|value, data\|
	first, last = value.split(' ', 2)
	data[:first_name] = first
	data[:last_name] = last
	}
	users.to { \|data\| User.new(data.to_hash) }
	users.execute


	# Version 2.0 extraction parser, probably won't build this for a while.
	Eliot.csv do \|et\|
	# Positional, with skips
	et.extract "id _ name {3} status *"
	et.extract :id, :_, :name, :3, :status, :*

	# Name keys, with conversion
	et.extract_keys "id full_name:name status"
	et.extract_keys :id, :name, :status
	end