Skip to content

Instantly share code, notes, and snippets.

@Altech
Last active December 12, 2015 05:28
Show Gist options
  • Save Altech/4721348 to your computer and use it in GitHub Desktop.
Save Altech/4721348 to your computer and use it in GitHub Desktop.
experimental API of gem to create many queries for TreasureData.
## Add Queries
queries = MMQueries.new do # the receiver is MMQueries.
# add query
# case0-0: one
add :count_users_inflow_source do |from, to|
%w[customers creators].each do |users|
td.query "SELECT * FROM #{users} WHERE #{specific_time(from,to)}", to: 'td://@/db/result'
end
end
# case1-1: one-to-one
add :count_users_inflow_source do |from, to|
%w[customers creators].each do |users|
# queries are processed concurrently.
td.query "SELECT * FROM #{users} WHERE #{specific_time(from,to)}" {|result|
mongo.collection('collectionA').insert(cnt: result[0], type: uesrs)
}
end
end
# case1-2: many-to-one
add :count_users_inflow_source do |range|
range ||= Date.today.prev_day..Date.today
result = %w[customers creators].each_with_object(Array.new) do |users,result|
td.query "SELECT * FROM #{users} WHERE #{specific_time(from,to)}" {|partial_result|
result << partial_result
}
end
after do # this will called when all jobs have finished.
mongo.collection('collectionA').insert(time: from, cnts: results.map(&:first))
end
end.every(1.day, :at => '4:30 am') # assign schedule for cron-tab
# case2: big query
add :count_users_inflow_source do |from, to|
%w[customers creators].each do |users|
td.query <<-QUERY {|result|
SELECT * FROM #{users} WHERE #{specific_time(from,to)}
JOIN
another_table b
ON (a.t1=b.t2)
QUERY
mongo.collection('collectionA').insert(cnt: result[0], type: uesrs)
}
end
end
# grouping
import 'kpi' # eval (DEFAULT_IMPORT_PATH + './queries/kpi.rb').read
import 'kpi', :path => 'sub_queries.rb' # eval 'sub_queries.rb'
import 'search/photo'
end
# grouping
MMQueries::Group.new 'kpi' do # the receiver is MMQueries.
add :kpi_of_new_costomers do |..|
..
end
add :kpi_of_new_creators do |..|
..
end
end.add_hook do |results| # group level hook
results.each do |label,job_result|
mongo.collection('all').insert({k => v})
end
end
# public method is valid.
queries.add :some_instrument do |..|
..
end
queries.import 'some_instruments'
# customize helper
module MMQueries
module Helper
def mongo
return Mongo::Connection.new ..
end
end
include Helper
end
## Inspection
queries.look_up(:count_users_inflow_source) # => #<MMQueries::Query:0x007fa4d2a5de70>
queries.look_up(:count_users_inflow_source).sample_query # => 'SELECT ... '
queries.look_up(:count_users_inflow_source).schedule # => every 1.day, :at => '4:30 am'
queries.look_up(:count_users_inflow_source, :calc_conversion_rate) # => #<MMQueries::Array:0x007fa4d2a5de70>
queries.look_up_group('kpi') # => #<MMQueries::Group:0x007fa4d2a5de70>
queries.look_up_group('kpi').schedule # ...(pretty inspect)..
queries.schedule # ...(pretty inspect)..
## Execution (CUI-interactive)
queries.look_up(:count_users_inflow_source).exec!
queries.look_up(:count_users_inflow_source).exec!(Date.today.prev_day, Date.today)
# multi-queries are processed parallel.
queries.look_up(:count_users_inflow_source, :calc_conversion_rate).exec!
queries.look_up(:count_users_inflow_source, :calc_conversion_rate).exec!(Date.today.prev_day, Date.today)
queries.look_up(:count_users_inflow_source, :calc_conversion_rate).exec!([Date.today.prev_day, Date.today], [Date.today.prev_day..Date.today])
# get result and invoke pry
queries.look_up(:count_users_inflow_source, :calc_conversion_rate).exec!(:pry => true)
# all
queries.exec!
## Scheduling (TODO: improve)
# - use cron-tab through whenever(gem)
# - https://github.com/javan/whenever
MMQueries.schedule(queries, job_list) # all
MMQueries.schedule(queries.look_up(:count_users_inflow_source), job_list) # job list is a Whenever::JobList
# - This will execute the following.
job_list.instance_exec(1.day, {:at => '4:30 am' },'count_users_inflow_source') do |interval,query_name|
every interval
query query_name
end
# - This is equal to following whenever configuration file.
job_type :query, '-/bin/many_many_query :query_name'
every 1.day, :at => '4:30 am' do
query 'count_users_inflow_source'
end
## TODO
# - logging
# - monitoring
# # options
MMQueries.default_database = 'database_name'
MMQueries.default_import_path = Pathname.new('./')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment