Skip to content

Instantly share code, notes, and snippets.

Last active December 28, 2015 04:59
Show Gist options
  • Save tejasbubane/7446106 to your computer and use it in GitHub Desktop.
Save tejasbubane/7446106 to your computer and use it in GitHub Desktop.
Parser for all log file in my directory and put the data into mongodb
require 'mongo'
require 'time'
include Mongo
mongo_client ="localhost",27017)
db = mongo_client.db("logs")
requests = db.collection("requests") # master database
date_logs = db.collection("date_logs") # One record per day
user_details = db.collection("user_details")
items = db.collection("items") # Item specific information
filenames = db.collection("filenames") # stores all processed filenames
# Only unprocessed files will be processed
cnt = 0
file_cnt = 0
Dir.glob('/media/MediA/test_logs/*.log') do |f|
row = {}
file_cnt += 1
myfile = filenames.find_one("_id"=>f)
next if myfile # do nothing if the file is already processed
myfile = { "_id"=>f, "timestamp"=> }
filenames.insert(myfile) # file processed
puts "Processing file #" + file_cnt.to_s
puts f
file = do |line|
line.encode!('UTF-16', 'UTF-8', :invalid => :replace, :replace => '')
line.encode!('UTF-8', 'UTF-16')
line = line.chop.chop
error_line = line
data = line.split(',')
h = {}
data.each do |c|
key = c.split('=>').first
value = c.split('=>').last
key = "" if key.nil? # found some nil keys, replacing them with ""
key = key.gsub('.','_') # mongodb doesn't allow a dot (.) in the key, hence a workaround -> replace with _
h[key] = value
h["trans_date"] = Time.parse(h["trans_date"]).utc if h["trans_date"]
# hash created, now find the record if it exists [record goes into the date_logs table]
if h["device_id"] and h["log_format"] and h["trans_date"] # do nothing if device_id, log_format or trans_date is absent
record = date_logs.find_one("_id" => { "trans_date"=>h["trans_date"].to_date.to_time, "partner_id" => h["partner_id"], "channel_id" => h["channel_id"] })
record = { "_id" => { "trans_date"=>h["trans_date"].to_date.to_time, "partner_id" => h["partner_id"], "channel_id" => h["channel_id"] }, "download"=>{ }, "streaming"=>{ }, "activation"=>{ }, "access"=>{ } } unless record # create new record if record does not exist
case h["log_format"]
when "access" # case 1
user = user_details.find_one("_id" => { "device_id" => h["device_id"], "channel_id" => h["channel_id"], "partner_id" => h["partner_id"] })
if user # user exists (Access)
if record["access"][h["device_id"]] then record["access"][h["device_id"]] += 1 else record["access"][h["device_id"]] = 1 end #access count update or initalize
else # user does not exist (Activation / first access)
user = {"_id"=> {"device_id"=>h["device_id"], "channel_id"=>h["channel_id"], "partner_id"=>h["partner_id"]}, "trans_date"=>h["trans_date"], "ip_address"=>h["ip_address"], "mmn"=>h["mmn"] }
record["activation"][h["device_id"]] = h["ip_address"] # no need to find if exists in activation, it will always be unique
when "download" # case 2
if record["download"][h["open_id"]] then record["download"][h["open_id"]] += 1 else record["download"][h["open_id"]] = 1 end #count update or initalize
when "streaming" # case 3
if record["streaming"][h["open_id"]] then record["streaming"][h["open_id"]] += 1 else record["streaming"][h["open_id"]] = 1 end #count update or initalize
# all done now store the records
requests.insert(h) # master record contains everything
date_logs.update({:_id => record["_id"]}, record, { :upsert => true })
item = items.find_one(:_id => {:open_id => h["open_id"], :partner_id => h["partner_id"], :channel_id => h["channel_id"]} )
if item.nil? # create and save the new item
item = {:_id => { :open_id => h["open_id"], :partner_id => h["partner_id"], :channel_id => h["channel_id"] },
:count_type => h["count_type"], :item_title => h["item_title"], :track_Artist => h["track_artist"],
:product_uri => h["product_uri"], :image_uri => h["image_uri"]}
cnt += 1
#puts cnt
rescue Exception => e
puts e
puts h
puts record
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment