Skip to content

Instantly share code, notes, and snippets.

@hirokai
Created January 3, 2015 08:04
Show Gist options
  • Save hirokai/f5f178f72407c5f5e164 to your computer and use it in GitHub Desktop.
Save hirokai/f5f178f72407c5f5e164 to your computer and use it in GitHub Desktop.
Email
require 'kconv'
require 'mail' # gem install mail
require 'csv'
def parse_message(message)
m = Mail.new(message)
o = {:id => m.message_id, :from => m.from, :to => m.to, :subject => m.subject, :reply => m.in_reply_to, :date => m.date, :ref => m.references}
p o
o
end
def read_mbox(path,max_count=100000)
f = open(path)
message = nil
res = []
count = 0
while (line = f.gets)
begin
m = line.match(/\AFrom /)
rescue
m = nil
end
if m
count += 1
res << parse_message(message) if (message)
print '.' if count % 100 == 0
puts " #{count} processed." if count % 1000 == 0
if count >= max_count
break
end
message = ''
else
begin
s = line.sub(/^\>From/, 'From')
rescue
s = line
end
message << s
end
end
f.close
Hash[res.map{|m| [m[:id], m]}]
end
folder = "/path/to/mbox"
user = '****'
sent = read_mbox("#{folder}/#{user}/Sent Mail.mbox/mbox", 100000)
all = read_mbox("#{folder}/#{user}/All Mail.mbox/mbox", 100000)
count = Hash.new
count[:reply] = 0
count[:not_reply] = 0
count[:error] = 0
res = []
CSV.open("result.csv", "wb") do |csv|
sent.each{|k,m|
if not m[:reply] # Not a reply
# p ['not a reply', m]
count[:not_reply] += 1
next
end
if not all[m[:reply]]
# p ['Missing replied message.', m[:id], m[:from], m[:subject], m[:reply]]
count[:error] += 1
else
d1 = all[m[:reply]][:date]
d2 = m[:date]
if d1 and d2
csv << [d2.strftime('%s').to_i - d1.strftime('%s').to_i,d1.to_s,m[:to],m[:subject]]
count[:reply] += 1
else
p [d1,d2]
puts 'Date is missing.'
count[:error] += 1
end
end
}
end
p count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment