Created
January 3, 2015 08:04
-
-
Save hirokai/f5f178f72407c5f5e164 to your computer and use it in GitHub Desktop.
Email
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'kconv' | |
require 'mail' # gem install mail | |
require 'csv' | |
def parse_message(message) | |
m = Mail.new(message) | |
o = {:id => m.message_id, :from => m.from, :to => m.to, :subject => m.subject, :reply => m.in_reply_to, :date => m.date, :ref => m.references} | |
p o | |
o | |
end | |
def read_mbox(path,max_count=100000) | |
f = open(path) | |
message = nil | |
res = [] | |
count = 0 | |
while (line = f.gets) | |
begin | |
m = line.match(/\AFrom /) | |
rescue | |
m = nil | |
end | |
if m | |
count += 1 | |
res << parse_message(message) if (message) | |
print '.' if count % 100 == 0 | |
puts " #{count} processed." if count % 1000 == 0 | |
if count >= max_count | |
break | |
end | |
message = '' | |
else | |
begin | |
s = line.sub(/^\>From/, 'From') | |
rescue | |
s = line | |
end | |
message << s | |
end | |
end | |
f.close | |
Hash[res.map{|m| [m[:id], m]}] | |
end | |
folder = "/path/to/mbox" | |
user = '****' | |
sent = read_mbox("#{folder}/#{user}/Sent Mail.mbox/mbox", 100000) | |
all = read_mbox("#{folder}/#{user}/All Mail.mbox/mbox", 100000) | |
count = Hash.new | |
count[:reply] = 0 | |
count[:not_reply] = 0 | |
count[:error] = 0 | |
res = [] | |
CSV.open("result.csv", "wb") do |csv| | |
sent.each{|k,m| | |
if not m[:reply] # Not a reply | |
# p ['not a reply', m] | |
count[:not_reply] += 1 | |
next | |
end | |
if not all[m[:reply]] | |
# p ['Missing replied message.', m[:id], m[:from], m[:subject], m[:reply]] | |
count[:error] += 1 | |
else | |
d1 = all[m[:reply]][:date] | |
d2 = m[:date] | |
if d1 and d2 | |
csv << [d2.strftime('%s').to_i - d1.strftime('%s').to_i,d1.to_s,m[:to],m[:subject]] | |
count[:reply] += 1 | |
else | |
p [d1,d2] | |
puts 'Date is missing.' | |
count[:error] += 1 | |
end | |
end | |
} | |
end | |
p count |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment