Created
February 1, 2015 11:47
-
-
Save rubys/6f6bdb8fae57017e2fc8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'mail' | |
require 'zlib' | |
require 'zip' | |
require 'yaml' | |
require 'stringio' | |
def file_list(contents, command) | |
file = Tempfile.new('parsemail') | |
file.write(contents) | |
file.close | |
output = `#{command} #{file.path}`.scan(/\s+(\d+)\s*(?:[-:\s]\d+){5}\s+(.*)/) | |
output.map! {|length, name| {length: length.to_i, name: name}} | |
output.select! {|file| file[:length]>0 or not file[:name].end_with? '/'} | |
output.select! {|file| file[:name] !~ /(^|\/)__MACOSX\//} | |
output.select! {|file| file[:name] !~ /(^|\/).DS_Store$/} | |
file.unlink | |
output | |
end | |
def zip_list(contents) | |
file_list(contents, 'unzip -lqq') | |
end | |
def tgz_list(contents) | |
file_list(contents, 'tar tzvf') | |
end | |
def safe_paths? paths | |
not paths.any? do |path| | |
name = path[:name] | |
name.empty? or # no path found | |
name =~ /[^[:print:]]/ or # non printable characters | |
name =~ /^[\\\/]/ or # leading slash or backslash | |
name =~ /(^|\\|\/)\.\./ # .. in a path | |
end | |
end | |
def page_count(contents) | |
file = Tempfile.new('parsemail') | |
file.write(contents) | |
file.close | |
count = `pdftk #{file.path} dump_data 2>&1`[/NumberOfPages: (\d+)/, 1].to_i | |
file.unlink | |
count | |
end | |
path = '/home/apmail/private-arch/officers-secretary' | |
database = File.basename(path) | |
Dir.chdir File.dirname(File.expand_path(__FILE__)) | |
if ARGV.include? '--update' | |
system "rsync -av --delete --exclude='*.yml' minotaur.apache.org:#{path} ." | |
end | |
width = 0 | |
Dir[File.join(database, '2*')].sort.each do |name| | |
next if name.end_with? '.yml' | |
print "#{name.ljust(width)}\r" | |
width = name.length | |
yaml = File.join(database, File.basename(name)[/\d+/] + '.yml') | |
mbox = YAML.load_file(yaml) rescue {} | |
next if mbox[:mtime] == File.mtime(name) | |
mbox[:mtime] = File.mtime(name) | |
mails = File.read(name) | |
if name.end_with? '.gz' | |
stream = StringIO.new(mails) | |
reader = Zlib::GzipReader.new(stream) | |
mails = reader.read | |
reader.close | |
stream.close rescue nil | |
end | |
mails.force_encoding Encoding::ASCII_8BIT | |
mails = mails.split(/^From .*/) | |
mails.shift | |
mails.each do |mail| | |
id = mail[/^Message-ID: <(.*?)>\s*$/i, 1] | |
next if id and mbox[id] | |
mail = Mail.read_from_string(mail) | |
id ||= mail.message_id | |
next if mbox[id] | |
begin | |
from = Mail::Address.new(mail[:from].value).display_name | |
rescue Exception | |
from = mail[:from].value | |
end | |
mbox[id] = { | |
from: mail.from_addrs.first, | |
name: from, | |
date: mail.date, | |
subject: (mail.subject rescue mail.header['subject'].value.inspect) | |
} | |
if mail.attachments.length > 0 | |
cc = [] | |
cc = mail[:to].value.split(/,\s*/) if mail[:to] | |
cc += mail[:cc].value.split(/,\s*/) if mail[:cc] | |
cc.reject! do |email| | |
begin | |
address = Mail::Address.new(email).address | |
return true if address == '[email protected]' | |
return true if mail.from_addrs.include? address | |
rescue Exception | |
true | |
end | |
end | |
parts = mail.attachments.map do |attach| | |
result = {name: attach.filename, length: attach.body.to_s.length, | |
mime: attach.mime_type} | |
if attach.filename =~ /\.zip$/ | |
result[:parts] = zip_list(attach.body.to_s) | |
elsif attach.filename =~ /\.t(ar\.)?gz$/ | |
result[:parts] = tgz_list(attach.body.to_s) | |
elsif attach.filename =~ /\.pdf$/ | |
result[:pages] = page_count(attach.body.to_s) | |
end | |
result | |
end | |
mbox[id].merge! cc: cc, parts: parts.to_a | |
end | |
end | |
File.open(yaml, 'w') { |file| YAML.dump(mbox, file) } | |
end | |
puts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment