-
-
Save Mic92/7171933 to your computer and use it in GitHub Desktop.
Extract display name and email addresses from Maildir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fork of https://gist.github.com/ringe/2593827 | |
# added the following features: | |
# - filter common social networks | |
# - only included email if it was found at least 2 times | |
# - extract display name | |
# - write to tab seperated file | |
require 'mail' # uses the mail gem => gem install mail | |
def sanitize_display_name(name) | |
name.gsub!(/["']/, "") | |
name | |
end | |
class Email | |
attr_accessor :count, :display_name, :address | |
def initialize(email) | |
@address = email.address.downcase | |
if email.display_name | |
name = sanitize_display_name(email.display_name) | |
if name != @address | |
@display_name = name | |
end | |
end | |
@count = 1 | |
end | |
def to_s | |
"#{display_name}\t#{address}" | |
end | |
end | |
def find_all_email_addresses_in(base_folder) | |
# Bash goodness :P | |
a=`find #{base_folder} -type d -name cur`.split("\n") +`find #{base_folder} -type d -name new`.split("\n") | |
emails={} | |
header_matcher = Regexp.new(/(?:From|^To|Cc): (.+)/) | |
a.each do |folder| | |
Dir[folder+"/*"].each do |file| | |
content = File.read(file).encode('UTF-8', 'binary', invalid: :replace, undef: :replace, replace: '') | |
content.scan(header_matcher).each do |line| | |
begin | |
list = Mail::AddressList.new(line.first) | |
list.addresses.each do |email| | |
next if email.domain.nil? or email.domain =~ /(facebook|github|unknown\.email|amazon|paypal|twitter|google\.com)/ | |
address = email.address.downcase | |
if emails[address] | |
obj = emails[address] | |
obj.count += 1 | |
if email.display_name and | |
display_name = sanitize_display_name(email.display_name) | |
if display_name.downcase != address and | |
(obj.display_name.nil? or obj.display_name.size < display_name.size) | |
obj.display_name = display_name | |
end | |
end | |
else | |
emails[address] = Email.new(email) | |
end | |
end | |
rescue Mail::Field::ParseError | |
# ignore invalid emails | |
end | |
end | |
end | |
end | |
emails = emails.values.select{|e| e.count > 1}.sort_by {|e| e.count } | |
file = File.open(ENV['HOME']+"/emails.tsv", 'w') | |
emails.each do |email| | |
file.write(email) | |
file.write("\n") | |
end | |
file.close | |
end | |
t = Thread.new { find_all_email_addresses_in(".") } | |
chars = %w{ | / - \\ } | |
while t.alive? | |
print chars[0] | |
sleep 0.1 | |
print "\b" | |
chars.push chars.shift | |
end | |
t.join | |
puts "Save all emails found to #{ENV['HOME']}/emails.tsv" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment