Created
September 5, 2012 04:07
-
-
Save nthomson/3630219 to your computer and use it in GitHub Desktop.
Simple email parser in ruby
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/ruby | |
#Returns an email dictionary that includes all of an emails headers as well as each part of the email | |
#You can reference which part of the email you'd like by its content type | |
#Ex: email['parts']['text/html'] will get you the text/html version of the email body | |
#You can reference headers by the header name | |
#Ex: email['headers']['To'] will return the value of the "to" field | |
def headers_and_parts(file_path) | |
headers = {} | |
file = File.new(file_path, "r") | |
#loop until the first body part | |
while(line = file.gets) | |
if line[0,2] == '--'#We found the first body part | |
break | |
elsif #Its a header | |
header_parts = line.split(': ') | |
if header_parts.length == 2 | |
headers[header_parts[0]] = header_parts[1].strip | |
end | |
end | |
end | |
boundary = '' | |
#Get the boundary string from the content-type header | |
headers['Content-Type'].split('; ').each do |value| | |
if value.include? "boundary" | |
boundary = value.split('=')[1].strip | |
end | |
end | |
body_parts = {} | |
#recursively loop over each body part | |
parse_body_part(boundary, file, body_parts) | |
file.close | |
return {'headers'=>headers, 'parts'=>body_parts } | |
end | |
#Takes in part of a file object. | |
#Starting with the first line after "--boundary" | |
def parse_body_part(boundary, file, parts) | |
body_part = {} | |
body_part['content-type'] = file.gets.split(': ')[1].split('; ')[0] | |
body_part['body'] = '' | |
while(line = file.gets) | |
#start of the next body | |
if line.strip == "--#{boundary}" | |
parse_body_part(boundary, file, parts) | |
elsif line.strip == "--#{boundary}--" | |
break | |
else | |
body_part['body'] += line | |
#Add the line to the "body" | |
end | |
end | |
parts[body_part['content-type']] = body_part['body'] | |
end | |
email = headers_and_parts('email.txt') | |
#print all the headers | |
email['headers'].each { |header| puts "#{header[0]}: #{header[1]}" } | |
#print the plaintext version of the email | |
puts email['parts']['text/plain'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment