|  | #!/usr/bin/env python | 
        
          |  |  | 
        
          |  | # Take an IBM Sametime HTML log file, determine the date, and create | 
        
          |  | # an RFC-compliant email message from it, for importation into an MUA | 
        
          |  | # | 
        
          |  | # Usage:  sametimetoeml.py inputfile.html | 
        
          |  | #  Where inputfile.html is a Sametime log located in a dated folder | 
        
          |  | #  (See readme for more useful suggestions.) | 
        
          |  | # | 
        
          |  | # Written for Python 2.6 | 
        
          |  |  | 
        
          |  |  | 
        
          |  | import sys | 
        
          |  | import os | 
        
          |  | import dateutil.parser | 
        
          |  | import time | 
        
          |  |  | 
        
          |  | from email.MIMEMultipart import MIMEMultipart | 
        
          |  | from email.MIMEText import MIMEText | 
        
          |  |  | 
        
          |  | import xml.etree.ElementTree | 
        
          |  |  | 
        
          |  | # Program-wide variables | 
        
          |  | global debug | 
        
          |  | debug = False  # Debug toggle | 
        
          |  |  | 
        
          |  |  | 
        
          |  | # Class definition | 
        
          |  | class Chatlog: | 
        
          |  | """Chatlog class, for holding chat log and metadata during conversion.""" | 
        
          |  | def __init__(self, filepath, logtype): | 
        
          |  | # Instance | 
        
          |  | self.filepath = filepath | 
        
          |  | self.logtype = logtype | 
        
          |  |  | 
        
          |  | def setHTML(self): | 
        
          |  | if (self.logtype == "html"): | 
        
          |  | try: | 
        
          |  | infile = open(self.filepath,'r') | 
        
          |  | except: | 
        
          |  | raise  # raise exception if we can't read file | 
        
          |  | self.html = infile.read() | 
        
          |  | # Then parse it using ElementTree | 
        
          |  | self.tree = xml.etree.ElementTree.fromstring(self.html) | 
        
          |  |  | 
        
          |  | def setBuddyName(self): | 
        
          |  | # remote buddy's name should always be the grandparent folder | 
        
          |  | self.buddyname = self.filepath.split(os.sep)[-3] | 
        
          |  |  | 
        
          |  | def setMetatagdata(self): | 
        
          |  | # <meta name="sametime:lastActivityTime" content="20070112-131123 (-0500)"/> | 
        
          |  | # This is for Python 2.6 | 
        
          |  | for element in self.tree.getiterator(tag="meta"): | 
        
          |  | try: | 
        
          |  | if element.attrib['http-equiv'] == 'Content-Type': | 
        
          |  | self.contenttype = element.attrib['content'] | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("HTTP Content-Type is: " + self.contenttype + "\n") | 
        
          |  | except KeyError: | 
        
          |  | pass # ignore KeyError | 
        
          |  |  | 
        
          |  | try: | 
        
          |  | if element.attrib['name'] == "sametime:creationTime": | 
        
          |  | self.datetimestr = element.attrib['content'] | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Creation time is " + self.datetimestr + "\n") | 
        
          |  |  | 
        
          |  | # This works as long as the time has seconds... | 
        
          |  | #self.isotime = self.datetimestr[:4] + '-' + self.datetimestr[4:6] + '-' + self.datetimestr[6:8] + 'T' + self.datetimestr[9:11] + ':' + self.datetimestr[11:13] + ':' + self.datetimestr[13:15] + self.datetimestr[17:20] + ':' + self.datetimestr[20:22] | 
        
          |  | #if debug: | 
        
          |  | #    sys.stdout.write("ISO format datetime is: " + self.isotime + "\n") | 
        
          |  |  | 
        
          |  | self.datetime = dateutil.parser.parse(self.datetimestr, fuzzy=True) | 
        
          |  |  | 
        
          |  | self.isotime = self.datetime.isoformat() | 
        
          |  |  | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Pretty date is: " + self.datetime.strftime("%a, %d %b %Y %H:%M:%S") + "\n") | 
        
          |  | except KeyError: | 
        
          |  | pass # ignore KeyError | 
        
          |  |  | 
        
          |  | try: | 
        
          |  | if element.attrib['name'] == 'sametime:username': | 
        
          |  | self.username = element.attrib['content'] | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Sametime username is " + self.username + "\n") | 
        
          |  | except KeyError: | 
        
          |  | pass # ignore KeyError | 
        
          |  |  | 
        
          |  |  | 
        
          |  |  | 
        
          |  | # Processing work | 
        
          |  | def main(): | 
        
          |  | # First argument is the input file | 
        
          |  | infilename = sys.argv[1] | 
        
          |  |  | 
        
          |  | # Make sure infilename at least ends in .html before processing it | 
        
          |  | if infilename[-4:] != "html": | 
        
          |  | if debug: | 
        
          |  | sys.stderr.write("Input filename does not end with html\n") | 
        
          |  | return(1) #exit with error | 
        
          |  |  | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Input filename:  " + infilename + "\n") | 
        
          |  |  | 
        
          |  | # Then get the path | 
        
          |  | filepath = os.path.abspath(infilename)  #filepath is a string | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Input path is: " + str(filepath) + "\n") | 
        
          |  | sys.stdout.write("Directory path separator char is: " + str(os.sep) + "\n") | 
        
          |  |  | 
        
          |  | # instantiate Chatlog object | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Instantiating chatlog object...\n") | 
        
          |  | chatlog = Chatlog(filepath, "html") | 
        
          |  |  | 
        
          |  | # read file contents into memory | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Reading file contents...\n") | 
        
          |  | chatlog.setHTML() | 
        
          |  |  | 
        
          |  | # get the remote buddy's name from the path | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Determining buddy name from path...\n") | 
        
          |  | chatlog.setBuddyName() | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Buddy name is: " + str(chatlog.buddyname) + "\n") | 
        
          |  |  | 
        
          |  | # Parse the HTML and set other metadata values | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Set meta tag values from HTML...\n") | 
        
          |  | chatlog.setMetatagdata() | 
        
          |  |  | 
        
          |  | # create message object for the output | 
        
          |  | msg_base = MIMEMultipart('mixed') | 
        
          |  |  | 
        
          |  | # set message headers | 
        
          |  | msg_base['Subject'] = "Sametime with " + chatlog.buddyname | 
        
          |  | msg_base['Date'] = chatlog.datetime.strftime("%a, %d %b %Y %H:%M:%S") | 
        
          |  | msg_base['From'] = chatlog.buddyname  # TODO: set this to the chat originator | 
        
          |  | msg_base['To'] = chatlog.username  # TODO: set this to username unless username == originator, in which case buddyname | 
        
          |  | #msg_base['X-Original-Filename'] = infilename | 
        
          |  | msg_base['X-Converted-On'] = time.strftime("%a, %d %b %Y %H:%M:%S") #timezones are hard... | 
        
          |  |  | 
        
          |  | if debug: | 
        
          |  | print "-- Headers after parsing first line are..." | 
        
          |  | for key, value in msg_base.items(): | 
        
          |  | print key + ": " + value | 
        
          |  |  | 
        
          |  | # create message content | 
        
          |  | encoding = chatlog.contenttype.split(';')[1].split('=')[1] # get encoding (probably UTF-8) from HTML content-type header | 
        
          |  | content = MIMEText(chatlog.html, 'html', encoding) | 
        
          |  |  | 
        
          |  | msg_base.attach(content) | 
        
          |  |  | 
        
          |  | # Second arg, if present, is the output file | 
        
          |  | try: | 
        
          |  | outfilename = sys.argv[2] | 
        
          |  | except IndexError: | 
        
          |  | # default output is to cwd with same basename but .eml instead of .html | 
        
          |  | outfilename = os.getcwd() + os.sep + chatlog.buddyname + ' (' + chatlog.datetime.strftime("%Y-%m-%dT%H%M") + ').eml' | 
        
          |  |  | 
        
          |  | if debug: | 
        
          |  | sys.stdout.write("Output file is: " + outfilename + "\n") | 
        
          |  |  | 
        
          |  | fo = open(outfilename, 'w') | 
        
          |  | fo.write( msg_base.as_string() ) | 
        
          |  |  | 
        
          |  | return 0 | 
        
          |  |  | 
        
          |  |  | 
        
          |  |  | 
        
          |  | # If run as standalone, execute main loop | 
        
          |  | if __name__ == "__main__": | 
        
          |  | sys.exit( main() )  # program return value is main()'s return value |