''' This script checks for the existence of a directory called "chats" in the current working directory and looks for XML files to read. Once the parsing is done, it creates an SQLite DB file, and a table for storing the date, from, to, text and style attributes of the messages. ''' import os import sys import csv import sqlite3 import xml.etree.ElementTree as ET sys.stdout.reconfigure(encoding='utf-8') def parse_chat_xml_to_list(xml_file): messages = [] tree = ET.parse(os.path.join(os.path.dirname(__file__), 'chats', xml_file)) for message in tree.getroot().findall('Message'): try: date_time = message.attrib['DateTime'] from_user = message.find('From').find('User').attrib['FriendlyName'] to_user = message.find('To').find('User').attrib['FriendlyName'] msg_text = message.find('Text').text msg_attributes = message.find('Text').attrib msg_style = '' if hasattr(msg_attributes, 'Style'): msg_style = msg_attributes['Style'] messages.append( [date_time, from_user, to_user, msg_text, msg_style]) except Exception as e: raise e return messages def generate_sqlite_db_from_chats(chats): con = sqlite3.connect("output/msn-chats.db") cur = con.cursor() try: cur.execute( "CREATE TABLE chats (id INTEGER PRIMARY KEY AUTOINCREMENT, msg_date, msg_from, msg_to, msg_text, msg_style);") to_db = [(i[0], i[1], i[2], i[3], i[4]) for i in chats] cur.executemany( "INSERT INTO chats (msg_date, msg_from, msg_to, msg_text, msg_style) VALUES (?, ?, ?, ?, ?);", to_db) con.commit() con.close() except Exception as e: con.close() raise e def main(): chats_data = [] for file in os.listdir("chats"): if file.endswith(".xml"): try: chats_data = chats_data + parse_chat_xml_to_list(file) except Exception as e: print(e) sys.exit() generate_sqlite_db_from_chats(chats_data) if __name__ == "__main__": main()