Last active
September 19, 2022 05:08
-
-
Save guychouk/8663b49b9b9f7d71e70b5021d5ac1376 to your computer and use it in GitHub Desktop.
Insert MSN messenger chats XMLs to a SQLite DB.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
This script checks for the existence of a directory called "chats" | |
in the current working directory and looks for XML files to read. | |
Once the parsing is done, it creates an SQLite DB file, and a table | |
for storing the date, from, to, text and style attributes of the messages. | |
''' | |
import os | |
import sys | |
import csv | |
import sqlite3 | |
import xml.etree.ElementTree as ET | |
sys.stdout.reconfigure(encoding='utf-8') | |
def parse_chat_xml_to_list(xml_file): | |
messages = [] | |
tree = ET.parse(os.path.join(os.path.dirname(__file__), 'chats', xml_file)) | |
for message in tree.getroot().findall('Message'): | |
try: | |
date_time = message.attrib['DateTime'] | |
from_user = message.find('From').find('User').attrib['FriendlyName'] | |
to_user = message.find('To').find('User').attrib['FriendlyName'] | |
msg_text = message.find('Text').text | |
msg_attributes = message.find('Text').attrib | |
msg_style = '' | |
if hasattr(msg_attributes, 'Style'): | |
msg_style = msg_attributes['Style'] | |
messages.append( | |
[date_time, from_user, to_user, msg_text, msg_style]) | |
except Exception as e: | |
raise e | |
return messages | |
def generate_sqlite_db_from_chats(chats): | |
con = sqlite3.connect("output/msn-chats.db") | |
cur = con.cursor() | |
try: | |
cur.execute( | |
"CREATE TABLE chats (id INTEGER PRIMARY KEY AUTOINCREMENT, msg_date, msg_from, msg_to, msg_text, msg_style);") | |
to_db = [(i[0], i[1], i[2], i[3], i[4]) for i in chats] | |
cur.executemany( | |
"INSERT INTO chats (msg_date, msg_from, msg_to, msg_text, msg_style) VALUES (?, ?, ?, ?, ?);", to_db) | |
con.commit() | |
con.close() | |
except Exception as e: | |
con.close() | |
raise e | |
def main(): | |
chats_data = [] | |
for file in os.listdir("chats"): | |
if file.endswith(".xml"): | |
try: | |
chats_data = chats_data + parse_chat_xml_to_list(file) | |
except Exception as e: | |
print(e) | |
sys.exit() | |
generate_sqlite_db_from_chats(chats_data) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment