Last active
July 3, 2024 10:56
-
-
Save brettp/f440bbe4e5c383cefda3 to your computer and use it in GitHub Desktop.
Python implementation for Outlook's thread-index headers for message threading
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import struct | |
import time | |
import sys | |
import base64 | |
import hashlib | |
import datetime | |
def parse_thread_index(index: 'str') -> ("tuple (str, array [datetime.datetime, ...])"): | |
import binascii | |
s = base64.b64decode(index) | |
# real guids are in this format | |
# guid = struct.unpack('>IHHQ', s[6:22]) | |
# guid = '%08X-%04X-%04X-%04X-%12X' % (guid[0], guid[1], guid[2], (guid[3] >> 48) & 0xFFFF, guid[3] & 0xFFFFFFFFFFFF) | |
# ours are just md5 digests | |
guid = binascii.hexlify(s[6:22]) | |
f = struct.unpack('>Q', s[:6] + b'\0\0')[0] | |
ts = [datetime.datetime(1601, 1, 1) + datetime.timedelta(microseconds=f//10)] | |
# pick out the 5 byte suffixes for used a Reply-To and the timeshift | |
for n in range(22, len(s), 5): | |
f = struct.unpack('>I', s[n:n+4])[0] | |
ts.append(ts[-1] + datetime.timedelta(microseconds=(f << 18) // 10)) | |
return guid, ts | |
def make_ms_thread_index(msg_id: 'str', ts: 'datetime.datetime') -> ('str'): | |
""" | |
Create the 22-byte base of the thread-index string in the format: | |
6 bytes = First 6 significant bytes of the FILETIME stamp | |
16 bytes = GUID (we're using a md5 hash of the message id) | |
See http://www.meridiandiscovery.com/how-to/e-mail-conversation-index-metadata-computer-forensics/ | |
""" | |
# Convert to FILETIME epoch (microseconds since 1601) and get first 6 bytes | |
delta = datetime.date(1970, 1, 1) - datetime.date(1601, 1, 1) | |
filetime = int(ts.timestamp() + delta.total_seconds()) * 10000000 | |
thread_bin = struct.pack(">Q", filetime)[:6] | |
# Make a 16 byte guid. This is usually generated by Outlook. | |
# The format is >IHHQ, but we don't care since we're just using a hash of the id | |
md5 = hashlib.md5(msg_id.encode('utf-8')) | |
thread_bin += md5.digest() | |
# base64 encode | |
return base64.b64encode(thread_bin) | |
now = datetime.datetime.today() | |
ti = make_ms_thread_index('<message/[email protected]>', now) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment