-
-
Save fweez/4477385 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright Jehiah Czebotar 2013 | |
# http://jehiah.cz/ | |
# Modifications by Ryan Forsythe 2013: | |
# * Switch to stdlib's optparse rather than relying on tornado's option parsing | |
# * Add new-style backup database parsing. | |
# NOTE: This is broken for SMS phone numbers. Currently researching a fix. | |
import optparse | |
import glob | |
import os | |
import sqlite3 | |
import logging | |
import datetime | |
import csv | |
# http://theiphonewiki.com/wiki/IMessage | |
MADRID_OFFSET = 978307200 | |
MADRID_FLAGS_SENT = [36869, 45061] | |
DEFAULT_BACKUP_LOCATION = "~/Library/Application Support/MobileSync/Backup/*/3d0d7e5fb2ce288813306e4d4636395e047a3d28" | |
# Command line args will get parsed into this: | |
options = None | |
def _utf8(s): | |
if isinstance(s, unicode): | |
s = s.encode('utf-8') | |
assert isinstance(s, str) | |
return s | |
def dict_factory(cursor, row): | |
d = {} | |
for idx,col in enumerate(cursor.description): | |
d[col[0]] = row[idx] | |
return d | |
class DB(object): | |
def __init__(self, *args, **kwargs): | |
self._db = sqlite3.connect(*args, **kwargs) | |
self._db.row_factory = dict_factory | |
def query(self, sql, params=None): | |
try: | |
c = self._db.cursor() | |
c.execute(sql, params or []) | |
res = c.fetchall() | |
self._db.commit() | |
except: | |
if self._db: | |
self._db.rollback() | |
raise | |
c.close() | |
return res | |
def extract_messages(db_file): | |
db = DB(db_file) | |
skipped = 0 | |
found = 0 | |
for row in db.query('select * from message'): | |
ts = row['date'] | |
is_imessage = False | |
if not row.has_key('is_madrid'): | |
# New-style (?) backups | |
is_imessage = row['service'] == u'iMessage' | |
sent = row['is_sent'] | |
else: | |
is_imessage = row['is_madrid'] | |
if is_imessage: | |
sent = row['madrid_flags'] in MADRID_FLAGS_SENT | |
else: | |
sent = row['flags'] in [3, 35] | |
if row.has_key('is_madrid'): | |
if row['is_madrid']: | |
ts += MADRID_OFFSET | |
else: | |
ts += MADRID_OFFSET | |
if not row['text']: | |
skipped += 1 | |
continue | |
dt = datetime.datetime.utcfromtimestamp(ts) | |
print '[%s] %r %r' %(dt, row.get('text'), row) | |
if options.sent_only and not sent: | |
skipped += 1 | |
continue | |
if dt.year != options.year: | |
skipped += 1 | |
continue | |
found +=1 | |
address = '' | |
if row.has_key('madrid_handle'): | |
address = row.get('address') or row['madrid_handle'] | |
else: | |
address = row.get('address') or row['account'] | |
yield dict( | |
sent='1' if sent else '0', | |
service='iMessage' if is_imessage else 'SMS', | |
subject=_utf8(row['subject'] or ''), | |
text=_utf8(row['text'] or '').replace('\n',r'\n'), | |
ts=ts, | |
address=address, | |
) | |
print('found %d skipped %d', found, skipped) | |
def run(): | |
assert not os.path.exists(options.output_file) | |
print('writing out to %s', options.output_file) | |
f = open(options.output_file, 'w') | |
columns = ["ts", "service", "sent", "address", "subject", "text"] | |
writer = csv.DictWriter(f, columns) | |
writer.writerow(dict([[x,x] for x in columns])) | |
pattern = os.path.expanduser(options.input_pattern) | |
for db_file in glob.glob(pattern): | |
print("reading %r. use --input-patern to select only this file", db_file) | |
for row in extract_messages(db_file): | |
if not options.include_message_text: | |
row['text'] = '' | |
writer.writerow(row) | |
f.close() | |
if __name__ == "__main__": | |
parser = optparse.OptionParser() | |
parser.add_option("-i", "--input_pattern", type=str, default=DEFAULT_BACKUP_LOCATION) | |
parser.add_option("-o", "--output_file", type=str, default="txt_messages.csv") | |
parser.add_option("-y", "--year", type=int, default=2012) | |
parser.add_option("-s", "--sent_only", action="store_true", default=False) | |
parser.add_option("-t", "--include_message_text", action="store_true", default=False) | |
(options, args) = parser.parse_args() | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey fweez,
I took your version of Jehiah's script and did some more work on it. I decided to take this on as a project, so I'll be making more changes.
If you're interested in helping out, please take a look at the project page. <3
https://github.com/dechols/iphone_messages_dump
Thanks!