-
-
Save macdiva/4481943 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright Jehiah Czebotar 2013 | |
# http://jehiah.cz/ | |
import tornado.options | |
import glob | |
import os | |
import sqlite3 | |
import logging | |
import datetime | |
import csv | |
# http://theiphonewiki.com/wiki/IMessage | |
MADRID_OFFSET = 978307200 | |
MADRID_FLAGS_SENT = [36869, 45061] | |
def _utf8(s): | |
if isinstance(s, unicode): | |
s = s.encode('utf-8') | |
assert isinstance(s, str) | |
return s | |
def dict_factory(cursor, row): | |
d = {} | |
for idx,col in enumerate(cursor.description): | |
d[col[0]] = row[idx] | |
return d | |
class DB(object): | |
def __init__(self, *args, **kwargs): | |
self._db = sqlite3.connect(*args, **kwargs) | |
self._db.row_factory = dict_factory | |
def query(self, sql, params=None): | |
try: | |
c = self._db.cursor() | |
c.execute(sql, params or []) | |
res = c.fetchall() | |
self._db.commit() | |
except: | |
if self._db: | |
self._db.rollback() | |
raise | |
c.close() | |
return res | |
def extract_messages(db_file): | |
db = DB(db_file) | |
skipped = 0 | |
found = 0 | |
for row in db.query('select * from message'): | |
ts = row['date'] | |
if row['is_madrid']: | |
ts += MADRID_OFFSET | |
if not row['text']: | |
skipped += 1 | |
continue | |
dt = datetime.datetime.utcfromtimestamp(ts) | |
logging.debug('[%s] %r %r', dt, row.get('text'), row) | |
if row['is_madrid']: | |
sent = row['madrid_flags'] in MADRID_FLAGS_SENT | |
else: | |
sent = row['flags'] in [3, 35] | |
if tornado.options.options.sent_only and not sent: | |
skipped += 1 | |
continue | |
if dt.year != tornado.options.options.year: | |
skipped += 1 | |
continue | |
found +=1 | |
yield dict( | |
sent='1' if sent else '0', | |
service='iMessage' if row['is_madrid'] else 'SMS', | |
subject=_utf8(row['subject'] or ''), | |
text=_utf8(row['text'] or '').replace('\n',r'\n'), | |
ts=ts, | |
address=row['address'] or row['madrid_handle'], | |
) | |
logging.info('found %d skipped %d', found, skipped) | |
def run(): | |
assert not os.path.exists(tornado.options.options.output_file) | |
logging.info('writing out to %s', tornado.options.options.output_file) | |
f = open(tornado.options.options.output_file, 'w') | |
columns = ["ts", "service", "sent", "address", "subject", "text"] | |
writer = csv.DictWriter(f, columns) | |
writer.writerow(dict([[x,x] for x in columns])) | |
pattern = os.path.expanduser(tornado.options.options.input_pattern) | |
for db_file in glob.glob(pattern): | |
logging.info("reading %r. use --input-patern to select only this file", db_file) | |
for row in extract_messages(db_file): | |
if not tornado.options.options.include_message_text: | |
row['text'] = '' | |
writer.writerow(row) | |
f.close() | |
if __name__ == "__main__": | |
tornado.options.define("input_pattern", type=str, default="~/Library/Application Support/MobileSync/Backup/*/3d0d7e5fb2ce288813306e4d4636395e047a3d28") | |
tornado.options.define("output_file", type=str, default="txt_messages.csv") | |
tornado.options.define("year", type=int, default=2012) | |
tornado.options.define("sent_only", type=bool, default=False) | |
tornado.options.define("include_message_text", type=bool, default=True) | |
tornado.options.parse_command_line() | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hey macdiva,
I took Jehiah's script and did some more work on it. I decided to take this on as a project, so I'll be making more changes.
If you're interested in helping out, please take a look at the project page. <3
https://github.com/dechols/iphone_messages_dump
Thanks!