Skip to content

Instantly share code, notes, and snippets.

@fish2000
Created October 28, 2012 12:00
Show Gist options
  • Save fish2000/3968435 to your computer and use it in GitHub Desktop.
Save fish2000/3968435 to your computer and use it in GitHub Desktop.
Python scripts for pillaging data from iOS backup files
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# -*- Python -*-
#
# http://www.employees.org/~mstenber/iphonebackupdb.py
#
# $Id: iphonebackupdb.py,v 1.2 2010/05/28 08:30:38 mstenber Exp $
#
# Author: Markus Stenberg <[email protected]>
#
# Copyright (c) 2009 Markus Stenberg
# All rights reserved
#
# Created: Tue Mar 31 13:44:03 2009 mstenber
# Last modified: Fri Oct 14 00:22:09 2011 mstenber
# Edit time: 209 min
#
"""
This is a minimalist module which abstracts the iPhone backup
directory's contents (in the Library/Applicatuon
Support/MobileSync/backup) as a filesystem. Only supported operation
is right now copying a file for read-only use, but in theory some
other things might be also diable later on (listdir etc).
XXX - turn this to a FUSE module?
On the other hand, why bother.. Currently this is like 4th version of
iTunes backup DB that I'm supporting;
- pre-8.1 (.mdbackup files, plists with binary content)
- 8.2+ (.mdinfo files, readable plists with nested plists)
- 9.2+ (.mbdb, .mbdx index files + files as-is)
- 10.5+ (.mbdb - .mdbx files disappeared)
Disclaimer: This module is published for information purposes, and
it's usefulness for anyone else except me may be highly
questionable. However, it might serve some useful purpose to other
people too, so I keep it on my web site.. ;-)
(I know quite a bit more about the un-decoded fields in the .mbdb, but
as my application only needs this stuff, I can't be arsed to decode
them anytime soon.. basic UNIX backup stuff like permissions, uid/gid,
and so forth.)
"""
import os, os.path
import ms.debug, ms.util
import ms.hexdump
import ms.cstruct
import hashlib
#ms.debug.setModuleLevel('.*', 3)
(error, log, debug) = ms.debug.getCalls('iphonebackupdb')
BACKUPPATH=os.path.join(os.environ['HOME'], 'Library',
'Application Support',
'MobileSync', 'backup')
# Test data - not really used for anything if system works correctly,
# but they were useful when debugging the format
KNOWN = {'documents/rahat.pdb' : 'b07ac15b5c745a287d3ecdc60bb6f6b955c0f229',
'documents/untitled.pdb': '27fe99e8746b43a9db00c332966d028998bc3a03',
'Documents/Py%F6r%E4ily.PDB'.lower(): '95ef4154eedac2fcc458cf21ec93c8c3895d9fcb'}
def getMTime():
mtime = None
for iphone in os.listdir(BACKUPPATH):
ipath = os.path.join(BACKUPPATH, iphone)
imtime = ms.util.file_mtime(ipath)
if mtime is None or mtime < imtime:
mtime = imtime
return imtime
def getS(data, ofs, defaultFF=False):
if defaultFF:
if data[ofs] == chr(0xFF):
assert data[ofs+1] == chr(0xFF)
return ofs+2, ''
# Assume first digit is zero or some small value.. smirk. Seems to
# be a short.
#
# For the time being, we assume strings < 512 bytes to keep sanity
# checking valid (initial guess was < 256, which wasn't)
assert data[ofs] in [chr(0), chr(1)], 'not 0/1: %s' % ord(data[ofs])
l0 = ord(data[ofs])
ofs += 1
l = ord(data[ofs]) + 256 * l0
ofs += 1
return ofs+l, data[ofs:ofs+l]
def getN(data, ofs, count):
return ofs+count, data[ofs:ofs+count]
def decodeMBDB(data):
ofs = 6
lofs = -1
filenames = []
while (ofs+20) < len(data):
#debug('iter %r', ofs)
assert ofs != lofs
#print ms.hexdump.hexdump(data[ofs:ofs+150])
lofs = ofs
ofs, vendor = getS(data, ofs)
ofs, filename = getS(data, ofs)
#print vendor, filename
ofs, bonus1 = getS(data, ofs, True)
ofs, bonus2 = getS(data, ofs, True)
ofs, bonus3 = getS(data, ofs, True)
#print ms.hexdump.hexdump(data[ofs:ofs+100])
ofs, garbage = getN(data, ofs, 39)
ofs, cnt = getN(data, ofs, 1)
filenames.append([lofs, vendor, filename, bonus1])
bonuscount = ord(cnt)
assert bonuscount <= 6, bonuscount
bonus = []
if bonuscount:
for i in range(bonuscount):
ofs, xxx = getS(data, ofs)
ofs, yyy = getS(data, ofs)
bonus.append((xxx, yyy))
debug('idx#%d ofs#%d->%d %r %r (%d bonus %s)', len(filenames), lofs, ofs, vendor, filename, bonuscount, bonus)
return filenames
def getBackups():
l = []
for iphone in os.listdir(BACKUPPATH):
ipath = os.path.join(BACKUPPATH, iphone)
l.append((os.stat(ipath).st_mtime, iphone, ipath))
l.sort()
l.reverse()
return l
def iterBackups(iterator):
l = getBackups()
for _, iphone, ipath in l:
debug('ipath:%r', ipath)
filename = os.path.join(ipath, 'Manifest.mbdb')
debug('opening %r', filename)
data = open(filename).read()
filenames = decodeMBDB(data)
log('decoded %d filenames', len(filenames))
# Create
# - convenience mapping of file-name => file-ofs from 'filenames'
# - convenience mapping of domain+file-name => file-ofs
# - convenience mapping of file-ofs => hash-name from 'shas'
fileMap = {}
fFileMap = {}
for lofs, vendor, filename, bonus1 in filenames:
lofs -= 6 # 6 = start of mbdb
lfilename = filename.lower()
#fFileMap[vendor,filename] = lofs # just replaced by next step
h = hashlib.sha1()
h.update(vendor+'-'+filename)
sha = h.digest().encode('hex')
fileMap[lfilename] = sha
k = vendor,lfilename
fFileMap[k] = sha
rv = iterator(ipath, fileMap, fFileMap)
if rv is not None:
return rv
def _copy(fromname, toname):
open(toname, 'w').write(open(fromname).read())
def getFileToFilename(backuppath, destfilename):
""" iphone database format 4 reader/decoder - this is 'simplified'
version which will hopefully eventually work correctly."""
bpl = backuppath.lower()
def _iterator(ipath, fileMap, fFileMap):
# Test how many of the files really exists
# Hardcoded check
sha = fileMap.get(bpl, '')
if sha:
if KNOWN.has_key(bpl):
if sha != KNOWN[bpl]:
log('!!! WRONG sha: %s <> %s', sha, KNOWN[bpl])
sha = KNOWN[bpl]
path = os.path.join(ipath, sha)
log('found potential sha candidate %r', path)
if ms.util.file_exists(path):
log('and it even existed! yay')
_copy(path, destfilename)
return True
else:
log('Path %r not found', path)
else:
log('No sha found for %r', bpl)
return iterBackups(_iterator)
# We care only about most recent backup by default, from most recent
# device..
def getDomainToDirectory(domain, directory, onlyMostRecentDevice=True):
def _iterator(ipath, fileMap, fFileMap):
dumped, skipped = 0, 0
for (vendor, filename), sha in fFileMap.items():
if vendor != domain:
continue
fromname = os.path.join(ipath, sha)
if ms.util.exists(fromname):
dumped += 1
dirname = os.path.dirname(filename)
basename = os.path.basename(filename)
newdirname = os.path.join(directory, dirname)
try:
os.makedirs(newdirname)
except OSError:
pass
toname = os.path.join(newdirname, basename)
_copy(fromname, toname)
else:
skipped += 1
if dumped:
print 'Copied %d files' % dumped
if skipped:
print 'Skipped %d files' % skipped
if onlyMostRecentDevice:
return True
return iterBackups(_iterator)
def dumpDirectory():
def _iterator(ipath, fileMap, fFileMap):
for (vendor, filename), sha in fFileMap.items():
print ipath, vendor, filename, sha
#return True # rather dump all devices?
return iterBackups(_iterator)
if __name__ == '__main__':
import sys
import ms.util
(opts, args) = ms.util.Getopt(format="d:o:l")
if opts['d'] and opts['o']:
apprefix, todir = opts['d'], opts['o']
getDomainToDirectory(apprefix, todir)
elif opts['l']:
dumpDirectory()
if 0:
tfilename = '/tmp/test-iphonebackupdb.dat'
assert getFileToFilename('documents/rahat.pdb', tfilename)
assert not getFileToFilename('documents/rahat.pdbxxx', tfilename)
os.unlink(tfilename)
#!/usr/bin/env python
# from http://stackoverflow.com/questions/3085153/how-to-parse-the-manifest-mbdb-file-in-an-ios-4-0-itunes-backup
import sys
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
return mbdb
def process_mbdx_file(filename):
mbdx = {} # Map offset of info in the MBDB file => fileID string
data = open(filename).read()
if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
offset = 4
offset = offset + 2 # value 0x02 0x00, not sure what this is
filecount, offset = getint(data, offset, 4) # 4-byte count of records
while offset < len(data):
# 26 byte record, made up of ...
fileID = data[offset:offset+20] # 20 bytes of fileID
fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
offset = offset + 20
mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
mode, offset = getint(data, offset, 2) # 2-byte mode field
mbdx[mbdb_offset] = fileID_string
return mbdx
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
mbdx = process_mbdx_file("Manifest.mbdx")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
#!/usr/bin/env python
import sys
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
return mbdb
def process_mbdx_file(filename):
mbdx = {} # Map offset of info in the MBDB file => fileID string
data = open(filename).read()
if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
offset = 4
offset = offset + 2 # value 0x02 0x00, not sure what this is
filecount, offset = getint(data, offset, 4) # 4-byte count of records
while offset < len(data):
# 26 byte record, made up of ...
fileID = data[offset:offset+20] # 20 bytes of fileID
fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
offset = offset + 20
mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
mode, offset = getint(data, offset, 2) # 2-byte mode field
mbdx[mbdb_offset] = fileID_string
return mbdx
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
mbdx = process_mbdx_file("Manifest.mbdx")
sizes = {}
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
if (fileinfo['mode'] & 0xE000) == 0x8000:
sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
for domain in sorted(sizes, key=sizes.get):
print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
#!/usr/bin/env python
import sys
import hashlib
mbdx = {}
def getint(data, offset, intsize):
"""Retrieve an integer (big-endian) and new offset from the current offset"""
value = 0
while intsize > 0:
value = (value<<8) + ord(data[offset])
offset = offset + 1
intsize = intsize - 1
return value, offset
def getstring(data, offset):
"""Retrieve a string and new offset from the current offset into the data"""
if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
return '', offset+2 # Blank string
length, offset = getint(data, offset, 2) # 2-byte length
value = data[offset:offset+length]
return value, (offset + length)
def process_mbdb_file(filename):
mbdb = {} # Map offset of info in this file => file info
data = open(filename).read()
if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
offset = 4
offset = offset + 2 # value x05 x00, not sure what this is
while offset < len(data):
fileinfo = {}
fileinfo['start_offset'] = offset
fileinfo['domain'], offset = getstring(data, offset)
fileinfo['filename'], offset = getstring(data, offset)
fileinfo['linktarget'], offset = getstring(data, offset)
fileinfo['datahash'], offset = getstring(data, offset)
fileinfo['unknown1'], offset = getstring(data, offset)
fileinfo['mode'], offset = getint(data, offset, 2)
fileinfo['unknown2'], offset = getint(data, offset, 4)
fileinfo['unknown3'], offset = getint(data, offset, 4)
fileinfo['userid'], offset = getint(data, offset, 4)
fileinfo['groupid'], offset = getint(data, offset, 4)
fileinfo['mtime'], offset = getint(data, offset, 4)
fileinfo['atime'], offset = getint(data, offset, 4)
fileinfo['ctime'], offset = getint(data, offset, 4)
fileinfo['filelen'], offset = getint(data, offset, 8)
fileinfo['flag'], offset = getint(data, offset, 1)
fileinfo['numprops'], offset = getint(data, offset, 1)
fileinfo['properties'] = {}
for ii in range(fileinfo['numprops']):
propname, offset = getstring(data, offset)
propval, offset = getstring(data, offset)
fileinfo['properties'][propname] = propval
mbdb[fileinfo['start_offset']] = fileinfo
fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
id = hashlib.sha1(fullpath)
mbdx[fileinfo['start_offset']] = id.hexdigest()
return mbdb
def modestr(val):
def mode(val):
if (val & 0x4): r = 'r'
else: r = '-'
if (val & 0x2): w = 'w'
else: w = '-'
if (val & 0x1): x = 'x'
else: x = '-'
return r+w+x
return mode(val>>6) + mode((val>>3)) + mode(val)
def fileinfo_str(f, verbose=False):
if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
else:
print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
type = '?' # unknown
info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
for name, value in f['properties'].items(): # extra properties
info = info + ' ' + name + '=' + repr(value)
return info
verbose = True
if __name__ == '__main__':
mbdb = process_mbdb_file("Manifest.mbdb")
for offset, fileinfo in mbdb.items():
if offset in mbdx:
fileinfo['fileID'] = mbdx[offset]
else:
fileinfo['fileID'] = "<nofileID>"
print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
print fileinfo_str(fileinfo, verbose)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment