fish2000 · October 28, 2012 12:00
diff --git a/iphonebackupdb-by-markus-stenberg.py b/iphonebackupdb-by-markus-stenberg.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # -*- Python -*-
 #
 # http://www.employees.org/~mstenber/iphonebackupdb.py
 #
 # $Id: iphonebackupdb.py,v 1.2 2010/05/28 08:30:38 mstenber Exp $
 #
 # Author: Markus Stenberg <[email protected]>
 #
 #  Copyright (c) 2009 Markus Stenberg
 #       All rights reserved
 #
 # Created:       Tue Mar 31 13:44:03 2009 mstenber
 # Last modified: Fri Oct 14 00:22:09 2011 mstenber
 # Edit time:     209 min
 #
 """

 This is a minimalist module which abstracts the iPhone backup
 directory's contents (in the Library/Applicatuon
 Support/MobileSync/backup) as a filesystem. Only supported operation
 is right now copying a file for read-only use, but in theory some
 other things might be also diable later on (listdir etc).

 XXX - turn this to a FUSE module?

 On the other hand, why bother.. Currently this is like 4th version of
 iTunes backup DB that I'm supporting;

 - pre-8.1 (.mdbackup files, plists with binary content)
 - 8.2+ (.mdinfo files, readable plists with nested plists)
 - 9.2+ (.mbdb, .mbdx index files + files as-is)
 - 10.5+ (.mbdb - .mdbx files disappeared)

 Disclaimer: This module is published for information purposes, and
 it's usefulness for anyone else except me may be highly
 questionable. However, it might serve some useful purpose to other
 people too, so I keep it on my web site.. ;-)

 (I know quite a bit more about the un-decoded fields in the .mbdb, but
 as my application only needs this stuff, I can't be arsed to decode
 them anytime soon.. basic UNIX backup stuff like permissions, uid/gid,
 and so forth.)

 """
 import os, os.path
 import ms.debug, ms.util
 import ms.hexdump
 import ms.cstruct
 import hashlib

 #ms.debug.setModuleLevel('.*', 3)
 (error, log, debug) = ms.debug.getCalls('iphonebackupdb')

 BACKUPPATH=os.path.join(os.environ['HOME'], 'Library',
                        'Application Support',
                        'MobileSync', 'backup')

 # Test data - not really used for anything if system works correctly,
 # but they were useful when debugging the format
 KNOWN = {'documents/rahat.pdb' : 'b07ac15b5c745a287d3ecdc60bb6f6b955c0f229',
         'documents/untitled.pdb': '27fe99e8746b43a9db00c332966d028998bc3a03',
         'Documents/Py%F6r%E4ily.PDB'.lower(): '95ef4154eedac2fcc458cf21ec93c8c3895d9fcb'}

 def getMTime():
    mtime = None
    for iphone in os.listdir(BACKUPPATH):
        ipath = os.path.join(BACKUPPATH, iphone)
        imtime = ms.util.file_mtime(ipath)
        if mtime is None or mtime < imtime:
            mtime = imtime
    return imtime

 def getS(data, ofs, defaultFF=False):
    if defaultFF:
        if data[ofs] == chr(0xFF):
            assert data[ofs+1] == chr(0xFF)
            return ofs+2, ''
    # Assume first digit is zero or some small value.. smirk. Seems to
    # be a short.
    #
    # For the time being, we assume strings < 512 bytes to keep sanity
    # checking valid (initial guess was < 256, which wasn't)
    assert data[ofs] in [chr(0), chr(1)], 'not 0/1: %s' % ord(data[ofs])
    l0 = ord(data[ofs])
    ofs += 1
    l = ord(data[ofs]) + 256 * l0
    ofs += 1
    return ofs+l, data[ofs:ofs+l]

 def getN(data, ofs, count):
    return ofs+count, data[ofs:ofs+count]

 def decodeMBDB(data):
    ofs = 6
    lofs = -1
    filenames = []
    while (ofs+20) < len(data):
        #debug('iter %r', ofs)
        assert ofs != lofs
        #print ms.hexdump.hexdump(data[ofs:ofs+150])
        lofs = ofs
        ofs, vendor = getS(data, ofs)
        ofs, filename = getS(data, ofs)
        #print vendor, filename
        ofs, bonus1 = getS(data, ofs, True)
        ofs, bonus2 = getS(data, ofs, True)
        ofs, bonus3 = getS(data, ofs, True)
        #print ms.hexdump.hexdump(data[ofs:ofs+100])
        ofs, garbage = getN(data, ofs, 39)
        ofs, cnt = getN(data, ofs, 1)
        filenames.append([lofs, vendor, filename, bonus1])
        bonuscount = ord(cnt)
        assert bonuscount <= 6, bonuscount
        bonus = []
        if bonuscount:
            for i in range(bonuscount):
                ofs, xxx = getS(data, ofs)
                ofs, yyy = getS(data, ofs)
                bonus.append((xxx, yyy))
        debug('idx#%d ofs#%d->%d %r %r (%d bonus %s)', len(filenames), lofs, ofs, vendor, filename, bonuscount, bonus)
    return filenames

 def getBackups():
    l = []
    for iphone in os.listdir(BACKUPPATH):
        ipath = os.path.join(BACKUPPATH, iphone)
        l.append((os.stat(ipath).st_mtime, iphone, ipath))
    l.sort()
    l.reverse()
    return l

 def iterBackups(iterator):
    l = getBackups()
    for _, iphone, ipath in l:
        debug('ipath:%r', ipath)
        filename = os.path.join(ipath, 'Manifest.mbdb')
        debug('opening %r', filename)
        data = open(filename).read()
        filenames = decodeMBDB(data)
        log('decoded %d filenames', len(filenames))
        # Create
        # - convenience mapping of file-name => file-ofs from 'filenames'
        # - convenience mapping of domain+file-name => file-ofs
        # - convenience mapping of file-ofs => hash-name from 'shas'
        fileMap = {}
        fFileMap = {}
        for lofs, vendor, filename, bonus1 in filenames:
            lofs -= 6 # 6 = start of mbdb
            lfilename = filename.lower()
            #fFileMap[vendor,filename] = lofs # just replaced by next step
            h = hashlib.sha1()
            h.update(vendor+'-'+filename)
            sha = h.digest().encode('hex')
            fileMap[lfilename] = sha
            k = vendor,lfilename
            fFileMap[k] = sha
        rv = iterator(ipath, fileMap, fFileMap)
        if rv is not None:
            return rv

 def _copy(fromname, toname):
    open(toname, 'w').write(open(fromname).read())


 def getFileToFilename(backuppath, destfilename):
    """ iphone database format 4 reader/decoder - this is 'simplified'
    version which will hopefully eventually work correctly."""
    bpl = backuppath.lower()
    def _iterator(ipath, fileMap, fFileMap):
        # Test how many of the files really exists
        # Hardcoded check
        sha = fileMap.get(bpl, '')
        if sha:
            if KNOWN.has_key(bpl):
                if sha != KNOWN[bpl]:
                    log('!!! WRONG sha: %s <> %s', sha, KNOWN[bpl])
                    sha = KNOWN[bpl]
            path = os.path.join(ipath, sha)
            log('found potential sha candidate %r', path)
            if ms.util.file_exists(path):
                log('and it even existed! yay')
                _copy(path, destfilename)
                return True
            else:
                log('Path %r not found', path)
        else:
            log('No sha found for %r', bpl)
    return iterBackups(_iterator)

 # We care only about most recent backup by default, from most recent
 # device..
 def getDomainToDirectory(domain, directory, onlyMostRecentDevice=True):
    def _iterator(ipath, fileMap, fFileMap):
        dumped, skipped = 0, 0
        for (vendor, filename), sha in fFileMap.items():
            if vendor != domain:
                continue
            fromname = os.path.join(ipath, sha)
            if ms.util.exists(fromname):
                dumped += 1
                dirname = os.path.dirname(filename)
                basename = os.path.basename(filename)
                newdirname = os.path.join(directory, dirname)
                try:
                    os.makedirs(newdirname)
                except OSError:
                    pass
                toname = os.path.join(newdirname, basename)
                _copy(fromname, toname)
            else:
                skipped += 1
        if dumped:
            print 'Copied %d files' % dumped
        if skipped:
            print 'Skipped %d files' % skipped
        if onlyMostRecentDevice:
            return True
    return iterBackups(_iterator)

 def dumpDirectory():
    def _iterator(ipath, fileMap, fFileMap):
        for (vendor, filename), sha in fFileMap.items():
            print ipath, vendor, filename, sha
        #return True # rather dump all devices?
    return iterBackups(_iterator)

 if __name__ == '__main__':
    import sys
    import ms.util
    (opts, args) = ms.util.Getopt(format="d:o:l")
    if opts['d'] and opts['o']:
        apprefix, todir = opts['d'], opts['o']
        getDomainToDirectory(apprefix, todir)
    elif opts['l']:
        dumpDirectory()
    if 0:
        tfilename = '/tmp/test-iphonebackupdb.dat'
        assert getFileToFilename('documents/rahat.pdb', tfilename)
        assert not getFileToFilename('documents/rahat.pdbxxx', tfilename)
        os.unlink(tfilename)
diff --git a/version-one-mbdb-mbdx-files.py b/version-one-mbdb-mbdx-files.py
 #!/usr/bin/env python
 # from http://stackoverflow.com/questions/3085153/how-to-parse-the-manifest-mbdb-file-in-an-ios-4-0-itunes-backup
 import sys

 def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

 def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

 def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
    return mbdb

 def process_mbdx_file(filename):
    mbdx = {} # Map offset of info in the MBDB file => fileID string
    data = open(filename).read()
    if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
    offset = 4
    offset = offset + 2 # value 0x02 0x00, not sure what this is
    filecount, offset = getint(data, offset, 4) # 4-byte count of records 
    while offset < len(data):
        # 26 byte record, made up of ...
        fileID = data[offset:offset+20] # 20 bytes of fileID
        fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
        offset = offset + 20
        mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
        mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
        mode, offset = getint(data, offset, 2) # 2-byte mode field
        mbdx[mbdb_offset] = fileID_string
    return mbdx

 def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

 def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

 verbose = True
 if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
diff --git a/version-three-same-as-version-one-with-cleaner-output.py b/version-three-same-as-version-one-with-cleaner-output.py
 #!/usr/bin/env python
 import sys

 def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

 def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

 def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
    return mbdb

 def process_mbdx_file(filename):
    mbdx = {} # Map offset of info in the MBDB file => fileID string
    data = open(filename).read()
    if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
    offset = 4
    offset = offset + 2 # value 0x02 0x00, not sure what this is
    filecount, offset = getint(data, offset, 4) # 4-byte count of records 
    while offset < len(data):
        # 26 byte record, made up of ...
        fileID = data[offset:offset+20] # 20 bytes of fileID
        fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
        offset = offset + 20
        mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
        mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
        mode, offset = getint(data, offset, 2) # 2-byte mode field
        mbdx[mbdb_offset] = fileID_string
    return mbdx

 def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

 def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

 verbose = True
 if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    mbdx = process_mbdx_file("Manifest.mbdx")
    sizes = {}
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
        if (fileinfo['mode'] & 0xE000) == 0x8000:
        sizes[fileinfo['domain']]= sizes.get(fileinfo['domain'],0) + fileinfo['filelen']
    for domain in sorted(sizes, key=sizes.get):
        print "%-60s %11d (%dMB)" % (domain, sizes[domain], int(sizes[domain]/1024/1024))
diff --git a/version-two-mbdb-only.py b/version-two-mbdb-only.py
 #!/usr/bin/env python
 import sys
 import hashlib

 mbdx = {}

 def getint(data, offset, intsize):
    """Retrieve an integer (big-endian) and new offset from the current offset"""
    value = 0
    while intsize > 0:
        value = (value<<8) + ord(data[offset])
        offset = offset + 1
        intsize = intsize - 1
    return value, offset

 def getstring(data, offset):
    """Retrieve a string and new offset from the current offset into the data"""
    if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
        return '', offset+2 # Blank string
    length, offset = getint(data, offset, 2) # 2-byte length
    value = data[offset:offset+length]
    return value, (offset + length)

 def process_mbdb_file(filename):
    mbdb = {} # Map offset of info in this file => file info
    data = open(filename).read()
    if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
    offset = 4
    offset = offset + 2 # value x05 x00, not sure what this is
    while offset < len(data):
        fileinfo = {}
        fileinfo['start_offset'] = offset
        fileinfo['domain'], offset = getstring(data, offset)
        fileinfo['filename'], offset = getstring(data, offset)
        fileinfo['linktarget'], offset = getstring(data, offset)
        fileinfo['datahash'], offset = getstring(data, offset)
        fileinfo['unknown1'], offset = getstring(data, offset)
        fileinfo['mode'], offset = getint(data, offset, 2)
        fileinfo['unknown2'], offset = getint(data, offset, 4)
        fileinfo['unknown3'], offset = getint(data, offset, 4)
        fileinfo['userid'], offset = getint(data, offset, 4)
        fileinfo['groupid'], offset = getint(data, offset, 4)
        fileinfo['mtime'], offset = getint(data, offset, 4)
        fileinfo['atime'], offset = getint(data, offset, 4)
        fileinfo['ctime'], offset = getint(data, offset, 4)
        fileinfo['filelen'], offset = getint(data, offset, 8)
        fileinfo['flag'], offset = getint(data, offset, 1)
        fileinfo['numprops'], offset = getint(data, offset, 1)
        fileinfo['properties'] = {}
        for ii in range(fileinfo['numprops']):
            propname, offset = getstring(data, offset)
            propval, offset = getstring(data, offset)
            fileinfo['properties'][propname] = propval
        mbdb[fileinfo['start_offset']] = fileinfo
        fullpath = fileinfo['domain'] + '-' + fileinfo['filename']
        id = hashlib.sha1(fullpath)
        mbdx[fileinfo['start_offset']] = id.hexdigest()
    return mbdb

 def modestr(val):
    def mode(val):
        if (val & 0x4): r = 'r'
        else: r = '-'
        if (val & 0x2): w = 'w'
        else: w = '-'
        if (val & 0x1): x = 'x'
        else: x = '-'
        return r+w+x
    return mode(val>>6) + mode((val>>3)) + mode(val)

 def fileinfo_str(f, verbose=False):
    if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
    if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
    elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
    elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
    else: 
        print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
        type = '?' # unknown
    info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 
            (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 
             f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
    if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
    for name, value in f['properties'].items(): # extra properties
        info = info + ' ' + name + '=' + repr(value)
    return info

 verbose = True
 if __name__ == '__main__':
    mbdb = process_mbdb_file("Manifest.mbdb")
    for offset, fileinfo in mbdb.items():
        if offset in mbdx:
            fileinfo['fileID'] = mbdx[offset]
        else:
            fileinfo['fileID'] = "<nofileID>"
            print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
        print fileinfo_str(fileinfo, verbose)
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# -- Python --
	#
	# http://www.employees.org/~mstenber/iphonebackupdb.py
	#
	# $Id: iphonebackupdb.py,v 1.2 2010/05/28 08:30:38 mstenber Exp $
	#
	# Author: Markus Stenberg <[email protected]>
	#
	# Copyright (c) 2009 Markus Stenberg
	# All rights reserved
	#
	# Created: Tue Mar 31 13:44:03 2009 mstenber
	# Last modified: Fri Oct 14 00:22:09 2011 mstenber
	# Edit time: 209 min
	#
	"""

	This is a minimalist module which abstracts the iPhone backup
	directory's contents (in the Library/Applicatuon
	Support/MobileSync/backup) as a filesystem. Only supported operation
	is right now copying a file for read-only use, but in theory some
	other things might be also diable later on (listdir etc).

	XXX - turn this to a FUSE module?

	On the other hand, why bother.. Currently this is like 4th version of
	iTunes backup DB that I'm supporting;

	- pre-8.1 (.mdbackup files, plists with binary content)
	- 8.2+ (.mdinfo files, readable plists with nested plists)
	- 9.2+ (.mbdb, .mbdx index files + files as-is)
	- 10.5+ (.mbdb - .mdbx files disappeared)

	Disclaimer: This module is published for information purposes, and
	it's usefulness for anyone else except me may be highly
	questionable. However, it might serve some useful purpose to other
	people too, so I keep it on my web site.. ;-)

	(I know quite a bit more about the un-decoded fields in the .mbdb, but
	as my application only needs this stuff, I can't be arsed to decode
	them anytime soon.. basic UNIX backup stuff like permissions, uid/gid,
	and so forth.)

	"""
	import os, os.path
	import ms.debug, ms.util
	import ms.hexdump
	import ms.cstruct
	import hashlib

	#ms.debug.setModuleLevel('.*', 3)
	(error, log, debug) = ms.debug.getCalls('iphonebackupdb')

	BACKUPPATH=os.path.join(os.environ['HOME'], 'Library',
	'Application Support',
	'MobileSync', 'backup')

	# Test data - not really used for anything if system works correctly,
	# but they were useful when debugging the format
	KNOWN = {'documents/rahat.pdb' : 'b07ac15b5c745a287d3ecdc60bb6f6b955c0f229',
	'documents/untitled.pdb': '27fe99e8746b43a9db00c332966d028998bc3a03',
	'Documents/Py%F6r%E4ily.PDB'.lower(): '95ef4154eedac2fcc458cf21ec93c8c3895d9fcb'}

	def getMTime():
	mtime = None
	for iphone in os.listdir(BACKUPPATH):
	ipath = os.path.join(BACKUPPATH, iphone)
	imtime = ms.util.file_mtime(ipath)
	if mtime is None or mtime < imtime:
	mtime = imtime
	return imtime

	def getS(data, ofs, defaultFF=False):
	if defaultFF:
	if data[ofs] == chr(0xFF):
	assert data[ofs+1] == chr(0xFF)
	return ofs+2, ''
	# Assume first digit is zero or some small value.. smirk. Seems to
	# be a short.
	#
	# For the time being, we assume strings < 512 bytes to keep sanity
	# checking valid (initial guess was < 256, which wasn't)
	assert data[ofs] in [chr(0), chr(1)], 'not 0/1: %s' % ord(data[ofs])
	l0 = ord(data[ofs])
	ofs += 1
	l = ord(data[ofs]) + 256 * l0
	ofs += 1
	return ofs+l, data[ofs:ofs+l]

	def getN(data, ofs, count):
	return ofs+count, data[ofs:ofs+count]

	def decodeMBDB(data):
	ofs = 6
	lofs = -1
	filenames = []
	while (ofs+20) < len(data):
	#debug('iter %r', ofs)
	assert ofs != lofs
	#print ms.hexdump.hexdump(data[ofs:ofs+150])
	lofs = ofs
	ofs, vendor = getS(data, ofs)
	ofs, filename = getS(data, ofs)
	#print vendor, filename
	ofs, bonus1 = getS(data, ofs, True)
	ofs, bonus2 = getS(data, ofs, True)
	ofs, bonus3 = getS(data, ofs, True)
	#print ms.hexdump.hexdump(data[ofs:ofs+100])
	ofs, garbage = getN(data, ofs, 39)
	ofs, cnt = getN(data, ofs, 1)
	filenames.append([lofs, vendor, filename, bonus1])
	bonuscount = ord(cnt)
	assert bonuscount <= 6, bonuscount
	bonus = []
	if bonuscount:
	for i in range(bonuscount):
	ofs, xxx = getS(data, ofs)
	ofs, yyy = getS(data, ofs)
	bonus.append((xxx, yyy))
	debug('idx#%d ofs#%d->%d %r %r (%d bonus %s)', len(filenames), lofs, ofs, vendor, filename, bonuscount, bonus)
	return filenames

	def getBackups():
	l = []
	for iphone in os.listdir(BACKUPPATH):
	ipath = os.path.join(BACKUPPATH, iphone)
	l.append((os.stat(ipath).st_mtime, iphone, ipath))
	l.sort()
	l.reverse()
	return l

	def iterBackups(iterator):
	l = getBackups()
	for _, iphone, ipath in l:
	debug('ipath:%r', ipath)
	filename = os.path.join(ipath, 'Manifest.mbdb')
	debug('opening %r', filename)
	data = open(filename).read()
	filenames = decodeMBDB(data)
	log('decoded %d filenames', len(filenames))
	# Create
	# - convenience mapping of file-name => file-ofs from 'filenames'
	# - convenience mapping of domain+file-name => file-ofs
	# - convenience mapping of file-ofs => hash-name from 'shas'
	fileMap = {}
	fFileMap = {}
	for lofs, vendor, filename, bonus1 in filenames:
	lofs -= 6 # 6 = start of mbdb
	lfilename = filename.lower()
	#fFileMap[vendor,filename] = lofs # just replaced by next step
	h = hashlib.sha1()
	h.update(vendor+'-'+filename)
	sha = h.digest().encode('hex')
	fileMap[lfilename] = sha
	k = vendor,lfilename
	fFileMap[k] = sha
	rv = iterator(ipath, fileMap, fFileMap)
	if rv is not None:
	return rv

	def _copy(fromname, toname):
	open(toname, 'w').write(open(fromname).read())


	def getFileToFilename(backuppath, destfilename):
	""" iphone database format 4 reader/decoder - this is 'simplified'
	version which will hopefully eventually work correctly."""
	bpl = backuppath.lower()
	def _iterator(ipath, fileMap, fFileMap):
	# Test how many of the files really exists
	# Hardcoded check
	sha = fileMap.get(bpl, '')
	if sha:
	if KNOWN.has_key(bpl):
	if sha != KNOWN[bpl]:
	log('!!! WRONG sha: %s <> %s', sha, KNOWN[bpl])
	sha = KNOWN[bpl]
	path = os.path.join(ipath, sha)
	log('found potential sha candidate %r', path)
	if ms.util.file_exists(path):
	log('and it even existed! yay')
	_copy(path, destfilename)
	return True
	else:
	log('Path %r not found', path)
	else:
	log('No sha found for %r', bpl)
	return iterBackups(_iterator)

	# We care only about most recent backup by default, from most recent
	# device..
	def getDomainToDirectory(domain, directory, onlyMostRecentDevice=True):
	def _iterator(ipath, fileMap, fFileMap):
	dumped, skipped = 0, 0
	for (vendor, filename), sha in fFileMap.items():
	if vendor != domain:
	continue
	fromname = os.path.join(ipath, sha)
	if ms.util.exists(fromname):
	dumped += 1
	dirname = os.path.dirname(filename)
	basename = os.path.basename(filename)
	newdirname = os.path.join(directory, dirname)
	try:
	os.makedirs(newdirname)
	except OSError:
	pass
	toname = os.path.join(newdirname, basename)
	_copy(fromname, toname)
	else:
	skipped += 1
	if dumped:
	print 'Copied %d files' % dumped
	if skipped:
	print 'Skipped %d files' % skipped
	if onlyMostRecentDevice:
	return True
	return iterBackups(_iterator)

	def dumpDirectory():
	def _iterator(ipath, fileMap, fFileMap):
	for (vendor, filename), sha in fFileMap.items():
	print ipath, vendor, filename, sha
	#return True # rather dump all devices?
	return iterBackups(_iterator)

	if __name__ == '__main__':
	import sys
	import ms.util
	(opts, args) = ms.util.Getopt(format="d:o:l")
	if opts['d'] and opts['o']:
	apprefix, todir = opts['d'], opts['o']
	getDomainToDirectory(apprefix, todir)
	elif opts['l']:
	dumpDirectory()
	if 0:
	tfilename = '/tmp/test-iphonebackupdb.dat'
	assert getFileToFilename('documents/rahat.pdb', tfilename)
	assert not getFileToFilename('documents/rahat.pdbxxx', tfilename)
	os.unlink(tfilename)
	#!/usr/bin/env python
	# from http://stackoverflow.com/questions/3085153/how-to-parse-the-manifest-mbdb-file-in-an-ios-4-0-itunes-backup
	import sys

	def getint(data, offset, intsize):
	"""Retrieve an integer (big-endian) and new offset from the current offset"""
	value = 0
	while intsize > 0:
	value = (value<<8) + ord(data[offset])
	offset = offset + 1
	intsize = intsize - 1
	return value, offset

	def getstring(data, offset):
	"""Retrieve a string and new offset from the current offset into the data"""
	if data[offset] == chr(0xFF) and data[offset+1] == chr(0xFF):
	return '', offset+2 # Blank string
	length, offset = getint(data, offset, 2) # 2-byte length
	value = data[offset:offset+length]
	return value, (offset + length)

	def process_mbdb_file(filename):
	mbdb = {} # Map offset of info in this file => file info
	data = open(filename).read()
	if data[0:4] != "mbdb": raise Exception("This does not look like an MBDB file")
	offset = 4
	offset = offset + 2 # value x05 x00, not sure what this is
	while offset < len(data):
	fileinfo = {}
	fileinfo['start_offset'] = offset
	fileinfo['domain'], offset = getstring(data, offset)
	fileinfo['filename'], offset = getstring(data, offset)
	fileinfo['linktarget'], offset = getstring(data, offset)
	fileinfo['datahash'], offset = getstring(data, offset)
	fileinfo['unknown1'], offset = getstring(data, offset)
	fileinfo['mode'], offset = getint(data, offset, 2)
	fileinfo['unknown2'], offset = getint(data, offset, 4)
	fileinfo['unknown3'], offset = getint(data, offset, 4)
	fileinfo['userid'], offset = getint(data, offset, 4)
	fileinfo['groupid'], offset = getint(data, offset, 4)
	fileinfo['mtime'], offset = getint(data, offset, 4)
	fileinfo['atime'], offset = getint(data, offset, 4)
	fileinfo['ctime'], offset = getint(data, offset, 4)
	fileinfo['filelen'], offset = getint(data, offset, 8)
	fileinfo['flag'], offset = getint(data, offset, 1)
	fileinfo['numprops'], offset = getint(data, offset, 1)
	fileinfo['properties'] = {}
	for ii in range(fileinfo['numprops']):
	propname, offset = getstring(data, offset)
	propval, offset = getstring(data, offset)
	fileinfo['properties'][propname] = propval
	mbdb[fileinfo['start_offset']] = fileinfo
	return mbdb

	def process_mbdx_file(filename):
	mbdx = {} # Map offset of info in the MBDB file => fileID string
	data = open(filename).read()
	if data[0:4] != "mbdx": raise Exception("This does not look like an MBDX file")
	offset = 4
	offset = offset + 2 # value 0x02 0x00, not sure what this is
	filecount, offset = getint(data, offset, 4) # 4-byte count of records
	while offset < len(data):
	# 26 byte record, made up of ...
	fileID = data[offset:offset+20] # 20 bytes of fileID
	fileID_string = ''.join(['%02x' % ord(b) for b in fileID])
	offset = offset + 20
	mbdb_offset, offset = getint(data, offset, 4) # 4-byte offset field
	mbdb_offset = mbdb_offset + 6 # Add 6 to get past prolog
	mode, offset = getint(data, offset, 2) # 2-byte mode field
	mbdx[mbdb_offset] = fileID_string
	return mbdx

	def modestr(val):
	def mode(val):
	if (val & 0x4): r = 'r'
	else: r = '-'
	if (val & 0x2): w = 'w'
	else: w = '-'
	if (val & 0x1): x = 'x'
	else: x = '-'
	return r+w+x
	return mode(val>>6) + mode((val>>3)) + mode(val)

	def fileinfo_str(f, verbose=False):
	if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
	if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
	elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
	elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
	else:
	print >> sys.stderr, "Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False))
	type = '?' # unknown
	info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
	(type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
	f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
	if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
	for name, value in f['properties'].items(): # extra properties
	info = info + ' ' + name + '=' + repr(value)
	return info

	verbose = True
	if __name__ == '__main__':
	mbdb = process_mbdb_file("Manifest.mbdb")
	mbdx = process_mbdx_file("Manifest.mbdx")
	for offset, fileinfo in mbdb.items():
	if offset in mbdx:
	fileinfo['fileID'] = mbdx[offset]
	else:
	fileinfo['fileID'] = "<nofileID>"
	print >> sys.stderr, "No fileID found for %s" % fileinfo_str(fileinfo)
	print fileinfo_str(fileinfo, verbose)