Created
November 24, 2014 22:57
-
-
Save tetrillard/759bf2d165b440e4915c to your computer and use it in GitHub Desktop.
SMS Backup & Restore : Extract images and videos from a backup file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf8 -*- | |
# SMSBackupRestore extractor | |
# | |
# smsbackuprestore-extractor.py | |
# 24/11/2014 | |
# | |
# This script will extract all images and videos retrieved | |
# from a xml backup of the Android application "SMS Backup & Restore". | |
# For each contact, it will create a folder inside the output folder | |
# with all received images and videos. | |
# | |
# Make sure the destination folder is empty otherwise it will create duplicates. | |
# | |
# Links : | |
# https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore | |
# | |
# example: python smsbackuprestore-extractor.py sms-20141122183844.xml medias/ | |
from lxml import etree | |
import os | |
import sys | |
if len(sys.argv) < 2: | |
print "usage: %s [sms-backup.xml] [output-folder]" % sys.argv[0] | |
sys.exit(-1) | |
INPUT_FILE = sys.argv[1] | |
OUTPUT_FOLDER = sys.argv[2] | |
if not os.path.isfile(INPUT_FILE): | |
print "File %s not found" % INPUT_FILE | |
print "[*] Parsing : %s" % INPUT_FILE | |
tree = etree.parse(INPUT_FILE) | |
mms_list = tree.xpath(".//mms") | |
total = 0 | |
for mms in mms_list: | |
address = mms.get("address") | |
contact = mms.get("contact_name") | |
if contact == "(Unknown)": | |
folder = address | |
if address == None: | |
folder = "_Unknown" | |
else: | |
folder = contact | |
media_list = mms.xpath(".//part[starts-with(@ct, 'image') or starts-with(@ct, 'video')]") | |
# Create the folders | |
for media in media_list: | |
total = total + 1 | |
output = OUTPUT_FOLDER + "/" + folder | |
if os.path.exists(output) == False: | |
os.makedirs(OUTPUT_FOLDER + "/" + folder) | |
print "[+] New folder created : %s" % output.encode("utf-8") | |
filename = media.get("cl") | |
rawdata = media.get("data").decode("base64") | |
outfile = output + "/" + filename | |
# Duplicates handling | |
i = 1 | |
while os.path.isfile(outfile): | |
dname = filename.split('.') | |
dname.insert(-1, str(i)) | |
outfile = output + "/" + '.'.join(dname) | |
i = i+1 | |
f = open(outfile, 'w') | |
f.write(rawdata) | |
f.close() | |
print "[*] Job done (%d files created)" % total | |
print "[*] Output folder : %s" % OUTPUT_FOLDER |
I'm also struggling with this on python3 / windows 10.
Any luck @nicksears?
Thanks for the code! I've modified it for my use to update "null" file names to the timestamp instead. I also set the file modification time to the MMS date timestamp. The other change I made was to parse messages individually instead of parsing the whole file at once. This allowed me to get through a very large backup.
#!/usr/bin/env python
# -*- coding: utf8 -*-
# SMSBackupRestore extractor
#
# smsbackuprestore-extractor.py
# 24/11/2014
#
# This script will extract all images and videos retrieved
# from a xml backup of the Android application "SMS Backup & Restore".
# For each contact, it will create a folder inside the output folder
# with all received images and videos.
#
# Make sure the destination folder is empty otherwise it will create duplicates.
#
# Links :
# https://play.google.com/store/apps/details?id=com.riteshsahu.SMSBackupRestore
#
# example: python smsbackuprestore-extractor.py sms-20141122183844.xml medias/
#
# 2019-02-09 @stefan-schiffer
# Ported to Python 3
# You might have first to fix malformed XML with entityfixer.py
# https://gist.github.com/Calvin-L/5232f876b8acf48a216941b8904632bb
from lxml import etree
from win32_setctime import setctime
import os
import sys
import base64
import datetime
if len(sys.argv) < 2:
print("usage: %s [sms-backup.xml] [output-folder]" % sys.argv[0])
sys.exit(-1)
INPUT_FILE = sys.argv[1]
OUTPUT_FOLDER = sys.argv[2]
if not os.path.isfile(INPUT_FILE):
print("File %s not found" % INPUT_FILE)
print("[*] Parsing : %s" % INPUT_FILE)
total = 0
for _, mms in etree.iterparse(INPUT_FILE, tag='mms', huge_tree=True):
address = mms.get("address")
contact = mms.get("contact_name")
if contact == "(Unknown)":
folder = address
if address == None:
folder = "_Unknown"
else:
folder = contact
media_list = mms.xpath(".//part[starts-with(@ct, 'image') or starts-with(@ct, 'video')]")
# Create the folders
for media in media_list:
total = total + 1
output = OUTPUT_FOLDER + "/" + folder
if os.path.exists(output) == False:
os.makedirs(OUTPUT_FOLDER + "/" + folder)
print("[+] New folder created: %s" % folder)
timestamp = datetime.datetime.fromtimestamp(float(mms.get("date"))/1000.0)
filename = media.get("cl")
rawdata = base64.b64decode(media.get("data"))
if filename == "null":
name = media.get("ct")
ext = name.split('/')[1]
if ext == "jpeg":
ext = "jpg"
elif name == "image/*":
print("Unknown image type * for MMS content; guessing .jpg " + output + "/" + timestamp.strftime("%Y%m%d_%H%M%S"))
ext = "jpg"
elif name == "video/*":
print("Unknown video type * for MMS content; guessing .3gpp " + output + "/" + timestamp.strftime("%Y%m%d_%H%M%S"))
ext = "3gpp"
filename = timestamp.strftime("%Y%m%d_%H%M%S") + '.' + ext
outfile = output + "/" + filename
# Duplicates handling
i = 1
while os.path.isfile(outfile):
dname = filename.split('.')
dname.insert(-1, str(i))
outfile = output + "/" + '.'.join(dname)
i = i+1
f = open(outfile, 'w')
f.buffer.write(rawdata)
f.close()
filetime = (timestamp-datetime.datetime(1970,1,1)).total_seconds()
setctime(outfile, filetime)
os.utime(outfile, (filetime, filetime))
mms.clear()
print("[*] Job done (%d files created)" % total)
print("[*] Output folder : %s" % OUTPUT_FOLDER)
This was a lifesaver! many thanks for porting to python 3 !
This is great! I just ran the newest version (posted by @bumpaneer) on a 600MB xml file and it worked wonderfully. I was about to start writing something to do the same thing but I wasn't looking forward to it :) Thank you.
Do you mind if I link to this gist from a Stack Exchange question?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Any luck running on a large file? My backup file is 2.1GB. I tried to add:
and then change
to
After cleaning the file as described above, I get the following output: