Skip to content

Instantly share code, notes, and snippets.

@shazow
Created October 31, 2010 17:45
Show Gist options
  • Save shazow/656895 to your computer and use it in GitHub Desktop.
Save shazow/656895 to your computer and use it in GitHub Desktop.
Didn't know about PhotoRec, so I wrote my own jpeg data recovery script in April 2008.
#!/bin/env python
# filenose - Find specific types of files from raw data.
from StringIO import StringIO
import sys
import time
from datetime import datetime
MAXSIZE = 1024*1024*4 # 4mb
CHUNK_SIZE = 1024*1024*10 # 10mb
filetypes = {} # (start, end) hex symbols for each file type
filetypes['PHOTO.JPEG'] = ('\xff\xd8\xff', '\xff\xd9')
# ffe1 is exif
# ffe0 is application marker
def find_jpeg_photo(fp):
START, END = filetypes['PHOTO.JPEG']
data = fp.read(MAXSIZE)
if not data:
raise TypeError("end of JPEG not found before EOF")
if not data.startswith(START):
raise TypeError("start of JPEG not found")
if data.count(END) < 2:
raise TypeError("end of JPEG not found within MAXSIZE")
# Find second occurance
first = data.find(END)
second = data.find(END, first+1)
data = data[:second+len(END)]
if 'Canon PowerShot SD850 IS' not in data[:first]:
raise TypeError("important string not found in data.")
# Success
return data
def search_device(fp):
"Read CHUNK_SIZe at a time, looking for key markers. Once found, extract file."
# TODO: Make a generic version of this to look for all types
LOOK_FOR = filetypes['PHOTO.JPEG'][0]
count = 0
iterations = 0
last_progress = fp.tell()
last_progress_time = time.time()
while 1:
iterations += 1
last_pos = fp.tell()
if (iterations % 1000)==0:
now = time.time()
sys.stderr.write("Progress %s: %d (%.2f mb, diff: %.2f mb/sec)\n" % (datetime.now(), last_pos, (last_pos/1024.0/1024.0), ((last_pos-last_progress)/(1024.0*1024.0)/(now-last_progress_time))))
sys.stderr.flush()
last_progress = last_pos
last_progress_time = now
chunk = fp.read(CHUNK_SIZE)
if not chunk:
print "Done!"
break
loc = chunk.find(LOOK_FOR)
if loc >= 0:
seek_to = fp.tell()-len(chunk)+loc
# print "Seeking to: %d" % seek_to
fp.seek(seek_to)
found_at = fp.tell()
try:
# print "Candidate found at %d, checking..." % fp.tell()
data = find_jpeg_photo(fp)
except TypeError, e:
# print "Candidate eliminated: %r" % e
rewind_to = found_at + len(LOOK_FOR)
#print "Rewinding to %d and continuing search..." % rewind_to
fp.seek(rewind_to)
continue
save_path = "%0.4d.PHOTO.JPEG" % count
print "Candidate successful at %d! Saving to: %s" % (found_at, save_path)
sys.stderr.write("Match found at %d\n" % found_at)
fp_candidate = open(save_path, 'wb')
fp_candidate.write(data)
fp_candidate.close()
count += 1
if __name__ == "__main__":
path = sys.argv[1]
print "Searching device: %s" % path
fp = open(path, 'rb')
if len(sys.argv) > 2:
seek_to = int(sys.argv[2])
print "Seeking to: %d" % seek_to
fp.seek(seek_to)
search_device(fp)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment