Created
October 31, 2010 17:45
-
-
Save shazow/656895 to your computer and use it in GitHub Desktop.
Didn't know about PhotoRec, so I wrote my own jpeg data recovery script in April 2008.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env python | |
# filenose - Find specific types of files from raw data. | |
from StringIO import StringIO | |
import sys | |
import time | |
from datetime import datetime | |
MAXSIZE = 1024*1024*4 # 4mb | |
CHUNK_SIZE = 1024*1024*10 # 10mb | |
filetypes = {} # (start, end) hex symbols for each file type | |
filetypes['PHOTO.JPEG'] = ('\xff\xd8\xff', '\xff\xd9') | |
# ffe1 is exif | |
# ffe0 is application marker | |
def find_jpeg_photo(fp): | |
START, END = filetypes['PHOTO.JPEG'] | |
data = fp.read(MAXSIZE) | |
if not data: | |
raise TypeError("end of JPEG not found before EOF") | |
if not data.startswith(START): | |
raise TypeError("start of JPEG not found") | |
if data.count(END) < 2: | |
raise TypeError("end of JPEG not found within MAXSIZE") | |
# Find second occurance | |
first = data.find(END) | |
second = data.find(END, first+1) | |
data = data[:second+len(END)] | |
if 'Canon PowerShot SD850 IS' not in data[:first]: | |
raise TypeError("important string not found in data.") | |
# Success | |
return data | |
def search_device(fp): | |
"Read CHUNK_SIZe at a time, looking for key markers. Once found, extract file." | |
# TODO: Make a generic version of this to look for all types | |
LOOK_FOR = filetypes['PHOTO.JPEG'][0] | |
count = 0 | |
iterations = 0 | |
last_progress = fp.tell() | |
last_progress_time = time.time() | |
while 1: | |
iterations += 1 | |
last_pos = fp.tell() | |
if (iterations % 1000)==0: | |
now = time.time() | |
sys.stderr.write("Progress %s: %d (%.2f mb, diff: %.2f mb/sec)\n" % (datetime.now(), last_pos, (last_pos/1024.0/1024.0), ((last_pos-last_progress)/(1024.0*1024.0)/(now-last_progress_time)))) | |
sys.stderr.flush() | |
last_progress = last_pos | |
last_progress_time = now | |
chunk = fp.read(CHUNK_SIZE) | |
if not chunk: | |
print "Done!" | |
break | |
loc = chunk.find(LOOK_FOR) | |
if loc >= 0: | |
seek_to = fp.tell()-len(chunk)+loc | |
# print "Seeking to: %d" % seek_to | |
fp.seek(seek_to) | |
found_at = fp.tell() | |
try: | |
# print "Candidate found at %d, checking..." % fp.tell() | |
data = find_jpeg_photo(fp) | |
except TypeError, e: | |
# print "Candidate eliminated: %r" % e | |
rewind_to = found_at + len(LOOK_FOR) | |
#print "Rewinding to %d and continuing search..." % rewind_to | |
fp.seek(rewind_to) | |
continue | |
save_path = "%0.4d.PHOTO.JPEG" % count | |
print "Candidate successful at %d! Saving to: %s" % (found_at, save_path) | |
sys.stderr.write("Match found at %d\n" % found_at) | |
fp_candidate = open(save_path, 'wb') | |
fp_candidate.write(data) | |
fp_candidate.close() | |
count += 1 | |
if __name__ == "__main__": | |
path = sys.argv[1] | |
print "Searching device: %s" % path | |
fp = open(path, 'rb') | |
if len(sys.argv) > 2: | |
seek_to = int(sys.argv[2]) | |
print "Seeking to: %d" % seek_to | |
fp.seek(seek_to) | |
search_device(fp) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment