Created
April 21, 2017 13:34
-
-
Save paretech/8c0c3ba50868608bdf1d3ed7a3c85da7 to your computer and use it in GitHub Desktop.
Sifting files generated by photorec using md5deep and tiny pieces of python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import csv | |
import os.path | |
import os | |
import shutil | |
'''Haystack.py - sifting through photorec recovered data | |
Concept of using photorec, md5deep, NSRL "known" file dataset and personal | |
"known" file datasets (generated with md5deep) to isolate or lessen the heap | |
generated by poor data management practices and recovered by photorec. | |
This code is terrible, do not use. It was written with one hand perhaps using | |
other lazy methods. | |
''' | |
def read_nsrl_results(known_files, found_files, ignore_files): | |
with open(known_files, errors='ignore') as f: | |
linereader = csv.reader(f, delimiter='\n') | |
for line in linereader: | |
# import pdb; pdb.set_trace() | |
if line[0].upper() in found_files: | |
with open(ignore_files, 'a') as wf: | |
wf.write("{} {}".format(line[0], found_files[line[0]])) | |
def md5read(infile): | |
with open(infile, errors='ignore') as f: | |
linereader = csv.reader(f, delimiter=' ') | |
try: | |
return {line[0].upper(): line[1] for line in linereader} | |
except IndexError as e: | |
return {line[0].upper(): None for line in linereader} | |
def moves_files(move_files, directory_name): | |
source_dir = '/Volumes/UNTITLED/photorec' | |
source_files = md5read('thing_o_hash1.txt') | |
move_files = md5read(move_files) | |
destination_dir = os.path.join(source_dir, directory_name) | |
common_file_set = set(source_files.keys()) & set(move_files.keys()) | |
# import pdb; pdb.set_trace() | |
if common_file_set: | |
try: | |
os.mkdir(destination_dir) | |
except FileExistsError as e: | |
print(e) | |
for f in common_file_set: | |
source_file = os.path.join(source_dir, source_files[f]) | |
try: | |
shutil.move(source_file, destination_dir) | |
except shutil.Error as e: | |
print(e) | |
except FileNotFoundError as e: | |
print(e) | |
if __name__ == "__main__": | |
found_files = md5read('thing_o_hash.txt') | |
read_nsrl_results('../known/NSRLFileMD5Only.txt', found_files, 'nsrl.txt') | |
# find common files | |
moves_files('dropbox_files.txt', 'dropbox') | |
moves_files('applications.txt', 'Applications') | |
moves_files('Library.txt', 'Library') | |
moves_files('System.txt', 'System') | |
moves_files('nsrl.txt', 'NSRL') | |
moves_files('external.txt', 'external') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment