Skip to content

Instantly share code, notes, and snippets.

@paretech
Created April 21, 2017 13:34
Show Gist options
  • Save paretech/8c0c3ba50868608bdf1d3ed7a3c85da7 to your computer and use it in GitHub Desktop.
Save paretech/8c0c3ba50868608bdf1d3ed7a3c85da7 to your computer and use it in GitHub Desktop.
Sifting files generated by photorec using md5deep and tiny pieces of python.
#!/usr/bin/env python3
import csv
import os.path
import os
import shutil
'''Haystack.py - sifting through photorec recovered data
Concept of using photorec, md5deep, NSRL "known" file dataset and personal
"known" file datasets (generated with md5deep) to isolate or lessen the heap
generated by poor data management practices and recovered by photorec.
This code is terrible, do not use. It was written with one hand perhaps using
other lazy methods.
'''
def read_nsrl_results(known_files, found_files, ignore_files):
with open(known_files, errors='ignore') as f:
linereader = csv.reader(f, delimiter='\n')
for line in linereader:
# import pdb; pdb.set_trace()
if line[0].upper() in found_files:
with open(ignore_files, 'a') as wf:
wf.write("{} {}".format(line[0], found_files[line[0]]))
def md5read(infile):
with open(infile, errors='ignore') as f:
linereader = csv.reader(f, delimiter=' ')
try:
return {line[0].upper(): line[1] for line in linereader}
except IndexError as e:
return {line[0].upper(): None for line in linereader}
def moves_files(move_files, directory_name):
source_dir = '/Volumes/UNTITLED/photorec'
source_files = md5read('thing_o_hash1.txt')
move_files = md5read(move_files)
destination_dir = os.path.join(source_dir, directory_name)
common_file_set = set(source_files.keys()) & set(move_files.keys())
# import pdb; pdb.set_trace()
if common_file_set:
try:
os.mkdir(destination_dir)
except FileExistsError as e:
print(e)
for f in common_file_set:
source_file = os.path.join(source_dir, source_files[f])
try:
shutil.move(source_file, destination_dir)
except shutil.Error as e:
print(e)
except FileNotFoundError as e:
print(e)
if __name__ == "__main__":
found_files = md5read('thing_o_hash.txt')
read_nsrl_results('../known/NSRLFileMD5Only.txt', found_files, 'nsrl.txt')
# find common files
moves_files('dropbox_files.txt', 'dropbox')
moves_files('applications.txt', 'Applications')
moves_files('Library.txt', 'Library')
moves_files('System.txt', 'System')
moves_files('nsrl.txt', 'NSRL')
moves_files('external.txt', 'external')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment