Skip to content

Instantly share code, notes, and snippets.

@kieranjol
Last active August 17, 2018 09:51
Show Gist options
  • Select an option

  • Save kieranjol/9d88672259e72072b17be66c83e86910 to your computer and use it in GitHub Desktop.

Select an option

Save kieranjol/9d88672259e72072b17be66c83e86910 to your computer and use it in GitHub Desktop.
#!/usr/bin/env/python
import subprocess
import sys
import os
import csv
import time
import argparse
import copyit
import ififuncs
def parse_args(args_):
'''
Parse command line arguments.
'''
parser = argparse.ArgumentParser(
description='Restore broken files on a server.'
' Written by Kieran O\'Leary.'
)
parser.add_argument(
'-damaged_server',
help='Path to the damaged server that will receive the restored files from the backup server',
required=True
)
parser.add_argument(
'-backup_server',
help='Path to the restored backup server that contains the good versions of the files.',
required=True
)
parser.add_argument(
'-csv',
help='Path to the audit CSV as generated by exif_audit.py',
required=True
)
parsed_args = parser.parse_args(args_)
return parsed_args
def create_csv(csv_file, *args):
'''
Creates a CSV file. Insert a list/tuple of headings as
the *args.
'''
f = open(csv_file, 'wb')
try:
writer = csv.writer(f)
writer.writerow(*args)
finally:
f.close()
def append_csv(csv_file, *args):
'''
Appends values to pre-xisting CSV file. Insert a list/tuple of values as
the *args.
'''
f = open(csv_file, 'ab')
try:
writer = csv.writer(f)
writer.writerow(*args)
finally:
f.close()
def parse_audit(audit_entries, restored_backup):
'''
This looks through the entries from the csv (audit_entries),
and then it replaces the paths so that python can find both the
corrupted files and the repaired files.
TODO - Anything that does not get processed should still end up in the csv_file as a failure
TO DO- check if isfile, and add error to csv if not.
the dict takes the form of {corruptfile:cleanfile}
'''
audit_dict = {}
for i in audit_entries[1:]: # let's slice out the first item, which is the CSV heading for filename.
clean = os.path.normpath(i.replace("/Volumes/Scans", "X:\\").replace("\"", "")).rstrip()
corrupt = os.path.normpath(i.replace("/Volumes/Scans", "S:\\").replace("\"", "")).rstrip()
'''
if os.path.isfile(b):
continue
else:
print b, os.path.isfile(b)
'''
audit_dict[corrupt] = clean
return audit_dict
def rename_corrupted_file(corrupted_file):
'''
Renames the corrupted file in such a way that it will be
easy to search for all corrupted files and delete them if we wish.
It also means that we are retaining the corruption temporarily
just in case.
'''
renamed_path = corrupted_file + '_corrupted20180616'
print(' - Attempting to rename %s with %s' % (corrupted_file, renamed_path))
os.rename(corrupted_file, renamed_path)
print(' - Renamed %s with %s' % (corrupted_file, renamed_path))
return renamed_path
def check_corruption(corrupted_file):
'''
This performs a similar check to exif_audit.py, in that it will
use exiftool in order to determine if the string 'Entire file is binary zeros'
is present in the output.
'''
verdict = 'unknown'
try:
exif_cmd = ['exiftool', corrupted_file]
exif_output = subprocess.check_output(exif_cmd)
except subprocess.CalledProcessError as e:
lines = e.output.splitlines()
for i in lines:
if 'Entire file is binary zeros' in i:
print(' - Corrupt file : %s ' % corrupted_file)
verdict = 'corrupt'
return verdict
def main(args_):
args = parse_args(args_)
source = args.damaged_server
restored_backup = args.backup_server
audit_filename = args.csv
if not os.path.isdir(source):
print 'Your first argument should be the source directory that you wish to process.'
sys.exit()
if not os.path.isdir(restored_backup):
print 'Your second argument should be the backup directory that files will be read from'
sys.exit()
csv_report_filename = 'server_restoration_report' + time.strftime("_%Y_%m_%dT%H_%M_%S")
# CSV will be saved to your Desktop.
csv_report = os.path.expanduser("~/Desktop/%s.csv") % csv_report_filename
create_csv(csv_report, ['source_filepath', 'restored_filepath', 'renamed_corrupted_filepath', 'corrupted_checksum', 'restored_checksum', 'successful_restore'])
print(' - Analysing %s for corrupt files' % audit_filename)
with open(audit_filename, 'r') as audit:
audit_entries = audit.readlines()
audit_dict = parse_audit(audit_entries, restored_backup)
print(' - Checking for corruption')
for corrupt_file in sorted(audit_dict.keys()):
verdict = check_corruption(corrupt_file)
if verdict == 'corrupt':
removed = 'not_removed'
print('\n - Beginning the restoration process for this corrupt file: %s' % corrupt_file)
clean_file = audit_dict[corrupt_file]
# the restored file may not exist in that particular backup
if os.path.isfile(clean_file):
renamed_path = rename_corrupted_file(corrupt_file)
copyit_cmd = [clean_file, os.path.dirname(corrupt_file)]
print copyit_cmd
copyit.main(copyit_cmd)
print(' - Generating MD5 checksum of corrupted file')
corrupted_checksum = ififuncs.hashlib_md5(renamed_path)
print(' - Generating MD5 checksum of clean file')
restored_checksum = ififuncs.hashlib_md5(clean_file)
print(' - Removing %s' % renamed_path)
try:
os.remove(renamed_path)
except WindowsError:
removed = 'could_not_delete_corrupt_file'
print(' - Could not delete corrupt file')
if not os.path.isfile(renamed_path):
removed = 'removed'
append_csv(csv_report, [corrupt_file, clean_file, renamed_path, corrupted_checksum, restored_checksum, removed])
else:
print(' - Exiting as the file does not exist in the restored server')
append_csv(csv_report, [corrupt_file, clean_file, 'n/a', 'n/a', 'n/a', 'file_not_present_in_restored_server'])
else:
append_csv(csv_report, [corrupt_file, audit_dict[corrupt_file], 'n/a', 'n/a', 'n/a', 'file_not_processed'])
if __name__ == '__main__':
main(sys.argv[1:])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment