Last active
August 17, 2018 09:51
-
-
Save kieranjol/9d88672259e72072b17be66c83e86910 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env/python | |
| import subprocess | |
| import sys | |
| import os | |
| import csv | |
| import time | |
| import argparse | |
| import copyit | |
| import ififuncs | |
| def parse_args(args_): | |
| ''' | |
| Parse command line arguments. | |
| ''' | |
| parser = argparse.ArgumentParser( | |
| description='Restore broken files on a server.' | |
| ' Written by Kieran O\'Leary.' | |
| ) | |
| parser.add_argument( | |
| '-damaged_server', | |
| help='Path to the damaged server that will receive the restored files from the backup server', | |
| required=True | |
| ) | |
| parser.add_argument( | |
| '-backup_server', | |
| help='Path to the restored backup server that contains the good versions of the files.', | |
| required=True | |
| ) | |
| parser.add_argument( | |
| '-csv', | |
| help='Path to the audit CSV as generated by exif_audit.py', | |
| required=True | |
| ) | |
| parsed_args = parser.parse_args(args_) | |
| return parsed_args | |
| def create_csv(csv_file, *args): | |
| ''' | |
| Creates a CSV file. Insert a list/tuple of headings as | |
| the *args. | |
| ''' | |
| f = open(csv_file, 'wb') | |
| try: | |
| writer = csv.writer(f) | |
| writer.writerow(*args) | |
| finally: | |
| f.close() | |
| def append_csv(csv_file, *args): | |
| ''' | |
| Appends values to pre-xisting CSV file. Insert a list/tuple of values as | |
| the *args. | |
| ''' | |
| f = open(csv_file, 'ab') | |
| try: | |
| writer = csv.writer(f) | |
| writer.writerow(*args) | |
| finally: | |
| f.close() | |
| def parse_audit(audit_entries, restored_backup): | |
| ''' | |
| This looks through the entries from the csv (audit_entries), | |
| and then it replaces the paths so that python can find both the | |
| corrupted files and the repaired files. | |
| TODO - Anything that does not get processed should still end up in the csv_file as a failure | |
| TO DO- check if isfile, and add error to csv if not. | |
| the dict takes the form of {corruptfile:cleanfile} | |
| ''' | |
| audit_dict = {} | |
| for i in audit_entries[1:]: # let's slice out the first item, which is the CSV heading for filename. | |
| clean = os.path.normpath(i.replace("/Volumes/Scans", "X:\\").replace("\"", "")).rstrip() | |
| corrupt = os.path.normpath(i.replace("/Volumes/Scans", "S:\\").replace("\"", "")).rstrip() | |
| ''' | |
| if os.path.isfile(b): | |
| continue | |
| else: | |
| print b, os.path.isfile(b) | |
| ''' | |
| audit_dict[corrupt] = clean | |
| return audit_dict | |
| def rename_corrupted_file(corrupted_file): | |
| ''' | |
| Renames the corrupted file in such a way that it will be | |
| easy to search for all corrupted files and delete them if we wish. | |
| It also means that we are retaining the corruption temporarily | |
| just in case. | |
| ''' | |
| renamed_path = corrupted_file + '_corrupted20180616' | |
| print(' - Attempting to rename %s with %s' % (corrupted_file, renamed_path)) | |
| os.rename(corrupted_file, renamed_path) | |
| print(' - Renamed %s with %s' % (corrupted_file, renamed_path)) | |
| return renamed_path | |
| def check_corruption(corrupted_file): | |
| ''' | |
| This performs a similar check to exif_audit.py, in that it will | |
| use exiftool in order to determine if the string 'Entire file is binary zeros' | |
| is present in the output. | |
| ''' | |
| verdict = 'unknown' | |
| try: | |
| exif_cmd = ['exiftool', corrupted_file] | |
| exif_output = subprocess.check_output(exif_cmd) | |
| except subprocess.CalledProcessError as e: | |
| lines = e.output.splitlines() | |
| for i in lines: | |
| if 'Entire file is binary zeros' in i: | |
| print(' - Corrupt file : %s ' % corrupted_file) | |
| verdict = 'corrupt' | |
| return verdict | |
| def main(args_): | |
| args = parse_args(args_) | |
| source = args.damaged_server | |
| restored_backup = args.backup_server | |
| audit_filename = args.csv | |
| if not os.path.isdir(source): | |
| print 'Your first argument should be the source directory that you wish to process.' | |
| sys.exit() | |
| if not os.path.isdir(restored_backup): | |
| print 'Your second argument should be the backup directory that files will be read from' | |
| sys.exit() | |
| csv_report_filename = 'server_restoration_report' + time.strftime("_%Y_%m_%dT%H_%M_%S") | |
| # CSV will be saved to your Desktop. | |
| csv_report = os.path.expanduser("~/Desktop/%s.csv") % csv_report_filename | |
| create_csv(csv_report, ['source_filepath', 'restored_filepath', 'renamed_corrupted_filepath', 'corrupted_checksum', 'restored_checksum', 'successful_restore']) | |
| print(' - Analysing %s for corrupt files' % audit_filename) | |
| with open(audit_filename, 'r') as audit: | |
| audit_entries = audit.readlines() | |
| audit_dict = parse_audit(audit_entries, restored_backup) | |
| print(' - Checking for corruption') | |
| for corrupt_file in sorted(audit_dict.keys()): | |
| verdict = check_corruption(corrupt_file) | |
| if verdict == 'corrupt': | |
| removed = 'not_removed' | |
| print('\n - Beginning the restoration process for this corrupt file: %s' % corrupt_file) | |
| clean_file = audit_dict[corrupt_file] | |
| # the restored file may not exist in that particular backup | |
| if os.path.isfile(clean_file): | |
| renamed_path = rename_corrupted_file(corrupt_file) | |
| copyit_cmd = [clean_file, os.path.dirname(corrupt_file)] | |
| print copyit_cmd | |
| copyit.main(copyit_cmd) | |
| print(' - Generating MD5 checksum of corrupted file') | |
| corrupted_checksum = ififuncs.hashlib_md5(renamed_path) | |
| print(' - Generating MD5 checksum of clean file') | |
| restored_checksum = ififuncs.hashlib_md5(clean_file) | |
| print(' - Removing %s' % renamed_path) | |
| try: | |
| os.remove(renamed_path) | |
| except WindowsError: | |
| removed = 'could_not_delete_corrupt_file' | |
| print(' - Could not delete corrupt file') | |
| if not os.path.isfile(renamed_path): | |
| removed = 'removed' | |
| append_csv(csv_report, [corrupt_file, clean_file, renamed_path, corrupted_checksum, restored_checksum, removed]) | |
| else: | |
| print(' - Exiting as the file does not exist in the restored server') | |
| append_csv(csv_report, [corrupt_file, clean_file, 'n/a', 'n/a', 'n/a', 'file_not_present_in_restored_server']) | |
| else: | |
| append_csv(csv_report, [corrupt_file, audit_dict[corrupt_file], 'n/a', 'n/a', 'n/a', 'file_not_processed']) | |
| if __name__ == '__main__': | |
| main(sys.argv[1:]) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment