kieranjol · August 17, 2018 09:51
diff --git a/server_restore.py b/server_restore.py
 #!/usr/bin/env/python

 import subprocess
 import sys
 import os
 import csv
 import time
 import argparse
 import copyit
 import ififuncs

 def parse_args(args_):
    '''
    Parse command line arguments.
    '''
    parser = argparse.ArgumentParser(
        description='Restore broken files on a server.'
        ' Written by Kieran O\'Leary.'
    )
    parser.add_argument(
        '-damaged_server',
        help='Path to the damaged server that will receive the restored files from the backup server',
        required=True
    )
    parser.add_argument(
        '-backup_server',
        help='Path to the restored backup server that contains the good versions of the files.',
        required=True
    )
    parser.add_argument(
        '-csv',
        help='Path to the audit CSV as generated by exif_audit.py',
        required=True
    )
    parsed_args = parser.parse_args(args_)
    return parsed_args

 def create_csv(csv_file, *args):
    '''
    Creates a CSV file. Insert a list/tuple of headings as 
    the *args.
    '''
    f = open(csv_file, 'wb')
    try:
        writer = csv.writer(f)
        writer.writerow(*args)
    finally:
        f.close()


 def append_csv(csv_file, *args):
    '''
    Appends values to pre-xisting CSV file. Insert a list/tuple of values as 
    the *args.
    '''
    f = open(csv_file, 'ab')
    try:
        writer = csv.writer(f)
        writer.writerow(*args)
    finally:
        f.close()

 def parse_audit(audit_entries, restored_backup):
    '''
    This looks through the entries from the csv (audit_entries),
    and then it replaces the paths so that python can find both the
    corrupted files and the repaired files.
    TODO - Anything that does not get processed should still end up in the csv_file as a failure
    TO DO- check if isfile, and add error to csv if not.
    the dict takes the form of {corruptfile:cleanfile}
    '''
    audit_dict = {}
    for i in audit_entries[1:]: # let's slice out the first item, which is the CSV heading for filename.
        clean = os.path.normpath(i.replace("/Volumes/Scans", "X:\\").replace("\"", "")).rstrip()
        corrupt = os.path.normpath(i.replace("/Volumes/Scans", "S:\\").replace("\"", "")).rstrip()
        '''
        if os.path.isfile(b):
            continue
        else:
            print b, os.path.isfile(b)
        '''
        audit_dict[corrupt] = clean
    return audit_dict
   
 def rename_corrupted_file(corrupted_file):
   '''
   Renames the corrupted file in such a way that it will be
   easy to search for all corrupted files and delete them if we wish.
   It also means that we are retaining the corruption temporarily
   just in case.
   '''
   renamed_path = corrupted_file + '_corrupted20180616'
   print(' - Attempting to rename %s with %s' % (corrupted_file, renamed_path))
   os.rename(corrupted_file, renamed_path)
   print(' - Renamed %s with %s' % (corrupted_file, renamed_path))
   return renamed_path

 def check_corruption(corrupted_file):
    '''
    This performs a similar check to exif_audit.py,  in that it will
    use exiftool in order to determine if the string 'Entire file is binary zeros'
    is present in the output.
    '''
    verdict = 'unknown'
    try:
        exif_cmd = ['exiftool', corrupted_file]
        exif_output = subprocess.check_output(exif_cmd)
    except subprocess.CalledProcessError as e:
        lines =  e.output.splitlines()
        for i in lines:
            if 'Entire file is binary zeros' in i:
                print(' - Corrupt file : %s ' % corrupted_file)
                verdict = 'corrupt'
    return verdict

 def main(args_):
    args = parse_args(args_)
    source = args.damaged_server
    restored_backup = args.backup_server
    audit_filename = args.csv
    if not os.path.isdir(source):
        print 'Your first argument should be the source directory that you wish to process.'
        sys.exit()
    if not os.path.isdir(restored_backup):
        print 'Your second argument should be the backup directory that files will be read from'
        sys.exit()
    csv_report_filename = 'server_restoration_report' + time.strftime("_%Y_%m_%dT%H_%M_%S")
    # CSV will be saved to your Desktop.
    csv_report = os.path.expanduser("~/Desktop/%s.csv") % csv_report_filename
    create_csv(csv_report, ['source_filepath', 'restored_filepath', 'renamed_corrupted_filepath', 'corrupted_checksum', 'restored_checksum', 'successful_restore'])
    print(' - Analysing %s for corrupt files' % audit_filename)
    with open(audit_filename, 'r') as audit:
        audit_entries = audit.readlines()
    audit_dict = parse_audit(audit_entries, restored_backup)
    print(' - Checking for corruption')
    for corrupt_file in sorted(audit_dict.keys()):
        verdict = check_corruption(corrupt_file)
        if verdict == 'corrupt':
            removed = 'not_removed'
            print('\n - Beginning the restoration process for this corrupt file: %s' % corrupt_file)
            clean_file = audit_dict[corrupt_file]
            # the restored file may not exist in that particular backup
            if os.path.isfile(clean_file):
                renamed_path = rename_corrupted_file(corrupt_file)
                copyit_cmd = [clean_file, os.path.dirname(corrupt_file)]
                print copyit_cmd
                copyit.main(copyit_cmd)
                print(' - Generating MD5 checksum of corrupted file')
                corrupted_checksum = ififuncs.hashlib_md5(renamed_path)
                print(' - Generating MD5 checksum of clean file')
                restored_checksum = ififuncs.hashlib_md5(clean_file)
                print(' - Removing %s' % renamed_path)
                try:
                    os.remove(renamed_path)
                except WindowsError:
                    removed = 'could_not_delete_corrupt_file'
                    print(' - Could not delete corrupt file')
                if not os.path.isfile(renamed_path):
                    removed = 'removed'
                append_csv(csv_report, [corrupt_file, clean_file, renamed_path, corrupted_checksum, restored_checksum, removed])
            else:
                print(' - Exiting as the file does not exist in the restored server')
                append_csv(csv_report, [corrupt_file, clean_file, 'n/a', 'n/a', 'n/a', 'file_not_present_in_restored_server'])
        else:
            append_csv(csv_report, [corrupt_file, audit_dict[corrupt_file], 'n/a', 'n/a', 'n/a', 'file_not_processed'])
 if __name__ == '__main__':
    main(sys.argv[1:])
	#!/usr/bin/env/python

	import subprocess
	import sys
	import os
	import csv
	import time
	import argparse
	import copyit
	import ififuncs

	def parse_args(args_):
	'''
	Parse command line arguments.
	'''
	parser = argparse.ArgumentParser(
	description='Restore broken files on a server.'
	' Written by Kieran O\'Leary.'
	)
	parser.add_argument(
	'-damaged_server',
	help='Path to the damaged server that will receive the restored files from the backup server',
	required=True
	)
	parser.add_argument(
	'-backup_server',
	help='Path to the restored backup server that contains the good versions of the files.',
	required=True
	)
	parser.add_argument(
	'-csv',
	help='Path to the audit CSV as generated by exif_audit.py',
	required=True
	)
	parsed_args = parser.parse_args(args_)
	return parsed_args

	def create_csv(csv_file, *args):
	'''
	Creates a CSV file. Insert a list/tuple of headings as
	the *args.
	'''
	f = open(csv_file, 'wb')
	try:
	writer = csv.writer(f)
	writer.writerow(*args)
	finally:
	f.close()


	def append_csv(csv_file, *args):
	'''
	Appends values to pre-xisting CSV file. Insert a list/tuple of values as
	the *args.
	'''
	f = open(csv_file, 'ab')
	try:
	writer = csv.writer(f)
	writer.writerow(*args)
	finally:
	f.close()

	def parse_audit(audit_entries, restored_backup):
	'''
	This looks through the entries from the csv (audit_entries),
	and then it replaces the paths so that python can find both the
	corrupted files and the repaired files.
	TODO - Anything that does not get processed should still end up in the csv_file as a failure
	TO DO- check if isfile, and add error to csv if not.
	the dict takes the form of {corruptfile:cleanfile}
	'''
	audit_dict = {}
	for i in audit_entries[1:]: # let's slice out the first item, which is the CSV heading for filename.
	clean = os.path.normpath(i.replace("/Volumes/Scans", "X:\\").replace("\"", "")).rstrip()
	corrupt = os.path.normpath(i.replace("/Volumes/Scans", "S:\\").replace("\"", "")).rstrip()
	'''
	if os.path.isfile(b):
	continue
	else:
	print b, os.path.isfile(b)
	'''
	audit_dict[corrupt] = clean
	return audit_dict

	def rename_corrupted_file(corrupted_file):
	'''
	Renames the corrupted file in such a way that it will be
	easy to search for all corrupted files and delete them if we wish.
	It also means that we are retaining the corruption temporarily
	just in case.
	'''
	renamed_path = corrupted_file + '_corrupted20180616'
	print(' - Attempting to rename %s with %s' % (corrupted_file, renamed_path))
	os.rename(corrupted_file, renamed_path)
	print(' - Renamed %s with %s' % (corrupted_file, renamed_path))
	return renamed_path

	def check_corruption(corrupted_file):
	'''
	This performs a similar check to exif_audit.py, in that it will
	use exiftool in order to determine if the string 'Entire file is binary zeros'
	is present in the output.
	'''
	verdict = 'unknown'
	try:
	exif_cmd = ['exiftool', corrupted_file]
	exif_output = subprocess.check_output(exif_cmd)
	except subprocess.CalledProcessError as e:
	lines = e.output.splitlines()
	for i in lines:
	if 'Entire file is binary zeros' in i:
	print(' - Corrupt file : %s ' % corrupted_file)
	verdict = 'corrupt'
	return verdict

	def main(args_):
	args = parse_args(args_)
	source = args.damaged_server
	restored_backup = args.backup_server
	audit_filename = args.csv
	if not os.path.isdir(source):
	print 'Your first argument should be the source directory that you wish to process.'
	sys.exit()
	if not os.path.isdir(restored_backup):
	print 'Your second argument should be the backup directory that files will be read from'
	sys.exit()
	csv_report_filename = 'server_restoration_report' + time.strftime("_%Y_%m_%dT%H_%M_%S")
	# CSV will be saved to your Desktop.
	csv_report = os.path.expanduser("~/Desktop/%s.csv") % csv_report_filename
	create_csv(csv_report, ['source_filepath', 'restored_filepath', 'renamed_corrupted_filepath', 'corrupted_checksum', 'restored_checksum', 'successful_restore'])
	print(' - Analysing %s for corrupt files' % audit_filename)
	with open(audit_filename, 'r') as audit:
	audit_entries = audit.readlines()
	audit_dict = parse_audit(audit_entries, restored_backup)
	print(' - Checking for corruption')
	for corrupt_file in sorted(audit_dict.keys()):
	verdict = check_corruption(corrupt_file)
	if verdict == 'corrupt':
	removed = 'not_removed'
	print('\n - Beginning the restoration process for this corrupt file: %s' % corrupt_file)
	clean_file = audit_dict[corrupt_file]
	# the restored file may not exist in that particular backup
	if os.path.isfile(clean_file):
	renamed_path = rename_corrupted_file(corrupt_file)
	copyit_cmd = [clean_file, os.path.dirname(corrupt_file)]
	print copyit_cmd
	copyit.main(copyit_cmd)
	print(' - Generating MD5 checksum of corrupted file')
	corrupted_checksum = ififuncs.hashlib_md5(renamed_path)
	print(' - Generating MD5 checksum of clean file')
	restored_checksum = ififuncs.hashlib_md5(clean_file)
	print(' - Removing %s' % renamed_path)
	try:
	os.remove(renamed_path)
	except WindowsError:
	removed = 'could_not_delete_corrupt_file'
	print(' - Could not delete corrupt file')
	if not os.path.isfile(renamed_path):
	removed = 'removed'
	append_csv(csv_report, [corrupt_file, clean_file, renamed_path, corrupted_checksum, restored_checksum, removed])
	else:
	print(' - Exiting as the file does not exist in the restored server')
	append_csv(csv_report, [corrupt_file, clean_file, 'n/a', 'n/a', 'n/a', 'file_not_present_in_restored_server'])
	else:
	append_csv(csv_report, [corrupt_file, audit_dict[corrupt_file], 'n/a', 'n/a', 'n/a', 'file_not_processed'])
	if __name__ == '__main__':
	main(sys.argv[1:])
No results found