Created
March 19, 2016 18:43
-
-
Save mRB0/1cb5fae60e872dcaf16e to your computer and use it in GitHub Desktop.
After you use gnu ddrescue on a drive with an NTFS filesystem, this script uses your ddrescue logfile & ntfs-3g to list files affected by unrecoverable bad chunks
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import sys | |
import os | |
import subprocess | |
import re | |
from StringIO import StringIO | |
# usage: | |
# | |
# first use gnu ddrescue on with a log file | |
# then update the following variables | |
# block device of your ntfs filesystem (probably should use the target, on your good drive) | |
device = "/dev/sde2" | |
# logfile produced by ddrescue | |
log = "work.log" | |
# difference between your ddrescue logfile offsets and your partition start, in bytes | |
# | |
# 1. if you ran ddrescue on your ntfs filesystem partition, put 0 here | |
# | |
# 2. if you ran ddrescue on the whole drive, get your filesystem offset from fdisk: | |
# | |
# fdisk /dev/sde | |
# u (change display units, repeat until you're in sectors) | |
# p (get the "start" for your filesystem device, and multiply it by the sector size in the top of the output; eg. 718848 * 512) | |
log_adj = 368050176 | |
# that's it! | |
def ph(message): | |
stars = ''.join('*' * (len(message) + 2)) | |
print stars | |
print " {} ".format(message) | |
print stars | |
ph("Read log") | |
with open(log, 'rb') as lf: | |
lines = list(lf) | |
lines = [l for l in lines if not l.startswith('#')] | |
bad_lines = [l for l in lines[1:] if l.strip().endswith('-')] | |
matches = [re.match(r'0x([0-9A-F]+)[\t ]+0x([0-9A-F]+)', l.strip()) for l in bad_lines] | |
bad_chunks = [(int(m.group(1), 16) - log_adj, int(m.group(2), 16)) for m in matches] | |
print("Bad chunks:") | |
for bc in bad_chunks: | |
print " offs {} size {}".format(bc[0], bc[1]) | |
ph("Read filesystem metadata") | |
ntfsinfo_output = subprocess.check_output(['ntfsinfo', '-m', device]) | |
matches = re.search(r'^[\t ]*Cluster Size: ([0-9]+)$', ntfsinfo_output, re.MULTILINE) | |
cluster_size = int(matches.group(1)) | |
print "Cluster size is {}".format(cluster_size) | |
ph("Read directory tree") | |
def read_path(path): | |
ls_output = subprocess.check_output(['ntfsls', '-F', '-p', path.encode('utf-8'), device]).decode('utf-8').strip().split(u'\n') | |
files = {} | |
for filename in ls_output: | |
if filename in [u'./', u'../']: | |
continue | |
if any(filename.endswith(c) for c in u'*@=|'): | |
print("Ignoring unrecognized file type {}".format(filename)) | |
continue | |
sys.stdout.write('\033[2K' + os.path.join(path, filename).encode('utf-8') + '\r') | |
sys.stdout.flush() | |
if filename.endswith(u'/'): | |
files[filename] = read_path(os.path.join(path, filename[:-1])) | |
else: | |
files[filename] = os.path.join(path, filename) | |
return files | |
fs_tree = read_path(u'/') | |
sys.stdout.write('\033[2K') | |
sys.stdout.flush() | |
all_file_paths = [] | |
def extract_file_paths(tree): | |
for filename, path_or_contents in tree.iteritems(): | |
if filename.endswith(u'/'): | |
extract_file_paths(path_or_contents) | |
else: | |
all_file_paths.append(path_or_contents) | |
extract_file_paths(fs_tree) | |
print('.') | |
ph("Get run lists") | |
affected_paths = {} | |
all_file_paths_len = len(all_file_paths) | |
for i, path in enumerate(all_file_paths): | |
sys.stdout.write('\r\033[2K{}% {}'.format(int(100 * i / all_file_paths_len), path.encode('utf-8'))) | |
sys.stdout.flush() | |
lines = subprocess.check_output(['ntfsinfo', '-v', '-F', path.encode('utf-8'), device]).decode('utf-8').strip().split('\n') | |
runlist_lines = [i for i, l in enumerate(lines) if u'Runlist:' in l] | |
overlaps = [] | |
for lineno in runlist_lines: | |
next_line = lineno + 1 | |
while True: | |
if next_line >= len(lines): | |
break | |
runlist_def = re.match(r'^[\t ]+0x[0-9a-f]+[\t ]+0x([0-9a-f]+)[\t ]+0x([0-9a-f]+)[\t ]*$', lines[next_line]) | |
if runlist_def is None: | |
break | |
start_cluster = int(runlist_def.group(1), 16) | |
len_clusters = int(runlist_def.group(2), 16) | |
start_offs = start_cluster * cluster_size | |
length = len_clusters * cluster_size | |
end_offs = start_offs + length | |
for bc in bad_chunks: | |
bc_start = bc[0] | |
bc_end = bc[0] + bc[1] | |
if max(start_offs, bc_start) < min(end_offs, bc_end): | |
affected_paths.setdefault(path, []) | |
affected_paths[path].append((start_offs, length, bc[0], bc[1])) | |
print "Bad chunk [file: offs {} size {}; bc: offs {} size {}]".format(start_offs, length, bc[0], bc[1]) | |
next_line += 1 | |
sys.stdout.write('\r\033[2K') | |
sys.stdout.flush() | |
ph('Done') | |
if affected_paths: | |
print "Files with problems:" | |
for path in sorted(affected_paths.keys()): | |
print u" {}".format(path) | |
else: | |
print "No files affected" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment