Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save mRB0/1cb5fae60e872dcaf16e to your computer and use it in GitHub Desktop.
Save mRB0/1cb5fae60e872dcaf16e to your computer and use it in GitHub Desktop.
After you use gnu ddrescue on a drive with an NTFS filesystem, this script uses your ddrescue logfile & ntfs-3g to list files affected by unrecoverable bad chunks
#!/usr/bin/env python
import sys
import os
import subprocess
import re
from StringIO import StringIO
# usage:
#
# first use gnu ddrescue on with a log file
# then update the following variables
# block device of your ntfs filesystem (probably should use the target, on your good drive)
device = "/dev/sde2"
# logfile produced by ddrescue
log = "work.log"
# difference between your ddrescue logfile offsets and your partition start, in bytes
#
# 1. if you ran ddrescue on your ntfs filesystem partition, put 0 here
#
# 2. if you ran ddrescue on the whole drive, get your filesystem offset from fdisk:
#
# fdisk /dev/sde
# u (change display units, repeat until you're in sectors)
# p (get the "start" for your filesystem device, and multiply it by the sector size in the top of the output; eg. 718848 * 512)
log_adj = 368050176
# that's it!
def ph(message):
stars = ''.join('*' * (len(message) + 2))
print
print stars
print " {} ".format(message)
print stars
print
ph("Read log")
with open(log, 'rb') as lf:
lines = list(lf)
lines = [l for l in lines if not l.startswith('#')]
bad_lines = [l for l in lines[1:] if l.strip().endswith('-')]
matches = [re.match(r'0x([0-9A-F]+)[\t ]+0x([0-9A-F]+)', l.strip()) for l in bad_lines]
bad_chunks = [(int(m.group(1), 16) - log_adj, int(m.group(2), 16)) for m in matches]
print("Bad chunks:")
for bc in bad_chunks:
print " offs {} size {}".format(bc[0], bc[1])
ph("Read filesystem metadata")
ntfsinfo_output = subprocess.check_output(['ntfsinfo', '-m', device])
matches = re.search(r'^[\t ]*Cluster Size: ([0-9]+)$', ntfsinfo_output, re.MULTILINE)
cluster_size = int(matches.group(1))
print "Cluster size is {}".format(cluster_size)
ph("Read directory tree")
def read_path(path):
ls_output = subprocess.check_output(['ntfsls', '-F', '-p', path.encode('utf-8'), device]).decode('utf-8').strip().split(u'\n')
files = {}
for filename in ls_output:
if filename in [u'./', u'../']:
continue
if any(filename.endswith(c) for c in u'*@=|'):
print("Ignoring unrecognized file type {}".format(filename))
continue
sys.stdout.write('\033[2K' + os.path.join(path, filename).encode('utf-8') + '\r')
sys.stdout.flush()
if filename.endswith(u'/'):
files[filename] = read_path(os.path.join(path, filename[:-1]))
else:
files[filename] = os.path.join(path, filename)
return files
fs_tree = read_path(u'/')
sys.stdout.write('\033[2K')
sys.stdout.flush()
all_file_paths = []
def extract_file_paths(tree):
for filename, path_or_contents in tree.iteritems():
if filename.endswith(u'/'):
extract_file_paths(path_or_contents)
else:
all_file_paths.append(path_or_contents)
extract_file_paths(fs_tree)
print('.')
ph("Get run lists")
affected_paths = {}
all_file_paths_len = len(all_file_paths)
for i, path in enumerate(all_file_paths):
sys.stdout.write('\r\033[2K{}% {}'.format(int(100 * i / all_file_paths_len), path.encode('utf-8')))
sys.stdout.flush()
lines = subprocess.check_output(['ntfsinfo', '-v', '-F', path.encode('utf-8'), device]).decode('utf-8').strip().split('\n')
runlist_lines = [i for i, l in enumerate(lines) if u'Runlist:' in l]
overlaps = []
for lineno in runlist_lines:
next_line = lineno + 1
while True:
if next_line >= len(lines):
break
runlist_def = re.match(r'^[\t ]+0x[0-9a-f]+[\t ]+0x([0-9a-f]+)[\t ]+0x([0-9a-f]+)[\t ]*$', lines[next_line])
if runlist_def is None:
break
start_cluster = int(runlist_def.group(1), 16)
len_clusters = int(runlist_def.group(2), 16)
start_offs = start_cluster * cluster_size
length = len_clusters * cluster_size
end_offs = start_offs + length
for bc in bad_chunks:
bc_start = bc[0]
bc_end = bc[0] + bc[1]
if max(start_offs, bc_start) < min(end_offs, bc_end):
affected_paths.setdefault(path, [])
affected_paths[path].append((start_offs, length, bc[0], bc[1]))
print
print "Bad chunk [file: offs {} size {}; bc: offs {} size {}]".format(start_offs, length, bc[0], bc[1])
print
next_line += 1
sys.stdout.write('\r\033[2K')
sys.stdout.flush()
ph('Done')
if affected_paths:
print "Files with problems:"
for path in sorted(affected_paths.keys()):
print u" {}".format(path)
else:
print "No files affected"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment