Created
September 14, 2012 02:15
-
-
Save glasserc/3719406 to your computer and use it in GitHub Desktop.
Quick hack to fix my git-annex repo
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/python | |
# Quick hack to find and rearrange files from a broken git-annex. | |
# | |
# One of my drives with a git annex suffered major filesystem | |
# corruption, and pretty much all the files got dumped into | |
# lost+found. Fortunately git-annex symlinks contain all the | |
# information you need to find your files and see if they're still OK | |
# -- to wit, the filesize and a SHA. | |
import os | |
import stat | |
import shutil | |
import argparse | |
import subprocess | |
import sys | |
ALL_READ = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH | |
ALL_EXC = stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | |
class Main(): | |
def __init__(self, argv=None): | |
self.parser = argparse.ArgumentParser( | |
description='find the files pointed to by broken git-annex links') | |
self.parser.add_argument('linksdir', type=str, help='directory where all the broken links are') | |
self.parser.add_argument('filesdir', type=str, help='directory where all the lost+found files are') | |
self.options = self.parser.parse_args(argv) | |
def find_broken_links(self): | |
find_output = subprocess.check_output(['find', '-L', self.options.linksdir, | |
'-type', 'l', '-print0']) | |
return find_output.strip('\0').split('\0') | |
def parse_annex_symlink(self, link): | |
target = os.readlink(link) | |
basename = os.path.basename(target) | |
# Not completely sure about the format of git annex | |
# links. This might break. | |
backend_size, hash = basename.split('--') | |
info = {'hash': hash} | |
backend, size = backend_size.split('-') | |
info['backend'] = backend | |
info['size'] = size.strip('s') | |
return info | |
def matchup_source(self, link): | |
info = self.parse_annex_symlink(link) | |
possibles = subprocess.check_output(['find', self.options.filesdir, '-type', 'f', | |
'-size', info['size'] + 'c', '-print0']) | |
possibles = possibles.strip('\0').split('\0') | |
if len(possibles) != 1: | |
print "WARNING: two matches for {0} with size {1}: {2}".format( | |
link, info['size'], possibles) | |
# FIXME: try all candidates | |
found = None | |
for possible in possibles: | |
hash = subprocess.check_output(['sha256sum', possible]) | |
hash, _ = hash.split() | |
if hash == info['hash']: | |
found = possible | |
else: | |
print "WARNING: hash doesn't match up for {0} ({1}) and {2} ({3})".format( | |
link, info['hash'], possible, hash) | |
if not found: | |
print "No match found for {0}".format(link) | |
else: | |
dest = os.path.abspath(os.path.join(os.path.dirname(link), os.readlink(link))) | |
if not os.path.exists(os.path.dirname(dest)): | |
os.makedirs(os.path.dirname(dest)) | |
# Unlock both source and dest, so that moving it doesn't | |
# cause problems | |
os.chmod(os.path.dirname(dest), 0777) | |
os.chmod(os.path.dirname(found), 0777) | |
subprocess.check_call(['mv', found, dest]) | |
os.chmod(dest, ALL_READ) | |
os.chmod(os.path.dirname(dest), ALL_READ | ALL_EXC) | |
def run(self): | |
links = self.find_broken_links() | |
for link in links: | |
self.matchup_source(link) | |
if __name__ == '__main__': | |
m = Main() | |
m.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment