Skip to content

Instantly share code, notes, and snippets.

@tbnorth
Last active August 19, 2019 15:43
Show Gist options
  • Save tbnorth/3c0d2d886653b365ccdac10b47f2882b to your computer and use it in GitHub Desktop.
Save tbnorth/3c0d2d886653b365ccdac10b47f2882b to your computer and use it in GitHub Desktop.
Move files to subdirs, similar to git object paths
"""
to_subdirs.py - move directory content to subdirs based on common prefix
Terry N. Brown [email protected] Mon Jun 24 16:11:59 EDT 2019
"""
import os
import re
import sys
from collections import namedtuple, defaultdict
PathMatch = namedtuple("PathMatch", "path match")
def report(counts):
sum_ = sum(counts.values())
max_ = max(counts.values())
sys.stderr.write("Moved %s, max per destination %s\n" % (sum_, max_))
pattern = re.compile(sys.argv[1])
print("# Getting list of files")
todo = sorted(os.listdir('.'))
print("# Got list of files")
todo = [PathMatch(i, pattern.match(i)) for i in todo]
todo = [i for i in todo if i.match and i.match.groups]
dirs = set()
moves = []
for path, match in todo:
p = '/'.join(match.groups())
dirs.add(p)
moves.append((path, p))
for p in dirs:
mkdir = "mkdir -p %s" % p
print(mkdir)
sys.stderr.write("%s\n" % mkdir)
counts = defaultdict(lambda: 0)
for n, (s, d) in enumerate(moves):
print("mv %s %s" % (s, d))
counts[d] += 1
if n % 500 == 0:
report(counts)
report(counts)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment