Skip to content

Instantly share code, notes, and snippets.

@davidwtbuxton
Created August 22, 2012 16:24
Show Gist options
  • Save davidwtbuxton/3427194 to your computer and use it in GitHub Desktop.
Save davidwtbuxton/3427194 to your computer and use it in GitHub Desktop.
import os
from collections import defaultdict
# http://www.reddit.com/r/learnpython/comments/yn52v/noob_advice_please_best_way_to_approach_this/
def normalized_filenames(dirname, func):
"""Builds a map from the normalized filename to the actual filename for
all the filenames in the directory and its sub-directories.
"""
# Use lists to hold values because more than 1 file can have the same key.
d = defaultdict(list)
for root, dirs, files in os.walk(dirname):
for name in dirs + files:
p = os.path.join(root, name)
d[func(p)].append(p)
return d
def keyfunc(filename):
"""The key/normalizing function to use on every filename. Returns the
lower-case version of the last part of the path minus extension.
"""
name = os.path.basename(filename)
name = os.path.splitext(name)[0]
name = name.lower()
return name
startdir = '/path/to/hd'
lowercase_names_map = normalized_filenames(startdir, keyfunc)
for picname in spreadsheet:
normname = keyfunc(picname)
foundnames = lowercase_names_map.get(normname, [])
print "%s (%s) matches %r" % (picname, normname, foundnames)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment