Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Created September 27, 2016 19:27
Show Gist options
  • Save hughdbrown/b01c35fb1e63e7c2332e7a45f279e878 to your computer and use it in GitHub Desktop.
Save hughdbrown/b01c35fb1e63e7c2332e7a45f279e878 to your computer and use it in GitHub Desktop.
Move updated files from src directory to dst directory
#!/usr/bin/env python
from __future__ import print_function
import os
import sys
from collections import defaultdict
from fnmatch import fnmatch
from pprint import pprint
from hashlib import sha1
def sha1sum(filename):
return sha1(open(filename).read()).hexdigest()
def build_destinations(dst, ext):
print("# Building destination lookup for '{0}'".format(dst), file=sys.stderr)
d = defaultdict(set)
for root, _, files in os.walk(dst):
for f in files:
if fnmatch(f, ext):
d[f].add(root)
return d
def duplicates(d):
# filename is a duplicate if it appears in value more than once
print("# Finding duplicates", file=sys.stderr)
duplicate_keys = [k for k, v in d.items() if len(v) > 1]
for k in duplicate_keys:
print("{0} {1}: {2}".format(dup_count, k, d[k]), file=sys.stderr)
return len(duplicate_keys)
def process_files(src, d, ext):
print("# Calculating moves and deletions", file=sys.stderr)
for filename in os.listdir(src):
if fnmatch(filename, ext) and filename in d:
# print("Considering {0}".format(filename), file=sys.stdout)
src_fullpath = os.path.join(src, filename)
dest = d[filename]
assert len(dest) == 1
dst_dir = list(dest)[0]
dst_fullpath = os.path.join(dst_dir, filename)
if sha1sum(src_fullpath) == sha1sum(dst_fullpath):
print("rm '{0}'".format(src_fullpath))
else:
print("mv '{0}' '{1}/.'".format(src_fullpath, dst_dir), file=sys.stderr)
def sync_files(src, dst, ext='*.pdf'):
src = os.path.expanduser(src)
dst = os.path.expanduser(dst)
# Build dictionary of destinations
d = build_destinations(dst, ext)
# Find out if there are any collisions on name
assert not duplicates(d)
# Figure out where files shgould be moved to
process_files(src, d, ext)
if __name__ == '__main__':
sync_files('~/Downloads', '~/Dropbox/Documents/programming')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment