Skip to content

Instantly share code, notes, and snippets.

@cwells
Created October 26, 2017 22:01
Show Gist options
  • Save cwells/8b68c259ec6402d9671cad1ca90e199c to your computer and use it in GitHub Desktop.
Save cwells/8b68c259ec6402d9671cad1ca90e199c to your computer and use it in GitHub Desktop.
Create new CSV file by combining results of directory traversal + original CSV file
#!/usr/bin/python
import csv
import os
from datetime import datetime
skipped = open('no-target.csv', 'w')
targets = {}
with open('file_mapping.csv') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
src = os.path.basename(row['file_path'])
dst = row['s3_path']
targets[src] = dst
sources = {}
for root, dirs, files in os.walk("/Librarian"):
if 'SafeToDelete' in dirs:
dirs.remove('SafeToDelete')
for name in files:
path = os.path.join(root, name)
mtime = datetime.fromtimestamp(os.path.getmtime(path))
if name in targets:
if not name in sources or sources[name]['mtime'] < mtime:
sources[name] = {
'src': path,
'dst': targets[name],
'mtime': mtime
}
else:
skipped.write(path + '\n')
with open('new-mapping.csv', 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=['src', 'dst'], extrasaction='ignore')
for name in sources:
writer.writerow(sources[name])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment