Skip to content

Instantly share code, notes, and snippets.

@brehaut
Created June 28, 2011 23:18
Show Gist options
  • Save brehaut/1052486 to your computer and use it in GitHub Desktop.
Save brehaut/1052486 to your computer and use it in GitHub Desktop.
Script to generate a mock directory of media based on a listing
"""populate.py
This script takes a listing of folders and filenames (as generated by `ls -R > listing`) on standard input
and the pathname for a set of media as the argument to argv.
e.g:
python populate.py _source < test/listing.txt
The files listed in listing are then mocked from the media files using the filenames in the input, and
writen to a directory called 'out-datetime'
"""
from collections import defaultdict
import datetime
import itertools
import os
import os.path
import sys
def _sanitise_directory(directory):
if directory.endswith(":"):
directory = directory[:-1]
if directory.startswith("."):
directory = directory[2:]
return directory
def consume_filenames(file, ignore_toplevel=True):
"""consumes a file containing a list of directories and filenames in the form provided by `ls -R`.
Returns an iterator that yields directory name and filename iterator pairs.
If ignore_toplevel is False this process operates in three phases:
1. Collect the filenames up until the first blank line
2. Start emitting directory and filename list pairs
* Each directory is removed from the set of initial filenames
3. Emit remaining filenames in the set.
If ignore_toplevel is True then only stage one is completed
"""
lines = itertools.imap(lambda s:s.strip(), file.readlines())
# phase 1
prelude = set([] if ignore_toplevel else itertools.takewhile(lambda line: line.strip(), lines))
# phase 2
try:
directory = _sanitise_directory(lines.next())
if directory in prelude:
prelude.remove(directory)
for line in lines:
line = line
if not line:
directory = _sanitise_directory(lines.next().strip())
if directory in prelude:
prelude.remove(directory)
else:
yield directory, line
except StopIteration:
pass
# phase 3
for line in prelude:
yield '.', line
def get_extension(filename):
parts = filename.rsplit(".", 1)
return parts[1].lower() if len(parts) > 1 else ''
def prep_media(media_path):
if not (os.path.exists(media_path) and os.path.isdir(media_path)):
raise Exception("media path is invalid")
media = defaultdict(lambda:[])
for filename in (fn for fn in os.listdir(media_path) if not fn.startswith(".")):
media[get_extension(filename)].append(os.path.realpath(os.path.join(media_path, filename)))
return dict((k, itertools.cycle(v)) for (k, v) in media.iteritems())
def populate(filenames_iter, media_path, output_path):
media = prep_media(media_path)
if os.path.exists(output_path):
raise Exception("output path already exists")
os.makedirs(output_path)
errored_exts = set()
for d, f in filenames_iter:
ext = get_extension(f)
if ext not in media:
if ext not in errored_exts:
print>>sys.stderr, "the extension '%s' is not available in the source media dir" % ext
errored_exts.add(ext)
continue
d = os.path.join(output_path, d)
if not os.path.exists(d):
os.makedirs(os.path.realpath(d))
fn = os.path.realpath(os.path.join(d, f))
os.symlink(os.path.relpath(media[ext].next(), d), fn)
if __name__ == "__main__":
if len(sys.argv) <= 1:
print>>sys.stderr, "needs media path as argument"
sys.exit(1)
media_path = os.path.realpath(sys.argv[1])
populate(consume_filenames(sys.stdin, False),
media_path,
"out-%s" % "".join(str(i) for i in datetime.datetime.now().timetuple()))
@brehaut
Copy link
Author

brehaut commented Jun 28, 2011

Usage:

 python populate.py _source < listing.txt 

Will generate a directory with out-[timestamp] containing symlinks from the filenames in listing.txt to media in the directory ('_source' in the example); links are made a per extension basis. If an extension is encountered with no match in the media source directory an error is written to stderr on the first occurance.

note: all symlinks are relative

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment