Skip to content

Instantly share code, notes, and snippets.

Created June 28, 2011 23:18
Show Gist options
  • Save brehaut/1052486 to your computer and use it in GitHub Desktop.
Save brehaut/1052486 to your computer and use it in GitHub Desktop.
Script to generate a mock directory of media based on a listing
This script takes a listing of folders and filenames (as generated by `ls -R > listing`) on standard input
and the pathname for a set of media as the argument to argv.
python _source < test/listing.txt
The files listed in listing are then mocked from the media files using the filenames in the input, and
writen to a directory called 'out-datetime'
from collections import defaultdict
import datetime
import itertools
import os
import os.path
import sys
def _sanitise_directory(directory):
if directory.endswith(":"):
directory = directory[:-1]
if directory.startswith("."):
directory = directory[2:]
return directory
def consume_filenames(file, ignore_toplevel=True):
"""consumes a file containing a list of directories and filenames in the form provided by `ls -R`.
Returns an iterator that yields directory name and filename iterator pairs.
If ignore_toplevel is False this process operates in three phases:
1. Collect the filenames up until the first blank line
2. Start emitting directory and filename list pairs
* Each directory is removed from the set of initial filenames
3. Emit remaining filenames in the set.
If ignore_toplevel is True then only stage one is completed
lines = itertools.imap(lambda s:s.strip(), file.readlines())
# phase 1
prelude = set([] if ignore_toplevel else itertools.takewhile(lambda line: line.strip(), lines))
# phase 2
directory = _sanitise_directory(
if directory in prelude:
for line in lines:
line = line
if not line:
directory = _sanitise_directory(
if directory in prelude:
yield directory, line
except StopIteration:
# phase 3
for line in prelude:
yield '.', line
def get_extension(filename):
parts = filename.rsplit(".", 1)
return parts[1].lower() if len(parts) > 1 else ''
def prep_media(media_path):
if not (os.path.exists(media_path) and os.path.isdir(media_path)):
raise Exception("media path is invalid")
media = defaultdict(lambda:[])
for filename in (fn for fn in os.listdir(media_path) if not fn.startswith(".")):
media[get_extension(filename)].append(os.path.realpath(os.path.join(media_path, filename)))
return dict((k, itertools.cycle(v)) for (k, v) in media.iteritems())
def populate(filenames_iter, media_path, output_path):
media = prep_media(media_path)
if os.path.exists(output_path):
raise Exception("output path already exists")
errored_exts = set()
for d, f in filenames_iter:
ext = get_extension(f)
if ext not in media:
if ext not in errored_exts:
print>>sys.stderr, "the extension '%s' is not available in the source media dir" % ext
d = os.path.join(output_path, d)
if not os.path.exists(d):
fn = os.path.realpath(os.path.join(d, f))
os.symlink(os.path.relpath(media[ext].next(), d), fn)
if __name__ == "__main__":
if len(sys.argv) <= 1:
print>>sys.stderr, "needs media path as argument"
media_path = os.path.realpath(sys.argv[1])
populate(consume_filenames(sys.stdin, False),
"out-%s" % "".join(str(i) for i in
Copy link

brehaut commented Jun 28, 2011


 python _source < listing.txt 

Will generate a directory with out-[timestamp] containing symlinks from the filenames in listing.txt to media in the directory ('_source' in the example); links are made a per extension basis. If an extension is encountered with no match in the media source directory an error is written to stderr on the first occurance.

note: all symlinks are relative

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment