Created
June 28, 2011 23:18
-
-
Save brehaut/1052486 to your computer and use it in GitHub Desktop.
Script to generate a mock directory of media based on a listing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""populate.py | |
This script takes a listing of folders and filenames (as generated by `ls -R > listing`) on standard input | |
and the pathname for a set of media as the argument to argv. | |
e.g: | |
python populate.py _source < test/listing.txt | |
The files listed in listing are then mocked from the media files using the filenames in the input, and | |
writen to a directory called 'out-datetime' | |
""" | |
from collections import defaultdict | |
import datetime | |
import itertools | |
import os | |
import os.path | |
import sys | |
def _sanitise_directory(directory): | |
if directory.endswith(":"): | |
directory = directory[:-1] | |
if directory.startswith("."): | |
directory = directory[2:] | |
return directory | |
def consume_filenames(file, ignore_toplevel=True): | |
"""consumes a file containing a list of directories and filenames in the form provided by `ls -R`. | |
Returns an iterator that yields directory name and filename iterator pairs. | |
If ignore_toplevel is False this process operates in three phases: | |
1. Collect the filenames up until the first blank line | |
2. Start emitting directory and filename list pairs | |
* Each directory is removed from the set of initial filenames | |
3. Emit remaining filenames in the set. | |
If ignore_toplevel is True then only stage one is completed | |
""" | |
lines = itertools.imap(lambda s:s.strip(), file.readlines()) | |
# phase 1 | |
prelude = set([] if ignore_toplevel else itertools.takewhile(lambda line: line.strip(), lines)) | |
# phase 2 | |
try: | |
directory = _sanitise_directory(lines.next()) | |
if directory in prelude: | |
prelude.remove(directory) | |
for line in lines: | |
line = line | |
if not line: | |
directory = _sanitise_directory(lines.next().strip()) | |
if directory in prelude: | |
prelude.remove(directory) | |
else: | |
yield directory, line | |
except StopIteration: | |
pass | |
# phase 3 | |
for line in prelude: | |
yield '.', line | |
def get_extension(filename): | |
parts = filename.rsplit(".", 1) | |
return parts[1].lower() if len(parts) > 1 else '' | |
def prep_media(media_path): | |
if not (os.path.exists(media_path) and os.path.isdir(media_path)): | |
raise Exception("media path is invalid") | |
media = defaultdict(lambda:[]) | |
for filename in (fn for fn in os.listdir(media_path) if not fn.startswith(".")): | |
media[get_extension(filename)].append(os.path.realpath(os.path.join(media_path, filename))) | |
return dict((k, itertools.cycle(v)) for (k, v) in media.iteritems()) | |
def populate(filenames_iter, media_path, output_path): | |
media = prep_media(media_path) | |
if os.path.exists(output_path): | |
raise Exception("output path already exists") | |
os.makedirs(output_path) | |
errored_exts = set() | |
for d, f in filenames_iter: | |
ext = get_extension(f) | |
if ext not in media: | |
if ext not in errored_exts: | |
print>>sys.stderr, "the extension '%s' is not available in the source media dir" % ext | |
errored_exts.add(ext) | |
continue | |
d = os.path.join(output_path, d) | |
if not os.path.exists(d): | |
os.makedirs(os.path.realpath(d)) | |
fn = os.path.realpath(os.path.join(d, f)) | |
os.symlink(os.path.relpath(media[ext].next(), d), fn) | |
if __name__ == "__main__": | |
if len(sys.argv) <= 1: | |
print>>sys.stderr, "needs media path as argument" | |
sys.exit(1) | |
media_path = os.path.realpath(sys.argv[1]) | |
populate(consume_filenames(sys.stdin, False), | |
media_path, | |
"out-%s" % "".join(str(i) for i in datetime.datetime.now().timetuple())) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage:
Will generate a directory with out-[timestamp] containing symlinks from the filenames in listing.txt to media in the directory ('_source' in the example); links are made a per extension basis. If an extension is encountered with no match in the media source directory an error is written to stderr on the first occurance.
note: all symlinks are relative