Skip to content

Instantly share code, notes, and snippets.

@jjjake
Created January 11, 2013 02:22
Show Gist options
  • Select an option

  • Save jjjake/4507455 to your computer and use it in GitHub Desktop.

Select an option

Save jjjake/4507455 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
from datetime import datetime
import logging
import sys
import ujson
# parallel_md_get.py available here: https://gist.github.com/3784845
from parallel_md_get import metadata_record_iterator
## Logging!
date = datetime.utcnow().strftime("%Y-%m-%d")
log_filename = "%s-%s.log" % (__file__.strip('.py'), date)
logging_format = "%(asctime)s\t%(levelname)s\t%(message)s"
logging.basicConfig(filename=log_filename,level=logging.warning,
format=logging_format)
def item_is_derived(formats):
if any('ARC' in s for s in formats) is True:
if any('CDX' in s for s in formats) is False:
return False
return True
#______________________________________________________________________________
ids = open(sys.argv[1])
results = metadata_record_iterator(ids, workers=20)
for i, id, md_json in results:
try:
metadata = ujson.loads(md_json)
logging.info("retrieved and parsed metadata:\t%s" % id)
files = metadata.get('files')
if not files:
logging.warning("item has no files!\t%s" % id)
continue
formats = set(x for x in [f['format'] for f in files])
if item_is_derived(formats) is False:
output_file = 'underived-thumper-items_%s.txt' % date
with open(output_file, 'a') as f:
f.write(id)
except Exception, e:
logging.error("%s:\t%s" % (id, e))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment