Skip to content

Instantly share code, notes, and snippets.

@jjjake
Created October 24, 2012 19:07
Show Gist options
  • Select an option

  • Save jjjake/3948167 to your computer and use it in GitHub Desktop.

Select an option

Save jjjake/3948167 to your computer and use it in GitHub Desktop.
#!/home/jake/.virtualenvs/ia-wrapper/bin/python
import logging
from datetime import datetime
import re
import threading, Queue
import archive
## Logging!
date = datetime.utcnow().strftime("%Y-%m-%d")
log_filename = "logs/%s-%s.log" % (__file__.strip('.py'), date)
logging_format = "%(asctime)s\t%(levelname)s\t%(message)s"
logging.basicConfig(filename=log_filename,level=logging.INFO,
format=logging_format)
#______________________________________________________________________________
def mk_meta_dict(line):
identifier, title, description, mediatype, sponsor, contributor, \
date, sound, color, noindex, collection1, collection2, collection3, \
subjects = (x for x in line.split('\t'))
meta_dict = dict(identifier=identifier,
title=title,
description=description,
mediatype=mediatype,
sponsor=sponsor,
contributor=contributor,
date=date,
sound=sound,
color=color,
noindex=noindex,
collection=[collection1, collection2, collection3],
subject=subjects.split(','),
)
return dict((k,v) for (k,v) in meta_dict.items() if v is not None and
v is not '')
#______________________________________________________________________________
def create_item(spreadsheet_line):
metadata = mk_meta_dict(spreadsheet_line.strip())
item = archive.Item(metadata.get('identifier'))
logging.info('creating item:\t%s' % item.identifier)
if item.exists:
logging.info('item already exists:\t%s' % item.identifier)
filename = ''.join(re.findall(r'\d', item.identifier)) + '.mov'
if filename in [x.strip() for x in open('files2upload')]:
print "creating item: " + item.identifier
item.upload(filename, metadata, derive=False, multipart=True)
# THREADING >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#______________________________________________________________________________
THREAD_LIMIT = 20
jobs = Queue.Queue(0)
files_to_process = Queue.Queue(THREAD_LIMIT)
def thread():
while True:
try:
item = jobs.get(False)
except Queue.Empty:
return
item_p = create_item(item)
files_to_process.put((item_p), True)
# MAIN >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
#______________________________________________________________________________
def main():
for spreadsheet_line in open('PrelingerMetadata20121024.txt'):
jobs.put(spreadsheet_line)
for n in xrange(THREAD_LIMIT):
t = threading.Thread(target=thread)
t.start()
while threading.activeCount() > 1 or not files_to_process.empty():
try:
item = files_to_process.get(False, 1)
except Queue.Empty:
continue
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment