Skip to content

Instantly share code, notes, and snippets.

@jjjake
Last active December 10, 2015 13:28
Show Gist options
  • Select an option

  • Save jjjake/4440707 to your computer and use it in GitHub Desktop.

Select an option

Save jjjake/4440707 to your computer and use it in GitHub Desktop.
Count how many times an archive.org item has been derived.
#!/usr/bin/env python
import sys
import os
import json
import requests
def get_tasks(identifier, params={}):
log_in_cookies = {'logged-in-sig': os.environ['LOGGED_IN_SIG'],
'logged-in-user': os.environ['LOGGED_IN_USER'],
}
params['json'] = 2
params['output'] = 'json'
params['callback'] = 'foo'
params['history'] = '1'
params['identifier'] = identifier
url = 'http://www-tracey.us.archive.org/catalog.php'
r = requests.get(url, params=params, cookies=log_in_cookies)
tasks_json = json.loads(r.text.strip('foo').strip().strip('()'))
tasks = []
for t in tasks_json:
td = {}
td['identifier'] = t[0]
td['server'] = t[1]
td['command'] = t[2]
td['time'] = t[3]
td['submitter'] = t[4]
td['args'] = t[5]
td['task_id'] = t[6]
td['type'] = t[7]
tasks.append(td)
return r, tasks
if __name__ == '__main__':
identifier = sys.argv[1]
r, tasks = get_tasks(identifier)
task_commmands = [x['command'] for x in tasks]
derive_tasks = [x for x in task_commmands if x == 'derive']
print "%s\t%s" % (identifier, len(derive_tasks))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment