Skip to content

Instantly share code, notes, and snippets.

#!/home/jake/.virtualenvs/ia-wrapper/bin/python
import logging
from datetime import datetime
import re
import threading, Queue
import archive
## Logging!
@jjjake
jjjake / count_derives.py
Last active December 10, 2015 13:28
Count how many times an archive.org item has been derived.
#!/usr/bin/env python
import sys
import os
import json
import requests
def get_tasks(identifier, params={}):
@jjjake
jjjake / ait_derive.php
Created January 8, 2013 20:11
Return a list of AIT items without CDXs, older than one week (for handoff to auto_submit.php perpetual loop).
<?
function get_data($url) {
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
@jjjake
jjjake / wcd_daily_derive.php
Last active December 10, 2015 20:18
Return a list of wcd items without ogg's, no older than two days (for handoff to auto_submit.php perpetual loop).
<?
function get_data($url) {
$ch = curl_init();
$timeout = 5;
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
$data = curl_exec($ch);
curl_close($ch);
return $data;
#!/usr/bin/env python
from datetime import datetime
import logging
import sys
import ujson
# parallel_md_get.py available here: https://gist.github.com/3784845
from parallel_md_get import metadata_record_iterator
#!/usr/bin/env python
import datetime
import logging
import sys
import ujson
# parallel_md_get.py available here: https://gist.github.com/3784845
from parallel_md_get import metadata_record_iterator
#!/usr/bin/env python
import datetime
import logging
import sys
import ujson
# parallel_md_get.py available here: https://gist.github.com/3784845
from parallel_md_get import metadata_record_iterator
def write_metadata(identifier, metadata={}, target='metadata'):
log_in_cookies = {'logged-in-sig': os.environ['LOGGED_IN_SIG'],
'logged-in-user': os.environ['LOGGED_IN_USER']}
url = 'http://archive.org/metadata/%s' % identifier
src = requests.get(url).json().get(target, {})
dest = dict((src.items() + metadata.items()))
json_patch = jsonpatch.make_patch(src, dest).patch
patch = [{p['op']: p['path'], 'value': p['value']} for p in json_patch]
if patch == []:
return 'no changes made to Metadata.'
#!/usr/bin/env python
"""Update a single items record in metamgr.
Usage:
./update_metamgr.py $identifier
Using parallel:
parallel ./update_metamgr.py < itemlist.txt
"""
#!/usr/bin/env python
import sys
import ujson
# Script/Library created by Mike McCabe to do multiple
# metadata fetches in parallel
# Available here: https://gist.github.com/3784845
from parallel_md_get import metadata_record_iterator