Skip to content

Instantly share code, notes, and snippets.

@DominicBM
Last active August 29, 2015 14:13
Show Gist options
  • Save DominicBM/1c730e533bd32d702339 to your computer and use it in GitHub Desktop.
Save DominicBM/1c730e533bd32d702339 to your computer and use it in GitHub Desktop.
import requests, json, os, csv, argparse
parser = argparse.ArgumentParser()
parser.add_argument('--file', dest='file', metavar='FILE',
action='store')
args = parser.parse_args()
file = args.file
newfile = 'NAIDs - ' + args.file
offset = 0
with open(newfile, 'w') as log :
writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
writelog.writerow( ('variantControlNumber', 'fileSize', 'labelFlag', 'accessFilename', 'naId', 'title') )
log.close()
with open(file, 'rb') as csvfile:
csvreader = csv.reader(csvfile, delimiter=',')
for row in csvreader:
variant_control_number = (row)[0]
file_size = (row)[1]
label_flag = (row)[2]
access_file_name = (row)[3]
geturl = 'https://catalog.archives.gov/api/v1?resultFields=description.fileUnit.title,naId&resultTypes=fileUnit&description.fileUnit.variantControlNumberArray.variantControlNumber.number=' + variant_control_number
y = requests.get(geturl)
parsed = json.loads(y.text)
try :
NAID = parsed['opaResponse']['results']['result'][0]['naId']
title = parsed['opaResponse']['results']['result'][0]['description']['fileUnit']['title']
except KeyError :
print 'No result found for ' + variant_control_number + '.'
pass
offset = offset + 1
with open(newfile, 'a') as log :
writelog = csv.writer(log, delimiter= '\t', quoting=csv.QUOTE_ALL)
writelog.writerow( (variant_control_number, file_size, label_flag, access_file_name, NAID, title) )
log.close()
print '\nRow ' + str(offset) + ' logged: ' + variant_control_number + ', ' + NAID + ', "' + title + '"'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment