Skip to content

Instantly share code, notes, and snippets.

@kschlottmann
Last active May 28, 2020 16:51
Show Gist options
  • Save kschlottmann/ee9e95b38dc222bc2dafd05f518f11ab to your computer and use it in GitHub Desktop.
Save kschlottmann/ee9e95b38dc222bc2dafd05f518f11ab to your computer and use it in GitHub Desktop.
get authorized name from lc json based on authority file number
import requests, csv, json, urllib, time
startTime = time.time()
baseURLexact = 'http://id.loc.gov/authorities/names/'
#http://id.loc.gov/authorities/names/nr2002027244.json
with open('input_numbers.csv', 'r') as csvfile:
reader = csv.reader(csvfile, delimiter=',', quotechar='"')
for row in reader:
number = str(row[0])
originalName = str(row[1])
#test for exact match
url = baseURLexact+number+'.json'
response = requests.get(url)
if response.status_code == 200:
try:
data = json.loads(response.text)
for d in data:
if d['@id'] == 'http://id.loc.gov/authorities/names/' + number:
if 'http://www.loc.gov/mads/rdf/v1#authoritativeLabel' in d:
LCName = d['http://www.loc.gov/mads/rdf/v1#authoritativeLabel'][0]['@value']
print (number, '|', LCName , '|', originalName)
break
except:
print (number, '|', 'Check encoding' , '|', originalName)
else:
print (number, '|', 'Check URL' , '|', originalName)
'''
if response.history:
for resp in response.history:
if str(resp.status_code) == '303':
print (number, '|', resp.url.split("/names/",1)[1], '|' , row[1])
else:
print ("No match")
#add a one second pause for each
time.sleep(1)
# show script runtime
elapsedTime = time.time() - startTime
m, s = divmod(elapsedTime, 60)
h, m = divmod(m, 60)
print ('Total script run time: ', '%d:%02d:%02d' % (h, m, s))
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment