Last active
May 28, 2020 16:51
-
-
Save kschlottmann/ee9e95b38dc222bc2dafd05f518f11ab to your computer and use it in GitHub Desktop.
get authorized name from lc json based on authority file number
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, csv, json, urllib, time | |
startTime = time.time() | |
baseURLexact = 'http://id.loc.gov/authorities/names/' | |
#http://id.loc.gov/authorities/names/nr2002027244.json | |
with open('input_numbers.csv', 'r') as csvfile: | |
reader = csv.reader(csvfile, delimiter=',', quotechar='"') | |
for row in reader: | |
number = str(row[0]) | |
originalName = str(row[1]) | |
#test for exact match | |
url = baseURLexact+number+'.json' | |
response = requests.get(url) | |
if response.status_code == 200: | |
try: | |
data = json.loads(response.text) | |
for d in data: | |
if d['@id'] == 'http://id.loc.gov/authorities/names/' + number: | |
if 'http://www.loc.gov/mads/rdf/v1#authoritativeLabel' in d: | |
LCName = d['http://www.loc.gov/mads/rdf/v1#authoritativeLabel'][0]['@value'] | |
print (number, '|', LCName , '|', originalName) | |
break | |
except: | |
print (number, '|', 'Check encoding' , '|', originalName) | |
else: | |
print (number, '|', 'Check URL' , '|', originalName) | |
''' | |
if response.history: | |
for resp in response.history: | |
if str(resp.status_code) == '303': | |
print (number, '|', resp.url.split("/names/",1)[1], '|' , row[1]) | |
else: | |
print ("No match") | |
#add a one second pause for each | |
time.sleep(1) | |
# show script runtime | |
elapsedTime = time.time() - startTime | |
m, s = divmod(elapsedTime, 60) | |
h, m = divmod(m, 60) | |
print ('Total script run time: ', '%d:%02d:%02d' % (h, m, s)) | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment