Last active
December 10, 2019 07:36
-
-
Save peterstadler/ae87e2308bbf15783097c059fd4fe66f to your computer and use it in GitHub Desktop.
Check Beacon File for outdated IDs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Script for checking GND IDs. | |
# 1. Grab all GNDs from the beacon file | |
# 2. Check every GND by making a HEAD request and see whether the returned status code is 303 | |
import httplib | |
from urlparse import urlparse | |
GND_BASE = "https://d-nb.info/gnd/" | |
BEACON_URL = "http://www.hait.tu-dresden.de/link/fhk.txt" | |
def make_request(url,verb): | |
try: | |
url_tokens = urlparse(url) | |
if(url_tokens.scheme == 'http'): | |
conn = httplib.HTTPConnection(url_tokens.netloc) | |
elif(url_tokens.scheme == 'https'): | |
conn = httplib.HTTPSConnection(url_tokens.netloc) | |
conn.request(verb, url_tokens.path) | |
return conn.getresponse() | |
except StandardError: | |
return None | |
def check_gnd(id): | |
""" A http status code 303 (See Other) is considered good, | |
everything else bad. | |
""" | |
req = make_request(GND_BASE + id, 'HEAD') | |
if (req.status == 303): | |
return 1 | |
elif (req.status == 301): | |
return req.getheader('location').split('/')[-1] | |
else: | |
return False | |
def read_gnds(): | |
req = make_request(BEACON_URL,'GET') | |
if(req): | |
dic = req.read().splitlines() | |
while(dic[0][0] == '#'): | |
dic = dic[1:] | |
return dic | |
else: | |
return req | |
for gnd in read_gnds(): | |
""" Main loop | |
Iterate over all GND IDs and print outdated ones | |
""" | |
check = check_gnd(gnd) | |
if(check != 1): | |
print 'your ID: ' + gnd + ' --> new ID: ' + str(check) | |
replace lines 42/43 with
while(dic[0][0] == '#'): dic = dic[1:] return dic
Thanks! Just updated my gist (finally)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
replace lines 42/43 with