Skip to content

Instantly share code, notes, and snippets.

@peterstadler
Last active December 10, 2019 07:36
Show Gist options
  • Save peterstadler/ae87e2308bbf15783097c059fd4fe66f to your computer and use it in GitHub Desktop.
Save peterstadler/ae87e2308bbf15783097c059fd4fe66f to your computer and use it in GitHub Desktop.
Check Beacon File for outdated IDs
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Script for checking GND IDs.
# 1. Grab all GNDs from the beacon file
# 2. Check every GND by making a HEAD request and see whether the returned status code is 303
import httplib
from urlparse import urlparse
GND_BASE = "https://d-nb.info/gnd/"
BEACON_URL = "http://www.hait.tu-dresden.de/link/fhk.txt"
def make_request(url,verb):
try:
url_tokens = urlparse(url)
if(url_tokens.scheme == 'http'):
conn = httplib.HTTPConnection(url_tokens.netloc)
elif(url_tokens.scheme == 'https'):
conn = httplib.HTTPSConnection(url_tokens.netloc)
conn.request(verb, url_tokens.path)
return conn.getresponse()
except StandardError:
return None
def check_gnd(id):
""" A http status code 303 (See Other) is considered good,
everything else bad.
"""
req = make_request(GND_BASE + id, 'HEAD')
if (req.status == 303):
return 1
elif (req.status == 301):
return req.getheader('location').split('/')[-1]
else:
return False
def read_gnds():
req = make_request(BEACON_URL,'GET')
if(req):
dic = req.read().splitlines()
while(dic[0][0] == '#'):
dic = dic[1:]
return dic
else:
return req
for gnd in read_gnds():
""" Main loop
Iterate over all GND IDs and print outdated ones
"""
check = check_gnd(gnd)
if(check != 1):
print 'your ID: ' + gnd + ' --> new ID: ' + str(check)
@peterstadler
Copy link
Author

replace lines 42/43 with

        while(dic[0][0] == '#'):
          dic = dic[1:]
        return dic

Thanks! Just updated my gist (finally)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment