Created
April 10, 2024 16:20
-
-
Save duetosymmetry/b4389220bf0c4435bd29db5ba3351753 to your computer and use it in GitHub Desktop.
Convert INSPIRE texkey to doi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import urllib.request | |
def maybe_get_insp_data(url, max_retries=3, sleep_ms=500): | |
"""Try to query an INSPIRE URL, with retries, and sleeping | |
Parameters | |
---------- | |
url: string | |
max_retries: int, optional [default: 3] | |
sleep_ms: numeric, optional [default: 500] | |
Length of sleep (in milliseconds) between HTTP 429 codes | |
Returns | |
------- | |
response: bytes or None | |
The positive response from INSPIRE; None if an error occurs, or | |
if we failed the max number of times. | |
""" | |
print(f"requesting {url}") | |
n_retries = 0 | |
while n_retries < max_retries: | |
try: | |
req = urllib.request.urlopen(url) | |
except urllib.error.HTTPError as e: | |
print(e) | |
if e.code == 429: | |
retry_time = req.getheaders()["retry-in"] | |
print(f'got 429 with "retry-in"={retry_time}') | |
print(f"going to sleep for {sleep_ms}ms") | |
sleep(sleep_ms / 1000.0) | |
n_retries = n_retries + 1 | |
continue | |
else: | |
return None | |
except urllib.error.URLError as e: | |
print(e) | |
return None | |
else: # Success | |
return req.read() | |
# maxed out on retries | |
print(f"too many ({n_retries}) retries") | |
return None | |
def maybe_texkey_to_doi(texkey): | |
res = maybe_get_insp_data(f"https://inspirehep.net/api/literature/?format=json&q=texkey:{texkey}") | |
try: | |
resj = json.loads(res) | |
return resj['hits']['hits'][0]['metadata']['dois'][0]['value'] | |
except: | |
print("Something went wrong") | |
return res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment