Created
June 13, 2017 22:05
-
-
Save nettrom/c692d7278f621cacc71acfc1121056a3 to your computer and use it in GitHub Desktop.
Handling lag in the MediaWiki API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Excerpt from the class, showing the method that calls the Wikidata API, | |
## approach is the same for other MediaWiki APIs with maxlag parameter available | |
def make_api_request(self, items, http_session): | |
''' | |
Make an HTTP request to the Wikidata API for info on the given list | |
of items using the given HTTP session. | |
:param items: the QIDs of the items we'll be getting data about | |
:type items: list | |
:param http_session: the HTTP session we'll use | |
:type http_session: requests.Session | |
''' | |
entity_data = {} | |
done = False | |
num_retries = 0 | |
while not done and num_retries < self.max_retries: | |
## We use a default of maxlag=5 | |
## ref https://www.mediawiki.org/wiki/Manual:Maxlag_parameter | |
item_url = "{base}{maxlag}&ids={idlist}".format( | |
base=self.wd_url, maxlag="&maxlag=5", | |
idlist="|".join(items)) | |
response = http_session.get(item_url) | |
if response.status_code != 200: | |
logging.warning('Wikidata returned status {}'.format(response.status_code)) | |
done = True | |
continue | |
try: | |
content = response.json() | |
except ValueError: | |
logging.warning('Unable to decode Wikidata response as JSON') | |
sleep(1) | |
num_retries += 1 | |
continue | |
except KeyError: | |
logging.warning("Wikidata response keys not as expected") | |
sleep(1) | |
num_retries += 1 | |
continue | |
if "error" in content and content['error']['code'] == 'maxlag': | |
## Pause before trying again | |
ptime = max(5, int(response.headers['Retry-After'])) | |
logging.warning('WD API is lagged, waiting {} seconds to try again'.format(ptime)) | |
sleep(ptime) | |
continue | |
entity_data = content['entities'] | |
done = True | |
continue | |
return(entity_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment