Created
July 18, 2020 10:35
-
-
Save cjw296/b8ed333dff58ac1c3f9a15177f54e9eb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import concurrent.futures | |
import json | |
import requests | |
from datetime import datetime | |
from pathlib import Path | |
import pandas as pd | |
from tqdm.notebook import tqdm | |
from urllib.parse import urlparse, parse_qs | |
storage = str(Path('~/Downloads').expanduser()) | |
area_code = 'areaCode' | |
area_name = 'areaName' | |
area_type = 'areaType' | |
date = 'date' | |
new_cases_by_specimen_date = 'newCasesBySpecimenDate' | |
new_deaths_by_death_date = 'newDeathsByDeathDate' | |
release_timestamp = 'releaseTimestamp' | |
ltla = 'ltla' | |
nation = 'nation' | |
def get(filters, structure, **params): | |
_params={ | |
'filters':';'.join(f'{k}={v}' for (k, v) in filters.items()), | |
'structure': json.dumps({element:element for element in structure}) | |
} | |
_params.update(params) | |
response = requests.get('https://api.coronavirus-staging.data.gov.uk/v1/data', timeout=20, params=_params) | |
if response.status_code != 200: | |
raise ValueError(response.status_code) | |
return response.json() | |
def query(filters, structure, max_workers=None, **params): | |
page = 1 | |
response = get(filters, structure, page=page, **params) | |
result = response['data'] | |
max_page = int(parse_qs(urlparse(response['pagination']['last']).query)['page'][0]) | |
if max_page > 1: | |
t = tqdm(total=max_page) | |
t.update(1) | |
todo = range(2, max_page+1) | |
attempt = 0 | |
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers or max_page-1) as executor: | |
while todo: | |
attempt += 1 | |
bad = [] | |
t.set_postfix({'errors': len(bad), 'attempt': attempt}) | |
futures = {executor.submit(get, filters, structure, page=page, **params): page | |
for page in todo} | |
for future in concurrent.futures.as_completed(futures): | |
page = futures[future] | |
try: | |
response = future.result() | |
except Exception as exc: | |
bad.append(page) | |
t.set_postfix({'errors': len(bad), 'attempt': attempt}) | |
else: | |
result.extend(response['data']) | |
t.update(1) | |
todo = bad | |
t.close() | |
return pd.DataFrame(result) | |
area_data = query( | |
filters={area_type: ltla}, | |
structure=[release_timestamp, date, area_name, area_code, new_cases_by_specimen_date, new_deaths_by_death_date], | |
# max_workers=1 | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# run time is a few seconds | |
from datetime import date as Date, timedelta | |
from pathlib import Path | |
import requests | |
cases_url = 'https://coronavirus.data.gov.uk/downloads/{data_type}/coronavirus-cases_latest.{data_type}' | |
base = str(Path('~/Downloads').expanduser()) | |
force_download = False | |
for_date = Date.today()-timedelta(days=0) | |
def download(url, path, modify=lambda text:text): | |
response = requests.get(url) | |
assert response.status_code==200 | |
with path.open('wb') as target: | |
target.write(modify(response.content)) | |
# download cases data | |
for suffix in 'csv', 'json': | |
path = Path(base).expanduser() / f'coronavirus-cases_{for_date}.{suffix}' | |
if force_download or not path.exists(): | |
download(cases_url.format(data_type=suffix), path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment