Skip to content

Instantly share code, notes, and snippets.

@mh0w
Last active April 25, 2024 14:11
Show Gist options
  • Select an option

  • Save mh0w/70015395a0c5f2648c024ac1f830d3f7 to your computer and use it in GitHub Desktop.

Select an option

Save mh0w/70015395a0c5f2648c024ac1f830d3f7 to your computer and use it in GitHub Desktop.
Working with the Office for National Statistics API (developer.ons.gov.uk)
"""
Program for accessing the ONS API (see https://developer.ons.gov.uk).
The ONS API can be used to programmatically access certain data that ONS has published.
This program provides functions for (1) checking what datasets are available through the
ONS API and (2) accessing datasets through the ONS API. To use this program, run the
entire script; You will then be able to use the get_list_of_available_datasets() and
get_latest_data_from_ons_api() functions.
get_list_of_available_datasets()
Run this to get a list of available datasets. The list will be returned to you
in the form of a dataframe. You can optionally enable saving to an xlsx file
as well (disabled by default).
Examples:
ons_api_datasets_available = get_list_of_available_datasets()
ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)
get_latest_data_from_ons_api(dataset_id)
Run this to get the chosen dataset via ONS API. The dataset will be returned to
you in the form of a dataframe. You can optionally enable saving to an xlsx file
as well (disabled by default). You can find out the dataset_id by running the
ons_api_datasets_available() function to get a list of all available datasets.
Examples:
my_df1 = get_latest_data_from_ons_api(dataset_id="TS033")
my_df2 = get_latest_data_from_ons_api(dataset_id="TS034", save_to_xlsx=True)
my_df3 = get_latest_data_from_ons_api(dataset_id="mid-year-pop-est")
my_df4 = get_latest_data_from_ons_api("weekly-deaths-local-authority")
"""
import requests
import tempfile
import pandas as pd
import json
import os
import warnings
import pyinputplus as pyip
def get_list_of_available_datasets(save_to_xlsx=False):
"""
Get a list of the datasets available from the ONS API. Returns a pandas dataframe.
Parameters
----------
save_to_xlsx : bool, optional
Whether to save the dataframe (that contains the list of available datasets
from the ONS API) to an xlsx. The default is False.
Returns
-------
list_of_available_datasets_df : pandas.DataFrame()
A list of the datasets available from the ONS API.
"""
api_url = "https://api.beta.ons.gov.uk/v1/datasets"
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
list_of_available_datasets = json.loads(
requests.get(f"{api_url}?limit=1000", verify=False).text
)
print("\nDatasets available :", list_of_available_datasets["total_count"])
print("Datasets listed :", list_of_available_datasets["count"])
columns_of_interest = ["id", "title", "description"]
list_of_available_datasets_df = pd.DataFrame(
{
col: [d[col] for d in list_of_available_datasets["items"]]
for col in columns_of_interest
}
)
if save_to_xlsx is True:
with tempfile.TemporaryFile(suffix=".xlsx", delete=False) as file:
list_of_available_datasets_df.to_excel(file.name, index=False)
os.startfile(file.name)
print(
"\nTemporary datasets list excel file created and being opened. "
"Use 'save as' if you want to keep it"
)
return list_of_available_datasets_df
def get_latest_data_from_ons_api(dataset_id, save_to_xlsx=False):
"""
Get the chosen dataset (dataset_id) from the ONS API. Returns a pandas dataframe.
Parameters
----------
dataset_id : str
The unique identifier for the dataset.
save_to_xlsx : TYPE, optional
Whether to save the dataframe to an xlsx file. The default is False.
Returns
-------
latest_data : pandas.DataFrame()
The requested dataset from the ONS API.
"""
with warnings.catch_warnings():
warnings.filterwarnings("ignore", message="Unverified HTTPS request")
dataset_url = json.loads(
requests.get(
f"https://api.beta.ons.gov.uk/v1/datasets/{dataset_id}", verify=False
).text
)["links"]["latest_version"]["href"]
latest_data_url = json.loads(requests.get(dataset_url, verify=False).text)[
"downloads"
]["csv"]["href"]
latest_data = pd.read_csv(
latest_data_url, storage_options={"User-Agent": "Mozilla/5.0"}
)
if save_to_xlsx is True:
temp_excel_file = tempfile.TemporaryFile(suffix=".xlsx", delete=False)
with temp_excel_file as file:
latest_data.to_excel(file.name, index=False)
os.startfile(file.name)
print(
f"\nTemporary {dataset_id} excel file created and being opened. "
"Use 'save as' if you want to keep it"
)
return latest_data
#
# Ask the user whether they want to show a list of available datasets
#
show_list_choice = pyip.inputInt(
min=1,
max=3,
prompt="\nShow a list of available datasets (dataset_id values) from the ONS API?"
"\n 1: Yes, save as a dataframe"
"\n 2: Yes, save as a dataframe and as an xlsx file"
"\n 3: No, continue without listing available datasets"
"\nYour choice (integer): ",
)
if show_list_choice == 1:
ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=False)
elif show_list_choice == 2:
ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)
#
# Ask the user whether they want to download a dataset
#
download_data_choice = pyip.inputInt(
min=1,
max=3,
prompt="\nDo you want to download a dataset (using a dataset_id) from the ONS API?"
"\n 1: Yes, save as a dataframe"
"\n 2: Yes, save as a dataframe and as an xlsx file"
"\n 3: No, stop the program"
"\nYour choice (integer): ",
)
print("")
if download_data_choice in [1, 2]:
dataset_id_choice = pyip.inputStr(
prompt="\nWhat is the dataset_id of the dataset you want to download?"
"\nThe latest available version will be downloaded."
"\nEnter dataset_id here (e.g., TS008 or mid-year-pop-est): "
)
print(
f"\nOkay, trying to download latest {dataset_id_choice} dataset, please wait..."
)
if download_data_choice == 1:
my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=False)
elif download_data_choice == 2:
my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=True)
print("\nThe program has stopped running.")
pandas
pyinputplus
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment