Last active
April 25, 2024 14:11
-
-
Save mh0w/70015395a0c5f2648c024ac1f830d3f7 to your computer and use it in GitHub Desktop.
Working with the Office for National Statistics API (developer.ons.gov.uk)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| Program for accessing the ONS API (see https://developer.ons.gov.uk). | |
| The ONS API can be used to programmatically access certain data that ONS has published. | |
| This program provides functions for (1) checking what datasets are available through the | |
| ONS API and (2) accessing datasets through the ONS API. To use this program, run the | |
| entire script; You will then be able to use the get_list_of_available_datasets() and | |
| get_latest_data_from_ons_api() functions. | |
| get_list_of_available_datasets() | |
| Run this to get a list of available datasets. The list will be returned to you | |
| in the form of a dataframe. You can optionally enable saving to an xlsx file | |
| as well (disabled by default). | |
| Examples: | |
| ons_api_datasets_available = get_list_of_available_datasets() | |
| ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True) | |
| get_latest_data_from_ons_api(dataset_id) | |
| Run this to get the chosen dataset via ONS API. The dataset will be returned to | |
| you in the form of a dataframe. You can optionally enable saving to an xlsx file | |
| as well (disabled by default). You can find out the dataset_id by running the | |
| ons_api_datasets_available() function to get a list of all available datasets. | |
| Examples: | |
| my_df1 = get_latest_data_from_ons_api(dataset_id="TS033") | |
| my_df2 = get_latest_data_from_ons_api(dataset_id="TS034", save_to_xlsx=True) | |
| my_df3 = get_latest_data_from_ons_api(dataset_id="mid-year-pop-est") | |
| my_df4 = get_latest_data_from_ons_api("weekly-deaths-local-authority") | |
| """ | |
| import requests | |
| import tempfile | |
| import pandas as pd | |
| import json | |
| import os | |
| import warnings | |
| import pyinputplus as pyip | |
| def get_list_of_available_datasets(save_to_xlsx=False): | |
| """ | |
| Get a list of the datasets available from the ONS API. Returns a pandas dataframe. | |
| Parameters | |
| ---------- | |
| save_to_xlsx : bool, optional | |
| Whether to save the dataframe (that contains the list of available datasets | |
| from the ONS API) to an xlsx. The default is False. | |
| Returns | |
| ------- | |
| list_of_available_datasets_df : pandas.DataFrame() | |
| A list of the datasets available from the ONS API. | |
| """ | |
| api_url = "https://api.beta.ons.gov.uk/v1/datasets" | |
| with warnings.catch_warnings(): | |
| warnings.filterwarnings("ignore", message="Unverified HTTPS request") | |
| list_of_available_datasets = json.loads( | |
| requests.get(f"{api_url}?limit=1000", verify=False).text | |
| ) | |
| print("\nDatasets available :", list_of_available_datasets["total_count"]) | |
| print("Datasets listed :", list_of_available_datasets["count"]) | |
| columns_of_interest = ["id", "title", "description"] | |
| list_of_available_datasets_df = pd.DataFrame( | |
| { | |
| col: [d[col] for d in list_of_available_datasets["items"]] | |
| for col in columns_of_interest | |
| } | |
| ) | |
| if save_to_xlsx is True: | |
| with tempfile.TemporaryFile(suffix=".xlsx", delete=False) as file: | |
| list_of_available_datasets_df.to_excel(file.name, index=False) | |
| os.startfile(file.name) | |
| print( | |
| "\nTemporary datasets list excel file created and being opened. " | |
| "Use 'save as' if you want to keep it" | |
| ) | |
| return list_of_available_datasets_df | |
| def get_latest_data_from_ons_api(dataset_id, save_to_xlsx=False): | |
| """ | |
| Get the chosen dataset (dataset_id) from the ONS API. Returns a pandas dataframe. | |
| Parameters | |
| ---------- | |
| dataset_id : str | |
| The unique identifier for the dataset. | |
| save_to_xlsx : TYPE, optional | |
| Whether to save the dataframe to an xlsx file. The default is False. | |
| Returns | |
| ------- | |
| latest_data : pandas.DataFrame() | |
| The requested dataset from the ONS API. | |
| """ | |
| with warnings.catch_warnings(): | |
| warnings.filterwarnings("ignore", message="Unverified HTTPS request") | |
| dataset_url = json.loads( | |
| requests.get( | |
| f"https://api.beta.ons.gov.uk/v1/datasets/{dataset_id}", verify=False | |
| ).text | |
| )["links"]["latest_version"]["href"] | |
| latest_data_url = json.loads(requests.get(dataset_url, verify=False).text)[ | |
| "downloads" | |
| ]["csv"]["href"] | |
| latest_data = pd.read_csv( | |
| latest_data_url, storage_options={"User-Agent": "Mozilla/5.0"} | |
| ) | |
| if save_to_xlsx is True: | |
| temp_excel_file = tempfile.TemporaryFile(suffix=".xlsx", delete=False) | |
| with temp_excel_file as file: | |
| latest_data.to_excel(file.name, index=False) | |
| os.startfile(file.name) | |
| print( | |
| f"\nTemporary {dataset_id} excel file created and being opened. " | |
| "Use 'save as' if you want to keep it" | |
| ) | |
| return latest_data | |
| # | |
| # Ask the user whether they want to show a list of available datasets | |
| # | |
| show_list_choice = pyip.inputInt( | |
| min=1, | |
| max=3, | |
| prompt="\nShow a list of available datasets (dataset_id values) from the ONS API?" | |
| "\n 1: Yes, save as a dataframe" | |
| "\n 2: Yes, save as a dataframe and as an xlsx file" | |
| "\n 3: No, continue without listing available datasets" | |
| "\nYour choice (integer): ", | |
| ) | |
| if show_list_choice == 1: | |
| ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=False) | |
| elif show_list_choice == 2: | |
| ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True) | |
| # | |
| # Ask the user whether they want to download a dataset | |
| # | |
| download_data_choice = pyip.inputInt( | |
| min=1, | |
| max=3, | |
| prompt="\nDo you want to download a dataset (using a dataset_id) from the ONS API?" | |
| "\n 1: Yes, save as a dataframe" | |
| "\n 2: Yes, save as a dataframe and as an xlsx file" | |
| "\n 3: No, stop the program" | |
| "\nYour choice (integer): ", | |
| ) | |
| print("") | |
| if download_data_choice in [1, 2]: | |
| dataset_id_choice = pyip.inputStr( | |
| prompt="\nWhat is the dataset_id of the dataset you want to download?" | |
| "\nThe latest available version will be downloaded." | |
| "\nEnter dataset_id here (e.g., TS008 or mid-year-pop-est): " | |
| ) | |
| print( | |
| f"\nOkay, trying to download latest {dataset_id_choice} dataset, please wait..." | |
| ) | |
| if download_data_choice == 1: | |
| my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=False) | |
| elif download_data_choice == 2: | |
| my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=True) | |
| print("\nThe program has stopped running.") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| pandas | |
| pyinputplus |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment