mh0w · April 25, 2024 14:11
diff --git a/Helper script for using the ONS API.py b/Helper script for using the ONS API.py
 """
 Program for accessing the ONS API (see https://developer.ons.gov.uk).

 The ONS API can be used to programmatically access certain data that ONS has published.
 This program provides functions for (1) checking what datasets are available through the
 ONS API and (2) accessing datasets through the ONS API. To use this program, run the
 entire script; You will then be able to use the get_list_of_available_datasets() and
 get_latest_data_from_ons_api() functions.

 get_list_of_available_datasets()
    Run this to get a list of available datasets. The list will be returned to you
    in the form of a dataframe. You can optionally enable saving to an xlsx file
    as well (disabled by default).
    Examples:
        ons_api_datasets_available = get_list_of_available_datasets()
        ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)

 get_latest_data_from_ons_api(dataset_id)
    Run this to get the chosen dataset via ONS API. The dataset will be returned to
    you in the form of a dataframe. You can optionally enable saving to an xlsx file
    as well (disabled by default). You can find out the dataset_id by running the
    ons_api_datasets_available() function to get a list of all available datasets.
    Examples:
        my_df1 = get_latest_data_from_ons_api(dataset_id="TS033")
        my_df2 = get_latest_data_from_ons_api(dataset_id="TS034", save_to_xlsx=True)
        my_df3 = get_latest_data_from_ons_api(dataset_id="mid-year-pop-est")
        my_df4 = get_latest_data_from_ons_api("weekly-deaths-local-authority")
 """

 import requests
 import tempfile
 import pandas as pd
 import json
 import os
 import warnings
 import pyinputplus as pyip


 def get_list_of_available_datasets(save_to_xlsx=False):
    """
    Get a list of the datasets available from the ONS API. Returns a pandas dataframe.

    Parameters
    ----------
    save_to_xlsx : bool, optional
        Whether to save the dataframe (that contains the list of available datasets
        from the ONS API) to an xlsx. The default is False.

    Returns
    -------
    list_of_available_datasets_df : pandas.DataFrame()
        A list of the datasets available from the ONS API.

    """
    api_url = "https://api.beta.ons.gov.uk/v1/datasets"

    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="Unverified HTTPS request")
        list_of_available_datasets = json.loads(
            requests.get(f"{api_url}?limit=1000", verify=False).text
        )

    print("\nDatasets available :", list_of_available_datasets["total_count"])
    print("Datasets listed    :", list_of_available_datasets["count"])

    columns_of_interest = ["id", "title", "description"]

    list_of_available_datasets_df = pd.DataFrame(
        {
            col: [d[col] for d in list_of_available_datasets["items"]]
            for col in columns_of_interest
        }
    )

    if save_to_xlsx is True:
        with tempfile.TemporaryFile(suffix=".xlsx", delete=False) as file:
            list_of_available_datasets_df.to_excel(file.name, index=False)
            os.startfile(file.name)

        print(
            "\nTemporary datasets list excel file created and being opened. "
            "Use 'save as' if you want to keep it"
        )

    return list_of_available_datasets_df


 def get_latest_data_from_ons_api(dataset_id, save_to_xlsx=False):
    """
    Get the chosen dataset (dataset_id) from the ONS API. Returns a pandas dataframe.

    Parameters
    ----------
    dataset_id : str
        The unique identifier for the dataset.
    save_to_xlsx : TYPE, optional
        Whether to save the dataframe to an xlsx file. The default is False.

    Returns
    -------
    latest_data : pandas.DataFrame()
        The requested dataset from the ONS API.

    """
    with warnings.catch_warnings():
        warnings.filterwarnings("ignore", message="Unverified HTTPS request")

        dataset_url = json.loads(
            requests.get(
                f"https://api.beta.ons.gov.uk/v1/datasets/{dataset_id}", verify=False
            ).text
        )["links"]["latest_version"]["href"]

        latest_data_url = json.loads(requests.get(dataset_url, verify=False).text)[
            "downloads"
        ]["csv"]["href"]

    latest_data = pd.read_csv(
        latest_data_url, storage_options={"User-Agent": "Mozilla/5.0"}
    )

    if save_to_xlsx is True:
        temp_excel_file = tempfile.TemporaryFile(suffix=".xlsx", delete=False)
        with temp_excel_file as file:
            latest_data.to_excel(file.name, index=False)
            os.startfile(file.name)

        print(
            f"\nTemporary {dataset_id} excel file created and being opened. "
            "Use 'save as' if you want to keep it"
        )

    return latest_data


 #
 # Ask the user whether they want to show a list of available datasets
 #
 show_list_choice = pyip.inputInt(
    min=1,
    max=3,
    prompt="\nShow a list of available datasets (dataset_id values) from the ONS API?"
    "\n  1: Yes, save as a dataframe"
    "\n  2: Yes, save as a dataframe and as an xlsx file"
    "\n  3: No, continue without listing available datasets"
    "\nYour choice (integer): ",
 )

 if show_list_choice == 1:
    ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=False)
 elif show_list_choice == 2:
    ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)

 #
 # Ask the user whether they want to download a dataset
 #
 download_data_choice = pyip.inputInt(
    min=1,
    max=3,
    prompt="\nDo you want to download a dataset (using a dataset_id) from the ONS API?"
    "\n  1: Yes, save as a dataframe"
    "\n  2: Yes, save as a dataframe and as an xlsx file"
    "\n  3: No, stop the program"
    "\nYour choice (integer): ",
 )

 print("")

 if download_data_choice in [1, 2]:
    dataset_id_choice = pyip.inputStr(
        prompt="\nWhat is the dataset_id of the dataset you want to download?"
        "\nThe latest available version will be downloaded."
        "\nEnter dataset_id here (e.g., TS008 or mid-year-pop-est): "
    )

    print(
        f"\nOkay, trying to download latest {dataset_id_choice} dataset, please wait..."
    )

    if download_data_choice == 1:
        my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=False)
    elif download_data_choice == 2:
        my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=True)

 print("\nThe program has stopped running.")
diff --git a/requirements.txt b/requirements.txt
 pandas
 pyinputplus
	"""
	Program for accessing the ONS API (see https://developer.ons.gov.uk).

	The ONS API can be used to programmatically access certain data that ONS has published.
	This program provides functions for (1) checking what datasets are available through the
	ONS API and (2) accessing datasets through the ONS API. To use this program, run the
	entire script; You will then be able to use the get_list_of_available_datasets() and
	get_latest_data_from_ons_api() functions.

	get_list_of_available_datasets()
	Run this to get a list of available datasets. The list will be returned to you
	in the form of a dataframe. You can optionally enable saving to an xlsx file
	as well (disabled by default).
	Examples:
	ons_api_datasets_available = get_list_of_available_datasets()
	ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)

	get_latest_data_from_ons_api(dataset_id)
	Run this to get the chosen dataset via ONS API. The dataset will be returned to
	you in the form of a dataframe. You can optionally enable saving to an xlsx file
	as well (disabled by default). You can find out the dataset_id by running the
	ons_api_datasets_available() function to get a list of all available datasets.
	Examples:
	my_df1 = get_latest_data_from_ons_api(dataset_id="TS033")
	my_df2 = get_latest_data_from_ons_api(dataset_id="TS034", save_to_xlsx=True)
	my_df3 = get_latest_data_from_ons_api(dataset_id="mid-year-pop-est")
	my_df4 = get_latest_data_from_ons_api("weekly-deaths-local-authority")
	"""

	import requests
	import tempfile
	import pandas as pd
	import json
	import os
	import warnings
	import pyinputplus as pyip


	def get_list_of_available_datasets(save_to_xlsx=False):
	"""
	Get a list of the datasets available from the ONS API. Returns a pandas dataframe.

	Parameters
	----------
	save_to_xlsx : bool, optional
	Whether to save the dataframe (that contains the list of available datasets
	from the ONS API) to an xlsx. The default is False.

	Returns
	-------
	list_of_available_datasets_df : pandas.DataFrame()
	A list of the datasets available from the ONS API.

	"""
	api_url = "https://api.beta.ons.gov.uk/v1/datasets"

	with warnings.catch_warnings():
	warnings.filterwarnings("ignore", message="Unverified HTTPS request")
	list_of_available_datasets = json.loads(
	requests.get(f"{api_url}?limit=1000", verify=False).text
	)

	print("\nDatasets available :", list_of_available_datasets["total_count"])
	print("Datasets listed :", list_of_available_datasets["count"])

	columns_of_interest = ["id", "title", "description"]

	list_of_available_datasets_df = pd.DataFrame(
	{
	col: [d[col] for d in list_of_available_datasets["items"]]
	for col in columns_of_interest
	}
	)

	if save_to_xlsx is True:
	with tempfile.TemporaryFile(suffix=".xlsx", delete=False) as file:
	list_of_available_datasets_df.to_excel(file.name, index=False)
	os.startfile(file.name)

	print(
	"\nTemporary datasets list excel file created and being opened. "
	"Use 'save as' if you want to keep it"
	)

	return list_of_available_datasets_df


	def get_latest_data_from_ons_api(dataset_id, save_to_xlsx=False):
	"""
	Get the chosen dataset (dataset_id) from the ONS API. Returns a pandas dataframe.

	Parameters
	----------
	dataset_id : str
	The unique identifier for the dataset.
	save_to_xlsx : TYPE, optional
	Whether to save the dataframe to an xlsx file. The default is False.

	Returns
	-------
	latest_data : pandas.DataFrame()
	The requested dataset from the ONS API.

	"""
	with warnings.catch_warnings():
	warnings.filterwarnings("ignore", message="Unverified HTTPS request")

	dataset_url = json.loads(
	requests.get(
	f"https://api.beta.ons.gov.uk/v1/datasets/{dataset_id}", verify=False
	).text
	)["links"]["latest_version"]["href"]

	latest_data_url = json.loads(requests.get(dataset_url, verify=False).text)[
	"downloads"
	]["csv"]["href"]

	latest_data = pd.read_csv(
	latest_data_url, storage_options={"User-Agent": "Mozilla/5.0"}
	)

	if save_to_xlsx is True:
	temp_excel_file = tempfile.TemporaryFile(suffix=".xlsx", delete=False)
	with temp_excel_file as file:
	latest_data.to_excel(file.name, index=False)
	os.startfile(file.name)

	print(
	f"\nTemporary {dataset_id} excel file created and being opened. "
	"Use 'save as' if you want to keep it"
	)

	return latest_data


	#
	# Ask the user whether they want to show a list of available datasets
	#
	show_list_choice = pyip.inputInt(
	min=1,
	max=3,
	prompt="\nShow a list of available datasets (dataset_id values) from the ONS API?"
	"\n 1: Yes, save as a dataframe"
	"\n 2: Yes, save as a dataframe and as an xlsx file"
	"\n 3: No, continue without listing available datasets"
	"\nYour choice (integer): ",
	)

	if show_list_choice == 1:
	ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=False)
	elif show_list_choice == 2:
	ons_api_datasets_available = get_list_of_available_datasets(save_to_xlsx=True)

	#
	# Ask the user whether they want to download a dataset
	#
	download_data_choice = pyip.inputInt(
	min=1,
	max=3,
	prompt="\nDo you want to download a dataset (using a dataset_id) from the ONS API?"
	"\n 1: Yes, save as a dataframe"
	"\n 2: Yes, save as a dataframe and as an xlsx file"
	"\n 3: No, stop the program"
	"\nYour choice (integer): ",
	)

	print("")

	if download_data_choice in [1, 2]:
	dataset_id_choice = pyip.inputStr(
	prompt="\nWhat is the dataset_id of the dataset you want to download?"
	"\nThe latest available version will be downloaded."
	"\nEnter dataset_id here (e.g., TS008 or mid-year-pop-est): "
	)

	print(
	f"\nOkay, trying to download latest {dataset_id_choice} dataset, please wait..."
	)

	if download_data_choice == 1:
	my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=False)
	elif download_data_choice == 2:
	my_df = get_latest_data_from_ons_api(dataset_id_choice, save_to_xlsx=True)

	print("\nThe program has stopped running.")
No results found