Skip to content

Instantly share code, notes, and snippets.

@buswedg
Created April 14, 2020 19:47
Show Gist options
  • Save buswedg/de4fce9572089af3bf3218bc359d67a0 to your computer and use it in GitHub Desktop.
Save buswedg/de4fce9572089af3bf3218bc359d67a0 to your computer and use it in GitHub Desktop.
pulling_economic_data_using_pandasdmx\abs_economic_series_pull
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import sys\n",
"\n",
"import shutil\n",
"import urllib.request as request\n",
"from contextlib import closing\n",
"\n",
"from pandasdmx import Request\n",
"import xmltodict\n",
"\n",
"from datetime import date\n",
"\n",
"import pandas as pd\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"def check_exists(path):\n",
"\n",
" return os.path.exists(path)\n",
"\n",
"\n",
"def check_or_create_dir(directory):\n",
"\n",
" if not check_exists(directory):\n",
" os.makedirs(directory)\n",
" \n",
" \n",
"def find_from(s, first):\n",
"\n",
" try:\n",
" start = s.index(first) + len(first)\n",
"\n",
" return s[start:]\n",
"\n",
" except Exception as e:\n",
" print(e)\n",
" \n",
" \n",
"def copy_url_to_file(url, tmp_file):\n",
"\n",
" try:\n",
"\n",
" with closing(request.urlopen(url)) as r:\n",
"\n",
" with open(tmp_file, 'wb') as f:\n",
" shutil.copyfileobj(r, f)\n",
"\n",
" except Exception as e:\n",
" print(e)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def apply_dsd(df, dsd_url):\n",
" copy_url_to_file(dsd_url, '_temp')\n",
"\n",
" with open('_temp') as f:\n",
" dsd = xmltodict.parse(f.read().replace('&', '&'))\n",
"\n",
" os.remove('_temp')\n",
"\n",
" for i, col in enumerate(df.columns):\n",
"\n",
" if col not in ['Time Period', 0]:\n",
" dic_val = {}\n",
"\n",
" ls_codes = dsd['message:Structure']['message:CodeLists']['CodeList'][i]['Code']\n",
" ls_codes = ls_codes if type(ls_codes) is list else [ls_codes]\n",
"\n",
" for code in ls_codes:\n",
" dic_val[code['@value']] = code['Description'][0]['#text']\n",
"\n",
" df = df.replace({col: dic_val})\n",
"\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# http://stat.data.abs.gov.au/Index.aspx\n",
"\n",
"ls_query = []\n",
"\n",
"series_cat = \"Economy\"\n",
"dtype_cat = \"Business Indicators\"\n",
"dtype_subcat = \"Business Indicators\"\n",
"dtype_desc = \"Inventories, Current Prices and Chain Volume Measures\"\n",
"dtype_abb = \"QBIS\"\n",
"res_url = \"http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetData/QBIS/50+70.B+C+D+F+G+H+TOTAL.0.99.10+20+30.Q/\"\n",
"dsd_url = \"http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetDataStructure/QBIS\"\n",
"ls_query.append([series_cat, dtype_cat, dtype_subcat, dtype_desc, dtype_abb, res_url, dsd_url])\n",
"\n",
"series_cat = \"Health\"\n",
"dtype_cat = \"Health Conditions and Risk Factors\"\n",
"dtype_subcat = \"Apparent Consumption of Alcohol, Australia\"\n",
"dtype_desc = \"Pure Alcohol Available for Consumption\"\n",
"dtype_abb = \"ALC\"\n",
"res_url = \"http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetData/ALC/1.1+2.1+2+3+5+4.6+10+11+12+15+14.A/\"\n",
"dsd_url = \"http://stat.data.abs.gov.au/restsdmx/sdmx.ashx/GetDataStructure/ALC\"\n",
"ls_query.append([series_cat, dtype_cat, dtype_subcat, dtype_desc, dtype_abb, res_url, dsd_url])\n",
"\n",
"\n",
"df_query = pd.DataFrame(ls_query, columns=['series_cat', 'dtype_cat', 'dtype_subcat', 'dtype_desc', 'dtype_abb', 'res_url', 'dsd_url'])"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"for index, row in df_query.iterrows():\n",
"\n",
" agency_code = 'ABS'\n",
"\n",
" ABS = Request(agency_code)\n",
" ABS.client.config['timeout'] = 100000\n",
"\n",
" res_url = row['dtype_abb'] + find_from(row['res_url'], row['dtype_abb'])\n",
"\n",
" data_response = ABS.data(resource_id=res_url)\n",
"\n",
" df = data_response.write().unstack().reset_index()\n",
" df = apply_dsd(df, row['dsd_url'])\n",
" \n",
" data_file = row['dtype_abb'] + \".csv.gz\"\n",
" \n",
" df.to_csv(data_file, sep=';', quotechar='\"', date_format='%Y-%m-%d %H:%M:%S.%f', compression='gzip', index=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment