Created
May 18, 2022 09:49
-
-
Save alekrutkowski/014e862b98dd8c150d4a399dc33cbb6a to your computer and use it in GitHub Desktop.
Additional functions to `eurostat` Python package (https://pypi.org/project/eurostat)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import eurostat | |
| import pandas as pd | |
| def importData(EurostatDatasetCode, flags=False): | |
| """ | |
| Import a dataset from Eurostat as a flat/melted table (pandas dataframe) | |
| Parameter | |
| ---------- | |
| EurostatDatasetCode : str | |
| For the available datasets and their codes see: | |
| https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?dir=data&start=all | |
| flags : bool | |
| Should the flags be included? | |
| Returns | |
| ------- | |
| pandas.core.frame.DataFrame | |
| The Eurostat dataset as a flat/melted table. | |
| Example | |
| -------- | |
| >>> importData('nama_10_gdp') | |
| unit na_item geo time value | |
| 0 CLV05_MEUR B1G AT 2021 269789.6 | |
| 1 CLV05_MEUR B1G BA 2021 11154.0 | |
| 2 CLV05_MEUR B1G BE 2021 343926.1 | |
| 3 CLV05_MEUR B1G BG 2021 29421.7 | |
| 4 CLV05_MEUR B1G CH 2021 440224.6 | |
| ... ... ... .. ... ... | |
| 1363982 PYP_MNAC YA1 PT 1975 NaN | |
| 1363983 PYP_MNAC YA1 RO 1975 NaN | |
| 1363984 PYP_MNAC YA1 SI 1975 NaN | |
| 1363985 PYP_MNAC YA1 UK 1975 NaN | |
| 1363986 PYP_MNAC YA1 XK 1975 NaN | |
| """ | |
| df = eurostat.get_data_df(EurostatDatasetCode, flags) | |
| idx = [idx for idx, s in enumerate(list(df.columns)) if '\\' in str(s)][0] | |
| if flags: | |
| new_col_names = [ | |
| 'value_'+x if '_value' in x else 'flag_'+x if '_flag' in x else x | |
| for x in df.columns | |
| ] | |
| new_col_names = [ | |
| x.replace('_flag',"").replace('_value',"") | |
| for x in new_col_names | |
| ] | |
| df.columns = new_col_names | |
| df = pd.wide_to_long(df, stubnames=['value_','flag_'], | |
| i=list(df.columns[range(idx + 1)]), | |
| j='variable') | |
| df = df.reset_index() | |
| df.rename(columns = {'value_': 'value', | |
| 'flag_': 'flag'}, | |
| inplace = True) | |
| else: | |
| df = df.melt(list(df.columns[range(idx + 1)])) | |
| pivot = df.columns[idx] | |
| dim1, dim2 = pivot.split('\\') | |
| df.rename(columns = {pivot: dim1, | |
| 'variable': dim2}, | |
| inplace = True) | |
| return df | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment