Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Created May 18, 2022 09:49
Show Gist options
  • Save alekrutkowski/014e862b98dd8c150d4a399dc33cbb6a to your computer and use it in GitHub Desktop.
Save alekrutkowski/014e862b98dd8c150d4a399dc33cbb6a to your computer and use it in GitHub Desktop.
Additional functions to `eurostat` Python package (https://pypi.org/project/eurostat)
import eurostat
import pandas as pd
def importData(EurostatDatasetCode, flags=False):
"""
Import a dataset from Eurostat as a flat/melted table (pandas dataframe)
Parameter
----------
EurostatDatasetCode : str
For the available datasets and their codes see:
https://ec.europa.eu/eurostat/estat-navtree-portlet-prod/BulkDownloadListing?dir=data&start=all
flags : bool
Should the flags be included?
Returns
-------
pandas.core.frame.DataFrame
The Eurostat dataset as a flat/melted table.
Example
--------
>>> importData('nama_10_gdp')
unit na_item geo time value
0 CLV05_MEUR B1G AT 2021 269789.6
1 CLV05_MEUR B1G BA 2021 11154.0
2 CLV05_MEUR B1G BE 2021 343926.1
3 CLV05_MEUR B1G BG 2021 29421.7
4 CLV05_MEUR B1G CH 2021 440224.6
... ... ... .. ... ...
1363982 PYP_MNAC YA1 PT 1975 NaN
1363983 PYP_MNAC YA1 RO 1975 NaN
1363984 PYP_MNAC YA1 SI 1975 NaN
1363985 PYP_MNAC YA1 UK 1975 NaN
1363986 PYP_MNAC YA1 XK 1975 NaN
"""
df = eurostat.get_data_df(EurostatDatasetCode, flags)
idx = [idx for idx, s in enumerate(list(df.columns)) if '\\' in str(s)][0]
if flags:
new_col_names = [
'value_'+x if '_value' in x else 'flag_'+x if '_flag' in x else x
for x in df.columns
]
new_col_names = [
x.replace('_flag',"").replace('_value',"")
for x in new_col_names
]
df.columns = new_col_names
df = pd.wide_to_long(df, stubnames=['value_','flag_'],
i=list(df.columns[range(idx + 1)]),
j='variable')
df = df.reset_index()
df.rename(columns = {'value_': 'value',
'flag_': 'flag'},
inplace = True)
else:
df = df.melt(list(df.columns[range(idx + 1)]))
pivot = df.columns[idx]
dim1, dim2 = pivot.split('\\')
df.rename(columns = {pivot: dim1,
'variable': dim2},
inplace = True)
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment