Last active
June 18, 2025 14:19
-
-
Save alekrutkowski/c2dc5669f0795bccfd92dcccdef925a8 to your computer and use it in GitHub Desktop.
Stata command to download and import a Eurostat dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Usage example: | |
// Either the parameter is Eurostat's "online data code": | |
// insheet_eurostat nama_10_gdp | |
// or the parameter is the custom URL generated with Eurostat GUI top right corner: | |
// Go to e.g. https://ec.europa.eu/eurostat/databrowser/view/nama_10_gdp/default/table?lang=en and | |
// do your selections by clicking | |
// ⤓ Download > Advanced settings > Select: File format = "Text (.tsv)", Select: Data scope = "All selected dimensions", and untick "Compress file (.gzip)" > Click: [Copy] button next to the "API link" | |
// insheet_eurostat https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/nama_10_gdp/A.CP_MEUR.B1G+P3.EU27_2020+BE+BG+CZ+DK/?format=TSV&startPeriod=2021&endPeriod=2023 | |
// To use it like other commands, put it in your "personal" folder/directory | |
// (see: https://www.stata.com/support/faqs/programming/personal-ado-directory/ and https://www.stata.com/help.cgi?personal). | |
// In line 49, I suggest replacing "reshape" with faster "greshape" -- it requires running first "ssc install gtools" | |
// In line 61: potentially -- will do it for years, will skip for e.g. quarters | |
prog insheet_eurostat | |
qui des | |
if `r(k)'!=0 { | |
di as err "Memory is not empty!" | |
err 18 | |
} | |
tempfile temp_file | |
di as txt "Downloading `1'..." | |
if substr("`1'",1,5)=="https" { | |
copy "`1'" "`temp_file'" | |
} | |
else { | |
copy "https://ec.europa.eu/eurostat/api/dissemination/sdmx/2.1/data/`=strupper("`1'")'?format=TSV" "`temp_file'" | |
} | |
di "Importing into memory..." | |
insheet using "`temp_file'", names tab | |
qui tostring *, force replace | |
qui ds * | |
tokenize `r(varlist)' | |
loc first_col `1' | |
qui des | |
forval i = 2/`r(k)' { | |
qui ren ``i'' _`i' | |
} | |
loc first_col_names : var lab `first_col' | |
loc first_col_names = subinstr("`first_col_names'", ",", " ", .) | |
loc first_col_names = subinstr("`first_col_names'", "\TIME_PERIOD", "", 1) | |
loc time_pair_list "" | |
foreach vname of varlist _* { | |
loc time : var lab `vname' | |
loc name = subinstr("`vname'","_","",1) | |
loc time_pair_list `"`time_pair_list' `""`time'" if time=="`name'""'"' | |
} | |
di "Reshaping into long format (be patient, it is usually very slow)..." | |
qui reshape long _, i(`first_col') j(time) | |
di "Reformatting time column..." | |
cap tostring time, replace | |
foreach pair of local time_pair_list { | |
qui replace time = `pair' | |
} | |
di "Splitting values and flags..." | |
qui split _ | |
drop _ | |
rename _1 value | |
rename _2 flags | |
qui destring value, replace force | |
cap destring time, replace | |
qui split `first_col', parse(,) gen(_) | |
drop `first_col' | |
tokenize `first_col_names' | |
foreach n of varlist _* { | |
rename `n' `1' | |
macro shift | |
} | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment