Last active
October 5, 2022 15:18
-
-
Save alekrutkowski/1ace3e222f92eaac021d07322ceee3f5 to your computer and use it in GitHub Desktop.
R script to import European Commission's (DG ECFIN's) Annual Macro-ECOnomic database (AMECO)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(magrittr) | |
| library(data.table) | |
| download.file('https://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip', | |
| 'ameco0.zip') | |
| time_stamp <- | |
| 'https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases/macro-economic-database-ameco/download-annual-data-set-macro-economic-database-ameco_en' %>% | |
| rvest::read_html() %>% | |
| rvest::html_elements(xpath='//*[@id="block-ewcms-theme-main-page-content"]/article/div/div/div/div[2]/div/div/p[1]/text()[2]') %>% | |
| rvest::html_text() %>% | |
| gsub('\\(|\\)| ', "", .) %>% | |
| gsub('/','-',.) %T>% | |
| cat(file=paste('AMECO last updated on',.)) | |
| txt_file_names <- unzip('ameco0.zip') | |
| EU_MSs <- | |
| c("BEL", | |
| "BGR", | |
| "CZE", | |
| "DNK", | |
| "DEU", | |
| "EST", | |
| "IRL", | |
| "GRC", | |
| "ESP", | |
| "FRA", | |
| "HRV", | |
| "ITA", | |
| "CYP", | |
| "LVA", | |
| "LTU", | |
| "LUX", | |
| "HUN", | |
| "MLT", | |
| "NLD", | |
| "AUT", | |
| "POL", | |
| "PRT", | |
| "ROM", | |
| "SVN", | |
| "SVK", | |
| "FIN", | |
| "SWE") | |
| AMECO_raw <- | |
| txt_file_names %>% | |
| lapply(. %>% | |
| # `data.table::fread` doesn't work (ignores the first row of columns) for some unknown reason, | |
| # hence `read.csv` which is fast enough for these rather small files | |
| read.csv(sep=';', header=TRUE, check.names=FALSE, strip.white=TRUE) %>% | |
| as.data.table()) %>% | |
| rbindlist() %>% | |
| .[, V1 := NULL] %>% | |
| setnames('UNIT','original_unit') %>% | |
| .[, c('country','TRN','AGG','UNIT','REF','variable') := | |
| tstrsplit(CODE,'.',fixed=TRUE)] %>% | |
| .[, CODE := NULL] %>% | |
| .[, is_interest_rate := `SUB-CHAPTER`=='02 Interest rates'] %>% | |
| .[, is_EU_MS := country %in% EU_MSs] | |
| country_codelist <- | |
| AMECO_raw %>% | |
| .[, .(country,COUNTRY)] %>% | |
| unique() | |
| variable_codelist <- | |
| AMECO_raw %>% | |
| .[, .(variable,`SUB-CHAPTER`,TITLE)] %>% | |
| unique() %>% | |
| setorder(`SUB-CHAPTER`,variable) | |
| ### ---- Taken from https://ec.europa.eu/info/sites/default/files/economy-finance/ameco_online_data_files_numerical_coding.pdf ---- | |
| parseAMECOcodelist <- function(.colnames, char) | |
| char %>% | |
| strsplit('\n') %>% | |
| .[[1]] %>% | |
| {data.table(sub('^(\\w+)\\s?.+$','\\1',.), | |
| sub('^\\w+\\s?(.+)$','\\1',.))} %>% | |
| setnames(colnames(.), .colnames) | |
| transformation_codelist <- | |
| parseAMECOcodelist(c('TRN','Transformations over time'), | |
| "1 Levels (and moving arithmetic mean for time periods) | |
| 2 Levels (and moving geometric mean for time periods) | |
| 3 Index numbers (and moving arithmetic mean for time periods) | |
| 4 Index numbers (and moving geometric mean for time periods) | |
| 5 Annual percentage changes (and moving arithmetic mean for time periods) | |
| 6 Annual percentage changes (and moving geometric mean for time periods) | |
| 7 Absolute value of annual percentage changes (and moving arithmetic mean for time periods) | |
| 8 Moving percentage changes | |
| 9 Annual changes (and moving arithmetic mean for time periods) | |
| 10 Absolute value of annual changes (and moving arithmetic mean for time periods) | |
| 11 Moving changes") | |
| aggregation_codelist_general <- | |
| parseAMECOcodelist(c('AGG','Aggregation mode'), | |
| "0 Standard aggregations (data converted to a common currency and summed) | |
| 1 Weighted mean of t/t-1 national ratios, weights current prices in ECU/EUR | |
| 2 Weighted mean of t/t-1 national ratios, weights current prices in PPS") %>% | |
| .[, is_interest_rate := FALSE] | |
| aggregation_codelist_interest <- | |
| parseAMECOcodelist(c('AGG','Aggregation mode'), | |
| "1 Weighted geometric mean (weights: GDP in current ECU/EUR) | |
| 2 Weighted geometric mean (weights: GDP in current PPS) | |
| 3 Weighted geometric mean (weights: Private consumption in current ECU/EUR) | |
| 4 Weighted geometric mean (weights: Private consumption in current PPS)") %>% | |
| .[, is_interest_rate := TRUE] | |
| unit_codelist <- | |
| parseAMECOcodelist(c('UNIT','Unit codes'), | |
| "0 Original units (e.g. national currency, persons, etc.) | |
| 99 ECU/EUR | |
| 212 PPS (purchasing power standards) | |
| 300 Final demand | |
| 310 Gross domestic product at market prices | |
| 311 Net domestic product at market prices | |
| 312 Gross domestic product at current factor cost | |
| 313 Net domestic product at current factor cost | |
| 315 Trend gross domestic product at market prices | |
| 316 Potential gross domestic product at market prices | |
| 318 Total gross value added | |
| 319 Gross domestic product at market prices (excessive deficit procedure) | |
| 320 Gross national product at market prices | |
| 321 National income at market prices | |
| 322 Gross national disposable income at market prices | |
| 323 National disposable income at market prices | |
| 338 Gross value added at market prices; manufacturing industry | |
| 380 Current revenue general government | |
| 390 Total expenditure general government | |
| 391 Current expenditure general government | |
| 410 Total population, demographic statistics | |
| 411 Population, 15 to 64 years | |
| 412 Total labour Force | |
| 413 Civilian labour force | |
| 414 Population 15 to 74 years | |
| 420 Total population (national accounts)") | |
| reference_codelist <- | |
| parseAMECOcodelist(c('REF','Codes for relative performance'), | |
| "215 former EU-15 | |
| 315 former EU-15 | |
| 328 former EU-28 | |
| 415 former EU-15 | |
| 424 24 industrial countries: former EU-15 & CH NR TR US CA MX JP AU NZ | |
| 437 37 industrial countries: EU-27 & CH NR TR UK US CA MX JP AU NZ") | |
| ### ------------------------------------------------------------------------------------------------------------------------------- | |
| # AMECO's country code -- Eurostat's geo code correspondence table: | |
| ameco_geo_corresp <- | |
| fread("country geo | |
| EU27 EU27_2020 | |
| EU15 EU15 | |
| EA19 EA19 | |
| EA12 EA12 | |
| BEL BE | |
| BGR BG | |
| CZE CZ | |
| DNK DK | |
| DEU DE | |
| EST EE | |
| IRL IE | |
| GRC EL | |
| ESP ES | |
| FRA FR | |
| HRV HR | |
| ITA IT | |
| CYP CY | |
| LVA LV | |
| LTU LT | |
| LUX LU | |
| HUN HU | |
| MLT MT | |
| NLD NL | |
| AUT AT | |
| POL PL | |
| PRT PT | |
| ROM RO | |
| SVN SI | |
| SVK SK | |
| FIN FI | |
| SWE SE | |
| GBR UK | |
| ALB AL | |
| MNE ME | |
| MKD MK | |
| SRB RS | |
| TUR TR | |
| ISL IS | |
| NOR NO | |
| CHE CH | |
| AUS AU | |
| CAN CA | |
| JPN JP | |
| KOR KR | |
| MEX MX | |
| NZL NZ | |
| USA US", sep=' ') | |
| list(country_codelist=country_codelist, | |
| variable_codelist=variable_codelist, | |
| transformation_codelist=transformation_codelist, | |
| aggregation_codelist_general=aggregation_codelist_general, | |
| aggregation_codelist_interest=aggregation_codelist_interest, | |
| unit_codelist=unit_codelist, | |
| reference_codelist=reference_codelist) %>% | |
| openxlsx::write.xlsx('Codelists.xlsx', | |
| withFilter=TRUE, | |
| headerStyle=openxlsx::createStyle(textDecoration='bold'), | |
| overwrite=TRUE) | |
| mrg <- function(dt1, dt2, ...) | |
| merge(dt1, dt2, all.x=TRUE, | |
| by=intersect(colnames(dt1),colnames(dt2))) | |
| AMECO <- | |
| AMECO_raw %>% | |
| melt(id.vars=suppressWarnings(colnames(.) %>% .[is.na(as.integer(.))]), | |
| # measure.vars=patterns('^[1-2][9|0][0-9]{2}$'), # alternative specification | |
| variable.name='year') %>% | |
| .[!is.na(value)] %>% | |
| .[, year := as.integer(as.character(year))] %>% | |
| mrg(transformation_codelist) %>% | |
| mrg(aggregation_codelist_general) %>% | |
| mrg(aggregation_codelist_interest) %>% | |
| .[, is_interest_rate := NULL] %>% | |
| mrg(unit_codelist) %>% | |
| mrg(reference_codelist) %>% | |
| mrg(ameco_geo_corresp) %>% | |
| .[, .(`SUB-CHAPTER`, | |
| variable, | |
| TITLE, | |
| geo, | |
| country, | |
| COUNTRY, | |
| is_EU_MS, | |
| year, | |
| UNIT, | |
| `Unit codes`, | |
| original_unit, | |
| AGG, | |
| `Aggregation mode`, | |
| TRN, | |
| `Transformations over time`, | |
| REF, | |
| `Codes for relative performance`, | |
| value)] %>% | |
| setorderv(colnames(.) %>% setdiff('value')) %T>% | |
| fst::write_fst('AMECO.Rfst') %T>% | |
| fwrite('AMECO.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment