Skip to content

Instantly share code, notes, and snippets.

@alekrutkowski
Last active October 5, 2022 15:18
Show Gist options
  • Save alekrutkowski/1ace3e222f92eaac021d07322ceee3f5 to your computer and use it in GitHub Desktop.
Save alekrutkowski/1ace3e222f92eaac021d07322ceee3f5 to your computer and use it in GitHub Desktop.
R script to import European Commission's (DG ECFIN's) Annual Macro-ECOnomic database (AMECO)
library(magrittr)
library(data.table)
download.file('https://ec.europa.eu/economy_finance/db_indicators/ameco/documents/ameco0.zip',
'ameco0.zip')
time_stamp <-
'https://ec.europa.eu/info/business-economy-euro/indicators-statistics/economic-databases/macro-economic-database-ameco/download-annual-data-set-macro-economic-database-ameco_en' %>%
rvest::read_html() %>%
rvest::html_elements(xpath='//*[@id="block-ewcms-theme-main-page-content"]/article/div/div/div/div[2]/div/div/p[1]/text()[2]') %>%
rvest::html_text() %>%
gsub('\\(|\\)| ', "", .) %>%
gsub('/','-',.) %T>%
cat(file=paste('AMECO last updated on',.))
txt_file_names <- unzip('ameco0.zip')
EU_MSs <-
c("BEL",
"BGR",
"CZE",
"DNK",
"DEU",
"EST",
"IRL",
"GRC",
"ESP",
"FRA",
"HRV",
"ITA",
"CYP",
"LVA",
"LTU",
"LUX",
"HUN",
"MLT",
"NLD",
"AUT",
"POL",
"PRT",
"ROM",
"SVN",
"SVK",
"FIN",
"SWE")
AMECO_raw <-
txt_file_names %>%
lapply(. %>%
# `data.table::fread` doesn't work (ignores the first row of columns) for some unknown reason,
# hence `read.csv` which is fast enough for these rather small files
read.csv(sep=';', header=TRUE, check.names=FALSE, strip.white=TRUE) %>%
as.data.table()) %>%
rbindlist() %>%
.[, V1 := NULL] %>%
setnames('UNIT','original_unit') %>%
.[, c('country','TRN','AGG','UNIT','REF','variable') :=
tstrsplit(CODE,'.',fixed=TRUE)] %>%
.[, CODE := NULL] %>%
.[, is_interest_rate := `SUB-CHAPTER`=='02 Interest rates'] %>%
.[, is_EU_MS := country %in% EU_MSs]
country_codelist <-
AMECO_raw %>%
.[, .(country,COUNTRY)] %>%
unique()
variable_codelist <-
AMECO_raw %>%
.[, .(variable,`SUB-CHAPTER`,TITLE)] %>%
unique() %>%
setorder(`SUB-CHAPTER`,variable)
### ---- Taken from https://ec.europa.eu/info/sites/default/files/economy-finance/ameco_online_data_files_numerical_coding.pdf ----
parseAMECOcodelist <- function(.colnames, char)
char %>%
strsplit('\n') %>%
.[[1]] %>%
{data.table(sub('^(\\w+)\\s?.+$','\\1',.),
sub('^\\w+\\s?(.+)$','\\1',.))} %>%
setnames(colnames(.), .colnames)
transformation_codelist <-
parseAMECOcodelist(c('TRN','Transformations over time'),
"1 Levels (and moving arithmetic mean for time periods)
2 Levels (and moving geometric mean for time periods)
3 Index numbers (and moving arithmetic mean for time periods)
4 Index numbers (and moving geometric mean for time periods)
5 Annual percentage changes (and moving arithmetic mean for time periods)
6 Annual percentage changes (and moving geometric mean for time periods)
7 Absolute value of annual percentage changes (and moving arithmetic mean for time periods)
8 Moving percentage changes
9 Annual changes (and moving arithmetic mean for time periods)
10 Absolute value of annual changes (and moving arithmetic mean for time periods)
11 Moving changes")
aggregation_codelist_general <-
parseAMECOcodelist(c('AGG','Aggregation mode'),
"0 Standard aggregations (data converted to a common currency and summed)
1 Weighted mean of t/t-1 national ratios, weights current prices in ECU/EUR
2 Weighted mean of t/t-1 national ratios, weights current prices in PPS") %>%
.[, is_interest_rate := FALSE]
aggregation_codelist_interest <-
parseAMECOcodelist(c('AGG','Aggregation mode'),
"1 Weighted geometric mean (weights: GDP in current ECU/EUR)
2 Weighted geometric mean (weights: GDP in current PPS)
3 Weighted geometric mean (weights: Private consumption in current ECU/EUR)
4 Weighted geometric mean (weights: Private consumption in current PPS)") %>%
.[, is_interest_rate := TRUE]
unit_codelist <-
parseAMECOcodelist(c('UNIT','Unit codes'),
"0 Original units (e.g. national currency, persons, etc.)
99 ECU/EUR
212 PPS (purchasing power standards)
300 Final demand
310 Gross domestic product at market prices
311 Net domestic product at market prices
312 Gross domestic product at current factor cost
313 Net domestic product at current factor cost
315 Trend gross domestic product at market prices
316 Potential gross domestic product at market prices
318 Total gross value added
319 Gross domestic product at market prices (excessive deficit procedure)
320 Gross national product at market prices
321 National income at market prices
322 Gross national disposable income at market prices
323 National disposable income at market prices
338 Gross value added at market prices; manufacturing industry
380 Current revenue general government
390 Total expenditure general government
391 Current expenditure general government
410 Total population, demographic statistics
411 Population, 15 to 64 years
412 Total labour Force
413 Civilian labour force
414 Population 15 to 74 years
420 Total population (national accounts)")
reference_codelist <-
parseAMECOcodelist(c('REF','Codes for relative performance'),
"215 former EU-15
315 former EU-15
328 former EU-28
415 former EU-15
424 24 industrial countries: former EU-15 & CH NR TR US CA MX JP AU NZ
437 37 industrial countries: EU-27 & CH NR TR UK US CA MX JP AU NZ")
### -------------------------------------------------------------------------------------------------------------------------------
# AMECO's country code -- Eurostat's geo code correspondence table:
ameco_geo_corresp <-
fread("country geo
EU27 EU27_2020
EU15 EU15
EA19 EA19
EA12 EA12
BEL BE
BGR BG
CZE CZ
DNK DK
DEU DE
EST EE
IRL IE
GRC EL
ESP ES
FRA FR
HRV HR
ITA IT
CYP CY
LVA LV
LTU LT
LUX LU
HUN HU
MLT MT
NLD NL
AUT AT
POL PL
PRT PT
ROM RO
SVN SI
SVK SK
FIN FI
SWE SE
GBR UK
ALB AL
MNE ME
MKD MK
SRB RS
TUR TR
ISL IS
NOR NO
CHE CH
AUS AU
CAN CA
JPN JP
KOR KR
MEX MX
NZL NZ
USA US", sep=' ')
list(country_codelist=country_codelist,
variable_codelist=variable_codelist,
transformation_codelist=transformation_codelist,
aggregation_codelist_general=aggregation_codelist_general,
aggregation_codelist_interest=aggregation_codelist_interest,
unit_codelist=unit_codelist,
reference_codelist=reference_codelist) %>%
openxlsx::write.xlsx('Codelists.xlsx',
withFilter=TRUE,
headerStyle=openxlsx::createStyle(textDecoration='bold'),
overwrite=TRUE)
mrg <- function(dt1, dt2, ...)
merge(dt1, dt2, all.x=TRUE,
by=intersect(colnames(dt1),colnames(dt2)))
AMECO <-
AMECO_raw %>%
melt(id.vars=suppressWarnings(colnames(.) %>% .[is.na(as.integer(.))]),
# measure.vars=patterns('^[1-2][9|0][0-9]{2}$'), # alternative specification
variable.name='year') %>%
.[!is.na(value)] %>%
.[, year := as.integer(as.character(year))] %>%
mrg(transformation_codelist) %>%
mrg(aggregation_codelist_general) %>%
mrg(aggregation_codelist_interest) %>%
.[, is_interest_rate := NULL] %>%
mrg(unit_codelist) %>%
mrg(reference_codelist) %>%
mrg(ameco_geo_corresp) %>%
.[, .(`SUB-CHAPTER`,
variable,
TITLE,
geo,
country,
COUNTRY,
is_EU_MS,
year,
UNIT,
`Unit codes`,
original_unit,
AGG,
`Aggregation mode`,
TRN,
`Transformations over time`,
REF,
`Codes for relative performance`,
value)] %>%
setorderv(colnames(.) %>% setdiff('value')) %T>%
fst::write_fst('AMECO.Rfst') %T>%
fwrite('AMECO.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment