Last active
March 13, 2020 13:20
-
-
Save cimentadaj/f73e1c51492165d5576e3fd1034acb8b to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(eurostat) | |
library(dplyr) | |
res <- get_eurostat("demo_r_pjangroup") | |
## Example for NUTS1: UKB | |
uk_match <- "UK[A-Z]{1}$" | |
## Example for NUTS2: ITAC12 | |
ita_match <- "IT[A-Z][0-9]{1,2}" | |
## Example for NUTS1: either DE3 or DEA | |
## I don't know why NUTS1 Germany mixes numbers and letters | |
de_match <- "DE[A-Z]{1}$|DE[0-9]{1}$" | |
## Example for NUTS1: FR1 and FRB, FRC, etc.. | |
fr_match <- "FR1$|FR[A-Z]{1}$" | |
## Example for NUTS2: ES11 | |
es_match <- "ES[0-9]{2,2}$" | |
## Example for NUTS2: IE04 to IE06 | |
ie_match <- "IE04|IE05|IE06$" | |
## Example for NUTS2: NL12 | |
nl_match <- "NL[0-9]{2,2}$" | |
## Example for NUTS2: BE12 | |
be_match <- "BE[0-9]{2,2}$" | |
## Example for NUTS2: AT11 | |
at_match <- "AT[0-9]{2,2}$" | |
## Example for NUTS2: AT11 | |
lu_match <- "LU[0-9]{2,2}$" | |
## Wikipedia NUTS library for each country | |
## https://en.wikipedia.org/wiki/NUTS_statistical_regions_of_France | |
## Just change the country name at the end for the desired one | |
final_match <- paste0(c(uk_match, | |
ita_match, | |
de_match, | |
fr_match, | |
es_match, | |
ie_match, | |
nl_match, | |
be_match, | |
at_match, | |
lu_match), | |
collapse = "|") | |
tst <- | |
res %>% | |
## To quickly check the codes for each country -- delete after done | |
## filter(grepl("FR", geo)) %>% | |
## distinct(geo) %>% | |
## print(n = Inf) | |
## Select the most recent time point | |
group_by(sex, age, geo) %>% | |
filter(time == max(time)) %>% | |
ungroup() %>% | |
# Drop unused columns | |
select(-unit, -time) %>% | |
filter(sex != "T", !age %in% c("TOTAL", "UNK"), grepl(final_match, geo)) %>% | |
label_eurostat(code = "geo", fix_duplicated = TRUE) %>% | |
mutate(country = case_when(grepl(uk_match, geo_code) ~ "UK", | |
grepl(ita_match, geo_code) ~ "ITA", | |
grepl(de_match, geo_code) ~ "DE", | |
grepl(fr_match, geo_code) ~ "FR", | |
grepl(es_match, geo_code) ~ "ES", | |
grepl(ie_match, geo_code) ~ "IE", | |
grepl(nl_match, geo_code) ~ "NL", | |
grepl(be_match, geo_code) ~ "BE", | |
grepl(at_match, geo_code) ~ "AT", | |
grepl(lu_match, geo_code) ~ "LU", | |
TRUE ~ NA_character_)) %>% | |
select(country, sex, age, geo_code, geo, values) %>% | |
## Because some geo codes in germany has DE* in front | |
mutate(geo = gsub("DE[0-9]{1} |DE[A-Z]{1} |FR[A-Z]{1,2} |FR1 |ES[0-9]{2,2} |Prov. |BE[0-9]{2,3} ", "", geo)) %>% | |
filter(!grepl("Départements|Not regionalised|RUP FR - Régions", geo)) | |
## To quickly extract the code/names for each NUTS -- delete after done | |
tst %>% | |
filter(country == "LU") %>% | |
distinct(geo_code, geo) %>% | |
print(n = Inf) %>% | |
pull(geo) %>% | |
as.character() %>% | |
cat(sep = "\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment