Skip to content

Instantly share code, notes, and snippets.

@mayrop
Last active April 14, 2020 07:19
Show Gist options
  • Save mayrop/85c1a6132b100f0e436f9233704ad05f to your computer and use it in GitHub Desktop.
Save mayrop/85c1a6132b100f0e436f9233704ad05f to your computer and use it in GitHub Desktop.
Script to merge COVID MX data with descriptions
library(dplyr)
# this function will soon be added to my R Package: https://github.com/mayrop/r-covid19in-mx
# also get the yaml file from: https://gist.github.com/mayrop/83c955e5038390095aef9e5405c632b6
source("functions.R")
# download CSV from: https://www.gob.mx/salud/documentos/datos-abiertos-152127
input <- read.csv("covid19_mexico_13_04_2020.csv", stringsAsFactors=FALSE, fileEncoding="latin1")
merged <- join_covid_mx(input)
merged %>%
dplyr::filter(FECHA_DEF!="9999-99-99") %>%
dplyr::group_by(RESULTADO_FACTOR) %>%
dplyr::count()
# A tibble: 3 x 2
# Groups: RESULTADO_FACTOR [3]
# RESULTADO_FACTOR n
#<fct> <int>
# 1 No positivo SARS-CoV-2 288
# 2 Positivo SARS-CoV-2 332
# 3 Resultado pendiente 84
join_covid_mx <- function(input) {
descriptions <- lapply(yaml::read_yaml("covid_mx_descriptions.yaml"), as.data.frame)
input %>%
dplyr::left_join(descriptions$origin, by=c("ORIGEN"="CLAVE")) %>%
dplyr::rename(
ORIGEN_FACTOR=DESCRIPCION
) %>%
# adding sector
dplyr::left_join(descriptions$sector, by=c("SECTOR"="CLAVE")) %>%
dplyr::rename(
SECTOR_FACTOR=DESCRIPCION
) %>%
# adding state that gave attention
dplyr::left_join(descriptions$states, by=c("ENTIDAD_UM"="CLAVE_ENTIDAD")) %>%
dplyr::rename(
ENTIDAD_UM_FACTOR=ENTIDAD_FEDERATIVA
) %>%
dplyr::select(-ABREVIATURA) %>%
# adding sex
dplyr::left_join(descriptions$sex, by=c("SEXO"="CLAVE")) %>%
dplyr::rename(
SEXO_FACTOR=DESCRIPCION
) %>%
# adding state where patient was born
dplyr::left_join(descriptions$states, by=c("ENTIDAD_NAC"="CLAVE_ENTIDAD")) %>%
dplyr::rename(
ENTIDAD_NAC_FACTOR=ENTIDAD_FEDERATIVA
) %>%
dplyr::select(-ABREVIATURA) %>%
# adding patient's state of residency
dplyr::left_join(descriptions$states, by=c("ENTIDAD_RES"="CLAVE_ENTIDAD")) %>%
dplyr::rename(
ENTIDAD_RES_FACTOR=ENTIDAD_FEDERATIVA
) %>%
dplyr::select(-ABREVIATURA) %>%
# adding patient's city of residency
dplyr::left_join(descriptions$cities, by=c("ENTIDAD_RES"="CLAVE_ENTIDAD", "MUNICIPIO_RES"="CLAVE_MUNICIPIO")) %>%
dplyr::rename(
MUNICIPIO_RES_FACTOR=MUNICIPIO
) %>%
# adding patient's type
dplyr::left_join(descriptions$patient_type, by=c("TIPO_PACIENTE"="CLAVE")) %>%
dplyr::rename(
TIPO_PACIENTE_FACTOR=DESCRIPCION
) %>%
# ? intubated
dplyr::left_join(descriptions$yes_no, by=c("INTUBADO"="CLAVE")) %>%
dplyr::rename(
INTUBADO_FACTOR=DESCRIPCION
) %>%
# ? pneumonia
dplyr::left_join(descriptions$yes_no, by=c("NEUMONIA"="CLAVE")) %>%
dplyr::rename(
NEUMONIA_FACTOR=DESCRIPCION
) %>%
# ? pregnant
dplyr::left_join(descriptions$yes_no, by=c("EMBARAZO"="CLAVE")) %>%
dplyr::rename(
EMBARAZO_FACTOR=DESCRIPCION
) %>%
# ? speaks native language
dplyr::left_join(descriptions$yes_no, by=c("HABLA_LENGUA_INDI"="CLAVE")) %>%
dplyr::rename(
HABLA_LENGUA_INDI_FACTOR=DESCRIPCION
) %>%
# ? diabetes
dplyr::left_join(descriptions$yes_no, by=c("DIABETES"="CLAVE")) %>%
dplyr::rename(
DIABETES_FACTOR=DESCRIPCION
) %>%
# ? epoc
dplyr::left_join(descriptions$yes_no, by=c("EPOC"="CLAVE")) %>%
dplyr::rename(
EPOC_FACTOR=DESCRIPCION
) %>%
# ? asthma
dplyr::left_join(descriptions$yes_no, by=c("ASMA"="CLAVE")) %>%
dplyr::rename(
ASMA_FACTOR=DESCRIPCION
) %>%
# ? immunosuppression
dplyr::left_join(descriptions$yes_no, by=c("INMUSUPR"="CLAVE")) %>%
dplyr::rename(
INMUSUPR_FACTOR=DESCRIPCION
) %>%
# ? hypertension
dplyr::left_join(descriptions$yes_no, by=c("HIPERTENSION"="CLAVE")) %>%
dplyr::rename(
HIPERTENSION_FACTOR=DESCRIPCION
) %>%
# ? other diseases
dplyr::left_join(descriptions$yes_no, by=c("OTRA_CON"="CLAVE")) %>%
dplyr::rename(
OTRA_CON_FACTOR=DESCRIPCION
) %>%
# ? cardiovascular
dplyr::left_join(descriptions$yes_no, by=c("CARDIOVASCULAR"="CLAVE")) %>%
dplyr::rename(
CARDIOVASCULAR_FACTOR=DESCRIPCION
) %>%
# ? obesity
dplyr::left_join(descriptions$yes_no, by=c("OBESIDAD"="CLAVE")) %>%
dplyr::rename(
OBESIDAD_FACTOR=DESCRIPCION
) %>%
# ? renal chronic
dplyr::left_join(descriptions$yes_no, by=c("RENAL_CRONICA"="CLAVE")) %>%
dplyr::rename(
RENAL_CRONICA_FACTOR=DESCRIPCION
) %>%
# ? smoker
dplyr::left_join(descriptions$yes_no, by=c("TABAQUISMO"="CLAVE")) %>%
dplyr::rename(
TABAQUISMO_FACTOR=DESCRIPCION
) %>%
# ? contact with other case?
dplyr::left_join(descriptions$yes_no, by=c("OTRO_CASO"="CLAVE")) %>%
dplyr::rename(
OTRO_CASO_FACTOR=DESCRIPCION
) %>%
# ? migrant
dplyr::left_join(descriptions$yes_no, by=c("MIGRANTE"="CLAVE")) %>%
dplyr::rename(
MIGRANTE_FACTOR=DESCRIPCION
) %>%
# ? uci
dplyr::left_join(descriptions$yes_no, by=c("UCI"="CLAVE")) %>%
dplyr::rename(
UCI_FACTOR=DESCRIPCION
) %>%
# adding result factor
dplyr::left_join(descriptions$result, by=c("RESULTADO"="CLAVE")) %>%
dplyr::rename(
RESULTADO_FACTOR=DESCRIPCION
) %>%
# adding nationality
dplyr::left_join(descriptions$nationality, by=c("NACIONALIDAD"="CLAVE")) %>%
dplyr::rename(
NACIONALIDAD_FACTOR=DESCRIPCION
)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment