Skip to content

Instantly share code, notes, and snippets.

@tomhopper
Created October 29, 2017 12:07
Show Gist options
  • Save tomhopper/6e337640054954f211d8362de4523491 to your computer and use it in GitHub Desktop.
Save tomhopper/6e337640054954f211d8362de4523491 to your computer and use it in GitHub Desktop.
Growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6
## Download growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6
## Data from
## So, Hung-Kwan et al. “Secular Changes in Height, Weight and Body Mass Index in Hong Kong Children.” BMC Public Health 8 (2008): 320. PMC. Web. 29 Oct. 2017.
## Article at \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/}
## PMC Copyright and reuse terms: \url{https://www.ncbi.nlm.nih.gov/pmc/about/copyright/}
## Heights in cm
## Weights in kg
## Libraries ####
library(rvest)
library(magrittr)
library(dplyr)
library(tidyr)
## Height Data ####
url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T1/"
the_data <- read_html(url) %>%
html_node(xpath = '//*[@id="T1"]/div[2]/table') %>%
html_table()
years <- the_data[1,]
headers <- the_data[3,]
df_names <- trimws(paste(years, headers))
rm(url, years, headers)
row_boys <- which(grepl("Boys", the_data[,1]))
row_girls <- which(grepl("Girls", the_data[,1]))
boys_h_df <- the_data[c((row_boys+1):(row_girls-2)), ]
girls_h_df <- the_data[c((row_girls+1):nrow(the_data)), ]
colnames(boys_h_df) <- df_names
colnames(girls_h_df) <- df_names
rm(df_names, the_data)
boys_h_df <- boys_h_df %>%
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
gather(year, value, -Age) %>%
separate(year, into = c('Year', 'stat'), sep = " ") %>%
spread(stat, value) %>%
mutate(Year = substr(Year, 1, 4))
boys_h_df <- boys_h_df %>% lapply(function(x) as.numeric(x)) %>%
as_data_frame() %>%
arrange(Age, Year)
girls_h_df <- girls_h_df %>%
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
gather(year, value, -Age) %>%
separate(year, into = c('Year', 'stat'), sep = " ") %>%
spread(stat, value) %>%
mutate(Year = substr(Year, 1, 4))
girls_h_df <- girls_h_df %>% lapply(function(x) as.numeric(x)) %>%
as_data_frame() %>%
arrange(Age, Year)
## Weight Data ####
url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T2/"
the_data <- read_html(url) %>%
html_node(xpath = '//*[@id="T2"]/div[2]/table') %>%
html_table()
years <- the_data[1,]
headers <- the_data[3,]
headers[which(grepl("N", headers))] <- "n" # Fix a typo in original table
df_names <- trimws(paste(years, headers))
rm(url, years, headers)
row_boys <- which(grepl("Boys", the_data[,1]))
row_girls <- which(grepl("Girls", the_data[,1]))
boys_w_df <- the_data[c((row_boys+1):(row_girls-2)), ]
girls_w_df <- the_data[c((row_girls+1):nrow(the_data)), ]
colnames(boys_w_df) <- df_names
colnames(girls_w_df) <- df_names
rm(df_names, the_data)
boys_w_df <- boys_w_df %>%
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
gather(year, value, -Age) %>%
separate(year, into = c('Year', 'stat'), sep = " ") %>%
spread(stat, value) %>%
mutate(Year = substr(Year, 1, 4))
boys_w_df <- boys_w_df %>% lapply(function(x) as.numeric(x)) %>%
as_data_frame() %>%
arrange(Age, Year)
girls_w_df <- girls_w_df %>%
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
gather(year, value, -Age) %>%
separate(year, into = c('Year', 'stat'), sep = " ") %>%
spread(stat, value) %>%
mutate(Year = substr(Year, 1, 4))
girls_w_df <- girls_w_df %>% lapply(function(x) as.numeric(x)) %>%
as_data_frame() %>%
arrange(Age, Year)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment