Created
October 29, 2017 12:07
-
-
Save tomhopper/6e337640054954f211d8362de4523491 to your computer and use it in GitHub Desktop.
Growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Download growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6 | |
## Data from | |
## So, Hung-Kwan et al. “Secular Changes in Height, Weight and Body Mass Index in Hong Kong Children.” BMC Public Health 8 (2008): 320. PMC. Web. 29 Oct. 2017. | |
## Article at \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/} | |
## PMC Copyright and reuse terms: \url{https://www.ncbi.nlm.nih.gov/pmc/about/copyright/} | |
## Heights in cm | |
## Weights in kg | |
## Libraries #### | |
library(rvest) | |
library(magrittr) | |
library(dplyr) | |
library(tidyr) | |
## Height Data #### | |
url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T1/" | |
the_data <- read_html(url) %>% | |
html_node(xpath = '//*[@id="T1"]/div[2]/table') %>% | |
html_table() | |
years <- the_data[1,] | |
headers <- the_data[3,] | |
df_names <- trimws(paste(years, headers)) | |
rm(url, years, headers) | |
row_boys <- which(grepl("Boys", the_data[,1])) | |
row_girls <- which(grepl("Girls", the_data[,1])) | |
boys_h_df <- the_data[c((row_boys+1):(row_girls-2)), ] | |
girls_h_df <- the_data[c((row_girls+1):nrow(the_data)), ] | |
colnames(boys_h_df) <- df_names | |
colnames(girls_h_df) <- df_names | |
rm(df_names, the_data) | |
boys_h_df <- boys_h_df %>% | |
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% | |
gather(year, value, -Age) %>% | |
separate(year, into = c('Year', 'stat'), sep = " ") %>% | |
spread(stat, value) %>% | |
mutate(Year = substr(Year, 1, 4)) | |
boys_h_df <- boys_h_df %>% lapply(function(x) as.numeric(x)) %>% | |
as_data_frame() %>% | |
arrange(Age, Year) | |
girls_h_df <- girls_h_df %>% | |
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% | |
gather(year, value, -Age) %>% | |
separate(year, into = c('Year', 'stat'), sep = " ") %>% | |
spread(stat, value) %>% | |
mutate(Year = substr(Year, 1, 4)) | |
girls_h_df <- girls_h_df %>% lapply(function(x) as.numeric(x)) %>% | |
as_data_frame() %>% | |
arrange(Age, Year) | |
## Weight Data #### | |
url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T2/" | |
the_data <- read_html(url) %>% | |
html_node(xpath = '//*[@id="T2"]/div[2]/table') %>% | |
html_table() | |
years <- the_data[1,] | |
headers <- the_data[3,] | |
headers[which(grepl("N", headers))] <- "n" # Fix a typo in original table | |
df_names <- trimws(paste(years, headers)) | |
rm(url, years, headers) | |
row_boys <- which(grepl("Boys", the_data[,1])) | |
row_girls <- which(grepl("Girls", the_data[,1])) | |
boys_w_df <- the_data[c((row_boys+1):(row_girls-2)), ] | |
girls_w_df <- the_data[c((row_girls+1):nrow(the_data)), ] | |
colnames(boys_w_df) <- df_names | |
colnames(girls_w_df) <- df_names | |
rm(df_names, the_data) | |
boys_w_df <- boys_w_df %>% | |
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% | |
gather(year, value, -Age) %>% | |
separate(year, into = c('Year', 'stat'), sep = " ") %>% | |
spread(stat, value) %>% | |
mutate(Year = substr(Year, 1, 4)) | |
boys_w_df <- boys_w_df %>% lapply(function(x) as.numeric(x)) %>% | |
as_data_frame() %>% | |
arrange(Age, Year) | |
girls_w_df <- girls_w_df %>% | |
mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% | |
gather(year, value, -Age) %>% | |
separate(year, into = c('Year', 'stat'), sep = " ") %>% | |
spread(stat, value) %>% | |
mutate(Year = substr(Year, 1, 4)) | |
girls_w_df <- girls_w_df %>% lapply(function(x) as.numeric(x)) %>% | |
as_data_frame() %>% | |
arrange(Age, Year) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment