tomhopper · October 29, 2017 12:07
diff --git a/Child_Height_Weight_Stats.R b/Child_Height_Weight_Stats.R
 ## Download growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6
 ## Data from
 ## So, Hung-Kwan et al. “Secular Changes in Height, Weight and Body Mass Index in Hong Kong Children.” BMC Public Health 8 (2008): 320. PMC. Web. 29 Oct. 2017.
 ## Article at \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/}
 ## PMC Copyright and reuse terms: \url{https://www.ncbi.nlm.nih.gov/pmc/about/copyright/}
 ## Heights in cm
 ## Weights in kg

 ## Libraries ####
 library(rvest)
 library(magrittr)
 library(dplyr)
 library(tidyr)

 ## Height Data ####
 url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T1/"

 the_data <- read_html(url) %>%
  html_node(xpath = '//*[@id="T1"]/div[2]/table') %>% 
  html_table()

 years <- the_data[1,]
 headers <- the_data[3,]
 df_names <- trimws(paste(years, headers))
 rm(url, years, headers)

 row_boys <- which(grepl("Boys", the_data[,1]))
 row_girls <- which(grepl("Girls", the_data[,1]))

 boys_h_df <- the_data[c((row_boys+1):(row_girls-2)), ]
 girls_h_df <- the_data[c((row_girls+1):nrow(the_data)), ]

 colnames(boys_h_df) <- df_names
 colnames(girls_h_df) <- df_names

 rm(df_names, the_data)

 boys_h_df <- boys_h_df %>% 
  mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% 
  gather(year, value, -Age) %>% 
  separate(year, into = c('Year', 'stat'), sep = " ") %>% 
  spread(stat, value) %>% 
  mutate(Year = substr(Year, 1, 4))

 boys_h_df <- boys_h_df %>% lapply(function(x) as.numeric(x)) %>% 
  as_data_frame() %>% 
  arrange(Age, Year)

 girls_h_df <- girls_h_df %>% 
  mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% 
  gather(year, value, -Age) %>% 
  separate(year, into = c('Year', 'stat'), sep = " ") %>% 
  spread(stat, value) %>% 
  mutate(Year = substr(Year, 1, 4))

 girls_h_df <- girls_h_df %>% lapply(function(x) as.numeric(x)) %>% 
  as_data_frame() %>% 
  arrange(Age, Year)

 ## Weight Data ####
 url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T2/"

 the_data <- read_html(url) %>%
  html_node(xpath = '//*[@id="T2"]/div[2]/table') %>% 
  html_table()

 years <- the_data[1,]
 headers <- the_data[3,]
 headers[which(grepl("N", headers))] <- "n" # Fix a typo in original table
 df_names <- trimws(paste(years, headers))
 rm(url, years, headers)

 row_boys <- which(grepl("Boys", the_data[,1]))
 row_girls <- which(grepl("Girls", the_data[,1]))

 boys_w_df <- the_data[c((row_boys+1):(row_girls-2)), ]
 girls_w_df <- the_data[c((row_girls+1):nrow(the_data)), ]

 colnames(boys_w_df) <- df_names
 colnames(girls_w_df) <- df_names

 rm(df_names, the_data)

 boys_w_df <- boys_w_df %>% 
  mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% 
  gather(year, value, -Age) %>% 
  separate(year, into = c('Year', 'stat'), sep = " ") %>% 
  spread(stat, value) %>% 
  mutate(Year = substr(Year, 1, 4))

 boys_w_df <- boys_w_df %>% lapply(function(x) as.numeric(x)) %>% 
  as_data_frame() %>% 
  arrange(Age, Year)

 girls_w_df <- girls_w_df %>% 
  mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>% 
  gather(year, value, -Age) %>% 
  separate(year, into = c('Year', 'stat'), sep = " ") %>% 
  spread(stat, value) %>% 
  mutate(Year = substr(Year, 1, 4))

 girls_w_df <- girls_w_df %>% lapply(function(x) as.numeric(x)) %>% 
  as_data_frame() %>% 
  arrange(Age, Year)
	## Download growth chart summary statistics for Hong Kong children, ages 6 to 18, for 1963, 1993, 2005/6
	## Data from
	## So, Hung-Kwan et al. “Secular Changes in Height, Weight and Body Mass Index in Hong Kong Children.” BMC Public Health 8 (2008): 320. PMC. Web. 29 Oct. 2017.
	## Article at \url{https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/}
	## PMC Copyright and reuse terms: \url{https://www.ncbi.nlm.nih.gov/pmc/about/copyright/}
	## Heights in cm
	## Weights in kg

	## Libraries ####
	library(rvest)
	library(magrittr)
	library(dplyr)
	library(tidyr)

	## Height Data ####
	url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T1/"

	the_data <- read_html(url) %>%
	html_node(xpath = '//*[@id="T1"]/div[2]/table') %>%
	html_table()

	years <- the_data[1,]
	headers <- the_data[3,]
	df_names <- trimws(paste(years, headers))
	rm(url, years, headers)

	row_boys <- which(grepl("Boys", the_data[,1]))
	row_girls <- which(grepl("Girls", the_data[,1]))

	boys_h_df <- the_data[c((row_boys+1):(row_girls-2)), ]
	girls_h_df <- the_data[c((row_girls+1):nrow(the_data)), ]

	colnames(boys_h_df) <- df_names
	colnames(girls_h_df) <- df_names

	rm(df_names, the_data)

	boys_h_df <- boys_h_df %>%
	mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
	gather(year, value, -Age) %>%
	separate(year, into = c('Year', 'stat'), sep = " ") %>%
	spread(stat, value) %>%
	mutate(Year = substr(Year, 1, 4))

	boys_h_df <- boys_h_df %>% lapply(function(x) as.numeric(x)) %>%
	as_data_frame() %>%
	arrange(Age, Year)

	girls_h_df <- girls_h_df %>%
	mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
	gather(year, value, -Age) %>%
	separate(year, into = c('Year', 'stat'), sep = " ") %>%
	spread(stat, value) %>%
	mutate(Year = substr(Year, 1, 4))

	girls_h_df <- girls_h_df %>% lapply(function(x) as.numeric(x)) %>%
	as_data_frame() %>%
	arrange(Age, Year)

	## Weight Data ####
	url <- "https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2572616/table/T2/"

	the_data <- read_html(url) %>%
	html_node(xpath = '//*[@id="T2"]/div[2]/table') %>%
	html_table()

	years <- the_data[1,]
	headers <- the_data[3,]
	headers[which(grepl("N", headers))] <- "n" # Fix a typo in original table
	df_names <- trimws(paste(years, headers))
	rm(url, years, headers)

	row_boys <- which(grepl("Boys", the_data[,1]))
	row_girls <- which(grepl("Girls", the_data[,1]))

	boys_w_df <- the_data[c((row_boys+1):(row_girls-2)), ]
	girls_w_df <- the_data[c((row_girls+1):nrow(the_data)), ]

	colnames(boys_w_df) <- df_names
	colnames(girls_w_df) <- df_names

	rm(df_names, the_data)

	boys_w_df <- boys_w_df %>%
	mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
	gather(year, value, -Age) %>%
	separate(year, into = c('Year', 'stat'), sep = " ") %>%
	spread(stat, value) %>%
	mutate(Year = substr(Year, 1, 4))

	boys_w_df <- boys_w_df %>% lapply(function(x) as.numeric(x)) %>%
	as_data_frame() %>%
	arrange(Age, Year)

	girls_w_df <- girls_w_df %>%
	mutate(d1 = NULL, d2 = NULL, `d1/yr` = NULL, `d2/yr` = NULL) %>%
	gather(year, value, -Age) %>%
	separate(year, into = c('Year', 'stat'), sep = " ") %>%
	spread(stat, value) %>%
	mutate(Year = substr(Year, 1, 4))

	girls_w_df <- girls_w_df %>% lapply(function(x) as.numeric(x)) %>%
	as_data_frame() %>%
	arrange(Age, Year)