Skip to content

Instantly share code, notes, and snippets.

@walterst
Last active November 15, 2023 14:58
Show Gist options
  • Save walterst/ede8b883d4f9acaf45ec9e2b0ec811fe to your computer and use it in GitHub Desktop.
Save walterst/ede8b883d4f9acaf45ec9e2b0ec811fe to your computer and use it in GitHub Desktop.
This is an R script for fitting and plotting infants' growth (weight and height) from ages 0-3 with a modified Michaelis-Menten equation.
# This code will read in the STARR heights and weight data that accompanied the article:
# "A modified Michaelis-Menten equation estimates growth from birth to 3 years in healthy babies in the US"
# The filepaths will need to be modified for the correct local filepath. dplyr and ggplot2, gplots, & gridExtra graphics
# libraries are needed. Interpolation of weight/heights from a given age in days
# would be done through the predict() function, passing the fitted model and a dataframe of days.
# Subjects that fail to fit due to errors with nls() will be plotted as raw data, if errors occur.
# Increase the default number_of_subjects_to_fit to 100 to see an example.
library(dplyr)
library(ggplot2)
library(gplots)
library(gridExtra)
number_of_subjects_to_fit = 5
#### CHANGE filepaths below to the correct local filepath
weights <- read.csv("starr_source_weight_data.txt", sep="\t", header = T)
heights <- read.csv("starr_source_height_data.txt", sep="\t", header = T)
# The dataframe should have anon_id (for the subjects), AgeBabyDays for the age, and weightkg for weights/Heightcm for heights. If other data are used, change the names accordingly (or modify the code below to use the proper names)
head(weights)
head(heights)
# In the case of failures, one can set trace=TRUE to examine the parameter values through iterations.
# For failures, it's often the case that a1 and b1 increase without bound.
# These starting parameter values were based upon weight in kg, age in days
fit_MM_curves_weights <- function(dates,weights) {
fitted_curve <-nls(weights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 5, b1 = 20, c1=2.5), trace = FALSE)
return(fitted_curve)
}
# These starting parameter values were based upon height in cm, age in days
fit_MM_curves_heights <- function(dates,heights) {
fitted_curve <-nls(heights~(c1+(a1*dates)/(b1+dates)), start = list(a1 = 60, b1 = 530, c1=50), trace = FALSE)
return(fitted_curve)
}
five_weights <- dplyr::filter(weights, anon_id %in% unique(weights$anon_id)[1:number_of_subjects_to_fit])
five_heights <- dplyr::filter(heights, anon_id %in% unique(heights$anon_id)[1:number_of_subjects_to_fit])
### Weight modeling
subjects_weight <- unique(five_weights$anon_id)
error_subjects_weights = character()
weight_fitting_data = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric(), fitted_curve=numeric())
error_data_weights = data.frame(anon_id=character(), date_series=numeric(),weight_series=numeric())
for (subject in subjects_weight) {
skip_to_next <- FALSE
target_subject = dplyr::filter(five_weights, anon_id == subject)
date_series <- target_subject$AgeBabyDays
weight_series <- target_subject$weightkg
possible_error <- tryCatch( {
fitted_curve <- fit_MM_curves_weights(date_series, weight_series)
}, error = function(e){
skip_to_next <<- TRUE
})
if(skip_to_next) {
error_subjects_weights <- c(error_subjects_weights, subject)
anon_ids = rep(as.character(subject), length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series)
error_data_weights <- dplyr::bind_rows(error_data_weights, curr_df)
next
}
anon_ids = rep(subject, length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, weight_series=weight_series, fitted_curve=predict(fitted_curve))
weight_fitting_data <- dplyr::bind_rows(weight_fitting_data, curr_df)
}
print("Total Number of Subjects for weight modeling:")
print(length(subjects_weight))
print("Number of subjects with errors fitting for weight modeling:")
print(length(error_subjects_weights))
filtered_remaining_subjects <- setdiff(subjects_weight, error_subjects_weights)
weight_fitted_data = dplyr::filter(weight_fitting_data, anon_id %in% filtered_remaining_subjects)
p1 <- ggplot() + geom_point(data=weight_fitted_data, aes(x=date_series, y = weight_series)) + geom_line(data=weight_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p1
# Only attempt if there are errors
if (length(error_subjects_weights) > 0) {
p2 <- ggplot() + geom_line(data=error_data_weights, aes(x=date_series, y = weight_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_weights, aes(x=date_series, y = weight_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p2
}
#### Height modeling
subjects_height <- unique(five_heights$anon_id)
error_subjects_height = character()
height_fitting_data = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric(), fitted_curve=numeric())
error_data_height = data.frame(anon_id=character(), date_series=numeric(),height_series=numeric())
for (subject in subjects_height) {
skip_to_next <- FALSE
target_subject = dplyr::filter(five_heights, anon_id == subject)
date_series <- target_subject$AgeBabyDays
height_series <- target_subject$Heightcm
possible_error <- tryCatch( {
fitted_curve <- fit_MM_curves_heights(date_series, height_series)
}, error = function(e){
skip_to_next <<- TRUE
})
if(skip_to_next) {
error_subjects_height <- c(error_subjects_height, subject)
anon_ids = rep(as.character(subject), length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series)
error_data_height <- dplyr::bind_rows(error_data_height, curr_df)
next
}
anon_ids = rep(subject, length(date_series))
curr_df = data.frame(anon_id = anon_ids, date_series=date_series, height_series=height_series, fitted_curve=predict(fitted_curve))
height_fitting_data <- dplyr::bind_rows(height_fitting_data, curr_df)
}
print("Total Number of Subjects for height modeling:")
print(length(subjects_height))
print("Number of subjects with errors fitting for weight modeling:")
print(length(error_subjects_height))
filtered_remaining_subjects <- setdiff(subjects_height, error_subjects_height)
height_fitted_data = dplyr::filter(height_fitting_data, anon_id %in% filtered_remaining_subjects)
p3 <- ggplot() + geom_point(data=height_fitted_data, aes(x=date_series, y = height_series)) + geom_line(data=height_fitted_data, aes(x=date_series, y=fitted_curve, color="red")) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p3
# Only attempt if there are errors
if (length(error_subjects_height) > 0) {
p4 <- ggplot() + geom_line(data=error_data_height, aes(x=date_series, y = height_series)) + facet_wrap(~anon_id) + geom_point(data=error_data_height, aes(x=date_series, y = height_series)) + theme(legend.position = "none") + facet_wrap(~anon_id)
#ggsave(file = "Add_filepath_and_uncomment_if_used.pdf", width = 20, height = 16)
p4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment