Skip to content

Instantly share code, notes, and snippets.

@samuelsaari
Created November 1, 2022 10:12
Show Gist options
  • Save samuelsaari/59cba7a997c36024b07c502253c3ac90 to your computer and use it in GitHub Desktop.
Save samuelsaari/59cba7a997c36024b07c502253c3ac90 to your computer and use it in GitHub Desktop.
Latent Class Growth Modelling with multinomial response in R
#---------------------------------------------------------
# libraries
rm(list=ls())
library(flexmix) # GMM & LCGM
library(TraMineR) # example data
library(OpenRepGrid)# random words for headings
library(khroma) # color palletttes
library(tidyverse)
library(car)
library(jtools) # for a nice theme
#---------------------------------------------------------
# parameters & options
TEST_RUN=T
set.seed(102)
theme_set(theme_nice(base_family = 'Consolas' ))
#---------------------------------------------------------
# loading and wrangling data
data(biofam)
biofam
biofam <- biofam %>% rename(gender=sex)
d <- biofam %>% select(.,gender,starts_with('a'),p02r01)
d$id <- 1:nrow(d)
d <- d %>% pivot_longer(.,names_to = "age",cols = starts_with('a'),values_to="rel_stat")
d <- d %>% mutate(across(c(gender,rel_stat), as.factor))
d <- d %>% transform(age=str_replace(age,"a",""))
d <- d %>% mutate(age=as.integer(age))
d <- tibble(d)
# 1 single
# 2 married
# 3 child
# 4 divorced
d$rel_stat <- car::recode(d$rel_stat,"0=1;1=1;2=2;3=2;4=3;5=3;6=3;7=4")
#---------------------------------------------------------
# making alternative ways to run the model
if (TEST_RUN) {
d <- d %>% filter(p02r01 %in% c("no denomination or religion"))
nr_of_classes <- 3
} else {
nr_of_classes <- 4
}
vector_of_chosen_classes <- 1:nr_of_classes
levels_rel_stat <- 1:4 # note that this is defined manually
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
# running Latent class growth modelling
lcgm_formula <- as.formula(rel_stat~age + I(age^2) + gender + gender:age)
lcgm <- flexmix::stepFlexmix(.~ .| id,
data=d,
k=nr_of_classes, # would be 1:12 in real analysis
nrep=1, # would be 50 in real analysis to avoid local maxima
control = list(iter.max = 500, minprior = 0),
model = flexmix::FLXMRmultinom(lcgm_formula,varFix=T,fixed = ~0))
#---------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
#----------------------------------------------------------------------------------
#fitting the values
fitted_lcgm <- fitted(lcgm)
fitted_tibbles <- lapply(fitted_lcgm, function(x) cbind(x,d$age,d$gender))
fitted_tibbles <- lapply(fitted_tibbles,function(x) setNames(as_tibble(x,.name_repair = "minimal"),c(levels_rel_stat,"age","gender") ))
fitted_tibbles_long <-purrr::map(fitted_tibbles, function(x) {
pivot_longer(data=x,cols = -all_of(c("age","gender")), names_to = "rel_stat", values_to = "probability")
} )
fitted_tibbles_long <- purrr::map(fitted_tibbles_long,distinct) # remove duplicate rows
# have tried two altenative approaches to predict the probabilites - predicting values by age and gender as opposed to fitting to original data,and calculating the fitted values by hand (https://www.ibm.com/support/pages/compute-predicted-probabilities-multinomial-logistic-new-cases-or-outside-spss) with identical results
#----------------------------------------------------------------------------------
# helpers for plotting
text_size <- 12
title_size <- text_size*1.2
line_size=2
line_size_legend <- line_size*1.75
gender_labels <- c(`1`="\U2642",`2`="\U2640")
rw <- function() {
randomWords(nr_of_classes)
}
(class_titles <- paste(vector_of_chosen_classes,"class:",rw(),rw(),rw()))
#--------------------------------------------------------------------------------
# plotting
plot_single_class_line <- function(CLASS_DATA,TITLE){
ggplot(CLASS_DATA, aes(x = age,y = probability,color=rel_stat)) +
geom_line(size=line_size) +
scale_color_vibrant() +
guides(color = guide_legend(reverse = TRUE)) +
# common for both plots +
ggtitle(TITLE) +
labs (x = NULL, y=NULL) +
facet_wrap(~ gender,strip.position="left",labeller=labeller(gender=gender_labels))+
theme(text = element_text(size=text_size),
#legend.key.size = unit(1,"pt"),
legend.title=element_blank(),
plot.title = element_text(size =title_size,margin=margin(b=5)),
strip.text.y.left= element_text(angle=0, size=text_size*1.6))
}
plot_single_class_line(fitted_tibbles_long[[1]],TITLE=class_titles[1])
class_plot_list_line<- purrr::map(vector_of_chosen_classes,function(i) plot_single_class_line(fitted_tibbles_long[[i]],TITLE=class_titles[i]))
(plot_lcgm_line <- do.call(ggpubr::ggarrange,c(class_plot_list_line, list(common.legend = TRUE, legend = "bottom",ncol=1))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment