samuelsaari · November 1, 2022 10:12
diff --git a/z1det_MVE_biofam.R b/z1det_MVE_biofam.R
 #---------------------------------------------------------
 # libraries
 rm(list=ls())

 library(flexmix) # GMM & LCGM
 library(TraMineR) # example data
 library(OpenRepGrid)# random words for headings
 library(khroma) # color palletttes
 library(tidyverse)
 library(car)
 library(jtools) # for a nice theme


 #---------------------------------------------------------
 # parameters & options
 TEST_RUN=T
 set.seed(102)
 theme_set(theme_nice(base_family = 'Consolas' ))

 #---------------------------------------------------------
 # loading and wrangling data
 data(biofam)
 biofam
 biofam <- biofam %>% rename(gender=sex)
 d <- biofam %>% select(.,gender,starts_with('a'),p02r01)
 d$id <- 1:nrow(d)
 d <- d %>% pivot_longer(.,names_to = "age",cols = starts_with('a'),values_to="rel_stat")

 d <- d %>% mutate(across(c(gender,rel_stat), as.factor))
 d <-  d %>% transform(age=str_replace(age,"a",""))
 d <- d %>% mutate(age=as.integer(age))
 d <- tibble(d)
 # 1 single
 # 2 married
 # 3 child
 # 4 divorced
 d$rel_stat <- car::recode(d$rel_stat,"0=1;1=1;2=2;3=2;4=3;5=3;6=3;7=4")

 #---------------------------------------------------------
 # making alternative ways to run the model
 if (TEST_RUN) {
  d <- d %>% filter(p02r01 %in% c("no denomination or religion"))
  nr_of_classes <- 3
 } else {
  nr_of_classes <- 4
 }
 vector_of_chosen_classes <- 1:nr_of_classes
 levels_rel_stat <- 1:4 # note that this is defined manually


 #----------------------------------------------------------------------------------
 #----------------------------------------------------------------------------------
 #----------------------------------------------------------------------------------
 # running Latent class growth modelling
 lcgm_formula <- as.formula(rel_stat~age + I(age^2) + gender + gender:age)
 lcgm <- flexmix::stepFlexmix(.~ .| id,
                             data=d,
                             k=nr_of_classes, # would be 1:12 in real analysis
                             nrep=1, # would be 50 in real analysis to avoid local maxima
                             control = list(iter.max = 500, minprior = 0),
                             model = flexmix::FLXMRmultinom(lcgm_formula,varFix=T,fixed = ~0))
 #---------------------------------------------------------------------------------
 #----------------------------------------------------------------------------------
 #----------------------------------------------------------------------------------


 #----------------------------------------------------------------------------------
 #fitting the values
 fitted_lcgm <- fitted(lcgm)
 fitted_tibbles <- lapply(fitted_lcgm, function(x) cbind(x,d$age,d$gender))
 fitted_tibbles <- lapply(fitted_tibbles,function(x) setNames(as_tibble(x,.name_repair = "minimal"),c(levels_rel_stat,"age","gender") ))
 fitted_tibbles_long <-purrr::map(fitted_tibbles, function(x) {
  pivot_longer(data=x,cols = -all_of(c("age","gender")), names_to = "rel_stat", values_to = "probability")
 } )
 fitted_tibbles_long <- purrr::map(fitted_tibbles_long,distinct) # remove duplicate rows

 # have tried two altenative approaches to predict the probabilites - predicting values by age and gender as opposed to fitting to original data,and calculating the fitted values by hand (https://www.ibm.com/support/pages/compute-predicted-probabilities-multinomial-logistic-new-cases-or-outside-spss) with identical results

 #----------------------------------------------------------------------------------
 # helpers for plotting

 text_size <- 12
 title_size <- text_size*1.2
 line_size=2
 line_size_legend <- line_size*1.75
 gender_labels <- c(`1`="\U2642",`2`="\U2640")

 rw <- function() {
  randomWords(nr_of_classes)
 }

 (class_titles <- paste(vector_of_chosen_classes,"class:",rw(),rw(),rw()))


 #--------------------------------------------------------------------------------
 # plotting

 plot_single_class_line <- function(CLASS_DATA,TITLE){
  ggplot(CLASS_DATA, aes(x = age,y = probability,color=rel_stat)) +
    geom_line(size=line_size) +
    scale_color_vibrant() +
    guides(color = guide_legend(reverse = TRUE))  +
    # common for both plots +
    ggtitle(TITLE) +
    labs (x = NULL, y=NULL) +
    facet_wrap(~ gender,strip.position="left",labeller=labeller(gender=gender_labels))+
    theme(text = element_text(size=text_size),
          #legend.key.size = unit(1,"pt"),
          legend.title=element_blank(),
          plot.title = element_text(size =title_size,margin=margin(b=5)),
          strip.text.y.left= element_text(angle=0, size=text_size*1.6)) 
 }

 plot_single_class_line(fitted_tibbles_long[[1]],TITLE=class_titles[1])

 class_plot_list_line<- purrr::map(vector_of_chosen_classes,function(i) plot_single_class_line(fitted_tibbles_long[[i]],TITLE=class_titles[i]))

 (plot_lcgm_line <- do.call(ggpubr::ggarrange,c(class_plot_list_line, list(common.legend = TRUE, legend = "bottom",ncol=1))))
	#---------------------------------------------------------
	# libraries
	rm(list=ls())

	library(flexmix) # GMM & LCGM
	library(TraMineR) # example data
	library(OpenRepGrid)# random words for headings
	library(khroma) # color palletttes
	library(tidyverse)
	library(car)
	library(jtools) # for a nice theme


	#---------------------------------------------------------
	# parameters & options
	TEST_RUN=T
	set.seed(102)
	theme_set(theme_nice(base_family = 'Consolas' ))

	#---------------------------------------------------------
	# loading and wrangling data
	data(biofam)
	biofam
	biofam <- biofam %>% rename(gender=sex)
	d <- biofam %>% select(.,gender,starts_with('a'),p02r01)
	d$id <- 1:nrow(d)
	d <- d %>% pivot_longer(.,names_to = "age",cols = starts_with('a'),values_to="rel_stat")

	d <- d %>% mutate(across(c(gender,rel_stat), as.factor))
	d <- d %>% transform(age=str_replace(age,"a",""))
	d <- d %>% mutate(age=as.integer(age))
	d <- tibble(d)
	# 1 single
	# 2 married
	# 3 child
	# 4 divorced
	d$rel_stat <- car::recode(d$rel_stat,"0=1;1=1;2=2;3=2;4=3;5=3;6=3;7=4")

	#---------------------------------------------------------
	# making alternative ways to run the model
	if (TEST_RUN) {
	d <- d %>% filter(p02r01 %in% c("no denomination or religion"))
	nr_of_classes <- 3
	} else {
	nr_of_classes <- 4
	}
	vector_of_chosen_classes <- 1:nr_of_classes
	levels_rel_stat <- 1:4 # note that this is defined manually


	#----------------------------------------------------------------------------------
	#----------------------------------------------------------------------------------
	#----------------------------------------------------------------------------------
	# running Latent class growth modelling
	lcgm_formula <- as.formula(rel_stat~age + I(age^2) + gender + gender:age)
	lcgm <- flexmix::stepFlexmix(.~ .\| id,
	data=d,
	k=nr_of_classes, # would be 1:12 in real analysis
	nrep=1, # would be 50 in real analysis to avoid local maxima
	control = list(iter.max = 500, minprior = 0),
	model = flexmix::FLXMRmultinom(lcgm_formula,varFix=T,fixed = ~0))
	#---------------------------------------------------------------------------------
	#----------------------------------------------------------------------------------
	#----------------------------------------------------------------------------------


	#----------------------------------------------------------------------------------
	#fitting the values
	fitted_lcgm <- fitted(lcgm)
	fitted_tibbles <- lapply(fitted_lcgm, function(x) cbind(x,d$age,d$gender))
	fitted_tibbles <- lapply(fitted_tibbles,function(x) setNames(as_tibble(x,.name_repair = "minimal"),c(levels_rel_stat,"age","gender") ))
	fitted_tibbles_long <-purrr::map(fitted_tibbles, function(x) {
	pivot_longer(data=x,cols = -all_of(c("age","gender")), names_to = "rel_stat", values_to = "probability")
	} )
	fitted_tibbles_long <- purrr::map(fitted_tibbles_long,distinct) # remove duplicate rows

	# have tried two altenative approaches to predict the probabilites - predicting values by age and gender as opposed to fitting to original data,and calculating the fitted values by hand (https://www.ibm.com/support/pages/compute-predicted-probabilities-multinomial-logistic-new-cases-or-outside-spss) with identical results

	#----------------------------------------------------------------------------------
	# helpers for plotting

	text_size <- 12
	title_size <- text_size*1.2
	line_size=2
	line_size_legend <- line_size*1.75
	gender_labels <- c(`1`="\U2642",`2`="\U2640")

	rw <- function() {
	randomWords(nr_of_classes)
	}

	(class_titles <- paste(vector_of_chosen_classes,"class:",rw(),rw(),rw()))


	#--------------------------------------------------------------------------------
	# plotting

	plot_single_class_line <- function(CLASS_DATA,TITLE){
	ggplot(CLASS_DATA, aes(x = age,y = probability,color=rel_stat)) +
	geom_line(size=line_size) +
	scale_color_vibrant() +
	guides(color = guide_legend(reverse = TRUE)) +
	# common for both plots +
	ggtitle(TITLE) +
	labs (x = NULL, y=NULL) +
	facet_wrap(~ gender,strip.position="left",labeller=labeller(gender=gender_labels))+
	theme(text = element_text(size=text_size),
	#legend.key.size = unit(1,"pt"),
	legend.title=element_blank(),
	plot.title = element_text(size =title_size,margin=margin(b=5)),
	strip.text.y.left= element_text(angle=0, size=text_size*1.6))
	}

	plot_single_class_line(fitted_tibbles_long[[1]],TITLE=class_titles[1])

	class_plot_list_line<- purrr::map(vector_of_chosen_classes,function(i) plot_single_class_line(fitted_tibbles_long[[i]],TITLE=class_titles[i]))

	(plot_lcgm_line <- do.call(ggpubr::ggarrange,c(class_plot_list_line, list(common.legend = TRUE, legend = "bottom",ncol=1))))