djnavarro · July 10, 2019 02:41
diff --git a/censor.R b/censor.R
 library(tidyverse)

 # define the censoring function 
 censor_smoking <- function(df) {
  
  # (this code is awful I am so sorry)
  df <- df %>% 
    rowwise() %>% # ugh... rowwise
    mutate(keep_case = case_when(
      (sex.factor == "Male" & runif(1) < .3)   ~ 0, # censor male with prob .3
      (sex.factor == "Female" & runif(1) < .6) ~ 0, # censor female with prob .6
      TRUE ~ 1 # otherwise don't censor
    )) %>% ungroup()
  
  # do the censoring
  df$smoking[df_miss$keep_case == 0] <- NA # censor
  df$keep_case <- NULL
  
  # convert to pure df
  df <- as.data.frame(df)
  return(df)
  
 }


 # small data set to use as an example
 df <- tribble(
  ~id,	~sex.factor,	~smoking,
  1, "Male", "Non-smoker",
  2, "Male", "Non-smoker",
  3, "Female", "Non-smoker",
  4, "Female","Smoker",
  5, "Male", "Non-smoker",
  6, "Female", "Smoker",
  7, "Male", "Smoker",
  8, "Male", "Non-smoker",
  9, "Male", "Non-smoker",
  1, "Female", "Non-smoker",
  1, "Female", "Non-smoker",
  1, "Male", "Non-smoker",
  1, "Male", "Non-smoker",
  1, "Male", "Non-smoker",
  1, "Male", "Smoker",
  1, "Male", "Non-smoker",
  1, "Male", "Non-smoker",
  1, "Male", "Smoker"
 )

 # now actually use it to overwrite the original
 df <- censor_smoking(df)
 print(df)
	library(tidyverse)

	# define the censoring function
	censor_smoking <- function(df) {

	# (this code is awful I am so sorry)
	df <- df %>%
	rowwise() %>% # ugh... rowwise
	mutate(keep_case = case_when(
	(sex.factor == "Male" & runif(1) < .3) ~ 0, # censor male with prob .3
	(sex.factor == "Female" & runif(1) < .6) ~ 0, # censor female with prob .6
	TRUE ~ 1 # otherwise don't censor
	)) %>% ungroup()

	# do the censoring
	df$smoking[df_miss$keep_case == 0] <- NA # censor
	df$keep_case <- NULL

	# convert to pure df
	df <- as.data.frame(df)
	return(df)

	}


	# small data set to use as an example
	df <- tribble(
	~id, ~sex.factor, ~smoking,
	1, "Male", "Non-smoker",
	2, "Male", "Non-smoker",
	3, "Female", "Non-smoker",
	4, "Female","Smoker",
	5, "Male", "Non-smoker",
	6, "Female", "Smoker",
	7, "Male", "Smoker",
	8, "Male", "Non-smoker",
	9, "Male", "Non-smoker",
	1, "Female", "Non-smoker",
	1, "Female", "Non-smoker",
	1, "Male", "Non-smoker",
	1, "Male", "Non-smoker",
	1, "Male", "Non-smoker",
	1, "Male", "Smoker",
	1, "Male", "Non-smoker",
	1, "Male", "Non-smoker",
	1, "Male", "Smoker"
	)

	# now actually use it to overwrite the original
	df <- censor_smoking(df)
	print(df)