johnjosephhorton · February 8, 2025 01:22
diff --git a/student_sample_plot.R b/student_sample_plot.R
 library(ggplot2)
 library(dplyr)
 library(tidyr)
 library(gridExtra)
 library(purrr)

 # Function to compute medians and bootstrap CIs for a single tau value
 get_medians <- function(data, simtot = 150, tau = 0.5) {
  # Data without errors
  data0 <- subset(data, all_errors == 0)
  
  # Get Quantiles
  all <- data %>%
    group_by(taskName) %>%
    summarise(across(c(lottery, mirror, pred, prob), 
                     ~ quantile(., probs = tau)),
              n = n(),
              tau = tau)  # Add tau to output
  
  clean <- data0 %>%
    group_by(taskName) %>%
    summarise(across(c(lottery, mirror, pred, prob), 
                     ~ quantile(., probs = tau)),
              n = n(),
              tau = tau)  # Add tau to output
  
  # Bootstrap CI calculation
  set.seed(nrow(data) + 1234 + simtot)
  uid <- unique(data$ID)
  
  bootstrap_iteration <- function(data, indices) {
    boot_data <- data[data$ID %in% indices, ]
    boot_data %>%
      group_by(taskName) %>%
      summarise(across(c(lottery, mirror), list(median = median, mean = mean)))
  }
  
  # Run bootstrap
  boot_results_all <- replicate(simtot, {
    indices <- sample(uid, replace = TRUE)
    bootstrap_iteration(data, indices)
  }, simplify = FALSE)
  
  boot_results_clean <- replicate(simtot, {
    indices <- sample(uid, replace = TRUE)
    bootstrap_iteration(data0, indices)
  }, simplify = FALSE)
  
  # Calculate CIs and SEs
  process_boot_results <- function(results, original_data) {
    boot_matrix <- do.call(rbind, results)
    
    original_data %>%
      group_by(taskName) %>%
      mutate(
        lottery.SE = sd(boot_matrix$lottery_median),
        mirror.SE = sd(boot_matrix$mirror_median),
        lottery.SE.mean = sd(boot_matrix$lottery_mean),
        mirror.SE.mean = sd(boot_matrix$mirror_mean),
        lottery.ciL = lottery - 2 * lottery.SE,
        lottery.ciH = lottery + 2 * lottery.SE,
        mirror.ciL = mirror - 2 * mirror.SE,
        mirror.ciH = mirror + 2 * mirror.SE
      )
  }
  
  all <- process_boot_results(boot_results_all, all)
  clean <- process_boot_results(boot_results_clean, clean)
  
  return(list(all = all, no.errors = clean))
 }

 # Function to compute medians for multiple tau values
 get_medians_multi_tau <- function(data, simtot = 150, tau_values) {
  # Map over tau values and combine results
  results <- map_dfr(tau_values, function(tau) {
    medians <- get_medians(data, simtot = simtot, tau = tau)
    medians$all  # Using just 'all' results for now
  })
  
  return(results)
 }

 # Function to create the faceted plot
 create_faceted_plot <- function(data, title = "") {
  # Reshape data to long format for faceting
  data_long <- data %>%
    pivot_longer(
      cols = c(lottery, mirror),
      names_to = "plot_type",
      values_to = "value"
    ) %>%
    mutate(
      plot_type = factor(plot_type, 
                         levels = c("lottery", "mirror"),
                         labels = c("Lotteries", "Mirrors")),
      diff_from_pred = value - pred,
      tau_label = paste0("τ = ", format(tau, digits = 2))
    )
  
  # Create base plot
  p <- ggplot(data_long, aes(x = taskNumber, y = diff_from_pred)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "gray44") +
    geom_point(color = "blue4", size = 3) +
    facet_grid(tau_label ~ plot_type) +
    scale_x_continuous(breaks = 1:10,
                       labels = unique(data$taskName)) +
    scale_y_continuous(breaks = seq(-8, 4, 2),
                       labels = paste0("$", seq(-8, 4, 2))) +
    labs(x = "", 
         y = "Valuation - Expected Value",
         title = title) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
      plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
      strip.text = element_text(face = "bold", size = 10),
      panel.spacing = unit(1.5, "lines")
    )
  
  # Add background annotations
  p <- p + 
    geom_rect(data = data_long %>% filter(plot_type == "Lotteries"),
              aes(xmin = 0.5, xmax = 5.5, ymin = -Inf, ymax = 7.5),
              fill = "green", alpha = 0.05, inherit.aes = FALSE) +
    geom_rect(data = data_long %>% filter(plot_type == "Lotteries"),
              aes(xmin = 5.5, xmax = 10.5, ymin = -Inf, ymax = 7.5),
              fill = "red", alpha = 0.05, inherit.aes = FALSE)
  
  # Add prediction annotations (only for top row)
  top_tau <- max(data_long$tau)
  p <- p +
    geom_rect(data = data_long,
              aes(xmin = 0.5, xmax = 10.5, ymin = 7.5, ymax = Inf),
              fill = "gray", alpha = 0.15, inherit.aes = FALSE) +
    geom_text(data = data_frame(
      plot_type = "Lotteries",
      tau_label = paste0("τ = ", format(top_tau, digits = 2)),
      x = 5.25,
      y = max(data_long$diff_from_pred) + 1,
      label = "Prospect Theory predicts"
    ), aes(x = x, y = y, label = label),
    inherit.aes = FALSE, fontface = "bold", size = 3) +
    geom_text(data = data_frame(
      plot_type = "Lotteries",
      tau_label = paste0("τ = ", format(top_tau, digits = 2)),
      x = c(3, 8),
      y = max(data_long$diff_from_pred) + 0.5,
      label = c("risk seeking", "risk averse")
    ), aes(x = x, y = y, label = label),
    inherit.aes = FALSE, fontface = "bold", size = 3) +
    geom_text(data = data_frame(
      plot_type = "Mirrors",
      tau_label = paste0("τ = ", format(top_tau, digits = 2)),
      x = 5.25,
      y = max(data_long$diff_from_pred) + 1,
      label = "Prospect Theory predicts: risk neutral for all"
    ), aes(x = x, y = y, label = label),
    inherit.aes = FALSE, fontface = "bold", size = 3)
  
  return(p)
 }

 # Function to create student-only plot with multiple tau values
 make_student_plot <- function(data, tau_values = c(0.25, 0.50, 0.75)) {
  df.s <- subset(data, treatment == 'student')
  
  # Get results for all tau values
  medians_all <- get_medians_multi_tau(df.s, simtot = 150, tau_values)
  
  task_order <- data.frame(
    taskName = c('G10', 'G25', 'L90', 'L75', 'L50', 'G50', 'G75', 'G90', 'L25', 'L10'),
    taskNumber = 1:10
  )
  
  medians_all <- merge(task_order, medians_all, by = "taskName") %>%
    arrange(taskNumber, tau)
  
  plot <- create_faceted_plot(
    medians_all,
    title = paste0("Student Sample (N=", max(medians_all$n), ")")
  )
  
  return(plot)
 }

 # Example usage:
 p2 <- make_student_plot(df)
 print(p2)
	library(ggplot2)
	library(dplyr)
	library(tidyr)
	library(gridExtra)
	library(purrr)

	# Function to compute medians and bootstrap CIs for a single tau value
	get_medians <- function(data, simtot = 150, tau = 0.5) {
	# Data without errors
	data0 <- subset(data, all_errors == 0)

	# Get Quantiles
	all <- data %>%
	group_by(taskName) %>%
	summarise(across(c(lottery, mirror, pred, prob),
	~ quantile(., probs = tau)),
	n = n(),
	tau = tau) # Add tau to output

	clean <- data0 %>%
	group_by(taskName) %>%
	summarise(across(c(lottery, mirror, pred, prob),
	~ quantile(., probs = tau)),
	n = n(),
	tau = tau) # Add tau to output

	# Bootstrap CI calculation
	set.seed(nrow(data) + 1234 + simtot)
	uid <- unique(data$ID)

	bootstrap_iteration <- function(data, indices) {
	boot_data <- data[data$ID %in% indices, ]
	boot_data %>%
	group_by(taskName) %>%
	summarise(across(c(lottery, mirror), list(median = median, mean = mean)))
	}

	# Run bootstrap
	boot_results_all <- replicate(simtot, {
	indices <- sample(uid, replace = TRUE)
	bootstrap_iteration(data, indices)
	}, simplify = FALSE)

	boot_results_clean <- replicate(simtot, {
	indices <- sample(uid, replace = TRUE)
	bootstrap_iteration(data0, indices)
	}, simplify = FALSE)

	# Calculate CIs and SEs
	process_boot_results <- function(results, original_data) {
	boot_matrix <- do.call(rbind, results)

	original_data %>%
	group_by(taskName) %>%
	mutate(
	lottery.SE = sd(boot_matrix$lottery_median),
	mirror.SE = sd(boot_matrix$mirror_median),
	lottery.SE.mean = sd(boot_matrix$lottery_mean),
	mirror.SE.mean = sd(boot_matrix$mirror_mean),
	lottery.ciL = lottery - 2 * lottery.SE,
	lottery.ciH = lottery + 2 * lottery.SE,
	mirror.ciL = mirror - 2 * mirror.SE,
	mirror.ciH = mirror + 2 * mirror.SE
	)
	}

	all <- process_boot_results(boot_results_all, all)
	clean <- process_boot_results(boot_results_clean, clean)

	return(list(all = all, no.errors = clean))
	}

	# Function to compute medians for multiple tau values
	get_medians_multi_tau <- function(data, simtot = 150, tau_values) {
	# Map over tau values and combine results
	results <- map_dfr(tau_values, function(tau) {
	medians <- get_medians(data, simtot = simtot, tau = tau)
	medians$all # Using just 'all' results for now
	})

	return(results)
	}

	# Function to create the faceted plot
	create_faceted_plot <- function(data, title = "") {
	# Reshape data to long format for faceting
	data_long <- data %>%
	pivot_longer(
	cols = c(lottery, mirror),
	names_to = "plot_type",
	values_to = "value"
	) %>%
	mutate(
	plot_type = factor(plot_type,
	levels = c("lottery", "mirror"),
	labels = c("Lotteries", "Mirrors")),
	diff_from_pred = value - pred,
	tau_label = paste0("τ = ", format(tau, digits = 2))
	)

	# Create base plot
	p <- ggplot(data_long, aes(x = taskNumber, y = diff_from_pred)) +
	geom_hline(yintercept = 0, linetype = "dashed", color = "gray44") +
	geom_point(color = "blue4", size = 3) +
	facet_grid(tau_label ~ plot_type) +
	scale_x_continuous(breaks = 1:10,
	labels = unique(data$taskName)) +
	scale_y_continuous(breaks = seq(-8, 4, 2),
	labels = paste0("$", seq(-8, 4, 2))) +
	labs(x = "",
	y = "Valuation - Expected Value",
	title = title) +
	theme_minimal() +
	theme(
	axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
	plot.title = element_text(hjust = 0.5, face = "bold", size = 14),
	strip.text = element_text(face = "bold", size = 10),
	panel.spacing = unit(1.5, "lines")
	)

	# Add background annotations
	p <- p +
	geom_rect(data = data_long %>% filter(plot_type == "Lotteries"),
	aes(xmin = 0.5, xmax = 5.5, ymin = -Inf, ymax = 7.5),
	fill = "green", alpha = 0.05, inherit.aes = FALSE) +
	geom_rect(data = data_long %>% filter(plot_type == "Lotteries"),
	aes(xmin = 5.5, xmax = 10.5, ymin = -Inf, ymax = 7.5),
	fill = "red", alpha = 0.05, inherit.aes = FALSE)

	# Add prediction annotations (only for top row)
	top_tau <- max(data_long$tau)
	p <- p +
	geom_rect(data = data_long,
	aes(xmin = 0.5, xmax = 10.5, ymin = 7.5, ymax = Inf),
	fill = "gray", alpha = 0.15, inherit.aes = FALSE) +
	geom_text(data = data_frame(
	plot_type = "Lotteries",
	tau_label = paste0("τ = ", format(top_tau, digits = 2)),
	x = 5.25,
	y = max(data_long$diff_from_pred) + 1,
	label = "Prospect Theory predicts"
	), aes(x = x, y = y, label = label),
	inherit.aes = FALSE, fontface = "bold", size = 3) +
	geom_text(data = data_frame(
	plot_type = "Lotteries",
	tau_label = paste0("τ = ", format(top_tau, digits = 2)),
	x = c(3, 8),
	y = max(data_long$diff_from_pred) + 0.5,
	label = c("risk seeking", "risk averse")
	), aes(x = x, y = y, label = label),
	inherit.aes = FALSE, fontface = "bold", size = 3) +
	geom_text(data = data_frame(
	plot_type = "Mirrors",
	tau_label = paste0("τ = ", format(top_tau, digits = 2)),
	x = 5.25,
	y = max(data_long$diff_from_pred) + 1,
	label = "Prospect Theory predicts: risk neutral for all"
	), aes(x = x, y = y, label = label),
	inherit.aes = FALSE, fontface = "bold", size = 3)

	return(p)
	}

	# Function to create student-only plot with multiple tau values
	make_student_plot <- function(data, tau_values = c(0.25, 0.50, 0.75)) {
	df.s <- subset(data, treatment == 'student')

	# Get results for all tau values
	medians_all <- get_medians_multi_tau(df.s, simtot = 150, tau_values)

	task_order <- data.frame(
	taskName = c('G10', 'G25', 'L90', 'L75', 'L50', 'G50', 'G75', 'G90', 'L25', 'L10'),
	taskNumber = 1:10
	)

	medians_all <- merge(task_order, medians_all, by = "taskName") %>%
	arrange(taskNumber, tau)

	plot <- create_faceted_plot(
	medians_all,
	title = paste0("Student Sample (N=", max(medians_all$n), ")")
	)

	return(plot)
	}

	# Example usage:
	p2 <- make_student_plot(df)
	print(p2)