EconomiCurtis · February 22, 2018 09:04
diff --git a/prob_of_winning.r b/prob_of_winning.r


 #' I had a quick chat with Nikos earlier today, 
 #' and I need to generate the same analysis Nikos described for three separate data sets 
 #' (they're all attached and include variables labelled task2score and task3score respectively).
 #' In the end what I'm trying to calculate is the probability a subject's task 3 score is 
 #' strictly lower than a randomly selected task 2 score from the same experiment. 
 #' Each file contains only the observations I need to include in the analysis.


 #packages
 library(readr)
 library(dplyr)
 library(ggplot2)


 # load data
 ADM_Lab <- read_csv("~/Downloads/ADM_Lab.csv")
 Matters <- read_csv("~/Downloads/Matters.csv")
 ADM_MTurk <- read_csv("~/Downloads/ADM_MTurk.csv")

 possible_task3_score <- unique(
  c(ADM_Lab$task3score,
    Matters$task3score,
    ADM_MTurk$task3score)
 ) %>% 
  sort

 # does not take into account tie break rule
 CDF_data = bind_rows(
  {
    tibble(
      task_3_score = possible_task3_score,
    ) %>% 
      left_join(
        ADM_Lab %>% 
          group_by(task2score) %>% 
          tally() %>% 
          select(task_3_score=task2score, n=nn, everything()) 
      ) %>% 
      mutate(
        group = 'ADM_Lab',
        n = ifelse(is.na(n), 0, n)
      )
  },
  {
    tibble(
      task_3_score = possible_task3_score,
    ) %>% 
      left_join(
        Matters %>% 
          group_by(task2score) %>% 
          tally() %>% 
          select(task_3_score=task2score, everything()) 
      ) %>% 
      mutate(
        group = 'Matters',
        n = ifelse(is.na(n), 0, n)
      )
  },
  {
    tibble(
      task_3_score = possible_task3_score,
    ) %>% 
      left_join(
        ADM_MTurk %>% 
          group_by(task2score) %>% 
          tally() %>% 
          select(task_3_score=task2score, everything()) 
      ) %>% 
      mutate(
        group = 'ADM_MTurk',
        n = ifelse(is.na(n), 0, n)
      )
  }
 ) %>% 
  group_by(group) %>% 
    mutate(
      scores_below = cumsum(n) - n,
      probability_of_winning_stricklyLess = scores_below / sum(n)
    )


 # plotting


 CDF_data %>% 
  ggplot(
    aes(
      y = probability_of_winning_stricklyLess,
      x = task_3_score,
      group = group,
      color = group
    )
  ) +
  geom_line() +
  labs(
    x = "Number of successful tasks in part 3",
    y = "Probability of winning tournament"
  ) +
  scale_y_continuous(labels = scales::percent)


	#' I had a quick chat with Nikos earlier today,
	#' and I need to generate the same analysis Nikos described for three separate data sets
	#' (they're all attached and include variables labelled task2score and task3score respectively).
	#' In the end what I'm trying to calculate is the probability a subject's task 3 score is
	#' strictly lower than a randomly selected task 2 score from the same experiment.
	#' Each file contains only the observations I need to include in the analysis.


	#packages
	library(readr)
	library(dplyr)
	library(ggplot2)


	# load data
	ADM_Lab <- read_csv("~/Downloads/ADM_Lab.csv")
	Matters <- read_csv("~/Downloads/Matters.csv")
	ADM_MTurk <- read_csv("~/Downloads/ADM_MTurk.csv")

	possible_task3_score <- unique(
	c(ADM_Lab$task3score,
	Matters$task3score,
	ADM_MTurk$task3score)
	) %>%
	sort

	# does not take into account tie break rule
	CDF_data = bind_rows(
	{
	tibble(
	task_3_score = possible_task3_score,
	) %>%
	left_join(
	ADM_Lab %>%
	group_by(task2score) %>%
	tally() %>%
	select(task_3_score=task2score, n=nn, everything())
	) %>%
	mutate(
	group = 'ADM_Lab',
	n = ifelse(is.na(n), 0, n)
	)
	},
	{
	tibble(
	task_3_score = possible_task3_score,
	) %>%
	left_join(
	Matters %>%
	group_by(task2score) %>%
	tally() %>%
	select(task_3_score=task2score, everything())
	) %>%
	mutate(
	group = 'Matters',
	n = ifelse(is.na(n), 0, n)
	)
	},
	{
	tibble(
	task_3_score = possible_task3_score,
	) %>%
	left_join(
	ADM_MTurk %>%
	group_by(task2score) %>%
	tally() %>%
	select(task_3_score=task2score, everything())
	) %>%
	mutate(
	group = 'ADM_MTurk',
	n = ifelse(is.na(n), 0, n)
	)
	}
	) %>%
	group_by(group) %>%
	mutate(
	scores_below = cumsum(n) - n,
	probability_of_winning_stricklyLess = scores_below / sum(n)
	)


	# plotting


	CDF_data %>%
	ggplot(
	aes(
	y = probability_of_winning_stricklyLess,
	x = task_3_score,
	group = group,
	color = group
	)
	) +
	geom_line() +
	labs(
	x = "Number of successful tasks in part 3",
	y = "Probability of winning tournament"
	) +
	scale_y_continuous(labels = scales::percent)