Created
February 22, 2018 09:04
-
-
Save EconomiCurtis/2203342c7b150bb1b3b3febc650f6e5a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#' I had a quick chat with Nikos earlier today, | |
#' and I need to generate the same analysis Nikos described for three separate data sets | |
#' (they're all attached and include variables labelled task2score and task3score respectively). | |
#' In the end what I'm trying to calculate is the probability a subject's task 3 score is | |
#' strictly lower than a randomly selected task 2 score from the same experiment. | |
#' Each file contains only the observations I need to include in the analysis. | |
#packages | |
library(readr) | |
library(dplyr) | |
library(ggplot2) | |
# load data | |
ADM_Lab <- read_csv("~/Downloads/ADM_Lab.csv") | |
Matters <- read_csv("~/Downloads/Matters.csv") | |
ADM_MTurk <- read_csv("~/Downloads/ADM_MTurk.csv") | |
possible_task3_score <- unique( | |
c(ADM_Lab$task3score, | |
Matters$task3score, | |
ADM_MTurk$task3score) | |
) %>% | |
sort | |
# does not take into account tie break rule | |
CDF_data = bind_rows( | |
{ | |
tibble( | |
task_3_score = possible_task3_score, | |
) %>% | |
left_join( | |
ADM_Lab %>% | |
group_by(task2score) %>% | |
tally() %>% | |
select(task_3_score=task2score, n=nn, everything()) | |
) %>% | |
mutate( | |
group = 'ADM_Lab', | |
n = ifelse(is.na(n), 0, n) | |
) | |
}, | |
{ | |
tibble( | |
task_3_score = possible_task3_score, | |
) %>% | |
left_join( | |
Matters %>% | |
group_by(task2score) %>% | |
tally() %>% | |
select(task_3_score=task2score, everything()) | |
) %>% | |
mutate( | |
group = 'Matters', | |
n = ifelse(is.na(n), 0, n) | |
) | |
}, | |
{ | |
tibble( | |
task_3_score = possible_task3_score, | |
) %>% | |
left_join( | |
ADM_MTurk %>% | |
group_by(task2score) %>% | |
tally() %>% | |
select(task_3_score=task2score, everything()) | |
) %>% | |
mutate( | |
group = 'ADM_MTurk', | |
n = ifelse(is.na(n), 0, n) | |
) | |
} | |
) %>% | |
group_by(group) %>% | |
mutate( | |
scores_below = cumsum(n) - n, | |
probability_of_winning_stricklyLess = scores_below / sum(n) | |
) | |
# plotting | |
CDF_data %>% | |
ggplot( | |
aes( | |
y = probability_of_winning_stricklyLess, | |
x = task_3_score, | |
group = group, | |
color = group | |
) | |
) + | |
geom_line() + | |
labs( | |
x = "Number of successful tasks in part 3", | |
y = "Probability of winning tournament" | |
) + | |
scale_y_continuous(labels = scales::percent) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment