Last active
June 11, 2019 15:33
-
-
Save jkeirstead/7527432 to your computer and use it in GitHub Desktop.
A funnel plot analysis of EPSRC Fellowship success rates
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Analysis of EPSRC fellowship success rates | |
# 18 November 2013 | |
# James Keirstead | |
##' Calculates the bounds for a funnel plot | |
##' | |
##' Calculates the upper and lower confidence interval limits for use | |
##' in a funnel plot. Assumes a binomial discrete distribution with a | |
##' probability of success theta. | |
##' | |
##' @param n the maximum number of trials in the data set | |
##' @param theta the average success rate in the data set | |
##' @param alpha a numeric giving the confidence interval, default = 0.95 | |
##' @param method a numeric giving the method to use ("exact", "spiegelhalter") | |
##' @return a data frame giving the upper and lower confidence | |
##' intervals on success rate | |
##' @source Spiegelhalter, D. J. (2005). Funnel plots for comparing | |
##' institutional performance. Statistics in Medicine, 24(8), | |
##' 1185--202. doi:10.1002/sim.1970 | |
##' | |
calculate_funnel_limits <- function(n, theta, alpha=0.95, method="exact") { | |
## Convert alpha into p | |
p <- (1 - alpha)/2 | |
if (method=="exact") { | |
## Calculate the exact binomial counts | |
## Because this is a discrete distribution | |
## these limits will appear very jumpy | |
upper <- qbinom(1 - p, 1:n, theta)/1:n | |
lower <- qbinom(p, 1:n, theta)/1:n | |
} else if (method=="spiegelhalter") { | |
## Calculate the confidence limits | |
## using Spiegelhalter's interpolation | |
u1 <- qbinom(1-p, 1:n, theta) | |
p.u1 <- pbinom(u1, 1:n, theta) | |
u2 <- qbinom(1-p, 1:n, theta)-1 | |
p.u2 <- pbinom(u2, 1:n, theta) | |
a <- ((1-p) - p.u2)/(p.u1-p.u2) | |
upper <- (u2 + a*(u1-u2))/1:n | |
l1 <- qbinom(p, 1:n, theta) - 1 | |
l1 <- replace(l1, l1<0, 0) | |
p.l1 <- pbinom(l1, 1:n, theta) | |
l2 <- qbinom(p, 1:n, theta) | |
p.l2 <- pbinom(l2, 1:n, theta) | |
a <- (p - p.l1)/(p.l2-p.l1) | |
lower <- (l1 + a*(l2-l1))/1:n | |
} | |
return(data.frame(x=1:n, up=upper, lo=lower)) | |
} | |
## Raw data on success rates in EPSRC Fellowships | |
## http://www.epsrc.ac.uk/skills/fellows/Pages/fellowships.aspx | |
data <- data.frame(rate=c(0.3, 0.17, 0.16, 0.1, 0.37, 0.33, 0.31, 0, 0), | |
n=c(33, 161, 47, 43, 32, 41, 104, 5, 5), | |
label=c("Engineering", "Physical Sciences", "Energy", "Healthcare", | |
"Manufacturing the Future", "ICT", "Mathematics", | |
"Digital Economy", "Living With Environmental Change"), | |
halign=c(0, 1, 0, 0, 0, 0, 0, 0, 0), | |
offset=c(1.5, -1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5), | |
valign=c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 1)) | |
## Calculate the expected response rate as a weighted average | |
n <- max(data$n) | |
theta <- with(data,sum(rate*n)/sum(n)) | |
## Calculate the funnel plot limits and transform | |
require(reshape2) | |
tmp <- calculate_funnel_limits(n, theta, method="spiegelhalter") | |
tmp <- melt(tmp, id="x") | |
## Plot the results | |
require(ggplot2) | |
require(scales) | |
gg <- ggplot(data, aes(x=n, y=rate)) + | |
geom_line(data=tmp, aes(x=x, y=value, group=variable), colour="#AAAAAA", linetype="dashed") + | |
geom_point() + | |
geom_text(aes(x=n+offset, label=label, hjust=halign, vjust=valign), size=3) + | |
geom_hline(data=NULL, aes(yintercept=theta), color="red") + | |
geom_text(data=NULL, aes(x=max(data$n), hjust=1, vjust=0, y=theta, label="Average"), size=4) + | |
labs(x="Number of applications", y="Success rate") + | |
scale_y_continuous(label=percent) + | |
theme_bw() | |
ggsave("epsrc-funnel.png", gg, width=8, height=6, dpi=150) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment