Skip to content

Instantly share code, notes, and snippets.

@jkeirstead
Last active June 11, 2019 15:33
Show Gist options
  • Save jkeirstead/7527432 to your computer and use it in GitHub Desktop.
Save jkeirstead/7527432 to your computer and use it in GitHub Desktop.
A funnel plot analysis of EPSRC Fellowship success rates
# Analysis of EPSRC fellowship success rates
# 18 November 2013
# James Keirstead
##' Calculates the bounds for a funnel plot
##'
##' Calculates the upper and lower confidence interval limits for use
##' in a funnel plot. Assumes a binomial discrete distribution with a
##' probability of success theta.
##'
##' @param n the maximum number of trials in the data set
##' @param theta the average success rate in the data set
##' @param alpha a numeric giving the confidence interval, default = 0.95
##' @param method a numeric giving the method to use ("exact", "spiegelhalter")
##' @return a data frame giving the upper and lower confidence
##' intervals on success rate
##' @source Spiegelhalter, D. J. (2005). Funnel plots for comparing
##' institutional performance. Statistics in Medicine, 24(8),
##' 1185--202. doi:10.1002/sim.1970
##'
calculate_funnel_limits <- function(n, theta, alpha=0.95, method="exact") {
## Convert alpha into p
p <- (1 - alpha)/2
if (method=="exact") {
## Calculate the exact binomial counts
## Because this is a discrete distribution
## these limits will appear very jumpy
upper <- qbinom(1 - p, 1:n, theta)/1:n
lower <- qbinom(p, 1:n, theta)/1:n
} else if (method=="spiegelhalter") {
## Calculate the confidence limits
## using Spiegelhalter's interpolation
u1 <- qbinom(1-p, 1:n, theta)
p.u1 <- pbinom(u1, 1:n, theta)
u2 <- qbinom(1-p, 1:n, theta)-1
p.u2 <- pbinom(u2, 1:n, theta)
a <- ((1-p) - p.u2)/(p.u1-p.u2)
upper <- (u2 + a*(u1-u2))/1:n
l1 <- qbinom(p, 1:n, theta) - 1
l1 <- replace(l1, l1<0, 0)
p.l1 <- pbinom(l1, 1:n, theta)
l2 <- qbinom(p, 1:n, theta)
p.l2 <- pbinom(l2, 1:n, theta)
a <- (p - p.l1)/(p.l2-p.l1)
lower <- (l1 + a*(l2-l1))/1:n
}
return(data.frame(x=1:n, up=upper, lo=lower))
}
## Raw data on success rates in EPSRC Fellowships
## http://www.epsrc.ac.uk/skills/fellows/Pages/fellowships.aspx
data <- data.frame(rate=c(0.3, 0.17, 0.16, 0.1, 0.37, 0.33, 0.31, 0, 0),
n=c(33, 161, 47, 43, 32, 41, 104, 5, 5),
label=c("Engineering", "Physical Sciences", "Energy", "Healthcare",
"Manufacturing the Future", "ICT", "Mathematics",
"Digital Economy", "Living With Environmental Change"),
halign=c(0, 1, 0, 0, 0, 0, 0, 0, 0),
offset=c(1.5, -1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5, 1.5),
valign=c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 1))
## Calculate the expected response rate as a weighted average
n <- max(data$n)
theta <- with(data,sum(rate*n)/sum(n))
## Calculate the funnel plot limits and transform
require(reshape2)
tmp <- calculate_funnel_limits(n, theta, method="spiegelhalter")
tmp <- melt(tmp, id="x")
## Plot the results
require(ggplot2)
require(scales)
gg <- ggplot(data, aes(x=n, y=rate)) +
geom_line(data=tmp, aes(x=x, y=value, group=variable), colour="#AAAAAA", linetype="dashed") +
geom_point() +
geom_text(aes(x=n+offset, label=label, hjust=halign, vjust=valign), size=3) +
geom_hline(data=NULL, aes(yintercept=theta), color="red") +
geom_text(data=NULL, aes(x=max(data$n), hjust=1, vjust=0, y=theta, label="Average"), size=4) +
labs(x="Number of applications", y="Success rate") +
scale_y_continuous(label=percent) +
theme_bw()
ggsave("epsrc-funnel.png", gg, width=8, height=6, dpi=150)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment