Created
December 28, 2016 10:54
-
-
Save padamson/8d9d45fd582695649528085b19b5675f to your computer and use it in GitHub Desktop.
ROC curve calculation for Listing 4.3 of Real-World Machine Learning
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Returns the false-positive and true-positive rates at nPoints thresholds for | |
# the given true and predicted labels | |
# trueLabels: 0=FALSE; 1=TRUE | |
rocCurve <- function(trueLabels, predictedProbs, nPoints=100, posClass=1){ | |
# Allocates the threshold and ROC lists | |
thr <- seq(0,1,length=nPoints) | |
tpr <- numeric(nPoints) | |
fpr <- numeric(nPoints) | |
# Precalculates values for the positive and negative cases, used in the loop | |
pos <- trueLabels == posClass | |
neg <- !pos | |
nPos <- sum(pos, na.rm=TRUE) | |
nNeg <- sum(neg, na.rm=TRUE) | |
# For each threshold, calculates the rate of true and false positives | |
for (i in 1:length(thr)) { | |
t <- thr[i] | |
meetOrExceedThreshold <- predictedProbs >= t | |
tpr[i] <- sum((meetOrExceedThreshold & pos), na.rm=TRUE) / nPos | |
fpr[i] <- sum((meetOrExceedThreshold & neg), na.rm=TRUE) / nNeg | |
} | |
# Create data frame without duplicated fpr's to return | |
duplicatedFPRs <- duplicated(fpr) | |
df <- data.frame(fpr=fpr[!duplicatedFPRs],tpr=tpr[!duplicatedFPRs],thr=thr[!duplicatedFPRs]) | |
return(df) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment