Created
November 6, 2016 02:24
-
-
Save BioSciEconomist/3ef759ecae7fee9bc42b939e9d418c4e to your computer and use it in GitHub Desktop.
An intuitive approach to ROC curves
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # R Code to support: http://econometricsense.blogspot.com/2011/05/intuitive-approach-to-roc-curves-with.html | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | import scored logit data from SAS - code generated by SAS MACRO %EXPORT_TO_R | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| # set R working directory | |
| setwd("C:\\Documents and Settings\\wkuuser\\Desktop\\PROJECTS\\Stats Training") | |
| # get data | |
| dat.from.SAS <- read.csv("fromSAS_delete.CSV", header=T) | |
| # check data dimensions | |
| dim(dat.from.SAS) | |
| names(dat.from.SAS) | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | scatter plot with marginal histograms | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| # | |
| # model predicts P(G) so we want these probabilities for each group | |
| # | |
| # get p(G) data set for the group that is actually green | |
| green <- dat.from.SAS[ dat.from.SAS$class=="G",] | |
| dim(green) | |
| # get p(G) data set for group that is actually red | |
| red <- dat.from.SAS[ dat.from.SAS$class=="R",] | |
| dim(red) | |
| # just look at regular histograms for each group | |
| hist(green$P_G, main = 'histogram for green') | |
| hist(red$P_G, main = 'histogram for red') | |
| # in order to do scatter plots n must be the same for each | |
| # group, randomly sample n = n(green) from red | |
| # Total number of red observations to match green | |
| N <- 24 | |
| print(N) | |
| # Randomly arrange the data and select out N size sample for red | |
| # and test set. | |
| dat <- red[sample(1:N),] | |
| red.rs <- dat[1:N,] | |
| dim(red.rs) | |
| # does the distribution retain original properties? Yes | |
| hist(red.rs$P_G, main = 'histogram for red sample') | |
| plot(green$P_G, red.rs$P_G) | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | create the marginal plots | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| def.par <- par(no.readonly = TRUE) # save default, for resetting... | |
| # define histograms | |
| Ghist <- hist(green$P_G,plot=FALSE) | |
| Rhist <- hist(red.rs$P_G, plot=FALSE) | |
| top <- max(c(Ghist$counts, Rhist$counts)) | |
| Grange <- c(0,1) | |
| Rrange <- c(0,1) | |
| nf <- layout(matrix(c(2,0,1,3),2,2,byrow=TRUE), c(3,1), c(1,3), TRUE) | |
| #layout.show(nf) | |
| par(mar=c(3,3,1,1)) | |
| plot(green$P_G, red.rs$P_G, xlim=Grange, ylim=Rrange, xlab="green", ylab="red") | |
| par(mar=c(0,3,1,1)) | |
| barplot(Ghist$counts, axes=FALSE, ylim=c(0, top), space=0, main = 'green') | |
| par(mar=c(3,0,1,1)) | |
| barplot(Rhist$counts, axes=FALSE, xlim=c(0, top), space=0, horiz=TRUE, main = 'red') | |
| par(def.par) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment