Created
April 27, 2016 22:02
-
-
Save primaryobjects/eca10eae262dd3250cbce2bc49ae5a90 to your computer and use it in GitHub Desktop.
Heathcare quality and detecting poor care with logistic regression.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library('caTools') | |
| library('ROCR') | |
| data <- read.csv('quality.csv') | |
| # 98 received good care (0), 33 poor care (1). | |
| table(data$PoorCare) | |
| rbPal <- colorRampPalette(c('red','blue')) | |
| data$col <- rbPal(10)[as.numeric(cut(data$PoorCare, breaks = 10))] | |
| plot(data$TotalVisits, data$Narcotics, col=data$col, pch=20) | |
| set.seed(88) | |
| split = sample.split(data$PoorCare, SplitRatio = 0.75) | |
| train <- subset(data, split == TRUE) | |
| test <- subset(data, split == FALSE) | |
| QualityLog = glm(PoorCare ~ OfficeVisits + Narcotics, data=train, family=binomial) | |
| pred <- predict(QualityLog, type='response') | |
| # Average prediction for all outcomes. | |
| tapply(pred, train$PoorCare, mean) | |
| # Confusion matrix. | |
| table(train$PoorCare, pred > 0.5) | |
| # FALSE TRUE | |
| # 0 70 4 | |
| # 1 15 10 | |
| # Sensitivity (true positive rate) = TP / (TP+FN) | |
| 10/25 | |
| # Specificity (true negative rate) = TN / (TN+FP) | |
| 70/74 | |
| table(train$PoorCare, pred > 0.7) | |
| # FALSE TRUE | |
| # 0 73 1 | |
| # 1 17 8 | |
| 8/25 | |
| 73/74 | |
| table(train$PoorCare, pred > 0.2) | |
| # FALSE TRUE | |
| # 0 54 20 | |
| # 1 9 16 | |
| rocpred <- prediction(pred, train$PoorCare) | |
| rocperf <- performance(rocpred, 'tpr', 'fpr') | |
| # The y-axis shows the true positive rate (poor care correctly identified), so 0.5 is half the patients correctly ranked as poor care. | |
| # The x-axis shows the false positive rate, so the more you go to the right, the more mistakes are made. | |
| plot(rocperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2, 1.7)) | |
| # Run on test set. | |
| predtest <- predict(QualityLog, type='response', newdata=test) | |
| rocpredtest <- prediction(predtest, test$PoorCare) | |
| # Calculate area under the curve (average percentage of time that our model will classify correctly). | |
| auc <- as.numeric(performance(rocpredtest, 'auc')@y.values) | |
| # 0.799 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment