Skip to content

Instantly share code, notes, and snippets.

@primaryobjects
Created April 27, 2016 22:02
Show Gist options
  • Select an option

  • Save primaryobjects/eca10eae262dd3250cbce2bc49ae5a90 to your computer and use it in GitHub Desktop.

Select an option

Save primaryobjects/eca10eae262dd3250cbce2bc49ae5a90 to your computer and use it in GitHub Desktop.
Heathcare quality and detecting poor care with logistic regression.
library('caTools')
library('ROCR')
data <- read.csv('quality.csv')
# 98 received good care (0), 33 poor care (1).
table(data$PoorCare)
rbPal <- colorRampPalette(c('red','blue'))
data$col <- rbPal(10)[as.numeric(cut(data$PoorCare, breaks = 10))]
plot(data$TotalVisits, data$Narcotics, col=data$col, pch=20)
set.seed(88)
split = sample.split(data$PoorCare, SplitRatio = 0.75)
train <- subset(data, split == TRUE)
test <- subset(data, split == FALSE)
QualityLog = glm(PoorCare ~ OfficeVisits + Narcotics, data=train, family=binomial)
pred <- predict(QualityLog, type='response')
# Average prediction for all outcomes.
tapply(pred, train$PoorCare, mean)
# Confusion matrix.
table(train$PoorCare, pred > 0.5)
# FALSE TRUE
# 0 70 4
# 1 15 10
# Sensitivity (true positive rate) = TP / (TP+FN)
10/25
# Specificity (true negative rate) = TN / (TN+FP)
70/74
table(train$PoorCare, pred > 0.7)
# FALSE TRUE
# 0 73 1
# 1 17 8
8/25
73/74
table(train$PoorCare, pred > 0.2)
# FALSE TRUE
# 0 54 20
# 1 9 16
rocpred <- prediction(pred, train$PoorCare)
rocperf <- performance(rocpred, 'tpr', 'fpr')
# The y-axis shows the true positive rate (poor care correctly identified), so 0.5 is half the patients correctly ranked as poor care.
# The x-axis shows the false positive rate, so the more you go to the right, the more mistakes are made.
plot(rocperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2, 1.7))
# Run on test set.
predtest <- predict(QualityLog, type='response', newdata=test)
rocpredtest <- prediction(predtest, test$PoorCare)
# Calculate area under the curve (average percentage of time that our model will classify correctly).
auc <- as.numeric(performance(rocpredtest, 'auc')@y.values)
# 0.799
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment