primaryobjects · April 27, 2016 22:02
diff --git a/quality.R b/quality.R
 library('caTools')
 library('ROCR')

 data <- read.csv('quality.csv')

 # 98 received good care (0), 33 poor care (1).
 table(data$PoorCare)

 rbPal <- colorRampPalette(c('red','blue'))
 data$col <- rbPal(10)[as.numeric(cut(data$PoorCare, breaks = 10))]
 plot(data$TotalVisits, data$Narcotics, col=data$col, pch=20)

 set.seed(88)
 split = sample.split(data$PoorCare, SplitRatio = 0.75)

 train <- subset(data, split == TRUE)
 test <- subset(data, split == FALSE)

 QualityLog = glm(PoorCare ~ OfficeVisits + Narcotics, data=train, family=binomial)
 pred <- predict(QualityLog, type='response')

 # Average prediction for all outcomes.
 tapply(pred, train$PoorCare, mean)

 # Confusion matrix.
 table(train$PoorCare, pred > 0.5)
 #    FALSE TRUE
 # 0  70    4
 # 1  15    10

 # Sensitivity (true positive rate) = TP / (TP+FN)
 10/25

 # Specificity (true negative rate) = TN / (TN+FP)
 70/74

 table(train$PoorCare, pred > 0.7)
 #    FALSE TRUE
 # 0  73    1
 # 1  17    8

 8/25
 73/74

 table(train$PoorCare, pred > 0.2)
 #    FALSE TRUE
 # 0  54    20
 # 1  9     16

 rocpred <- prediction(pred, train$PoorCare)
 rocperf <- performance(rocpred, 'tpr', 'fpr')
 # The y-axis shows the true positive rate (poor care correctly identified), so 0.5 is half the patients correctly ranked as poor care.
 # The x-axis shows the false positive rate, so the more you go to the right, the more mistakes are made.
 plot(rocperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2, 1.7))

 # Run on test set.
 predtest <- predict(QualityLog, type='response', newdata=test)
 rocpredtest <- prediction(predtest, test$PoorCare)

 # Calculate area under the curve (average percentage of time that our model will classify correctly).
 auc <- as.numeric(performance(rocpredtest, 'auc')@y.values)
 # 0.799
	library('caTools')
	library('ROCR')

	data <- read.csv('quality.csv')

	# 98 received good care (0), 33 poor care (1).
	table(data$PoorCare)

	rbPal <- colorRampPalette(c('red','blue'))
	data$col <- rbPal(10)[as.numeric(cut(data$PoorCare, breaks = 10))]
	plot(data$TotalVisits, data$Narcotics, col=data$col, pch=20)

	set.seed(88)
	split = sample.split(data$PoorCare, SplitRatio = 0.75)

	train <- subset(data, split == TRUE)
	test <- subset(data, split == FALSE)

	QualityLog = glm(PoorCare ~ OfficeVisits + Narcotics, data=train, family=binomial)
	pred <- predict(QualityLog, type='response')

	# Average prediction for all outcomes.
	tapply(pred, train$PoorCare, mean)

	# Confusion matrix.
	table(train$PoorCare, pred > 0.5)
	# FALSE TRUE
	# 0 70 4
	# 1 15 10

	# Sensitivity (true positive rate) = TP / (TP+FN)
	10/25

	# Specificity (true negative rate) = TN / (TN+FP)
	70/74

	table(train$PoorCare, pred > 0.7)
	# FALSE TRUE
	# 0 73 1
	# 1 17 8

	8/25
	73/74

	table(train$PoorCare, pred > 0.2)
	# FALSE TRUE
	# 0 54 20
	# 1 9 16

	rocpred <- prediction(pred, train$PoorCare)
	rocperf <- performance(rocpred, 'tpr', 'fpr')
	# The y-axis shows the true positive rate (poor care correctly identified), so 0.5 is half the patients correctly ranked as poor care.
	# The x-axis shows the false positive rate, so the more you go to the right, the more mistakes are made.
	plot(rocperf, colorize=TRUE, print.cutoffs.at=seq(0,1,0.1), text.adj=c(-0.2, 1.7))

	# Run on test set.
	predtest <- predict(QualityLog, type='response', newdata=test)
	rocpredtest <- prediction(predtest, test$PoorCare)

	# Calculate area under the curve (average percentage of time that our model will classify correctly).
	auc <- as.numeric(performance(rocpredtest, 'auc')@y.values)
	# 0.799
No results found