Created
December 4, 2018 01:48
-
-
Save BioSciEconomist/c05717cc989152b9e5227e24831a1a95 to your computer and use it in GitHub Desktop.
Logistic Regression and Traffic Lighting
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # *------------------------------------------------------------------ | |
| # | PROGRAM NAME: ex_logit_analytics_R | |
| # | DATE: 3/14/11 | |
| # | CREATED BY: Matt Bogard | |
| # | PROJECT FILE:Desktop/R Programs | |
| # *---------------------------------------------------------------- | |
| # | PURPOSE: example of predictive model and reporting | |
| # | from blog post: from http://econometricsense.blogspot.com/2011/03/predictive-modeling-and-custom.html | |
| # | | |
| # *------------------------------------------------------------------ | |
| # | COMMENTS: | |
| # | | |
| # | 1: Reference: R Data Analysis Logistic Regression | |
| # | http://www.ats.ucla.edu/stat/r/dae/logit.htm | |
| # | 2: | |
| # | 3: | |
| # |*------------------------------------------------------------------ | |
| # | DATA USED: data downloaded from reference above | |
| # | | |
| # | | |
| # |*------------------------------------------------------------------ | |
| # | CONTENTS: | |
| # | | |
| # | PART 1: data partition | |
| # | PART 2: build model | |
| # | PART 3: predictions/scoring | |
| # | PART 4: traffic lighting report | |
| # *----------------------------------------------------------------- | |
| # | UPDATES: | |
| # | | |
| # | | |
| # *------------------------------------------------------------------ | |
| # get data | |
| apps <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/binary.csv")) # read data | |
| names(apps) # list variables in this data set | |
| dim(apps) # number of observations | |
| print(apps) # view | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | data partition | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| # store total number of observations in your data | |
| N <- 400 | |
| print(N) | |
| # Number of training observations | |
| Ntrain <- N * 0.5 | |
| print(Ntrain) | |
| # add an explicit row number variable for tracking | |
| id <- seq(1,400) | |
| apps2 <- cbind(apps,id) | |
| # Randomly arrange the data and divide it into a training | |
| # and test set. | |
| dat <- apps2[sample(1:N),] | |
| train <- dat[1:Ntrain,] | |
| validate <- dat[(Ntrain+1):N,] | |
| dim(dat) | |
| dim(train) | |
| dim(validate) | |
| # sort and look at data sets to see that they are different | |
| sort_train <- train[order(train$id),] | |
| print(sort_train) | |
| sort_val <- validate[order(validate$id),] | |
| print(sort_val) | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | build model | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| # logit model | |
| admit_model<- glm(train$admit~train$gre+train$gpa+as.factor(train$rank), family=binomial(link="logit"), na.action=na.pass) | |
| # model results | |
| summary(admit_model) | |
| # odds ratios | |
| exp(admit_model$coefficients) | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | predictions/scoring data | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| train$score <-predict(admit_model,type="response") # add predictons to training data | |
| sort_train_score <- train[order(train$id),] # sort by observation | |
| print(sort_train_score) # view | |
| validate$score <-predict(admit_model,newdata=validate,type="response") # add predictions to validation data | |
| sort_val_score <- validate[order(validate$id),] # sort by observation | |
| print(sort_val_score) # view | |
| # *------------------------------------------------------------------ | |
| # | | |
| # | create a 'traffic light report' based on predicted probabilities | |
| # | | |
| # | | |
| # *----------------------------------------------------------------- | |
| summary(validate$score) # look at probability ranges | |
| green <- validate[validate$score >=.6,] # subset most likley to be admitted group | |
| dim(green) | |
| green$colorcode <- "green" # add color code variable for this group | |
| yellow <- validate[(validate$score < .6 & validate$score >.5),] # subset intermediate group | |
| dim(yellow) | |
| yellow$colorcode <-"yellow" # add color code | |
| red <- validate[validate$score <=.5,] # subset least likely to be admitted group | |
| dim(red) | |
| red$colorcode <- "red" # add color code | |
| # create distribution list/report | |
| applicants_by_risk<- rbind(red,yellow, green) | |
| dim(applicants_by_risk) | |
| report<-applicants_by_risk[order(applicants_by_risk$id),] # sort by applicant id | |
| print(report[c("id","colorcode", "score")]) # basic unformatted action report can be saved as a data set, and exported for other reports and formatting | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment