Skip to content

Instantly share code, notes, and snippets.

@BioSciEconomist
Created December 4, 2018 01:48
Show Gist options
  • Select an option

  • Save BioSciEconomist/c05717cc989152b9e5227e24831a1a95 to your computer and use it in GitHub Desktop.

Select an option

Save BioSciEconomist/c05717cc989152b9e5227e24831a1a95 to your computer and use it in GitHub Desktop.
Logistic Regression and Traffic Lighting
# *------------------------------------------------------------------
# | PROGRAM NAME: ex_logit_analytics_R
# | DATE: 3/14/11
# | CREATED BY: Matt Bogard
# | PROJECT FILE:Desktop/R Programs
# *----------------------------------------------------------------
# | PURPOSE: example of predictive model and reporting
# | from blog post: from http://econometricsense.blogspot.com/2011/03/predictive-modeling-and-custom.html
# |
# *------------------------------------------------------------------
# | COMMENTS:
# |
# | 1: Reference: R Data Analysis Logistic Regression
# | http://www.ats.ucla.edu/stat/r/dae/logit.htm
# | 2:
# | 3:
# |*------------------------------------------------------------------
# | DATA USED: data downloaded from reference above
# |
# |
# |*------------------------------------------------------------------
# | CONTENTS:
# |
# | PART 1: data partition
# | PART 2: build model
# | PART 3: predictions/scoring
# | PART 4: traffic lighting report
# *-----------------------------------------------------------------
# | UPDATES:
# |
# |
# *------------------------------------------------------------------
# get data
apps <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/binary.csv")) # read data
names(apps) # list variables in this data set
dim(apps) # number of observations
print(apps) # view
# *------------------------------------------------------------------
# |
# | data partition
# |
# |
# *-----------------------------------------------------------------
# store total number of observations in your data
N <- 400
print(N)
# Number of training observations
Ntrain <- N * 0.5
print(Ntrain)
# add an explicit row number variable for tracking
id <- seq(1,400)
apps2 <- cbind(apps,id)
# Randomly arrange the data and divide it into a training
# and test set.
dat <- apps2[sample(1:N),]
train <- dat[1:Ntrain,]
validate <- dat[(Ntrain+1):N,]
dim(dat)
dim(train)
dim(validate)
# sort and look at data sets to see that they are different
sort_train <- train[order(train$id),]
print(sort_train)
sort_val <- validate[order(validate$id),]
print(sort_val)
# *------------------------------------------------------------------
# |
# | build model
# |
# |
# *-----------------------------------------------------------------
# logit model
admit_model<- glm(train$admit~train$gre+train$gpa+as.factor(train$rank), family=binomial(link="logit"), na.action=na.pass)
# model results
summary(admit_model)
# odds ratios
exp(admit_model$coefficients)
# *------------------------------------------------------------------
# |
# | predictions/scoring data
# |
# |
# *-----------------------------------------------------------------
train$score <-predict(admit_model,type="response") # add predictons to training data
sort_train_score <- train[order(train$id),] # sort by observation
print(sort_train_score) # view
validate$score <-predict(admit_model,newdata=validate,type="response") # add predictions to validation data
sort_val_score <- validate[order(validate$id),] # sort by observation
print(sort_val_score) # view
# *------------------------------------------------------------------
# |
# | create a 'traffic light report' based on predicted probabilities
# |
# |
# *-----------------------------------------------------------------
summary(validate$score) # look at probability ranges
green <- validate[validate$score >=.6,] # subset most likley to be admitted group
dim(green)
green$colorcode <- "green" # add color code variable for this group
yellow <- validate[(validate$score < .6 & validate$score >.5),] # subset intermediate group
dim(yellow)
yellow$colorcode <-"yellow" # add color code
red <- validate[validate$score <=.5,] # subset least likely to be admitted group
dim(red)
red$colorcode <- "red" # add color code
# create distribution list/report
applicants_by_risk<- rbind(red,yellow, green)
dim(applicants_by_risk)
report<-applicants_by_risk[order(applicants_by_risk$id),] # sort by applicant id
print(report[c("id","colorcode", "score")]) # basic unformatted action report can be saved as a data set, and exported for other reports and formatting
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment