BioSciEconomist · December 4, 2018 01:48
diff --git a/ex_logit_analytics_R.R b/ex_logit_analytics_R.R
 # *------------------------------------------------------------------
 # | PROGRAM NAME: ex_logit_analytics_R
 # | DATE: 3/14/11
 # | CREATED BY: Matt Bogard  
 # | PROJECT FILE:Desktop/R Programs            
 # *----------------------------------------------------------------
 # | PURPOSE: example of predictive model and reporting 
 # | from blog post: from http://econometricsense.blogspot.com/2011/03/predictive-modeling-and-custom.html              
 # |
 # *------------------------------------------------------------------
 # | COMMENTS:               
 # |
 # |  1: Reference: R Data Analysis Logistic Regression 
 # |     http://www.ats.ucla.edu/stat/r/dae/logit.htm
 # |  2: 
 # |  3: 
 # |*------------------------------------------------------------------
 # | DATA USED: data downloaded from reference above             
 # |
 # |
 # |*------------------------------------------------------------------
 # | CONTENTS:               
 # |
 # |  PART 1: data partition 
 # |  PART 2: build model
 # |  PART 3: predictions/scoring
 # |     PART 4: traffic lighting report
 # *-----------------------------------------------------------------
 # | UPDATES:               
 # |
 # |
 # *------------------------------------------------------------------
 
 # get data 
 
 apps <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/binary.csv")) # read data 
 
 
 names(apps) # list variables in this data set
 dim(apps) # number of observations
 print(apps) # view
 
 # *------------------------------------------------------------------
 # |                
 # |    data partition
 # |  
 # |  
 # *-----------------------------------------------------------------
 
 
 # store total number of observations in your data
 N <- 400 
 print(N)
 
 # Number of training observations
 Ntrain <- N * 0.5
 print(Ntrain)
 
 # add an explicit row number variable for tracking
 
 id <- seq(1,400)
 
 apps2 <- cbind(apps,id)
 
 # Randomly arrange the data and divide it into a training
 # and test set.
 
 dat <- apps2[sample(1:N),]
 train <- dat[1:Ntrain,]
 validate <- dat[(Ntrain+1):N,]
 
 dim(dat)
 dim(train)
 dim(validate)
 
 # sort and look at data sets to see that they are different
 
 sort_train <- train[order(train$id),]
 print(sort_train)
 
 sort_val <- validate[order(validate$id),]
 print(sort_val)
 
 # *------------------------------------------------------------------
 # |                
 # |    build model
 # |  
 # |  
 # *-----------------------------------------------------------------
 
 # logit model 
 
 admit_model<- glm(train$admit~train$gre+train$gpa+as.factor(train$rank), family=binomial(link="logit"), na.action=na.pass)
 
 # model results
 
 summary(admit_model)
 
 # odds ratios
 
 exp(admit_model$coefficients)
 
 # *------------------------------------------------------------------
 # |                
 # |   predictions/scoring data
 # |  
 # |  
 # *-----------------------------------------------------------------
 
 train$score <-predict(admit_model,type="response") # add predictons to training data 
 
 sort_train_score <- train[order(train$id),] # sort by observation
 print(sort_train_score) # view
 
 validate$score <-predict(admit_model,newdata=validate,type="response") # add predictions to validation data 
 sort_val_score <- validate[order(validate$id),] # sort by observation
 print(sort_val_score) # view
 
 
 # *------------------------------------------------------------------
 # |                
 # |    create a 'traffic light report' based on predicted probabilities
 # |  
 # |  
 # *----------------------------------------------------------------- 
 
 summary(validate$score) # look at probability ranges
 
 
 green <- validate[validate$score >=.6,] # subset most likley to be admitted group
 dim(green)
 green$colorcode <- "green" # add color code variable for this group
 
 yellow <- validate[(validate$score < .6 & validate$score >.5),] # subset intermediate group
 dim(yellow)
 yellow$colorcode <-"yellow"  # add color code
 
 red <- validate[validate$score <=.5,]  # subset least likely to be admitted group
 dim(red)
 red$colorcode <- "red" # add color code
 
 # create distribution list/report
 
 applicants_by_risk<- rbind(red,yellow, green)
 dim(applicants_by_risk)
 report<-applicants_by_risk[order(applicants_by_risk$id),] # sort by applicant id
 print(report[c("id","colorcode", "score")]) # basic unformatted action report can be saved as a data set, and exported for other reports and formatting
	# *------------------------------------------------------------------
	# \| PROGRAM NAME: ex_logit_analytics_R
	# \| DATE: 3/14/11
	# \| CREATED BY: Matt Bogard
	# \| PROJECT FILE:Desktop/R Programs
	# *----------------------------------------------------------------
	# \| PURPOSE: example of predictive model and reporting
	# \| from blog post: from http://econometricsense.blogspot.com/2011/03/predictive-modeling-and-custom.html
	# \|
	# *------------------------------------------------------------------
	# \| COMMENTS:
	# \|
	# \| 1: Reference: R Data Analysis Logistic Regression
	# \| http://www.ats.ucla.edu/stat/r/dae/logit.htm
	# \| 2:
	# \| 3:
	# \|*------------------------------------------------------------------
	# \| DATA USED: data downloaded from reference above
	# \|
	# \|
	# \|*------------------------------------------------------------------
	# \| CONTENTS:
	# \|
	# \| PART 1: data partition
	# \| PART 2: build model
	# \| PART 3: predictions/scoring
	# \| PART 4: traffic lighting report
	# *-----------------------------------------------------------------
	# \| UPDATES:
	# \|
	# \|
	# *------------------------------------------------------------------

	# get data

	apps <- read.csv(url("http://www.ats.ucla.edu/stat/r/dae/binary.csv")) # read data


	names(apps) # list variables in this data set
	dim(apps) # number of observations
	print(apps) # view

	# *------------------------------------------------------------------
	# \|
	# \| data partition
	# \|
	# \|
	# *-----------------------------------------------------------------


	# store total number of observations in your data
	N <- 400
	print(N)

	# Number of training observations
	Ntrain <- N * 0.5
	print(Ntrain)

	# add an explicit row number variable for tracking

	id <- seq(1,400)

	apps2 <- cbind(apps,id)

	# Randomly arrange the data and divide it into a training
	# and test set.

	dat <- apps2[sample(1:N),]
	train <- dat[1:Ntrain,]
	validate <- dat[(Ntrain+1):N,]

	dim(dat)
	dim(train)
	dim(validate)

	# sort and look at data sets to see that they are different

	sort_train <- train[order(train$id),]
	print(sort_train)

	sort_val <- validate[order(validate$id),]
	print(sort_val)

	# *------------------------------------------------------------------
	# \|
	# \| build model
	# \|
	# \|
	# *-----------------------------------------------------------------

	# logit model

	admit_model<- glm(train$admit~train$gre+train$gpa+as.factor(train$rank), family=binomial(link="logit"), na.action=na.pass)

	# model results

	summary(admit_model)

	# odds ratios

	exp(admit_model$coefficients)

	# *------------------------------------------------------------------
	# \|
	# \| predictions/scoring data
	# \|
	# \|
	# *-----------------------------------------------------------------

	train$score <-predict(admit_model,type="response") # add predictons to training data

	sort_train_score <- train[order(train$id),] # sort by observation
	print(sort_train_score) # view

	validate$score <-predict(admit_model,newdata=validate,type="response") # add predictions to validation data
	sort_val_score <- validate[order(validate$id),] # sort by observation
	print(sort_val_score) # view


	# *------------------------------------------------------------------
	# \|
	# \| create a 'traffic light report' based on predicted probabilities
	# \|
	# \|
	# *-----------------------------------------------------------------

	summary(validate$score) # look at probability ranges


	green <- validate[validate$score >=.6,] # subset most likley to be admitted group
	dim(green)
	green$colorcode <- "green" # add color code variable for this group

	yellow <- validate[(validate$score < .6 & validate$score >.5),] # subset intermediate group
	dim(yellow)
	yellow$colorcode <-"yellow" # add color code

	red <- validate[validate$score <=.5,] # subset least likely to be admitted group
	dim(red)
	red$colorcode <- "red" # add color code

	# create distribution list/report

	applicants_by_risk<- rbind(red,yellow, green)
	dim(applicants_by_risk)
	report<-applicants_by_risk[order(applicants_by_risk$id),] # sort by applicant id
	print(report[c("id","colorcode", "score")]) # basic unformatted action report can be saved as a data set, and exported for other reports and formatting
No results found