Last active
August 29, 2015 14:19
-
-
Save aschleg/febe070b4b6488aa45fe to your computer and use it in GitHub Desktop.
Simple R script for performing logistic regression on binary categorical variables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(readr) | |
library(caret) | |
# Enter csv file path. If the data is in xls or xlsx, you will need to use read.table or the read_table function | |
# in the readr package | |
data <- read_csv("") | |
attach(data) | |
summary(data) | |
#Enter column name with categorical variables you want to model and predict between the empty quotations. | |
names(data)[names(data) == ""] <- "category" | |
# Create factor for categorical variables. | |
data$category.f <- factor(data$category) | |
# Split data into training and test sets using 60% of the data. | |
inTrain <- createDataPartition(y = data$category.f, p = .60, list = FALSE) | |
training <- data[inTrain,] | |
testing <- data[-inTrain,] | |
# Fit logistic model to the data. Change 'Predictor' values to variable names in data | |
data.fit = glm(category.f ~ Predictor1 + Predictor2 + Predictor_n, data=training, family=binomial) | |
summary(data.fit) | |
data.prob = predict(data.fit, testing, type="response") | |
# Create prediction estimates to measure performance of model. Change "F" and "M" to category names. | |
data.pred = rep("F", dim(training)[1]) | |
data.pred[data.prob > .5] = "M" | |
table(data.pred, training$category.f) | |
mean(data.pred == training$category.f) | |
mean(data.pred != training$category.f) | |
# Predict probability of simulated data falling under particular category. Change Variable1 and Variable2 | |
# values to new data you want to predict. The values represent x,y pairs; for example {(2.8, 13), (1.8, 7)} | |
predict(data.fit, newdata=data.frame(Variable1=c(2.8, 1.8), Variable2=c(13, 7)), type="response") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment