aschleg · August 29, 2015 14:19
diff --git a/logistic_regression b/logistic_regression
 library(readr)
 library(caret)

 # Enter csv file path. If the data is in xls or xlsx, you will need to use read.table or the read_table function
 # in the readr package
 data <- read_csv("") 
 attach(data)
 summary(data)

 #Enter column name with categorical variables you want to model and predict between the empty quotations.
 names(data)[names(data) == ""] <- "category"

 # Create factor for categorical variables.
 data$category.f <- factor(data$category)

 # Split data into training and test sets using 60% of the data.
 inTrain <- createDataPartition(y = data$category.f, p = .60, list = FALSE)
 training <- data[inTrain,]
 testing <- data[-inTrain,]

 # Fit logistic model to the data. Change 'Predictor' values to variable names in data
 data.fit = glm(category.f ~ Predictor1 + Predictor2 + Predictor_n, data=training, family=binomial)
 summary(data.fit)
 data.prob = predict(data.fit, testing, type="response")

 # Create prediction estimates to measure performance of model. Change "F" and "M" to category names.
 data.pred = rep("F", dim(training)[1])
 data.pred[data.prob > .5] = "M"
 table(data.pred, training$category.f)
 mean(data.pred == training$category.f)
 mean(data.pred != training$category.f)

 # Predict probability of simulated data falling under particular category. Change Variable1 and Variable2
 # values to new data you want to predict. The values represent x,y pairs; for example {(2.8, 13), (1.8, 7)}
 predict(data.fit, newdata=data.frame(Variable1=c(2.8, 1.8), Variable2=c(13, 7)), type="response")
	library(readr)
	library(caret)

	# Enter csv file path. If the data is in xls or xlsx, you will need to use read.table or the read_table function
	# in the readr package
	data <- read_csv("")
	attach(data)
	summary(data)

	#Enter column name with categorical variables you want to model and predict between the empty quotations.
	names(data)[names(data) == ""] <- "category"

	# Create factor for categorical variables.
	data$category.f <- factor(data$category)

	# Split data into training and test sets using 60% of the data.
	inTrain <- createDataPartition(y = data$category.f, p = .60, list = FALSE)
	training <- data[inTrain,]
	testing <- data[-inTrain,]

	# Fit logistic model to the data. Change 'Predictor' values to variable names in data
	data.fit = glm(category.f ~ Predictor1 + Predictor2 + Predictor_n, data=training, family=binomial)
	summary(data.fit)
	data.prob = predict(data.fit, testing, type="response")

	# Create prediction estimates to measure performance of model. Change "F" and "M" to category names.
	data.pred = rep("F", dim(training)[1])
	data.pred[data.prob > .5] = "M"
	table(data.pred, training$category.f)
	mean(data.pred == training$category.f)
	mean(data.pred != training$category.f)

	# Predict probability of simulated data falling under particular category. Change Variable1 and Variable2
	# values to new data you want to predict. The values represent x,y pairs; for example {(2.8, 13), (1.8, 7)}
	predict(data.fit, newdata=data.frame(Variable1=c(2.8, 1.8), Variable2=c(13, 7)), type="response")