abhijeetchopra · May 1, 2017 01:39
diff --git a/script.R b/script.R
 # Auth: Abhijeet Chopra
 # CWID: 50180612 
 # Date: 30 Apr 2017
 # Prog: K-means
 # Desc: R Script to find cluster using k-means algorithm
 # Meth: Classification Trees using C5.0
 # -----------------------------------------

 setwd("C:/Users/abhij/Desktop")
 mydata <- read.csv("C:/Users/abhij/Desktop/mydata.csv", header = TRUE, stringsAsFactors=FALSE)

 # Prepare Data
 mydata <- na.omit(mydata) # listwise deletion of missing
 mydata <- scale(mydata) # standardize variables 

 # Determine number of clusters
 wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
 for (i in 2:15) wss[i] <- sum(kmeans(mydata,
                                     centers=i)$withinss)
 plot(1:15, wss, type="b", xlab="Number of Clusters",
     ylab="Within groups sum of squares") 

 # K-Means Cluster Analysis
 fit <- kmeans(mydata, 3) # 3 cluster solution
 # get cluster means
 aggregate(mydata,by=list(fit$cluster),FUN=mean)
 # append cluster assignment
 mydata <- data.frame(mydata, fit$cluster) 

 head(mydata)

 write.csv(mydata, file = "output.csv", row.names = FALSE)
	# Auth: Abhijeet Chopra
	# CWID: 50180612
	# Date: 30 Apr 2017
	# Prog: K-means
	# Desc: R Script to find cluster using k-means algorithm
	# Meth: Classification Trees using C5.0
	# -----------------------------------------

	setwd("C:/Users/abhij/Desktop")
	mydata <- read.csv("C:/Users/abhij/Desktop/mydata.csv", header = TRUE, stringsAsFactors=FALSE)

	# Prepare Data
	mydata <- na.omit(mydata) # listwise deletion of missing
	mydata <- scale(mydata) # standardize variables

	# Determine number of clusters
	wss <- (nrow(mydata)-1)*sum(apply(mydata,2,var))
	for (i in 2:15) wss[i] <- sum(kmeans(mydata,
	centers=i)$withinss)
	plot(1:15, wss, type="b", xlab="Number of Clusters",
	ylab="Within groups sum of squares")

	# K-Means Cluster Analysis
	fit <- kmeans(mydata, 3) # 3 cluster solution
	# get cluster means
	aggregate(mydata,by=list(fit$cluster),FUN=mean)
	# append cluster assignment
	mydata <- data.frame(mydata, fit$cluster)

	head(mydata)

	write.csv(mydata, file = "output.csv", row.names = FALSE)
No results found