Last active
August 29, 2015 14:19
-
-
Save talayhan/a86de672d108bf557285 to your computer and use it in GitHub Desktop.
R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # ------------------------------- | |
| # Homework 2 - Machine Learning | |
| # Samet Sait Talayhan | |
| # ------------------------------- | |
| # o / \ //\ | |
| # o |\___/| / \// \\ | |
| # /0 0 \__ / // | \ \ | |
| # / / \/_/ // | \ \ | |
| # @_^_@'/ \/_ // | \ \ | |
| # //_^_/ \/_ // | \ \ | |
| # ( //) | \/// | \ \ | |
| # ( / /) _|_ / ) // | \ _\ | |
| # ( // /) '/,_ _ _/ ( ; -. | _ _\.-~ .-~~~^-. | |
| # (( / / )) ,-{ _ `-.|.-~-. .~ `. | |
| # (( // / )) '/\ / ~-. _ .-~ .-~^-. \ | |
| # (( /// )) `. { } / \ \ | |
| # (( / )) .----~-.\ \-' .~ \ `. \^-. | |
| # ///.----..> \ _ -~ `. ^-` ^-_ | |
| # ///-._ _ _ _ _ _ _}^ - - - - ~ ~-- ,.-~ | |
| # /.-~ | |
| # | |
| # Note: I created this beauty dragon using cowsay app, ~$ cowsay -f dragon "Talayan" | |
| ## Step 2: Exploring and preparing the data ---- | |
| #import the csv file | |
| data_set <- read.csv("hw2train.csv", stringsAsFactors = FALSE) | |
| # examine the structure of the test data frame | |
| str(data_set) | |
| # drop the class features, | |
| #data_set <- data_set[,16:43] | |
| # shuffle data | |
| data_set <- data_set[sample(nrow(data_set)),] | |
| # table of rightmost feature | |
| table(data_set$Feat.43) | |
| # recode Feat.43 as a factor | |
| data_set$Feat.43 <- factor(data_set$Feat.43, levels = c(-1, 1), | |
| labels = c("Negative", "Positive")) | |
| # table or proportions with more informative labels | |
| round(prop.table(table(data_set$Feat.43)) * 100, digits = 1) | |
| # summarize three numeric features | |
| summary(data_set[c("Feat.17", "Feat.18", "Feat.19")]) | |
| # create normalization function | |
| normalize <- function(x) { | |
| return ((x - min(x)) / (max(x) - min(x))) | |
| } | |
| # test normalization function - result should be identical | |
| normalize(c(1, 2, 3, 4, 5)) | |
| normalize(c(10, 20, 30, 40, 50)) | |
| # normalize the homework data | |
| data_set_n <- as.data.frame(lapply(data_set[1:42], normalize)) | |
| # confirm that normalization worked | |
| summary(data_set_n$Feat.17) | |
| # shuffle normalized data | |
| #data_set_n_shuffle <- data_set_n[sample(nrow(data_set_n)),] | |
| # create training and test data | |
| data_set_train <- data_set_n[1:99, ] | |
| data_set_test <- data_set_n[100:164, ] | |
| # table of | |
| table(data_set$Feat.43) | |
| # create labels for training and test data | |
| data_set_train_labels <- data_set[1:99, 43] | |
| data_set_test_labels <- data_set[100:164, 43] | |
| ## Step 3: Training a model on the data ---- | |
| # load the "class" library | |
| library(class) | |
| data_set_test_pred <- knn(train = data_set_train, test = data_set_test, | |
| cl = data_set_train_labels, k=10) | |
| ## Step 4: Evaluating model performance ---- | |
| # load the "gmodels" library | |
| library(gmodels) | |
| # Create the cross tabulation of predicted vs. actual | |
| CrossTable(x = data_set_test_labels, y = data_set_test_pred, | |
| prop.chisq=FALSE) | |
| ## Step 5: Optimize the solution | |
| # load the "corrplot" library, | |
| # the library to compute correlation matrix | |
| install.packages("corrplot") | |
| library(corrplot) | |
| library(mlbench) | |
| library(caret) | |
| # compute the correlation matrix | |
| correlation_mat <- cor(data_set_n) | |
| # summurize the correlation matrix | |
| print(correlation_mat) | |
| # find attributes that are highly corrected (ideally >0.75) | |
| highlyCorrelated <- findCorrelation(correlationMatrix, cutoff=0.5) | |
| # visualize the matrix, clustering features by correlation index. | |
| corrplot(correlation_mat, order = "hclust") | |
| setwd("/home/t4/Dropbox/Machine Learning/hw2") | |
| install.packages("plyr_1.8.2", repos=NULL) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment