Created
April 6, 2015 20:47
-
-
Save inkhorn/2497ad497114189dcb8a to your computer and use it in GitHub Desktop.
Cell Phone Analysis
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(jsonlite) | |
cp = fromJSON(txt = "Cell Phone Data.txt", simplifyDataFrame = TRUE) | |
num.atts = c(4,9,11,12,13,14,15,16,18,22) | |
cp[,num.atts] = sapply(cp[,num.atts], function (x) as.numeric(x)) | |
cp$aspect.ratio = cp$att_pixels_y / cp$att_pixels_x | |
cp$isSmartPhone = ifelse(grepl("smart|iphone|blackberry", cp$name, ignore.case=TRUE) == TRUE | cp$att_screen_size >= 4, "Yes", "No") | |
library(ggplot2) | |
library(ggthemr) | |
library(scales) | |
ggthemr("camoflauge") | |
ggplot(cp, aes(x=att_brand, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Brand") + theme(axis.text.x=element_text(angle=90, size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Brand") | |
ggplot(cp, aes(x=att_weight, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Weight") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Weight (oz)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=att_screen_size, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Screen Size") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Screen Size (in)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=att_ram, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Amount of RAM") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("RAM (gb)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=att_sd_card, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by SD Card Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("SD Card Capacity (gb)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=ifelse(cp$att_dual_sim == 1, "Yes", "No"), y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Dual Sim") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Has Dual Sim Card?") | |
ggplot(cp, aes(x=att_storage, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Storage Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Storage Capacity (gb)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=att_battery_mah, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Battery Capacity") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Battery Capacity (mAh)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=aspect.ratio, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Aspect Ratio") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Aspect Ratio (Y Pixels / X Pixels)") + stat_smooth(se=FALSE) | |
ggplot(cp, aes(x=isSmartPhone, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Smart Phone Status") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Is it a Smart Phone?") | |
ggplot(cp, aes(x=att_os, y=price)) + geom_boxplot() + ggtitle("Mobile Phone Price by Operating System") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_discrete("Operating System") | |
library(caret) | |
control = trainControl(method="cv") | |
in_train = createDataPartition(cp$price, p=.8, list=FALSE) | |
model.gbm = train(price ~ att_brand + att_weight + att_screen_size + | |
att_ram + att_sd_card + att_dual_sim + | |
att_storage + att_battery_mah + att_os, data=cp, | |
method="gbm", trControl=control, verbose=FALSE, subset=in_train) | |
cp$att_brand = factor(cp$) | |
cp.test = cp[-in_train,] | |
cp.test = subset(cp.test, att_brand != 'TOTO') | |
cp.test = na.omit(cp.test) | |
cp.test$pred.price = predict(model.gbm, cp.test) | |
ggplot(cp.test, aes(x=pred.price, y=price)) + geom_point(size=3) + ggtitle("Mobile Phone Price by Predicted Price") + theme(axis.text.x=element_text(size=14, vjust=0.5), axis.text.y=element_text(size=14), axis.title.x=element_text(size=15), axis.title.y=element_text(size=15), plot.title=element_text(size=17)) + scale_y_continuous(labels=dollar, name="Price (USD?)") + scale_x_continuous("Predicted Price", labels=dollar) + geom_abline(intercept=0, slope=1, colour="yellow") + stat_smooth(se=FALSE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello inkhorn, I can't download the file "Cell Phone Data.txt". Can you please share the link? If not, can you please teach how to get the data from the url? Appreciate the guidance. Thanks.