Last active
October 22, 2015 22:39
-
-
Save willium/d86b94a078f50d4157ab to your computer and use it in GitHub Desktop.
Group Project 1, first R project
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
############################################### | |
# SETUP | |
############################################### | |
# set working directory | |
setwd("/Users/willium/Documents/School/uw/2015-2016/Quarter 1/Stat 311/assignments/group-1") | |
# load data.csv into variable d | |
d <- read.csv("data.csv", header=TRUE, as.is=TRUE, na.strings=c("", "NA")) # d is data | |
############################################### | |
# DATA MANIPULATION | |
############################################### | |
# backup old columns | |
d$oCollision <- d$Collision | |
d$oPropertyDamage <- d$Property.Damage | |
d$oComprehensive <- d$Comprehensive | |
# adjust columns by -100 so the "average insurance is 0" | |
d$Collision <- d$Collision-100 | |
d$PropertyDamage <- d$Property.Damage-100 | |
d$Comprehensive <- d$Comprehensive-100 | |
# convert size to a factor | |
d$Size <- factor(d$Size, levels=c("Small","Midsize","Large","VeryLarge")) | |
# break data up into subsets by size | |
small <- subset(d, d$Size=="Small") | |
midsize <- subset(d, d$Size=="Midsize") | |
large <- subset(d, d$Size=="Large") | |
vlarge <- subset(d, d$Size=="VeryLarge") | |
# create WD (wheeldrive) column based on last 3 characters of Vehicle | |
wheeldrive <- substr(d$Vehicle, nchar(d$Vehicle)-2, nchar(d$Vehicle)) | |
wheeldrive[25] <- NA # manually remove 'ght' (cell without 2WD or 4WD) | |
d$WD <- wheeldrive | |
d$WD <- factor(d$WD, levels=c("2WD", "4WD")) | |
# break data up into subsets by 2WD vs 4WD (or in this case wd2, wd4) | |
wd4 <- subset(d, d$WD=="4WD") | |
wd2 <- subset(d, d$WD=="2WD") | |
############################################### | |
# ALL DATA | |
############################################### | |
# create 2x2 grid for next (4) plots | |
par(mfrow=c(2,2)) | |
# (1) boxplot for all claim types generally | |
boxplot(d$Collision, d$PropertyDamage, d$Comprehensive, names=c("Collision", "Property", "Comprehensive"), main="All Ratings", xlab="Insurance Claim Type", ylab="Loss Rating", ylim=c(-70, 70)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (2) histogram for Collision rating | |
hist(d$Collision, main="Collision", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50)) | |
# (3) histogram for Property Damage rating | |
hist(d$PropertyDamage, main="Property Damage", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50)) | |
# (4) histogram for Comprehensive rating | |
hist(d$Comprehensive, main="Comprehensive", xlab="Loss Rating", breaks=10, xlim=c(-60,80), ylim=c(0, 50)) | |
# 5# summary for Collision rating (min, 1st, median, mean, 3rd, max) | |
summary(d$Collision) | |
# standard deviation for Collision Damage rating | |
sd(d$Collision, na.rm=TRUE) | |
# 5# summary for Property Damage rating (min, 1st, median, mean, 3rd, max) | |
summary(d$PropertyDamage) | |
# standard deviation for Property Damage rating | |
sd(d$PropertyDamage, na.rm=TRUE) | |
# 5# summary for Comprehensive rating (min, 1st, median, mean, 3rd, max) | |
summary(d$Comprehensive) | |
# standard deviation for Comprehensive rating | |
sd(d$Comprehensive, na.rm=TRUE) | |
############################################### | |
# RATINGS WITH PLOTS FOR EACH SIZE | |
############################################### | |
# create 2x2 grid for the next (4) plots | |
par(mfrow=c(2,2)) | |
# (1) boxplot for all loss ratings for Small subset of data | |
boxplot(small$Collision, small$PropertyDamage, small$Comprehensive, main="Small", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (2) boxplot for all loss ratings for Midsize subset of data | |
boxplot(midsize$Collision, midsize$PropertyDamage, midsize$Comprehensive, main="Midsize", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (3) boxplot for all loss ratings for Large subset of data | |
boxplot(large$Collision, large$PropertyDamage, large$Comprehensive, main="Large", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (4) boxplot for all loss ratings for Very Large subset of data | |
boxplot(vlarge$Collision, vlarge$PropertyDamage, vlarge$Comprehensive, main="Very Large", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-50, 50)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
############################################### | |
# RATINGS WITH PLOTS FOR EACH DRIVE TYPE | |
############################################### | |
# create 1x2 grid for next (2) plots | |
par(mfrow=c(1,2)) | |
# (1) boxplot for all loss ratings for 4WD subset of data | |
boxplot(wd4$Collision, wd4$PropertyDamage, wd4$Comprehensive, main="4WD", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (2) boxplot for all loss ratings for 2WD subset of data | |
boxplot(wd2$Collision, wd2$PropertyDamage, wd2$Comprehensive, main="2WD", names=c("Collision", "Property", "Comprehensive"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
############################################### | |
# DRIVE TYPES FOR EACH RATING TYPE | |
############################################### | |
# create 1x3 grid for next (3) plots | |
par(mfrow=c(1,3)) | |
# (1) boxplot of Collision rating for 2WD and 4WD | |
boxplot(d$Collision ~ d$WD, main="Collision", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (2) boxplot of Property Damage rating for 2WD and 4WD | |
boxplot(d$PropertyDamage ~ d$WD, main="Property Damage", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (3) boxplot of Comprehensive rating for 2WD and 4WD | |
boxplot(d$Comprehensive ~ d$WD, main="Comprehensive", names=c("2WD", "4WD"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
############################################### | |
# EACH RATING TYPE COMPARED THROUGH ALL SIZES | |
############################################### | |
# create 1x3 grid for next (3) plots | |
par(mfrow=c(1,3)) | |
# (1) boxplot of Collision rating for small, mid, large, vlarge | |
boxplot(d$Collision ~ d$Size, main="Collision", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (2) boxplot of Property Damage rating for small, mid, large, vlarge | |
boxplot(d$PropertyDamage ~ d$Size, main="Property Damage", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# (3) boxplot of Comprehensive rating for small, mid, large, vlarge | |
boxplot(d$Comprehensive ~ d$Size, main="Comprehensive", xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# # verbose alternative | |
# # (1) boxplot of Collision rating for small, mid, large, vlarge | |
# boxplot(small$Collision, midsize$Collision, large$Collision, vlarge$Collision, main="Collision", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
# abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# | |
# # (2) boxplot of Property Damage rating for small, mid, large, vlarge | |
# boxplot(small$PropertyDamage, midsize$PropertyDamage, large$PropertyDamage, vlarge$PropertyDamage, main="Property Damage", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
# abline(h=0, col="red", lty=3) # draw dotted red line at 0 | |
# | |
# # (3) boxplot of Comprehensive rating for small, mid, large, vlarge | |
# boxplot(small$Comprehensive, midsize$Comprehensive, large$Comprehensive, vlarge$Comprehensive, main="Comprehensive", names=c("Small", "Midsize", "Large", "Very Large"), xlab="Loss Type", ylab="Loss Rating", ylim=c(-60,80)) | |
# abline(h=0, col="red", lty=3) # draw dotted red line at 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment