iCHAIT · July 1, 2016 07:08
diff --git a/R-cheats.r b/R-cheats.r
 # install packages
 install.packages('data.table')
 install.packages('ggplot2')

 # import library
 library(data.table)
 library(ggplot2)

 # Assigning Vectors
 x= c(0,1,2,3,4)
 y=c(1:5)

 #using a function in R
 square_root = function(x = 2){sqrt(x)}

 # Creating a data table
 dt = data.table(name=c('Himanshu','Abhishek'),age = c(22,23))

 # Viewing a data table
 #Reading And Writing Tables, removing row names

 # attaching the diamond table included in ggplot package
 attach(diamonds)
 diamonds = data.table(diamonds)

 # show all colnames of diamonds
 colnames(diamonds)

 # showing number of rows, columns and dimensions of a table
 nrow(diamonds)

 # showing number of rows, columns and dimensions of a table
 ncol(diamonds)

 # for dimension
 dim(diamonds)

 # changing column names in a data table
 setnames(diamonds,c('carat','cut'),c('Carat','Cut'))

 colnames(diamonds)

 setnames(diamonds,c('Carat','Cut'),c('carat','cut'))


 # Find the number and types unique cuts and assign it to a variable
 unique_cuts = unique(diamonds$cut)
 unique_cuts

 len_unique_cuts = length(unique(diamonds$cut))
 len_unique_cuts

 # Find the combination of unique cuts and clarities
 unique_cuts_and_colors = unique(diamonds, by=c('cut','clarity'))
 unique_cuts_and_colors[,c('cut','color'),with = F]

 unique_cuts_and_colors[cut=='Ideal' ,c('cut','color', 'clarity'),with=F]


 #filtering - looking at values of a Ideal Cut and E color
 IdealCut_EColor_diamonds = diamonds[cut=='Ideal' & color == 'E']
 IdealCut_EColor_diamonds  

 # Grouping - Find number of instances of every cut and mean price of every cut and list the cut with highest price
 grouping_on_cut = diamonds[,list(Total_instance = .N,mean_price = mean(price)),
                           by=c('cut')]

 # Defining a new column in R
 price_cat <- diamonds[cut == "Ideal" & price < 3457.42, price_category := "Cheap"]
 View(price_cat)

 # Way to do it 
 max_mean_price = max(grouping_on_cut$mean_price)
 cut_highest_price = unique(grouping_on_cut[mean_price == max_mean_price]$cut)



 # Excercise
 # Find the count of instances and mean of depth of every cut and color combination
 # Find and print the cut and color combination with least depth and its value
 # Find the combination with highest mean depth among the Premium Cut

 grouping_on_cut_color = diamonds[,list(Total_instance = .N,mean_depth = mean(depth)),
                           by=c('cut','color')]

 min_depth = min(grouping_on_cut_color$mean_depth)
 grouping_on_cut_color[mean_depth == min_depth,c('cut','color'),with=F]
 max_depth_Premium <- max(grouping_on_cut_color[cut == 'Premium']$mean_depth)
 grouping_on_cut_color[mean_depth == max_depth_Premium,c('cut','color'),with=F]

 nrow(diamonds[cut == "Ideal" & price < 350])


 # Merging in a Data Table
 cuts = unique(diamonds$cut)

 cutQuality = data.table(cut=cuts,quality = c("Q1","Q2","Q3","Q4","Q5"))

 cutPriceType = data.table(cut=cuts[1:4],priceType = c("P1","P2","P3","P4"))

 # left join
 diamondsWithQuality = merge(diamonds,cutQuality,all.x=T,by=c('cut'))

 diamondsWithQuality[,price_category := NULL]
 diamondsWithPrice = merge(diamonds,cutPriceType,all.x=T,by=c('cut'))

 naPriceType = diamondsWithPrice[is.na(priceType)]

 diamondsWithPriceOnlyExisting = merge(diamonds,cutPriceType,by=c('cut'))

 # Cartesian Join - Create all combination of price and color
 cutTable = data.table(cut=unique(diamonds$cut))
 cutTable[,key:=1]

 colorTable = data.table(color=unique(diamonds$color))
 colorTable[,key:=1]

 # Allow.cartesian = T
 cutColorCombo = merge(cutTable,colorTable,allow.cartesian = T,by=c('key'))

 # deleting a column
 cutColorCombo[,key:=NULL]

 #Excercise 3
 # Does the data set contain all possible combinations of these 
 # cut,colort,clarity types? 
 # If no, list the values of missing combinations in the diamonds set. 
 # Which combination(s) has/have the most records? 
 # Which one(s) has/have the least? Print them in your own way

 cutTable = data.table(cut=unique(diamonds$cut))
 cutTable[,key:=1]

 colorTable = data.table(color=unique(diamonds$color))
 colorTable[,key:=1]

 clarityTable = data.table(clarity=unique(diamonds$clarity))
 clarityTable[,key := 1]

 cutColorTable <- merge(cutTable,colorTable, allow.cartesian = T, by = 'key')
 cutColorClarityTable <- merge(cutColorTable,clarityTable, allow.cartesian = T, by = 'key')

 diamonds1 <- unique(diamonds, by = c("cut", 'color', 'clarity'))
 diamonds1[,present := 1]

 final <- merge(cutColorClarityTable, diamonds1, all.x = T, by = c("cut", 'color', 'clarity'))
 View(final)
 View(final[is.na(present)])


 iris = data.table(iris)
 View(iris)
 summary(iris)
 str(iris)

 # Plotting

 qplot(Sepal.Length, Petal.Length, color = Species, data = iris)
 qplot(Sepal.Length, color = Species, data = iris)


 # Modelling

 # use the copy function
 FeatureSetTrain = copy(iris)

 FeatureSetTrain = FeatureSetTrain[,c('Sepal.Length','Petal.Length'),with=F]

 train <- FeatureSetTrain[1:100]

 Target = FeatureSetTrain$Sepal.Length

 FeatureSetTrain = data.frame(FeatureSetTrain)

 fit1 <- lm(Sepal.Length ~ Petal.Length , data = FeatureSetTrain)

 print(coef(fit1))

 coeff=data.table(coef(fit1)[[1]])

 res = predict(fit1,FeatureSetTrain)
 inp = data.table(Sepal.Length=Target)
 res = data.table(Sepal.Length.Pred = res)
 View(inp - res)

 fin_res = cbind(inp,res)
 View(fin_res)
 FinalRes = cbind(iris,fin_res)
 View(FinalRes)

 # MAE By Mean and ME By Mean Analysis

 # Excercise 4
 # Calculate the Mean Error, Mean Absolute Error and Mean Sepal Length for each species type
 # Then calculate Mean Error By Mean and Mean Absolute Error by Mean for each species type

 m <- matrix(data=cbind(rnorm(30, 0), rnorm(30, 2), rnorm(30, 5)), nrow=30, ncol=3)
 View(m)

 # See help of apply function - 1 represents rows and 2 represents colulmns
 apply(m, 2, function(x) length(x[x<0]))

 # sapply and lapply
 # sapply 
 sapply(1:3, function(x) x^2)

 # lapply, very similar function but returns list rather than vector
 lapply(1:3, function(x) x^2)

 rbind(lapply(iris$Sepal.Length, function(x) x^3))

 rbind(lapply(iris$Sepal.Length, function(x) iris[,xyz := x^2]))
	# install packages
	install.packages('data.table')
	install.packages('ggplot2')

	# import library
	library(data.table)
	library(ggplot2)

	# Assigning Vectors
	x= c(0,1,2,3,4)
	y=c(1:5)

	#using a function in R
	square_root = function(x = 2){sqrt(x)}

	# Creating a data table
	dt = data.table(name=c('Himanshu','Abhishek'),age = c(22,23))

	# Viewing a data table
	#Reading And Writing Tables, removing row names

	# attaching the diamond table included in ggplot package
	attach(diamonds)
	diamonds = data.table(diamonds)

	# show all colnames of diamonds
	colnames(diamonds)

	# showing number of rows, columns and dimensions of a table
	nrow(diamonds)

	# showing number of rows, columns and dimensions of a table
	ncol(diamonds)

	# for dimension
	dim(diamonds)

	# changing column names in a data table
	setnames(diamonds,c('carat','cut'),c('Carat','Cut'))

	colnames(diamonds)

	setnames(diamonds,c('Carat','Cut'),c('carat','cut'))


	# Find the number and types unique cuts and assign it to a variable
	unique_cuts = unique(diamonds$cut)
	unique_cuts

	len_unique_cuts = length(unique(diamonds$cut))
	len_unique_cuts

	# Find the combination of unique cuts and clarities
	unique_cuts_and_colors = unique(diamonds, by=c('cut','clarity'))
	unique_cuts_and_colors[,c('cut','color'),with = F]

	unique_cuts_and_colors[cut=='Ideal' ,c('cut','color', 'clarity'),with=F]


	#filtering - looking at values of a Ideal Cut and E color
	IdealCut_EColor_diamonds = diamonds[cut=='Ideal' & color == 'E']
	IdealCut_EColor_diamonds

	# Grouping - Find number of instances of every cut and mean price of every cut and list the cut with highest price
	grouping_on_cut = diamonds[,list(Total_instance = .N,mean_price = mean(price)),
	by=c('cut')]

	# Defining a new column in R
	price_cat <- diamonds[cut == "Ideal" & price < 3457.42, price_category := "Cheap"]
	View(price_cat)

	# Way to do it
	max_mean_price = max(grouping_on_cut$mean_price)
	cut_highest_price = unique(grouping_on_cut[mean_price == max_mean_price]$cut)



	# Excercise
	# Find the count of instances and mean of depth of every cut and color combination
	# Find and print the cut and color combination with least depth and its value
	# Find the combination with highest mean depth among the Premium Cut

	grouping_on_cut_color = diamonds[,list(Total_instance = .N,mean_depth = mean(depth)),
	by=c('cut','color')]

	min_depth = min(grouping_on_cut_color$mean_depth)
	grouping_on_cut_color[mean_depth == min_depth,c('cut','color'),with=F]
	max_depth_Premium <- max(grouping_on_cut_color[cut == 'Premium']$mean_depth)
	grouping_on_cut_color[mean_depth == max_depth_Premium,c('cut','color'),with=F]

	nrow(diamonds[cut == "Ideal" & price < 350])


	# Merging in a Data Table
	cuts = unique(diamonds$cut)

	cutQuality = data.table(cut=cuts,quality = c("Q1","Q2","Q3","Q4","Q5"))

	cutPriceType = data.table(cut=cuts[1:4],priceType = c("P1","P2","P3","P4"))

	# left join
	diamondsWithQuality = merge(diamonds,cutQuality,all.x=T,by=c('cut'))

	diamondsWithQuality[,price_category := NULL]
	diamondsWithPrice = merge(diamonds,cutPriceType,all.x=T,by=c('cut'))

	naPriceType = diamondsWithPrice[is.na(priceType)]

	diamondsWithPriceOnlyExisting = merge(diamonds,cutPriceType,by=c('cut'))

	# Cartesian Join - Create all combination of price and color
	cutTable = data.table(cut=unique(diamonds$cut))
	cutTable[,key:=1]

	colorTable = data.table(color=unique(diamonds$color))
	colorTable[,key:=1]

	# Allow.cartesian = T
	cutColorCombo = merge(cutTable,colorTable,allow.cartesian = T,by=c('key'))

	# deleting a column
	cutColorCombo[,key:=NULL]

	#Excercise 3
	# Does the data set contain all possible combinations of these
	# cut,colort,clarity types?
	# If no, list the values of missing combinations in the diamonds set.
	# Which combination(s) has/have the most records?
	# Which one(s) has/have the least? Print them in your own way

	cutTable = data.table(cut=unique(diamonds$cut))
	cutTable[,key:=1]

	colorTable = data.table(color=unique(diamonds$color))
	colorTable[,key:=1]

	clarityTable = data.table(clarity=unique(diamonds$clarity))
	clarityTable[,key := 1]

	cutColorTable <- merge(cutTable,colorTable, allow.cartesian = T, by = 'key')
	cutColorClarityTable <- merge(cutColorTable,clarityTable, allow.cartesian = T, by = 'key')

	diamonds1 <- unique(diamonds, by = c("cut", 'color', 'clarity'))
	diamonds1[,present := 1]

	final <- merge(cutColorClarityTable, diamonds1, all.x = T, by = c("cut", 'color', 'clarity'))
	View(final)
	View(final[is.na(present)])


	iris = data.table(iris)
	View(iris)
	summary(iris)
	str(iris)

	# Plotting

	qplot(Sepal.Length, Petal.Length, color = Species, data = iris)
	qplot(Sepal.Length, color = Species, data = iris)


	# Modelling

	# use the copy function
	FeatureSetTrain = copy(iris)

	FeatureSetTrain = FeatureSetTrain[,c('Sepal.Length','Petal.Length'),with=F]

	train <- FeatureSetTrain[1:100]

	Target = FeatureSetTrain$Sepal.Length

	FeatureSetTrain = data.frame(FeatureSetTrain)

	fit1 <- lm(Sepal.Length ~ Petal.Length , data = FeatureSetTrain)

	print(coef(fit1))

	coeff=data.table(coef(fit1)[[1]])

	res = predict(fit1,FeatureSetTrain)
	inp = data.table(Sepal.Length=Target)
	res = data.table(Sepal.Length.Pred = res)
	View(inp - res)

	fin_res = cbind(inp,res)
	View(fin_res)
	FinalRes = cbind(iris,fin_res)
	View(FinalRes)

	# MAE By Mean and ME By Mean Analysis

	# Excercise 4
	# Calculate the Mean Error, Mean Absolute Error and Mean Sepal Length for each species type
	# Then calculate Mean Error By Mean and Mean Absolute Error by Mean for each species type

	m <- matrix(data=cbind(rnorm(30, 0), rnorm(30, 2), rnorm(30, 5)), nrow=30, ncol=3)
	View(m)

	# See help of apply function - 1 represents rows and 2 represents colulmns
	apply(m, 2, function(x) length(x[x<0]))

	# sapply and lapply
	# sapply
	sapply(1:3, function(x) x^2)

	# lapply, very similar function but returns list rather than vector
	lapply(1:3, function(x) x^2)

	rbind(lapply(iris$Sepal.Length, function(x) x^3))

	rbind(lapply(iris$Sepal.Length, function(x) iris[,xyz := x^2]))