Skip to content

Instantly share code, notes, and snippets.

View cimentadaj's full-sized avatar

Jorge Cimentada cimentadaj

View GitHub Profile
# Create the scatterplot
m <- ggplot(housing, aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point(); m
# Let's add the name, breaks and labels
m <- m + scale_y_continuous(name="Home Price Index", breaks=waiver(), labels=waiver()); m
m <- m + scale_x_continuous(name="Land Price Index", breaks=c(0,2,4,6,8), labels=c("Hi","this","is","a","trial"), limits=c(0,8)); m
# With geom_bar()
ggplot(housing[!is.na(housing$region),], aes(x=region)) + geom_bar(color="blue", fill="red")
# With geom_density()
ggplot(housing[!is.na(housing$region),], aes(x=Home.Value, fill=region)) + geom_density()
# With geom_smooth()
ggplot(housing[!is.na(housing$region), ], aes(x = Home.Value, y= Structure.Cost, color=region, linetype=region)) + geom_smooth()
# With geom_bar but more complicated
# Alright, standard scatterplot, right?
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point()
# Let's control the opacity of the dots
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point(alpha=0.1) # You can set this number and play around to
# see which transparency is best for your data
# Let's add a red colour to the dots
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point(alpha=0.1, colour="red")
library(scales) ## install.packages("scales") if you don't have it installed
ggplot(data=housing, aes(x = region, y=Home.Value)) + geom_bar(stat="identity") + scale_y_continuous(labels = comma)
## What if you want to change the X height for summary statistics like mean, max or min? It's easy. Specify the
## summary as the stat option and use fun.y to specify which function to use.
# This uses the mean as the height of Y
ggplot(data=housing, aes(x = region, y=Home.Value)) + geom_bar(stat="summary", fun.y=mean) + scale_y_continuous(labels = comma)
## Two continuous variables
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + stat_identity() # Is the same as geom_point()
ggplot(housing, aes(x = Home.Value, y= Structure.Cost, color=region)) + stat_smooth() # Is the same as geom_smooth()
## One continuous and one categorical
ggplot(data=housing, aes(x = region, y= Home.Value)) + stat_boxplot() # Is the same as geom_boxplot()
ggplot(housing, aes(x = region, y= Structure.Cost, color=region)) + stat_ydensity() # Is the same as geom_violin()
## One categorical
ggplot(data=housing, aes(x = region)) + stat_count() # Is the same as geom_bar()
## This plots the distribution of the Structure.Cost.
ggplot(housing, aes(x = Structure.Cost, color=region)) + geom_bar()
## geom_bar automatically transforms the continuous variables into 'bins' and then plots
## the total number of 'bins' and its total counts. This package has a special function
## that does that called stat_bin().
## What will the output of this code be?
ggplot(housing, aes(x = Structure.Cost, color=region)) + stat_bin()
## Two continuous variables
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + geom_point()
ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost)) + geom_line()
## One continuous and one categorical
ggplot(data=housing, aes(x = region, y= Home.Value)) + geom_boxplot()
## One categorical
ggplot(data=housing, aes(x = region)) + geom_bar()
## One continuous
ggplot(data=housing, aes(x = Home.Value)) + geom_histogram()
ggplot(data=housing, aes(x = Land.Value)) + geom_dotplot()
graph1 + geom_point()
graph1 <- ggplot(data=housing, aes(x = Home.Value, y= Structure.Cost))
# First let's download some data files which we will use in the tutorial.
library(ggplot2) # install.packages("ggplot2") if you don't have the package
library(downloader) # install.packages("downloader") if you don't have the package
# Create and set a new directory for the tutorial
dir.create(paste0(getwd(),"/ggplot2tutorial"))
setwd(paste0(getwd(),"/ggplot2tutorial"))
# Download the zip file with all the data and unzip it to the tutorial folder