Skip to content

Instantly share code, notes, and snippets.

View cimentadaj's full-sized avatar

Jorge Cimentada cimentadaj

View GitHub Profile
library(scales) ## install.packages("scales") if you don't have it installed
ggplot(data=housing, aes(x = region, y=Home.Value)) + geom_bar(stat="identity") + scale_y_continuous(labels = comma)
## What if you want to change the X height for summary statistics like mean, max or min? It's easy. Specify the
## summary as the stat option and use fun.y to specify which function to use.
# This uses the mean as the height of Y
ggplot(data=housing, aes(x = region, y=Home.Value)) + geom_bar(stat="summary", fun.y=mean) + scale_y_continuous(labels = comma)
# Alright, standard scatterplot, right?
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point()
# Let's control the opacity of the dots
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point(alpha=0.1) # You can set this number and play around to
# see which transparency is best for your data
# Let's add a red colour to the dots
ggplot(housing, aes(x=Home.Value, y=Structure.Cost)) + geom_point(alpha=0.1, colour="red")
# With geom_bar()
ggplot(housing[!is.na(housing$region),], aes(x=region)) + geom_bar(color="blue", fill="red")
# With geom_density()
ggplot(housing[!is.na(housing$region),], aes(x=Home.Value, fill=region)) + geom_density()
# With geom_smooth()
ggplot(housing[!is.na(housing$region), ], aes(x = Home.Value, y= Structure.Cost, color=region, linetype=region)) + geom_smooth()
# With geom_bar but more complicated
# Create the scatterplot
m <- ggplot(housing, aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point(); m
# Let's add the name, breaks and labels
m <- m + scale_y_continuous(name="Home Price Index", breaks=waiver(), labels=waiver()); m
m <- m + scale_x_continuous(name="Land Price Index", breaks=c(0,2,4,6,8), labels=c("Hi","this","is","a","trial"), limits=c(0,8)); m
# Manual trans
ggplot(housing, aes(x=sqrt(Land.Price.Index), y=sqrt(Home.Price.Index))) + geom_point()
# Scale trans
ggplot(housing, aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point() +
scale_x_sqrt() +
scale_y_sqrt()
# Coordinate trans
ggplot(housing[], aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point() +
m + scale_x_reverse() # reverses the axis order
m + scale_x_sqrt() # same as the one specified
m + scale_x_log10() # transforms the x axis to the 10th log
library(scales) # install.packages("scales") in case you don't have it
m + scale_y_continuous(trans = log2_trans(), # choose the transformation
breaks = trans_breaks("log2", function(x) 2^x), # the number of breaks
labels = trans_format("log2", math_format(2^.x))) # and the labels
# We need to use the trans_new() function which is in the scales library. Remember to load that library.
# We create a new function with the _trans suffix. Inside that function we use the trans_new
# function to define the transformation name and the actual transformation. To see why I repeated the
# the transformation check the documentation for trans_new.
mult_trans = function() trans_new("mult", function(x) 1*x, function(x) 1*x)
# If you're planning to use this code to build your own transformations remember to change
# the name of the function(something_trans), the character string inside trans_new and the actual transformation.
# Create boxplot and remove the outliers
o <- ggplot(housing[!is.na(housing$region), ], aes(x=region, y=Land.Price.Index)) + geom_boxplot(outlier.shape = NA)
# The options from scale_*_discrete are practically the same as scale_*_continuous except limits.
# Remember that limits controls the minimum and maximum of the chosen axis. Given that this is
# a categorical discrete axis, it doesn't make any sense to define neither. Instead, you control the order of categories.
o + scale_x_discrete(name="Region",
breaks=waiver(),
# Set the scientific numbering labels
o <- ggplot(housing, aes(x=region, y=Land.Price.Index)) + geom_boxplot(outlier.shape = NA) +
scale_y_continuous(labels = scientific); o
# change it to standard numbers
o + scale_y_continuous(labels = comma)
# change it to percentages
o + scale_y_continuous(labels = percent)