Skip to content

Instantly share code, notes, and snippets.

View cimentadaj's full-sized avatar

Jorge Cimentada cimentadaj

View GitHub Profile
kids <- sample(1:4, 100, replace = T)
lm(income ~ gender + kids)
set.seed(1)
income <- sample(1000:5000, 100, replace = T)
gender <- rep(c(1, 0), 50)
t <- t.test(income ~ gender)
unname(t$estimate[2] - t$estimate[1])
# Difference of about 201.46
coef(model <- lm(income ~ gender))
# Let's create a scatterplot with a discrete variable in the X axis
p <- ggplot(housing[!is.na(housing$region),], aes(x=State, y=Land.Value, color=Home.Value)) + geom_point(alpha=0.3); p
# Note that we set the color to be conditional on a continuous variables(Home.Value)
# Here we set the color to be blue when the Home.Value variable is low and red when it's high
p + scale_color_gradient(low="blue", high="red", name= "Home Value")
# You can do similarly with the scale_color_gradient2, but it has a different default color schem
p + scale_color_gradient2(low=muted("red"), mid="white", high=muted("black"), name= "Home Value")
library(RColorBrewer) # install.packages("RColorBrewer") in case you don't have it installed
# Simple bar graph with the mean of each state colored by region
g <- ggplot(housing[!is.na(housing$region),], aes(x=State, y=Land.Value, fill=region)) + geom_bar(alpha=0.5,stat = "summary", fun.y=mean);g
# To change the colors of a legend manually, you can use the scale_fill_manual
g + scale_fill_manual(values = c("skyblue", "royalblue", "blue", "navy")) + scale_alpha(range = 0.1)
# Note that here you can even assign the color to the specific category, e.g. "Midwest"= "royalblue", etc..
# To set existing group of colors
# Set the scientific numbering labels
o <- ggplot(housing, aes(x=region, y=Land.Price.Index)) + geom_boxplot(outlier.shape = NA) +
scale_y_continuous(labels = scientific); o
# change it to standard numbers
o + scale_y_continuous(labels = comma)
# change it to percentages
o + scale_y_continuous(labels = percent)
# Create boxplot and remove the outliers
o <- ggplot(housing[!is.na(housing$region), ], aes(x=region, y=Land.Price.Index)) + geom_boxplot(outlier.shape = NA)
# The options from scale_*_discrete are practically the same as scale_*_continuous except limits.
# Remember that limits controls the minimum and maximum of the chosen axis. Given that this is
# a categorical discrete axis, it doesn't make any sense to define neither. Instead, you control the order of categories.
o + scale_x_discrete(name="Region",
breaks=waiver(),
# We need to use the trans_new() function which is in the scales library. Remember to load that library.
# We create a new function with the _trans suffix. Inside that function we use the trans_new
# function to define the transformation name and the actual transformation. To see why I repeated the
# the transformation check the documentation for trans_new.
mult_trans = function() trans_new("mult", function(x) 1*x, function(x) 1*x)
# If you're planning to use this code to build your own transformations remember to change
# the name of the function(something_trans), the character string inside trans_new and the actual transformation.
library(scales) # install.packages("scales") in case you don't have it
m + scale_y_continuous(trans = log2_trans(), # choose the transformation
breaks = trans_breaks("log2", function(x) 2^x), # the number of breaks
labels = trans_format("log2", math_format(2^.x))) # and the labels
m + scale_x_reverse() # reverses the axis order
m + scale_x_sqrt() # same as the one specified
m + scale_x_log10() # transforms the x axis to the 10th log
# Manual trans
ggplot(housing, aes(x=sqrt(Land.Price.Index), y=sqrt(Home.Price.Index))) + geom_point()
# Scale trans
ggplot(housing, aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point() +
scale_x_sqrt() +
scale_y_sqrt()
# Coordinate trans
ggplot(housing[], aes(x=Land.Price.Index, y=Home.Price.Index)) + geom_point() +