Skip to content

Instantly share code, notes, and snippets.

@BrunoGrandePhD
Created October 26, 2016 00:03
Show Gist options
  • Save BrunoGrandePhD/b9f572676e811282d168a23babc12fbc to your computer and use it in GitHub Desktop.
Save BrunoGrandePhD/b9f572676e811282d168a23babc12fbc to your computer and use it in GitHub Desktop.
R script capturing what was covered in the SciProg.ca workshop on ggplot2 (https://github.com/sciprog-sfu/sciprog-sfu.github.io/issues/143)
## GGplot2 workshop Sciprog October 24 2016
# Author: Michelle Jones
library(ggplot2)
#install the packages for the workshop
#install.packages(c("tidyverse","ggthemes","cowplot"))
#install.packages("gapminder")
#Load packages
library(tidyverse)
library(ggthemes)
library(cowplot)
library(gapminder)
head(gapminder)
# H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York,
# 2009.
# data is in form of a data frame. You must provide:
#(1) data and
#(2) asthetics.
# asthetics connect columns to plot - position, color, size, shape? What do
# the columns represent "link columns to visual features"
# (3) geom is the shapes you actually want to plot,
#what type of things do people see - points, lines, etc.
#Other things are automatically set, but you could manually override
#(1)scale - map data values -> computer values
#(2) stat - summarization or transformation
#geoms start doing simple stats for you - for example geom boxplot will
# sumarize median and stuff for you
#(3) facet - split data into multiple plots
## Example
#Step one take data
#asthetics says which columns are you going to use - like display and horse power
# then scale it to the actual points on the computer
head(gapminder)
names(gapminder)
View(gapminder)#opens data into a new tab of r studio or as a document in xcode
#step one - create simple plot
#aes links columns to feature
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp)) #won't plot because no geom
#add the geom
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))+ geom_point() #won't plot because no geom
#save this as an object
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))+ geom_point() #won't plot because no geom
p<-ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp)) #won't plot because no geom
# maybe we want to change the scale to something logrithimic
p+geom_point()+scale_x_log10()
#What if we want color by the country
#this is athetic mapping. We want to map our continent to this by either shape
# size or color
#color would probably make the most sense because it is a discrete variable
#rerun the command with a new asthetic in geom_point
#you can give astehtics in different locations
#in this case, x and y are global as gdpPercap and lifeexpet
#so if we want a local we will generate it
p+geom_point(aes(color=continent))+scale_x_log10()
#we can also add in transparancu
#If you want to change across the board you specifcy outside geos
#these features are not being mapped they are being redefined
p+geom_point(aes(color=continent),size=.6,alpha=.3)+scale_x_log10()
p+geom_point(aes(color=continent),size=.6,alpha=.3)+scale_x_log10()
##Adding geom smooth will add a fitted line with standard error. to get rid of it se=false
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point()+scale_x_log10()+geom_smooth()
## you can specify method, for instance lm
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point()+scale_x_log10()+geom_smooth(se=FALSE,method="lm")
## you can add in color=continent again
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+geom_smooth(se=FALSE,method="lm")
#we can also facet so we have a plot per continent
#+faucet_wrap(~continent)
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent)
##adding another layer you can add on the linear models
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent)+geom_smooth(se=FALSE,method="lm")
#say we want to take out oceania, go to the beginning and filter the data
ggplot(subset(gapminder,continent!="Oceania"),aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent)+geom_smooth(se=FALSE,method="lm")
## We can also look at boxplots and strip plots
##map the stuff to factor and y to life expetancy
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_point()
#you can also use the jitter function to add some random noise to make it easier to read
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter()
#specify the position in jitter
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter(position=position_jitter(height=0,width=.8))
#then add some transparency
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter(position=position_jitter(height=0,width=.8),alpha=.5)
p2<-ggplot(gapminder, aes(x=continent, y=lifeExp))
p2+geom_boxplot()
#think of ggplot2 as different layers
#add on raw data
p2+geom_boxplot()+geom_jitter(position=position_jitter(height=0,width=.2),alpha=.5)
#order matters - it does layer on top so if swithch boxplor and points the points will be covered
p2+geom_jitter(position=position_jitter(height=0,width=.2),alpha=.5)+geom_boxplot()
#now lets add some color - color is lines and fill is the interior of the boxplot - color the polygon
#color for continent is an aesthetic.
#in order to get to it we need to add an asthetic
p2+geom_boxplot(aes(fill=continent))
#to color anything we need to add in an asthetic
#asthetic is to map columns to anything on your graph
#we can add summarys on it too
p2+geom_boxplot(aes(fill=continent))+stat_summary(fun.y=median,color="red", geom="point",size=3)
## Density Plots in ggplot
p3<-ggplot(gapminder, aes(x=lifeExp))
p3
#need to add geom
p3+geom_histogram()
#specify the bin width
p3+geom_histogram(binwidth=1)
#binwidth is outside of the aes because it is a universal
# property independent of the data
#what if you wanted to add color for continent?
p3+geom_histogram(binwidth=1)+aes(fill=continent)
## can also overlap histograms
p3+geom_histogram(binwidth=1,position="identity")+aes(fill=continent)
#you can also visualize the denisty with an outline
p3+aes(color=continent)+geom_freqpoly()
p3+aes(color=continent)+geom_freqpoly(binwidth=1)
## KErnal density plot
p3+aes(color=continent)+geom_density()
#you can specify how smooth youwant it
p3+aes(color=continent)+geom_density(adjust=.3)
#you can also color the inside
p3+aes(fill=continent)+geom_density(adjust=.3)
#but by filling data is then hiding - make transparent
p3+aes(fill=continent)+geom_density(adjust=.3,alpha=.3)
#you can also faucet
p3+aes(fill=continent)+geom_density(adjust=.3,alpha=.3)+facet_wrap(~continent)
#this can be problematic when year is seen as a numeric variable
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_boxplot()
#use group
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_boxplot(aes(group=year))
#violin plots
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="point")
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="line")
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="line")+stat_summary(fun.y=median,color="red",geom="point")
##bar plots
ggplot(gapminder,aes(x=continent))+geom_bar()
#how can you order them so that they are highest to lowest/lowest to highest?
ggplot(gapminder,aes(x=reorder(continent,continent,length)))+geom_bar()
# add labesl
ggplot(gapminder,aes(x=reorder(continent,continent,length)))+geom_bar()+labs(title="continent",x="continent",y="count")
library(plyr)
continent_freq<-dplyr::count(gapminder,continent)
ggplot(continent_freq,aes(x=continents, y=n))+geom_bar() #gg
#bar automatically doesn a thing called count. Behind the scenes before
# the countries were counted. Here we already counted, so ggplot doesn't get
#that it has already been counted. We need to tell it it has been counted
#so stat is "identity
ggplot(continent_freq,aes(x=continent, y=n))+geom_bar(stat="identity") #gg
## Themes
#lets you change coloring - packages have themes, main is default, etc.
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_dark()
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_economist()
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_wsj()
#etc.
#cowplot is a theme often used
library(cowplot)
#you can combine plots with multipannel plots and label
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()
#if this library is loaded this will overwrite all the default ggplot2 with
# their opinion
ggplot(subset(gapminder,continent=="Americas" & year==1952), aes(x=gdpPercap, y=lifeExp))+scale_x_log10()+geom_point()
install.packages("ggrepel")
library(ggrepel)
ggplot(subset(gapminder,continent=="Americas"& year==1952), aes(x=gdpPercap, y=lifeExp))+scale_x_log10()+geom_point()+geom_label_repel(aes(label=country))
countries<-c("Canada", "Rwanada","Cambodia", "Mexico")
gapminder_small<-droplevels(subset(gapminder, country%in% countries))
ggplot(gapminder_small, aes(x=year, y=lifeExp, color=country))+geom_line()+geom_point()
#color brewer
ggplot(gapminder_small, aes(x=year, y=lifeExp, color=country))+geom_line()+geom_point()+scale_color_brewer(palette="Dark2")
# Dynamically plot only a subset of the labels
# Here, only countries with a life expectancy higher than 65
america_1952 <- subset(gapminder, year == 1952 & continent == "Americas")
ggplot(america_1952, aes(x = gdpPercap, y = lifeExp)) +
scale_x_log10() +
geom_point() +
geom_label_repel(data = subset(america_1952, lifeExp > 65), aes(label = country))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment