Created
October 26, 2016 00:03
-
-
Save BrunoGrandePhD/b9f572676e811282d168a23babc12fbc to your computer and use it in GitHub Desktop.
R script capturing what was covered in the SciProg.ca workshop on ggplot2 (https://github.com/sciprog-sfu/sciprog-sfu.github.io/issues/143)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## GGplot2 workshop Sciprog October 24 2016 | |
# Author: Michelle Jones | |
library(ggplot2) | |
#install the packages for the workshop | |
#install.packages(c("tidyverse","ggthemes","cowplot")) | |
#install.packages("gapminder") | |
#Load packages | |
library(tidyverse) | |
library(ggthemes) | |
library(cowplot) | |
library(gapminder) | |
head(gapminder) | |
# H. Wickham. ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York, | |
# 2009. | |
# data is in form of a data frame. You must provide: | |
#(1) data and | |
#(2) asthetics. | |
# asthetics connect columns to plot - position, color, size, shape? What do | |
# the columns represent "link columns to visual features" | |
# (3) geom is the shapes you actually want to plot, | |
#what type of things do people see - points, lines, etc. | |
#Other things are automatically set, but you could manually override | |
#(1)scale - map data values -> computer values | |
#(2) stat - summarization or transformation | |
#geoms start doing simple stats for you - for example geom boxplot will | |
# sumarize median and stuff for you | |
#(3) facet - split data into multiple plots | |
## Example | |
#Step one take data | |
#asthetics says which columns are you going to use - like display and horse power | |
# then scale it to the actual points on the computer | |
head(gapminder) | |
names(gapminder) | |
View(gapminder)#opens data into a new tab of r studio or as a document in xcode | |
#step one - create simple plot | |
#aes links columns to feature | |
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp)) #won't plot because no geom | |
#add the geom | |
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))+ geom_point() #won't plot because no geom | |
#save this as an object | |
ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp))+ geom_point() #won't plot because no geom | |
p<-ggplot(data=gapminder,aes(x=gdpPercap,y=lifeExp)) #won't plot because no geom | |
# maybe we want to change the scale to something logrithimic | |
p+geom_point()+scale_x_log10() | |
#What if we want color by the country | |
#this is athetic mapping. We want to map our continent to this by either shape | |
# size or color | |
#color would probably make the most sense because it is a discrete variable | |
#rerun the command with a new asthetic in geom_point | |
#you can give astehtics in different locations | |
#in this case, x and y are global as gdpPercap and lifeexpet | |
#so if we want a local we will generate it | |
p+geom_point(aes(color=continent))+scale_x_log10() | |
#we can also add in transparancu | |
#If you want to change across the board you specifcy outside geos | |
#these features are not being mapped they are being redefined | |
p+geom_point(aes(color=continent),size=.6,alpha=.3)+scale_x_log10() | |
p+geom_point(aes(color=continent),size=.6,alpha=.3)+scale_x_log10() | |
##Adding geom smooth will add a fitted line with standard error. to get rid of it se=false | |
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point()+scale_x_log10()+geom_smooth() | |
## you can specify method, for instance lm | |
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point()+scale_x_log10()+geom_smooth(se=FALSE,method="lm") | |
## you can add in color=continent again | |
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+geom_smooth(se=FALSE,method="lm") | |
#we can also facet so we have a plot per continent | |
#+faucet_wrap(~continent) | |
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent) | |
##adding another layer you can add on the linear models | |
ggplot(gapminder,aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent)+geom_smooth(se=FALSE,method="lm") | |
#say we want to take out oceania, go to the beginning and filter the data | |
ggplot(subset(gapminder,continent!="Oceania"),aes(x=gdpPercap,y=lifeExp))+geom_point(aes(color=continent))+scale_x_log10()+facet_wrap(~continent)+geom_smooth(se=FALSE,method="lm") | |
## We can also look at boxplots and strip plots | |
##map the stuff to factor and y to life expetancy | |
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_point() | |
#you can also use the jitter function to add some random noise to make it easier to read | |
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter() | |
#specify the position in jitter | |
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter(position=position_jitter(height=0,width=.8)) | |
#then add some transparency | |
ggplot(gapminder, aes(x=continent, y=lifeExp))+geom_jitter(position=position_jitter(height=0,width=.8),alpha=.5) | |
p2<-ggplot(gapminder, aes(x=continent, y=lifeExp)) | |
p2+geom_boxplot() | |
#think of ggplot2 as different layers | |
#add on raw data | |
p2+geom_boxplot()+geom_jitter(position=position_jitter(height=0,width=.2),alpha=.5) | |
#order matters - it does layer on top so if swithch boxplor and points the points will be covered | |
p2+geom_jitter(position=position_jitter(height=0,width=.2),alpha=.5)+geom_boxplot() | |
#now lets add some color - color is lines and fill is the interior of the boxplot - color the polygon | |
#color for continent is an aesthetic. | |
#in order to get to it we need to add an asthetic | |
p2+geom_boxplot(aes(fill=continent)) | |
#to color anything we need to add in an asthetic | |
#asthetic is to map columns to anything on your graph | |
#we can add summarys on it too | |
p2+geom_boxplot(aes(fill=continent))+stat_summary(fun.y=median,color="red", geom="point",size=3) | |
## Density Plots in ggplot | |
p3<-ggplot(gapminder, aes(x=lifeExp)) | |
p3 | |
#need to add geom | |
p3+geom_histogram() | |
#specify the bin width | |
p3+geom_histogram(binwidth=1) | |
#binwidth is outside of the aes because it is a universal | |
# property independent of the data | |
#what if you wanted to add color for continent? | |
p3+geom_histogram(binwidth=1)+aes(fill=continent) | |
## can also overlap histograms | |
p3+geom_histogram(binwidth=1,position="identity")+aes(fill=continent) | |
#you can also visualize the denisty with an outline | |
p3+aes(color=continent)+geom_freqpoly() | |
p3+aes(color=continent)+geom_freqpoly(binwidth=1) | |
## KErnal density plot | |
p3+aes(color=continent)+geom_density() | |
#you can specify how smooth youwant it | |
p3+aes(color=continent)+geom_density(adjust=.3) | |
#you can also color the inside | |
p3+aes(fill=continent)+geom_density(adjust=.3) | |
#but by filling data is then hiding - make transparent | |
p3+aes(fill=continent)+geom_density(adjust=.3,alpha=.3) | |
#you can also faucet | |
p3+aes(fill=continent)+geom_density(adjust=.3,alpha=.3)+facet_wrap(~continent) | |
#this can be problematic when year is seen as a numeric variable | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_boxplot() | |
#use group | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_boxplot(aes(group=year)) | |
#violin plots | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year)) | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="point") | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="line") | |
ggplot(gapminder, aes(x=year,y=lifeExp))+geom_violin(aes(group=year))+stat_summary(fun.y=median,color="red",geom="line")+stat_summary(fun.y=median,color="red",geom="point") | |
##bar plots | |
ggplot(gapminder,aes(x=continent))+geom_bar() | |
#how can you order them so that they are highest to lowest/lowest to highest? | |
ggplot(gapminder,aes(x=reorder(continent,continent,length)))+geom_bar() | |
# add labesl | |
ggplot(gapminder,aes(x=reorder(continent,continent,length)))+geom_bar()+labs(title="continent",x="continent",y="count") | |
library(plyr) | |
continent_freq<-dplyr::count(gapminder,continent) | |
ggplot(continent_freq,aes(x=continents, y=n))+geom_bar() #gg | |
#bar automatically doesn a thing called count. Behind the scenes before | |
# the countries were counted. Here we already counted, so ggplot doesn't get | |
#that it has already been counted. We need to tell it it has been counted | |
#so stat is "identity | |
ggplot(continent_freq,aes(x=continent, y=n))+geom_bar(stat="identity") #gg | |
## Themes | |
#lets you change coloring - packages have themes, main is default, etc. | |
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_dark() | |
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_economist() | |
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point()+theme_wsj() | |
#etc. | |
#cowplot is a theme often used | |
library(cowplot) | |
#you can combine plots with multipannel plots and label | |
ggplot(gapminder, aes(x=gdpPercap, y=lifeExp,color=continent))+scale_x_log10()+geom_point() | |
#if this library is loaded this will overwrite all the default ggplot2 with | |
# their opinion | |
ggplot(subset(gapminder,continent=="Americas" & year==1952), aes(x=gdpPercap, y=lifeExp))+scale_x_log10()+geom_point() | |
install.packages("ggrepel") | |
library(ggrepel) | |
ggplot(subset(gapminder,continent=="Americas"& year==1952), aes(x=gdpPercap, y=lifeExp))+scale_x_log10()+geom_point()+geom_label_repel(aes(label=country)) | |
countries<-c("Canada", "Rwanada","Cambodia", "Mexico") | |
gapminder_small<-droplevels(subset(gapminder, country%in% countries)) | |
ggplot(gapminder_small, aes(x=year, y=lifeExp, color=country))+geom_line()+geom_point() | |
#color brewer | |
ggplot(gapminder_small, aes(x=year, y=lifeExp, color=country))+geom_line()+geom_point()+scale_color_brewer(palette="Dark2") | |
# Dynamically plot only a subset of the labels | |
# Here, only countries with a life expectancy higher than 65 | |
america_1952 <- subset(gapminder, year == 1952 & continent == "Americas") | |
ggplot(america_1952, aes(x = gdpPercap, y = lifeExp)) + | |
scale_x_log10() + | |
geom_point() + | |
geom_label_repel(data = subset(america_1952, lifeExp > 65), aes(label = country)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment