MonkmanMH · December 20, 2015 01:19
diff --git a/gistfile1.r b/gistfile1.r
 # ######################
 #
 # Blog with output and discussion:
 # "Fair weather fans? (An R scatter plot matrix)" 2013-07-18
 # http://bayesball.blogspot.ca/2013/07/fair-weather-fans-r-scatter-plot-matrix.html
 #
 # data: pulled from www.harbourcats.com
 # saved on Google Drive:
 # https://docs.google.com/spreadsheet/ccc?key=0Art4wpcrwqkBdHZvTUFzOUo5U3BzMHFveXdYOTdTWUE&usp=sharing
 # File / Download as > Comma Separated Values (CSV)
 #
 # read the csv file
 HarbourCat.attend <- read.csv("HarbourCat_attendance - Sheet1.csv")
 #
 # load ggplot2
 if (!require(ggplot2)) install.packages("ggplot2")
 library(ggplot2)
 #
 # #####
 #
 # simple line plot of data series
 #
 ggplot(HarbourCat.attend, aes(x=num, y=attend)) + 
    geom_point() + 
    geom_line()+
    ggtitle("HarbourCats attendance \n2013 season") +
    annotate("text", label="Opening Night", x=3, y=3050, size=3,    
      fontface="bold.italic")
 #
 # #####
 #
 # prune the extreme outlier that is opening night
 # and structure the data so that attendance is last and will appear as the Y axis on plots
 HarbourCat.attend.data <- (subset(HarbourCat.attend, num > 1, select=c(num, day2, sun, temp.f, attend)))
 # print the data table to screen
 HarbourCat.attend.data
 #
 # ###################
 # scatter plot matrix
 # ###################
 #
 # scatter plot matrix - simple
 # (see Winston Chang, "R Graphics Cookbook", recipe 5.13)
 pairs(HarbourCat.attend.data[,1:5])
 #
 # #####
 #
 # scatter plot matrix - with correlation coefficients
 # define panels (copy-paste from the "pairs" help page)
 panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
 {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(0, 1, 0, 1))
    r <- abs(cor(x, y))
    txt <- format(c(r, 0.123456789), digits = digits)[1]
    txt <- paste0(prefix, txt)
    if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
    text(0.5, 0.5, txt, cex = cex.cor * r)
 }
 #
 panel.hist <- function(x, ...)
 {
    usr <- par("usr"); on.exit(par(usr))
    par(usr = c(usr[1:2], 0, 1.5) )
    h <- hist(x, plot = FALSE)
    breaks <- h$breaks; nB <- length(breaks)
    y <- h$counts; y <- y/max(y)
    rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
 }
 #
 # run pairs plot
 #
 pairs(HarbourCat.attend.data[,1:5], upper.panel = panel.cor,
                                    diag.panel = panel.hist,
                                    lower.panel = panel.smooth)
 #
 #
 # ######################
 # Spearman's correlation
 # ######################
 #
 # big thanks to r2evans who supplied the code below in the comments to the blog post
 # 
 panel.cor <- function(x, y, digits=3, ..., text.cex, text.col='black') {
 par(usr = c(0, 1, 0, 1))
 numsamples <- sum(! is.na(x) & ! is.na(y))
 r <- cor(x, y, use='complete.obs')
 spearman <- cor.test(x, y, method='spearman', continuity=TRUE, exact=FALSE)
 #
 if (require(RColorBrewer, quietly=TRUE)) {
 colbrew <- 'YlOrRd' ## 9 available colors
 ndiv <- 5 ## can be up to 9+1=10 since first cut has no color
 colors <- c(NA, brewer.pal(ndiv-1, 'YlOrRd'))
 } else {
 ## if RColorBrewer is not available, need to define 'colors' manually
 ndiv <- 4
 colors <- c(NA, 'yellow', 'orange', 'red') ## for ndiv=4
 }
 ## Could use c(0:ndiv/ndiv), but cut() looks at (0,0.2] so a
 ## p-value of 0, though highly unlikely, would break things.
 ## Using anything less than 0 side-steps this problem.
 cuts <- c(-1, 1:ndiv/ndiv)
 if (spearman$p.value <= 0.05)
 polygon( c(-2,2,2,-2,-2), c(-2,-2,2,2,-2),
 col=colors[ cut(abs(r), breaks=cuts, labels=FALSE) ])
 mindig <- max(0.001, 1/10^digits)
 if (spearman$p.value < mindig) {
 spearman$p.value <- mindig
 leq <- '<'
 } else leq <- '='
 ## Can "arbitrarily" add other info to this list for stacked display.
 labels <- list(sprintf('n = %d', numsamples),
 sprintf(paste0('%0.', digits, 'f'), r),
 sprintf(paste0('p %s %0.', digits, 'f'), leq, spearman$p.value))
 nn <- length(labels)
 ## Ensure the text isn't too big for the square in height or width.
 ## 0.9 is just a factor to give a little bit of buffer.
 if (missing(text.cex)) 
 text.cex <- min(0.9/((nn+1) * strheight(labels[[1]]) * 1.3),
 0.9/max(strwidth(labels)))
 text(0.5, (nn:1)/(nn+1), labels, cex=text.cex, col=text.col, adj=0.5)
 }
 #
 #
	# ######################
	#
	# Blog with output and discussion:
	# "Fair weather fans? (An R scatter plot matrix)" 2013-07-18
	# http://bayesball.blogspot.ca/2013/07/fair-weather-fans-r-scatter-plot-matrix.html
	#
	# data: pulled from www.harbourcats.com
	# saved on Google Drive:
	# https://docs.google.com/spreadsheet/ccc?key=0Art4wpcrwqkBdHZvTUFzOUo5U3BzMHFveXdYOTdTWUE&usp=sharing
	# File / Download as > Comma Separated Values (CSV)
	#
	# read the csv file
	HarbourCat.attend <- read.csv("HarbourCat_attendance - Sheet1.csv")
	#
	# load ggplot2
	if (!require(ggplot2)) install.packages("ggplot2")
	library(ggplot2)
	#
	# #####
	#
	# simple line plot of data series
	#
	ggplot(HarbourCat.attend, aes(x=num, y=attend)) +
	geom_point() +
	geom_line()+
	ggtitle("HarbourCats attendance \n2013 season") +
	annotate("text", label="Opening Night", x=3, y=3050, size=3,
	fontface="bold.italic")
	#
	# #####
	#
	# prune the extreme outlier that is opening night
	# and structure the data so that attendance is last and will appear as the Y axis on plots
	HarbourCat.attend.data <- (subset(HarbourCat.attend, num > 1, select=c(num, day2, sun, temp.f, attend)))
	# print the data table to screen
	HarbourCat.attend.data
	#
	# ###################
	# scatter plot matrix
	# ###################
	#
	# scatter plot matrix - simple
	# (see Winston Chang, "R Graphics Cookbook", recipe 5.13)
	pairs(HarbourCat.attend.data[,1:5])
	#
	# #####
	#
	# scatter plot matrix - with correlation coefficients
	# define panels (copy-paste from the "pairs" help page)
	panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(0, 1, 0, 1))
	r <- abs(cor(x, y))
	txt <- format(c(r, 0.123456789), digits = digits)[1]
	txt <- paste0(prefix, txt)
	if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
	text(0.5, 0.5, txt, cex = cex.cor * r)
	}
	#
	panel.hist <- function(x, ...)
	{
	usr <- par("usr"); on.exit(par(usr))
	par(usr = c(usr[1:2], 0, 1.5) )
	h <- hist(x, plot = FALSE)
	breaks <- h$breaks; nB <- length(breaks)
	y <- h$counts; y <- y/max(y)
	rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
	}
	#
	# run pairs plot
	#
	pairs(HarbourCat.attend.data[,1:5], upper.panel = panel.cor,
	diag.panel = panel.hist,
	lower.panel = panel.smooth)
	#
	#
	# ######################
	# Spearman's correlation
	# ######################
	#
	# big thanks to r2evans who supplied the code below in the comments to the blog post
	#
	panel.cor <- function(x, y, digits=3, ..., text.cex, text.col='black') {
	par(usr = c(0, 1, 0, 1))
	numsamples <- sum(! is.na(x) & ! is.na(y))
	r <- cor(x, y, use='complete.obs')
	spearman <- cor.test(x, y, method='spearman', continuity=TRUE, exact=FALSE)
	#
	if (require(RColorBrewer, quietly=TRUE)) {
	colbrew <- 'YlOrRd' ## 9 available colors
	ndiv <- 5 ## can be up to 9+1=10 since first cut has no color
	colors <- c(NA, brewer.pal(ndiv-1, 'YlOrRd'))
	} else {
	## if RColorBrewer is not available, need to define 'colors' manually
	ndiv <- 4
	colors <- c(NA, 'yellow', 'orange', 'red') ## for ndiv=4
	}
	## Could use c(0:ndiv/ndiv), but cut() looks at (0,0.2] so a
	## p-value of 0, though highly unlikely, would break things.
	## Using anything less than 0 side-steps this problem.
	cuts <- c(-1, 1:ndiv/ndiv)
	if (spearman$p.value <= 0.05)
	polygon( c(-2,2,2,-2,-2), c(-2,-2,2,2,-2),
	col=colors[ cut(abs(r), breaks=cuts, labels=FALSE) ])
	mindig <- max(0.001, 1/10^digits)
	if (spearman$p.value < mindig) {
	spearman$p.value <- mindig
	leq <- '<'
	} else leq <- '='
	## Can "arbitrarily" add other info to this list for stacked display.
	labels <- list(sprintf('n = %d', numsamples),
	sprintf(paste0('%0.', digits, 'f'), r),
	sprintf(paste0('p %s %0.', digits, 'f'), leq, spearman$p.value))
	nn <- length(labels)
	## Ensure the text isn't too big for the square in height or width.
	## 0.9 is just a factor to give a little bit of buffer.
	if (missing(text.cex))
	text.cex <- min(0.9/((nn+1) * strheight(labels[[1]]) * 1.3),
	0.9/max(strwidth(labels)))
	text(0.5, (nn:1)/(nn+1), labels, cex=text.cex, col=text.col, adj=0.5)
	}
	#
	#
No results found