Skip to content

Instantly share code, notes, and snippets.

View aaronsaunders's full-sized avatar

Aaron Marc Saunders aaronsaunders

View GitHub Profile
@aaronsaunders
aaronsaunders / UTF-8-sig-decode.py
Created October 1, 2013 08:38
>print data_list '\xef\xbb\xbfsome_data1' source: http://stackoverflow.com/questions/18664712/split-function-add-xef-xbb-xbf-n-to-my-list UTF-8 BOM in the beginning. To get rid of it, first decode your file contents to unicode.
fp = open("file.txt")
data = fp.read().decode("utf-8-sig").encode("utf-8")
p1 <- pw + geom_point() + facet_grid(.~g, scales='fixed') + coord_equal() +
stat_smooth(method='lm')
p2 <- px + geom_point() + facet_grid(.~g, scales='fixed') + coord_equal() +
stat_smooth(method='lm')
p3 <- pz + geom_point() + facet_grid(.~g, scales='fixed') + coord_equal() +
stat_smooth(method='lm')
grid.arrange(p1, p2, p3, ncol=1)
# create new MarinerNames field that contains only the name of Mariners
# players (plagarized from Winston Chang's R Graphics Cookbook Recipe 5.11)
outfield$MarinerNames = outfield$Name
idx = (outfield$Team.x == "Mariners")
outfield$MarinerNames[!idx] = NA
# create a new table, taking a subset that has only the Mariners players
Mariners = subset(outfield, Team.x == "Mariners")
# add the names of the UZR stars to outfield$Table2 sort the table by
# wRAA, then add the names of the top 4 wRAA stars
outfield$wRAAstars = outfield$Name
dat <- data.frame(x = rnorm(10), y = rnorm(10), label = letters[1:10])
#Create a subset of data that you want to label. Here we label points a - e
labeled.dat <- dat[dat$label %in% letters[1:5] ,]
ggplot(dat, aes(x,y)) + geom_point() +
geom_text(data = labeled.dat, aes(x,y, label = label), hjust = 2)
#Or add a separate layer for each point you want to label.
ggplot(dat, aes(x,y)) + geom_point() +
# Recently I wanted to recreate assocplot using ggplot2. In the end I propose a simple way to visualize data arranged two-way tables using geom_tile.
#
# I used Titanic data set as an example combining age and sex dimensions to get two-way data.
#
# I plot residuals of Chi-squared test (like in assocplot) on the left and probability of survival on the right. A nice feature of geom_tile is that nicely highlights missing data (children were not crew members). Here is a code generating the plots:
library(ggplot2)
library(grid)
library(reshape2)
colSums( x, na.rm= FALSE, dims= 1)
rowSums( x, na.rm= FALSE, dims= 1)
colMeans(x, na.rm= FALSE, dims= 1)
rowMeans(x, na.rm= FALSE, dims= 1)
rowsum(x, group, reorder= TRUE) # finds row sums for each level of a grouping variable
apply(X, MARGIN, FUN, ...) # applies the function (FUN) to either rows (1) or columns (2) on object X
apply(x, 1, min) # finds the minimum for each row
apply(x, 2, max) # finds the maximum for each column
col.max(x) # another way to find which column has the maximum value for each row
which.min(x)
install.packages("Hmisc",dependencies=TRUE)
update.packages()
q()
setwd()
getwd()
ls() # lists objects
rm(object) # deletes an object
library(RColorBrewer) # load ColorBrewer palettes
@aaronsaunders
aaronsaunders / aggregation_example.R
Created August 9, 2013 07:06
A Survey of Data Aggregation Techniques in R
#
# Steve Pittard - [email protected], 03/19/12
# Code to illustrate motivations for using apply function
#
# See www.bimcore.emory.edu/bbseries for slides and code downloads
#
# References include:
# http://statland.org/R/R/Rpulse2.htm , http://www.cyclismo.org/tutorial/R/tables.html#manipulations
# http://nsaunders.wordpress.com/2010/08/20/a-brief-introduction-to-apply-in-r/
#
# run a quick demonstration
install.packages("Hmisc",dependencies=TRUE)
update.packages()
library("Hmisc")
mydata<-spss.get("http://www.rci.rutgers.edu/~rwomack/R/vermont.sav",use.value.labels=TRUE)
head(mydata)
tail(mydata)
summary(mydata)
attach(mydata)
hist(AGEP)
library(lattice)
my.wines <- read.csv("http://www.bimcore.emory.edu/wine.csv", header=TRUE)
# Look at the correlations
library(gclus)
my.abs <- abs(cor(my.wines))
my.colors <- dmat.color(my.abs)
my.ordered <- order.single(cor(my.wines))