Steven Worthington stevenworthington

Director of Data Science Services Lead Data Scientist

stevenworthington / barplot_values_ggplot2.R

Created March 29, 2012 19:56

barplot_values_ggplot2

	library(ggplot2)
	library(plyr)
	data(mpg)

	# create a data frame with averages and standard deviations
	hwy.means <- ddply(mpg, c("class", "year"), summarize, hwy.avg = mean(hwy), hwy.sd = sd(hwy))

	# barplot with values over bars
	ggplot(hwy.means) +
	geom_bar(aes(class, hwy.avg, fill = factor(year)), position = "dodge", colour = "black", size = 0.3) +

stevenworthington / formula_creation.R

Created April 4, 2012 15:36

concatenate variables into a formula

	# example of how to concatenate lots of variables into a formula without typing them out

	# example from the High School and Beyond data set
	hsb_df <- read.table("http://www.ats.ucla.edu/stat/R/notes/hs0.csv", header = TRUE, sep = ",")

	# create the formula using variables from columns 4:8 and 10:11 as predictors
	hsb_form <- formula( paste(c("math ~ 1", colnames(hsb_df[, c(4:8, 10:11)])), collapse = " + ") )

	# fit the model
	fit1 <- lm(hsb_form, data = hsb_df)

stevenworthington / ipak.R

Created July 25, 2012 19:44

Install and load multiple R packages at once

	# ipak function: install and load multiple R packages.
	# check to see if packages are installed. Install them if they are not, then load them into the R session.

	ipak <- function(pkg){
	new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
	if (length(new.pkg))
	install.packages(new.pkg, dependencies = TRUE)
	sapply(pkg, require, character.only = TRUE)
	}

stevenworthington / centroid_perm.R

Created July 25, 2012 19:50

Permutation test for group differences using 3D coordinate data

	# ===============================================================================
	# Name : centroid_perm
	# Original author : Steven Worthington ([email protected])
	# Affiliation : IQSS, Harvard University
	# Date (mm/dd/yyyy) : 06/14/2012
	# Version : v0.8
	# Aim : exact permutation test for group differences
	# ===============================================================================

	# Goal:

stevenworthington / k_medoids_uncent_corr.R

Last active March 4, 2021 10:58

Calculate K-medoids using the uncentered correlation distance method

	# example of calculating K-medoids using the uncentered
	# correlation metric as a measure of distance


	# 0) load data
	data(mtcars)


	# 1) create a distance matrix using the "cosine of the angle" method (aka, uncentered correlation)

stevenworthington / lme4_contrast_example.R

Created July 11, 2013 19:23

Example of how to create custom contrasts to test hypotheses in lme4 models.


	# Note: requires loading the "socsub" data frame (not a bundled R dataset)


	# ------------------------------------------------------------------------------------
	# pairwise comparisons including interactions


	# use lm model to get design matrix
	model1 <- lm(agro.rec.tot ~ sex*ageclass + loggrpmem, offset = logtimeage, data = socsub)

stevenworthington / extract_twitter_text.R

Created July 11, 2013 19:47


	# list with character vectors
	text <- list(a = "all day I play @sworth with R",
	b = "all night I play @sworth with R")

	# extract letters after "@" in a single character vector
	sub("^.@(\\w+).", "\\1", text$a)

	# extract letters after "@" in a list of character vectors
	gsub("^.@(\\w+).", "\\1", text)

stevenworthington / points_on_polygons.R

Created September 3, 2013 14:08

	library(sp)
	library(maptools)

	# get North Carolina shape data
	NC <- readShapePoly(system.file("shapes/sids.shp", package = "maptools")[1],
	IDvar = "FIPSNO", proj4string = CRS("+proj=longlat +ellps=clrk66"))

	# plot polygons
	plot(NC, border = "blue", axes = TRUE, las = 1)

stevenworthington / caliper_text.R

Last active August 29, 2015 13:56

Extract blocks of text based on patterns

	# set working directory
	dir_path <- "path_to_text_files"
	setwd(dir_path)

	# create vector of filenames
	filenames <- list.files(dir_path)

	# read in files to a list
	docList <- lapply(filenames, scan, what = "character", sep = "\n")

stevenworthington / R_packages.R

Created May 9, 2014 01:02

	x <- c(
	'knitr', # A general-purpose package for dynamic report generation in R.
	# 'sqldf', # For running SQL statements on R data frames, optimized for convenience.
	'randomForest', # Classification and regression based on a forest of trees using random inputs.
	'arm', # R functions for processing lm, glm, svy.glm, mer and polr outputs.
	'ggplot2', # An implementation of the Grammar of Graphics.
	'gridExtra', # misc. high-level Grid functions
	'plyr', # Tools for splitting, applying and combining data.
	'tree', # Classification and regression trees.
	'gbm', # Generalized Boosted Regression Models