patternproject

Git

Press minus + shift + s and return to chop/fold long lines!

	# load the package and data set "Teams"
	install.packages("Lahman")
	library("Lahman")
	data(Teams)
	#
	#
	# CREATE LEAGUE SUMMARY TABLES
	# ============================
	#
	# select a sub-set of teams from 1901 [the establishment of the American League] forward to 2012

	# Brian Abelson @brianabelson
	# Harmony Institute
	# December 5, 2012

	# lda is a wrapper for lda.collapsed.gibbs.sampler in the "lda" package
	# it fits topic models using latent dirichlet allocation
	# it provides arguments for cleaning the input text and tuning the parameters of the model
	# it also returns alot of useful information about the topics/documents in a format that you can easily join back to your original data
	# this allows you to easily model outcomes based on the distribution of topics within a collection of texts

	doInstall <- TRUE # Change to FALSE if you don't want packages installed.
	toInstall <- c("ggplot2", "reshape2", "RColorBrewer")
	if(doInstall){install.packages(toInstall, repos = "http://cran.us.r-project.org")}
	lapply(toInstall, library, character.only = TRUE)

	# Generate a random matrix
	# This can be any type of numeric matrix,
	# though we often see heatmaps of square correlation matrices.
	nRow <- 9
	nCol <- 16

	library(maps)

	unemp <- read.csv('unemployment09.csv',header=FALSE, stringsAsFactors=FALSE,
	col.names=c("blsid", "stfips", "cofips", "name", "year", "pop1", "pop2", "unempraw", "unemppct"))

	unemp$mpname <- tolower(paste(state.name[match(sub("^.*([A-Z][A-Z])$","\\1",unemp$name,fixed=FALSE),state.abb)],
	sub("^(.*) (County\|[Cc]ity\|Parish), ..$","\\1", unemp$name),sep=","))

	unemp$ri <- as.numeric(cut(unemp$unemppct,c(seq(0,10,by=2),max(unemp$unemppct))))