inkhorn’s gists

inkhorn / ebike.r

Created September 13, 2013 00:31

E-bike Survey Analysis

	library(rpart)
	library(plyr)
	library(rpart.plot)

	ebike = read.csv("E-Bike_Survey_Responses.csv")

	# This next part is strictly to change any blank responses into NAs
	ebike[,2:10][ebike[,2:10] == ''] = NA

	# In this section we use mapvalues from the plyr package to get rid of blanks, but also

inkhorn / estimate_age.R

Last active December 20, 2015 09:19

Estimate Age from First Name in R

	library(stringr)
	library(plyr)

	# We're assuming you've downloaded the SSA files into your R project directory.

	file_listing = list.files()[3:135]
	for (f in file_listing) {
	year = str_extract(f, "[0-9]{4}")
	if (year == "1880") { # Initializing the very long dataframe
	name_data = read.csv(f, header=FALSE)

inkhorn / casino.geo.r

Last active October 30, 2023 12:44

not in my backyard casino analysis

	library(ff)
	library(ggthemes)
	ffload(file="casino", overwrite=TRUE)

	casino.orig$Outside.of.Toronto = as.ff(ifelse(casino.orig[,"City"] == "Toronto",0,1))

	casino.in.toronto = glm(casino.orig[,"Q6"] == "City of Toronto" ~ Outside.of.Toronto, data=casino.orig, family=binomial(logit))
	casino.outside.toronto = glm(casino.orig[,"Q6"] == "Adjacent Municipality" ~ Outside.of.Toronto, data=casino.orig, family=binomial(logit))

	summary(casino.in.toronto)

inkhorn / neither.casino.glm.r

Created May 17, 2013 19:00

neither casino glm

	Call:
	glm(formula = casino$Q6 == "Neither" ~ GoBigorGoHome + TechnicalDetails +
	Soc.Env.Issues, family = binomial(logit), data = casino)

	Deviance Residuals:
	Min 1Q Median 3Q Max
	-2.4090 -0.7344 -0.3934 0.8966 2.7194

	Coefficients:
	Estimate Std. Error z value Pr(>\|z\|)

inkhorn / adj.mun.cacsino.glm.r

Created May 17, 2013 18:59

adjacent municipality casino glm

	Call:
	glm(formula = casino$Q6 == "Adjacent Municipality" ~ GoBigorGoHome +
	TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
	data = casino)

	Deviance Residuals:
	Min 1Q Median 3Q Max
	-1.0633 -0.7248 -0.5722 -0.3264 2.7136

	Coefficients:

inkhorn / toronto.casino.glm.r

Created May 17, 2013 18:57

toronto casino glm results

	Call:
	glm(formula = casino$Q6 == "City of Toronto" ~ GoBigorGoHome +
	TechnicalDetails + Soc.Env.Issues, family = binomial(logit),
	data = casino)

	Deviance Residuals:
	Min 1Q Median 3Q Max
	-3.6426 -0.4745 -0.1156 0.4236 3.4835

	Coefficients:

inkhorn / toronto_casino.r

Created May 2, 2013 01:01

Casino Analysis

	library(ff)
	library(ffbase)
	library(stringr)
	library(ggplot2)
	library(ggthemes)
	library(reshape2)
	library(RgoogleMaps)

	# Loading 2 copies of the same data set so that I can convert one and have the original for its text values
	casino = read.csv("/home/inkhorn/Downloads/casino_survey_results20130325.csv")

inkhorn / stack multiple copies of the same data type together.r

Created October 26, 2012 01:39

Crazy data reshaping script

	# Here's where I extract the database IDs and repeat them 50 times to make the column long enough for
	# my new long-form dataset (596,100 rows)

	client.data.new = rep(client.data[,1],50)

	for (i in 2:32){
	# for each column in the first 31 after the ID column, find the 49 matching columns
	# to the right and stack them using melt

	stacked.data = melt(client.data, id.vars="CnBio_ID", measure.vars=seq(i,(i+(31*49)),31), value.name=names(client.data)[i])

inkhorn / penultimax.r

Created September 14, 2012 01:45

Find the second highest value in a vector

	penultimax = function(invector) {
	# If the vector starts off as only having 1 or 0 numbers, return NA
	if (length(invector) <= 1) {
	return(NA)
	}
	first.max = safe.max(invector)
	#Once we get the max, take it out of the vector and make newvector
	newvector = invector[!invector == first.max]
	#If newvector now has nothing in it, return NA
	if (length(newvector) == 0) {

inkhorn / crossbarminmax.r

Created June 10, 2012 02:33

min median max crossbar with dots

	scents = read.table("clipboard",header=TRUE,sep="\t")
	strial3.by.sex.wide = ddply(scents, 'Sex', function (x) quantile(x$S.Trial.3, c(0,.5,1), na.rm=TRUE))
	strial3.by.sex.smokers = melt(ddply(subset(scents,Smoker == "Y") , 'Sex', function (x) quantile(x$S.Trial.3, c(0,1), na.rm=TRUE)),variable.name="Percentile",value.name="Time")

	ggplot() + geom_crossbar(data=strial3.by.sex.wide, aes(x=Sex, y=strial3.by.sex.wide$"50%", ymin=strial3.by.sex.wide$"0%", ymax=strial3.by.sex.wide$"100%"),fill="#bcc927",width=.75) +
	geom_point(data=strial3.by.sex.smokers, aes(x=Sex, y=Time, stat="identity"), size=3)
	+ opts(legend.title = theme_text(size=10, face="bold"), legend.text = theme_text(size=10),
	axis.text.x=theme_text(size=10), axis.text.y=theme_text(size=10,hjust=1), axis.title.x=theme_text(size=12,face="bold"), axis.title.y=theme_text(size=12, angle=90,
	face="bold")) + scale_y_continuous(name="Time to Completion")