inkhorn’s gists

inkhorn / cellphone analysis.R

Created April 6, 2015 20:47

Cell Phone Analysis

	library(jsonlite)

	cp = fromJSON(txt = "Cell Phone Data.txt", simplifyDataFrame = TRUE)

	num.atts = c(4,9,11,12,13,14,15,16,18,22)

	cp[,num.atts] = sapply(cp[,num.atts], function (x) as.numeric(x))
	cp$aspect.ratio = cp$att_pixels_y / cp$att_pixels_x
	cp$isSmartPhone = ifelse(grepl("smart\|iphone\|blackberry", cp$name, ignore.case=TRUE) == TRUE \| cp$att_screen_size >= 4, "Yes", "No")

inkhorn / first nations libraries.r

Created April 9, 2014 01:42

	library(plyr)
	library(ggplot2)
	library(ggmap)

	libraries = read.csv("ontario_library_stats_2010.csv")
	libraries$isFN = ifelse(libraries$Library.Service.Type == "First Nations Library",1,0)

	# Here we create the 'proportionate' versions of all the variables
	libraries[,143:265] = sapply(libraries[,20:142], function (x) x/libraries[,13])
	names(libraries)[143:265] = paste(names(libraries)[20:142], "P",sep=".")

inkhorn / process recipes.py

Last active August 29, 2015 13:56

	import os
	rfiles = os.listdir('.')
	rc = []
	for f in rfiles:
	if '.txt' in f:
	# The recipes come in 3 txt files consisting of 1 recipe per line, the
	# cuisine of the recipe as the first entry in the line, and all subsequent ingredient
	# entries separated by a tab
	infile = open(f, 'r')
	rc.append(infile.read())

inkhorn / recipe analysis.R

Last active September 1, 2015 05:11

	recipes = readLines('recipes combined.tsv')

	# Once I read it into R, I have to get rid of the /t
	# characters so that it's more acceptable to the tm package

	recipes.new = apply(as.matrix(recipes), 1, function (x) gsub('\t',' ', x))

	recipes.corpus = Corpus(VectorSource(recipes.new))
	recipes.dtm = DocumentTermMatrix(recipes.corpus)

inkhorn / scotland.R

Last active August 29, 2015 13:56

	# **Introduction**

	# Data analysis is like an interview. In any interview, the interviewer hopes to use a series of
	# questions in order to discover a story. The questions the interviewer asks, of course, are
	# subjectively chosen. As such, the story that one interviewer gets out of an interviewee might
	# be fairly different from the story that another interviewer gets out of the same person. In the
	# same way, the commands (and thus the analysis) below are not the only way of analyzing the data.
	# When you understand what the commands are doing, you might decide to take a different approach
	# to analyzing the data. Please do so, and be sure to share what you find!

inkhorn / ltep.r

Created December 13, 2013 04:24

LTEP Survey Analsyis

	ltep = read.csv("ltep-survey-results-all.csv")

	library(likert)
	library(ggthemes)

	# Here I flip the scoring
	ltep[,13:19] = sapply(ltep[,13:19], function (x) 8 - x)
	deal.w.esources = likert(ltep[,13:19])
	summary(deal.w.esources)
	plot(deal.w.esources, text.size=6, text.color="black") + theme(axis.text.x=element_text(colour="black", face="bold", size=14), axis.text.y=element_text(colour="black", face="bold", size=14), axis.title.x=element_text(colour="black", face="bold", size=14), plot.title=element_text(size=18, face="bold")) + ggtitle("What guidelines should Ontario use\n for its future mix of energy sources?")

inkhorn / enron corpus processing v2.py

Created November 5, 2013 03:21

Enron Corpus Processing, version 2

	docs = []
	from os import listdir, chdir
	import re


	# Here's the section where I try to filter useless stuff out.
	# Notice near the end all of the regex patterns where I've called
	# "re.DOTALL". This is pretty key here. What it means is that the
	# .+ I have referenced within the regex pattern should be able to
	# pick up alphanumeric characters, in addition to newline characters

inkhorn / enron corpus processing.r

Last active December 27, 2015 03:18

Enron Corpus Processing

	library(stringr)
	library(plyr)
	library(tm)
	library(tm.plugin.mail)
	library(SnowballC)
	library(topicmodels)

	# At this point, the python script should have been run,
	# creating about 126 thousand txt files. I was very much afraid
	# to import that many txt files into the tm package in R (my computer only

inkhorn / enron processing.py

Last active February 5, 2017 18:12

Script to read, filter, and output all enron emails into many files in one directory

	docs = []
	from os import listdir, chdir
	import re


	# Here's my attempt at coming up with regular expressions to filter out
	# parts of the enron emails that I deem as useless.

	email_pat = re.compile(".+@.+")
	to_pat = re.compile("To:.+\n")

inkhorn / daycares.R

Created October 17, 2013 02:18

Daycare Analysis

	library(ff)
	library(ffbase)
	library(RgoogleMaps)
	library(plyr)

	addTrans <- function(color,trans)
	{
	# This function adds transparancy to a color.
	# Define transparancy with an integer between 0 and 255
	# 0 being fully transparant and 255 being fully visable