alexhanna’s gists

alexhanna / rupaul.R

Last active December 14, 2015 18:48

RuPaul's drag race

alexhanna / sweaveWeb.Rnw

Created March 14, 2013 15:26

When Brett dared me to Sweave Adam's website.

	\documentclass{article}
	\usepackage{graphicx}

	\title{Can I Sweave Adam's Website?}
	\author{Alexander Hanna}

	\begin{document}
	\maketitle

	% put R code here

alexhanna / polClassify.R

Created March 21, 2013 00:43

Political classifier, largely adapted from Machine Learning for Hackers.

	# File-Name: polClassify.R
	# Edited: 2013-03-20
	# Orig.Author: Drew Conway ([email protected])
	#
	# Packages Used: tm, ggplot2
	#

	# All source code is copyright (c) 2012, under the Simplified BSD License.
	# For more information on FreeBSD see: http://www.opensource.org/licenses/bsd-license.php

alexhanna / rupaulModelFit.R

Last active December 15, 2015 09:19

Model fit with residuals

	t.cox2_ph <- coxph(t.surv ~ (Age + PlusSize + PuertoRico + Wins + Highs + Lows + Lipsyncs + CompLeft +
	WinsCompLeft + HighsCompLeft + LowsCompLeft + LipsyncsCompLeft) + cluster(ID), df)
	t.cox3s <- coxph(t.surv ~ (Age + PlusSize + PuertoRico + Wins + Highs + Lows + LipsyncWithoutOut + CompLeft) + cluster(ID), df)

	model.df <- data.frame(ID = integer(0), Residuals = double(0), Model = character(0))
	model.list <- list(c2 = t.cox2, c2ph = t.cox2_ph, c3 = t.cox3, c3s = t.cox3s)

	for (i in 1:length(model.list)) {
	name <- names(model.list[i])
	cMod <- model.list[[i]]

alexhanna / schema.sql

Last active August 29, 2017 07:02

Creating Twitter Hive schema.

	SET hive.exec.compress.output=true;
	SET mapred.max.split.size=256000000;
	SET mapred.output.compression.type=BLOCK;
	SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec;
	SET hive.exec.dynamic.partition.mode=nonstrict;
	SET hive.exec.dynamic.partition=true;

	CREATE EXTERNAL TABLE gh_raw (
	id BIGINT,
	created_at STRING,

alexhanna / sentimentTweet.py

Created April 22, 2014 13:03

Gist for generating sentiment scores for political tweets from the gardenhose and a focused sample

	from __future__ import division

	import csv, logging, math, os.path
	import pickle, random, re, string
	import time
	import numpy as np
	import pandas as pd

	import nltk.data
	from nltk.tokenize.regexp import WordPunctTokenizer

alexhanna / sentiment.R

Created April 22, 2014 13:04

Plot sentiment for candidates

	#!/usr/bin/env Rscript

	library(ggplot2)
	library(grid)
	library(lubridate)
	library(scales)

	# datetimeToEasternDate <- function(x) {
	# ## create as UTC
	# x <- as.POSIXct(x, format="%Y-%m-%d %H:%M", tz="UTC")

alexhanna / asareport2013.csv

Created August 12, 2014 21:56

alexhanna / asareport.R

Created August 12, 2014 23:13


	library(ggplot2)
	library(grid)

	## data from http://www.asanet.org/documents/research/pdfs/2013_ASA_Job_Bank_Analysis.pdf

	df <- read.csv("../data/asa2013report.csv", header = TRUE)

	## exclude categories with high volume in jobs but no grads
	df <- df[df$GradCount > 0,]

alexhanna / gradient-color.R

Last active August 29, 2015 14:06

	p <- ggplot(df.p, aes(x=Margin, y=factor(variable), fill = Class, alpha = value))
	p <- p + theme_bw() + geom_tile(color = NA, width = 0.005) + scale_fill_manual(values = wes.palette(2, "Royal1"), labels = c("False Positives", "True Positives"))
	p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())
	p <- p + theme(axis.text.y = element_text(size = 7)) + ylab("Feature")
	ggsave(p, file = "../img/linearsvc_no-fs_top100_fp-v-tp_20140916.png", width = 16, height = 9)

Alex Hanna alexhanna