This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(RCurl) | |
library(ggplot2) | |
library(survival) | |
## plot Kaplan-Meier curve | |
plotKM <- function(df, metric) { | |
## make age categorical | |
df$Age[df$Age < 25] <- 1 | |
df$Age[df$Age >= 25 & df$Age < 31] <- 2 | |
df$Age[df$Age >= 31] <- 3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
\documentclass{article} | |
\usepackage{graphicx} | |
\title{Can I Sweave Adam's Website?} | |
\author{Alexander Hanna} | |
\begin{document} | |
\maketitle | |
% put R code here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# File-Name: polClassify.R | |
# Edited: 2013-03-20 | |
# Orig.Author: Drew Conway ([email protected]) | |
# | |
# Packages Used: tm, ggplot2 | |
# | |
# All source code is copyright (c) 2012, under the Simplified BSD License. | |
# For more information on FreeBSD see: http://www.opensource.org/licenses/bsd-license.php |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
t.cox2_ph <- coxph(t.surv ~ (Age + PlusSize + PuertoRico + Wins + Highs + Lows + Lipsyncs + CompLeft + | |
Wins*CompLeft + Highs*CompLeft + Lows*CompLeft + Lipsyncs*CompLeft) + cluster(ID), df) | |
t.cox3s <- coxph(t.surv ~ (Age + PlusSize + PuertoRico + Wins + Highs + Lows + LipsyncWithoutOut + CompLeft) + cluster(ID), df) | |
model.df <- data.frame(ID = integer(0), Residuals = double(0), Model = character(0)) | |
model.list <- list(c2 = t.cox2, c2ph = t.cox2_ph, c3 = t.cox3, c3s = t.cox3s) | |
for (i in 1:length(model.list)) { | |
name <- names(model.list[i]) | |
cMod <- model.list[[i]] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SET hive.exec.compress.output=true; | |
SET mapred.max.split.size=256000000; | |
SET mapred.output.compression.type=BLOCK; | |
SET mapred.output.compression.codec=org.apache.hadoop.io.compress.SnappyCodec; | |
SET hive.exec.dynamic.partition.mode=nonstrict; | |
SET hive.exec.dynamic.partition=true; | |
CREATE EXTERNAL TABLE gh_raw ( | |
id BIGINT, | |
created_at STRING, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import csv, logging, math, os.path | |
import pickle, random, re, string | |
import time | |
import numpy as np | |
import pandas as pd | |
import nltk.data | |
from nltk.tokenize.regexp import WordPunctTokenizer |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env Rscript | |
library(ggplot2) | |
library(grid) | |
library(lubridate) | |
library(scales) | |
# datetimeToEasternDate <- function(x) { | |
# ## create as UTC | |
# x <- as.POSIXct(x, format="%Y-%m-%d %H:%M", tz="UTC") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Spec | GradCount | JobCount | |
---|---|---|---|
Sex and Gender | 108 | 31 | |
Education | 83 | 9 | |
Medical Sociology | 83 | 15 | |
Race, Class, and Gender | 79 | 15 | |
Racial and Ethnic Relations | 75 | 43 | |
Cultural Sociology | 68 | 30 | |
Crime/Delinquency | 66 | 89 | |
Environmental Sociology | 65 | 34 | |
Social Psychology | 65 | 17 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(ggplot2) | |
library(grid) | |
## data from http://www.asanet.org/documents/research/pdfs/2013_ASA_Job_Bank_Analysis.pdf | |
df <- read.csv("../data/asa2013report.csv", header = TRUE) | |
## exclude categories with high volume in jobs but no grads | |
df <- df[df$GradCount > 0,] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
p <- ggplot(df.p, aes(x=Margin, y=factor(variable), fill = Class, alpha = value)) | |
p <- p + theme_bw() + geom_tile(color = NA, width = 0.005) + scale_fill_manual(values = wes.palette(2, "Royal1"), labels = c("False Positives", "True Positives")) | |
p <- p + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) | |
p <- p + theme(axis.text.y = element_text(size = 7)) + ylab("Feature") | |
ggsave(p, file = "../img/linearsvc_no-fs_top100_fp-v-tp_20140916.png", width = 16, height = 9) |
OlderNewer