Skip to content

Instantly share code, notes, and snippets.

# File-Name: Scotch_Pref.R
# Date: 2009-11-29
# Author: Drew Conway
# Purpose: Display one-dimensional item response for scotch whiskey preference
# Data Used: whiskey, package=flexmix
# Packages Used: zelig,ggplot
# Output File: scotch_pref.png
# Data Output:
# Machine: Drew Conway's MacBook
library(geoR)
library(geoRglm)
# Perfrom MCMC simulations
model<-list(cov.pars = c(1, 1), beta = 1, family = "poisson")
mcmc.test<-mcmc.control(S.scale = 0.45, thin = 1)
test.tune<-glsm.mcmc(haiti.geo, model = model, mcmc.input=mcmc.test)
haiti.mcmc<-prepare.likfit.glsm(test.tune)
prior<- prior.glm.control(phi.prior = "fixed", phi = .1)
library(ggplot2)
library(XML)
### Meetup topics word cloud ###
# Get the raw meetup description into a dataframe
raw_desc<-levels(read.table('descriptions.txt',sep="\n")$V1)
clean_strings<-function(s){
low<-tolower(s)
clean<-gsub("[[:punct:]\n]","",low)
### Meetup activity history ###
joins<-read.csv("New_York_R_Statistical_Programming_Meetup_Groups_Joins.csv")
rsvp<-read.csv("New_York_R_Statistical_Programming_Meetup_RSVPs.csv")
activity<-read.csv("New_York_R_Statistical_Programming_Meetup_Total_and_Active_Members.csv")
# Merge the data into a single frame
all<-merge(joins,rsvp,by="Date",all=TRUE)
all<-merge(all,activity,by="Date",all=TRUE)
all_dates<-as.vector(all$Date)
# From the FrumForum.com Tea Party survey we see the following result for
# the questions:
# Question 1: In approximate percentage terms, how much is the U.S. (federal)
# government currently taking out of the U.S. economy in taxation?
#
# Mean: 42.06%
# STD: 19.06%
# Actual: 31.5%
#
# Visualize data and save
png("Tax_percentage.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax1))+stat_density()+geom_vline(aes(xintercept=31.5,colour="Actual Value"))+opts(title="In approximate percentage terms, how much is the U.S. (federal)
government currently taking out of the U.S. economy in taxation?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()
png("Tax_family.png",height=1000,width=1000,res=100)
ggplot(frum_data,aes(tax2))+stat_density()+geom_vline(aes(xintercept=7.5,colour="Actual Value"))+opts(title="How much federal income tax do you think a typical family
earning $50,000 pays (in 1,000 dollars)?",plot.title=theme_text(size = 12))+xlab("Simulated Tea Party Survey Response Distribution")+ylab("Density")
dev.off()
library(survival)
library(ggplot2)
library(Design)
### Test data generation ###
# Chi-square test
count<-1000
cs1<-rchisq(count,1)
### Generate our data ###
# Time to seeing a penis
penis<-round(rchisq(30,2))
p_cens<-rep(1,30)
p_type<-rep("penis",30)
p_bind<-cbind(penis,p_cens,p_type)
# Time to seeing a lonely dude
lonely<-round(rchisq(40,1))
### Perform analysis and make pretty pictures ###
png("surv_plot.png",height=1000,width=1000,res=100)
par(mfrow=c(2,2))
survplot(survfit(Surv(time,censor)~type,data=subset(surv_data,type=="lonely")),what="survival",conf="bands",xlab="Minutes")
title("Survival function for seeing a lonely guy")
survplot(survfit(Surv(time,censor)~type,data=subset(surv_data,type=="penis")),what="survival",conf="bands",xlab="Minutes",main="Survival function for seeing a penis")
title("Survival function for seeing a penis")
survplot(survfit(Surv(time,censor)~type,data=subset(surv_data,type=="drunk")),what="survival",conf="bands",xlab="Minutes",main="Survival function for seeing two or more drunk people")
title("Survival function for seeing two or more drunk people")
#### The following code produces a crash of ggplot2 ####
# Load data
hashtag<-"rstats"
infochimps<-read.csv(paste(hashtag,"_infochimps.csv",sep=""))
# Produce plot
png(paste(hashtag,"_infochimps_metric.png",sep=""),height=800,width=800,res=100)
ic.plot<-ggplot(infochimps,aes(x=log(followers_count/friends_count),y=trstrank))+geom_text(aes(label=screen_name,color=tweet.hash,size=replies_out/replies_in))
ic.plot<-ic.plot+xlab(expression(log[frac(Followers,Friends)]))+ylab("Infochimps.org trstrank")+opts(title=paste("Key Actor Analysis for",hashtag," with Infochimps.org Data",sep=""))