Skip to content

Instantly share code, notes, and snippets.

View revodavid's full-sized avatar

David Smith revodavid

View GitHub Profile
fit_glm <- glm(bad_widget ~ x, training_set, family=binomial(link="logit"))
glm_link_scores <- predict(fit_glm, test_set, type="link")
glm_response_scores <- predict(fit_glm, test_set, type="response")
score_data <- data.frame(link=glm_link_scores,
response=glm_response_scores,
bad_widget=test_set$bad_widget,
stringsAsFactors=FALSE)
set.seed(1)
sim_widget_data <- function(N, noise=100){
x <- runif(N, min=0, max=100)
y <- 122 - x/2 + rnorm(N, sd=noise)
bad_widget <- factor(y > 100)
data.frame(x, y, bad_widget)
}
widget_data <- sim_widget_data(500, 10)
test_set_idx <- sample(1:nrow(widget_data), size=floor(nrow(widget_data)/4))
simple_roc <- function(labels, scores){
labels <- labels[order(scores, decreasing=TRUE)]
data.frame(TPR=cumsum(labels)/sum(labels), FPR=cumsum(!labels)/sum(!labels), labels)
}
inPowerBI <- exists("dataset")
if (inPowerBI) {
weatherHistory <- dataset
weatherHistory$Date <- as.Date(paste(dataset$Year, dataset$Month, dataset$Day, sep = "/"), format = "%Y/%b/%d")
#warning(paste(dataset$Year, dataset$Month, dataset$Day, sep = "/")[1])
#warning(weatherHistory$Date[1])
}
library(checkpoint)
checkpoint("2016-04-22")
library(checkpoint)
checkpoint("2016-04-22")
library(weatherData)
city <- "SJC"
cityLongName <- "San Jose"
yearStart <- 1991
yearEnd <- 2015
rf_ga3
1010 samples
58 predictors
2 classes: 'PS', 'WS'
Maximum generations: 100
Population per generation: 20
Crossover probability: 0.8
Mutation probability: 0.1
Elitism: 0
@revodavid
revodavid / mtcars-cat.R
Created November 30, 2015 17:09 — forked from dill/mtcars-cat.R
categorical emojis -- like this Mark?
library(emoGG)
library(ggplot2)
# set the am variable to be different emoji
mtcars$am[mtcars$am==1] <- "1f697"
mtcars$am[mtcars$am==0] <- "1f68c"
# use am as the emoji aesthetic
ggplot(mtcars, aes(wt, mpg, emoji=am))+ geom_emoji()
@revodavid
revodavid / turkey.R
Last active December 31, 2015 22:12
## R script and data by Kieran Healey
## https://twitter.com/kjhealy/status/669567682178654208
datafile = "blog.revolutionanalytics.com/downloads/tdata.csv"
library(ggplot2)
x = read.csv(paste0("http://",datafile))
ggplot(x) + geom_tile(aes(x=H,y=T,fill=tc))+scale_fill_identity()
## source: https://github.com/toddwschneider/nyc-taxi-data/blob/master/analysis/analysis.R
dropoffs = query("SELECT * FROM dropoff_by_lat_long_cab_type ORDER BY count")
dropoffs = mutate(dropoffs, cab_type_id = factor(cab_type_id))
p = ggplot() +
geom_polygon(data = ex_staten_island_map,
aes(x = long, y = lat, group = group),
fill = "#080808", color = "#080808") +
geom_point(data = dropoffs,
aes(x = dropoff_long, y = dropoff_lat, alpha = count, size = count, color = cab_type_id)) +
library(checkpoint)
checkpoint("2015-03-04")
require(devtools)
## no way to install EBImage reproducibly
source("http://bioconductor.org/biocLite.R")
biocLite("EBImage")
# latest commits as of 2015-03-04
install_github("ramnathv/rblocks", ref="a85e748390c17c752cc0ba961120d1e784fb1956")