Last active
August 29, 2015 14:26
-
-
Save hafen/8805a032dda8cc73768a to your computer and use it in GitHub Desktop.
A quick datadr / trelliscope demo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## installation | |
##--------------------------------------------------------- | |
install.packages("devtools") # if not installed | |
devtools::install_github("tesseradata/datadr") | |
devtools::install_github("tesseradata/trelliscope") | |
devtools::install_github("hafen/housingData") | |
devtools::install_github("bokeh/rbokeh") | |
## quick trelliscope example | |
##--------------------------------------------------------- | |
# load packages | |
library(trelliscope) | |
library(rbokeh) | |
library(housingData) | |
# plot median monthly list price vs. time | |
panel <- function(x) | |
figure() %>% ly_points(time, medListPriceSqft, data = x) | |
# apply this plot to each county/state in the data and view | |
housing %>% | |
qtrellis(by = c("county", "state"), panel, layout = c(2, 4)) | |
## more involved example | |
##--------------------------------------------------------- | |
# divide housing data by county and state | |
byCounty <- divide(housing, | |
by = c("county", "state")) | |
# look at byCounty object | |
byCounty | |
# look at a subset of byCounty | |
byCounty[[1]] | |
# create a panel function of list and sold price vs. time | |
timePanel <- function(x) | |
xyplot(medListPriceSqft + medSoldPriceSqft ~ time, type = c("p", "g"), | |
data = x, auto.key = TRUE, ylab = "Price / Sq. Ft.", | |
par.settings = list(superpose.symbol = list(pch = 19, alpha = 0.7))) | |
# test function on a subset | |
timePanel(byCounty[[20]]$value) | |
# create a cognostics function of metrics of interest | |
priceCog <- function(x) { | |
zillowString <- gsub(" ", "-", do.call(paste, getSplitVars(x))) | |
list( | |
slope = cog(coef(lm(medListPriceSqft ~ time, data = x))[2], | |
desc = "list price slope"), | |
meanList = cogMean(x$medListPriceSqft), | |
meanSold = cogMean(x$medSoldPriceSqft), | |
nObs = cog(length(which(!is.na(x$medListPriceSqft))), | |
desc = "number of non-NA list prices"), | |
zillowHref = cogHref( | |
sprintf("http://www.zillow.com/homes/%s_rb/", zillowString), | |
desc = "zillow link") | |
) | |
} | |
# test cognostics function on a subset | |
priceCog(byCounty[[1]]$value) | |
# create the display and add to vdb | |
makeDisplay(byCounty, | |
name = "list_sold_vs_time_quickstart", | |
desc = "List and sold price over time", | |
panelFn = timePanel, | |
cogFn = priceCog) | |
# view the result | |
view() | |
# if you want to put these out on shinyapps.io: | |
# http://shiny.rstudio.com/articles/shinyapps.html | |
# deployToShinyApps() | |
## datadr example | |
##--------------------------------------------------------- | |
# look at housing data | |
head(housing) | |
# divide by county and state | |
byCounty <- divide(housing, | |
by = c("county", "state"), update = TRUE) | |
# look at result | |
byCounty | |
# look at a subset | |
byCounty[[1]] | |
# look at a subset by key | |
byCounty[["county=New York County|state=NY"]] | |
# apply a transformation to get slope of fitted line of list price vs. time | |
lmCoef <- function(x) | |
coef(lm(medListPriceSqft ~ time, data = x))[2] | |
byCountySlope <- addTransform(byCounty, lmCoef) | |
# look at a subset | |
byCountySlope[[1]] | |
# recombine the slope coefficients into a data frame | |
countySlopes <- recombine(byCountySlope, combRbind) | |
head(countySlopes) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment