This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(miniCRAN) | |
library(gender) | |
library(stringr) | |
# Get package description data | |
# This took about an hour to run, so you can load the data directly below | |
# pkgs <- available.packages("http://cran.rstudio.com/src/contrib") | |
# desc <- getCranDescription(pkgs, repos = c(CRAN="http://cran.rstudio.com")) | |
desc <- read.csv("http://www.stat.berkeley.edu/~ledell/data/RStudioCRAN_pkgDesc_20141216.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
localH2O <- h2o.init(nthreads = -1) #Start up H2O cluster using nthreads = ncores | |
# Get training data: | |
data <- h2o.importFile("http://www.stat.berkeley.edu/~ledell/data/wisc-diag-breast-cancer-shuffled.csv", | |
destination_frame = "breast_cancer") | |
y <- "diagnosis" #Response column | |
x <- setdiff(names(data), c(y, "id")) #remove 'id' and response col |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Also available here: http://h2o-release.s3.amazonaws.com/h2o/rel-simons/7/index.html#R | |
# The following two commands remove any previously installed H2O packages for R. | |
if ("package:h2o" %in% search()) { detach("package:h2o", unload=TRUE) } | |
if ("h2o" %in% rownames(installed.packages())) { remove.packages("h2o") } | |
# Next, we download packages that H2O depends on. | |
if (! ("methods" %in% rownames(installed.packages()))) { install.packages("methods") } | |
if (! ("statmod" %in% rownames(installed.packages()))) { install.packages("statmod") } | |
if (! ("stats" %in% rownames(installed.packages()))) { install.packages("stats") } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# A small-data example of binary classification on a local machine using H2O Ensemble | |
library(h2oEnsemble) | |
localH2O <- h2o.init(nthreads = -1) # Start an H2O cluster with nthreads = num cores on your machine | |
# Import a sample binary outcome train/test set into R | |
train <- read.table("http://www.stat.berkeley.edu/~ledell/data/higgs_10k.csv", sep=",") | |
test <- read.table("http://www.stat.berkeley.edu/~ledell/data/higgs_test_5k.csv", sep=",") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(devtools) | |
install_github("h2oai/h2o-3/h2o-r/ensemble/h2oEnsemble-package") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Update for SuperLearner::CVFolds function that enables stratification by outcome and cluster ID | |
CVFolds2 <- function (N, id, Y, cvControl) { | |
if (!is.null(cvControl$validRows)) { | |
return(cvControl$validRows) | |
} | |
stratifyCV <- cvControl$stratifyCV | |
shuffle <- cvControl$shuffle | |
V <- cvControl$V | |
if (!stratifyCV) { ### Not Stratified |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Example of how to train an H2O model with folds that are | |
# stratified both by outcome and a cluster id | |
library(cvAUC) | |
data("adherence") #load a dataset with an ID column | |
df <- adherence | |
# Load a utility function for creating stratified folds | |
source("https://gist.githubusercontent.com/ledell/bd4e227d4e5ff426c41d/raw/708eb429fa1954a140d65a6a42ce93847affd67c/CVFolds2.R") #utility function |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(h2o) | |
h2o.init(nthreads = -1) # This means nthreads = num available cores | |
train_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/train.csv.gz" | |
test_file <- "https://h2o-public-test-data.s3.amazonaws.com/bigdata/laptop/mnist/test.csv.gz" | |
train <- h2o.importFile(train_file) | |
test <- h2o.importFile(test_file) | |
# To see a brief summary of the data, run the following command |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### Lending Club example using cleaned up dataset & h2o.ensemble ### | |
library(h2o) | |
h2o.init(nthreads = -1, max_mem_size = "8G") | |
loan_csv <- "https://raw.githubusercontent.com/h2oai/app-consumer-loan/master/data/loan.csv" | |
data <- h2o.importFile(loan_csv) # 163994 x 15 | |
data$bad_loan <- as.factor(data$bad_loan) | |
rand <- h2o.runif(data, seed = 1) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# install.packages("h2o", type="source", repos=(c("http://h2o-release.s3.amazonaws.com/h2o/rel-slater/5/R"))) | |
library(h2o) | |
localH2O <- h2o.init(ip = "XX.XX.XX.XX", port = 54321) | |
#higgs <- h2o.importFile("/home/0xdiag/datasets/higgs/HIGGS.csv", destination_frame = "higgs") #Local copy | |
higgs <- h2o.importFile("http://archive.ics.uci.edu/ml/machine-learning-databases/00280/HIGGS.csv.gz", destination_frame = "higgs") | |
dim(higgs) #11M x 29 | |
higgs$C1 <- as.factor(higgs$C1) #Encode response as categorical |
OlderNewer