elyase · August 29, 2015 14:10
diff --git a/gistfile1.r b/gistfile1.r
 function (x, y = NULL, xtest = NULL, ytest = NULL, ntree = 500, 
            mtry = if (!is.null(y) && !is.factor(y)) max(floor(ncol(x)/3), 
                                                         1) else floor(sqrt(ncol(x))), replace = TRUE, classwt = NULL, 
            cutoff, strata, sampsize = if (replace) nrow(x) else ceiling(0.632 * 
                                                                           nrow(x)), nodesize = if (!is.null(y) && !is.factor(y)) 5 else 1, 
            maxnodes = NULL, importance = FALSE, localImp = FALSE, nPerm = 1, 
            proximity, oob.prox = proximity, norm.votes = TRUE, do.trace = FALSE, 
            keep.forest = !is.null(y) && is.null(xtest), corr.bias = FALSE, 
            keep.inbag = FALSE, ...) 
  {
    addclass <- is.null(y)
    classRF <- addclass || is.factor(y)
    if (!classRF && length(unique(y)) <= 5) {
      warning("The response has five or fewer unique values.  Are you sure you want to do regression?")
    }
    if (classRF && !addclass && length(unique(y)) < 2) 
      stop("Need at least two classes to do classification.")
    n <- nrow(x)
    p <- ncol(x)
    if (n == 0) 
      stop("data (x) has 0 rows")
    x.row.names <- rownames(x)
    x.col.names <- if (is.null(colnames(x))) 
      1:ncol(x)
    else colnames(x)
    keep.forest <- keep.forest
    testdat <- !is.null(xtest)
    if (testdat) {
      if (ncol(x) != ncol(xtest)) 
        stop("x and xtest must have same number of columns")
      ntest <- nrow(xtest)
      xts.row.names <- rownames(xtest)
    }
    if (mtry < 1 || mtry > p) 
      warning("invalid mtry: reset to within valid range")
    mtry <- max(1, min(p, round(mtry)))
    if (!is.null(y)) {
      if (length(y) != n) 
        stop("length of response must be the same as predictors")
      addclass <- FALSE
    }
    else {
      if (!addclass) 
        addclass <- TRUE
      y <- factor(c(rep(1, n), rep(2, n)))
      x <- rbind(x, x)
    }
    if (any(is.na(x))) 
      stop("NA not permitted in predictors")
    if (testdat && any(is.na(xtest))) 
      stop("NA not permitted in xtest")
    if (any(is.na(y))) 
      stop("NA not permitted in response")
    if (!is.null(ytest) && any(is.na(ytest))) 
      stop("NA not permitted in ytest")
    if (is.data.frame(x)) {
      xlevels <- lapply(x, mylevels)
      ncat <- sapply(xlevels, length)
      ncat <- ifelse(sapply(x, is.ordered), 1, ncat)
      x <- data.matrix(x)
      if (testdat) {
        if (!is.data.frame(xtest)) 
          stop("xtest must be data frame if x is")
        xfactor <- which(sapply(xtest, is.factor))
        if (length(xfactor) > 0) {
          for (i in xfactor) {
            if (any(!levels(xtest[[i]]) %in% xlevels[[i]])) 
              stop("New factor levels in xtest not present in x")
            xtest[[i]] <- factor(xlevels[[i]][match(xtest[[i]], 
                                                    xlevels[[i]])], levels = xlevels[[i]])
          }
        }
        xtest <- data.matrix(xtest)
      }
    }
    else {
      ncat <- rep(1, p)
      xlevels <- as.list(rep(0, p))
    }
    maxcat <- max(ncat)
    if (maxcat > 53) 
      stop("Can not handle categorical predictors with more than 53 categories.")
    if (classRF) {
      nclass <- length(levels(y))
      if (any(table(y) == 0)) 
        stop("Can't have empty classes in y.")
      if (!is.null(ytest)) {
        if (!is.factor(ytest)) 
          stop("ytest must be a factor")
        if (!all(levels(y) == levels(ytest))) 
          stop("y and ytest must have the same levels")
      }
      if (missing(cutoff)) {
        cutoff <- rep(1/nclass, nclass)
      }
      else {
        if (sum(cutoff) > 1 || sum(cutoff) < 0 || !all(cutoff > 
                                                         0) || length(cutoff) != nclass) {
          stop("Incorrect cutoff specified.")
        }
        if (!is.null(names(cutoff))) {
          if (!all(names(cutoff) %in% levels(y))) {
            stop("Wrong name(s) for cutoff")
          }
          cutoff <- cutoff[levels(y)]
        }
      }
      if (!is.null(classwt)) {
        if (length(classwt) != nclass) 
          stop("length of classwt not equal to number of classes")
        if (!is.null(names(classwt))) {
          if (!all(names(classwt) %in% levels(y))) {
            stop("Wrong name(s) for classwt")
          }
          classwt <- classwt[levels(y)]
        }
        if (any(classwt <= 0)) 
          stop("classwt must be positive")
        ipi <- 1
      }
      else {
        classwt <- rep(1, nclass)
        ipi <- 0
      }
    }
    else addclass <- FALSE
    if (missing(proximity)) 
      proximity <- addclass
    if (proximity) {
      prox <- matrix(0, n, n)
      proxts <- if (testdat) 
        matrix(0, ntest, ntest + n)
      else double(1)
    }
    else {
      prox <- proxts <- double(1)
    }
    if (localImp) {
      importance <- TRUE
      impmat <- matrix(0, p, n)
    }
    else impmat <- double(1)
    if (importance) {
      if (nPerm < 1) 
        nPerm <- as.integer(1)
      else nPerm <- as.integer(nPerm)
      if (classRF) {
        impout <- matrix(0, p, nclass + 2)
        impSD <- matrix(0, p, nclass + 1)
      }
      else {
        impout <- matrix(0, p, 2)
        impSD <- double(p)
        names(impSD) <- x.col.names
      }
    }
    else {
      impout <- double(p)
      impSD <- double(1)
    }
    nsample <- if (addclass) 
      2 * n
    else n
    Stratify <- length(sampsize) > 1
    if ((!Stratify) && sampsize > nrow(x)) 
      stop("sampsize too large")
    if (Stratify && (!classRF)) 
      stop("sampsize should be of length one")
    if (classRF) {
      if (Stratify) {
        if (missing(strata)) 
          strata <- y
        if (!is.factor(strata)) 
          strata <- as.factor(strata)
        nsum <- sum(sampsize)
        if (length(sampsize) > nlevels(strata)) 
          stop("sampsize has too many elements.")
        if (any(sampsize <= 0) || nsum == 0) 
          stop("Bad sampsize specification")
        if (!is.null(names(sampsize))) {
          sampsize <- sampsize[levels(strata)]
        }
        if (any(sampsize > table(strata))) 
          stop("sampsize can not be larger than class frequency")
      }
      else {
        nsum <- sampsize
      }
      nrnodes <- 2 * trunc(nsum/nodesize) + 1
    }
    else {
      nrnodes <- 2 * trunc(sampsize/max(1, nodesize - 4)) + 
        1
    }
    if (!is.null(maxnodes)) {
      maxnodes <- 2 * maxnodes - 1
      if (maxnodes > nrnodes) 
        warning("maxnodes exceeds its max value.")
      nrnodes <- min(c(nrnodes, max(c(maxnodes, 1))))
    }
    x <- t(x)
    storage.mode(x) <- "double"
    if (testdat) {
      xtest <- t(xtest)
      storage.mode(xtest) <- "double"
      if (is.null(ytest)) {
        ytest <- labelts <- 0
      }
      else {
        labelts <- TRUE
      }
    }
    else {
      xtest <- double(1)
      ytest <- double(1)
      ntest <- 1
      labelts <- FALSE
    }
    nt <- if (keep.forest) 
      ntree
    else 1
    if (classRF) {
      cwt <- classwt
      threshold <- cutoff
      error.test <- if (labelts) 
        double((nclass + 1) * ntree)
      else double(1)
      rfout <- .C("classRF", x = x, xdim = as.integer(c(p, 
                                                        n)), y = as.integer(y), nclass = as.integer(nclass), 
                  ncat = as.integer(ncat), maxcat = as.integer(maxcat), 
                  sampsize = as.integer(sampsize), strata = if (Stratify) as.integer(strata) else integer(1), 
                  Options = as.integer(c(addclass, importance, localImp, 
                                         proximity, oob.prox, do.trace, keep.forest, replace, 
                                         Stratify, keep.inbag)), ntree = as.integer(ntree), 
                  mtry = as.integer(mtry), ipi = as.integer(ipi), classwt = as.double(cwt), 
                  cutoff = as.double(threshold), nodesize = as.integer(nodesize), 
                  outcl = integer(nsample), counttr = integer(nclass * 
                                                                nsample), prox = prox, impout = impout, impSD = impSD, 
                  impmat = impmat, nrnodes = as.integer(nrnodes), ndbigtree = integer(ntree), 
                  nodestatus = integer(nt * nrnodes), bestvar = integer(nt * 
                                                                          nrnodes), treemap = integer(nt * 2 * nrnodes), 
                  nodepred = integer(nt * nrnodes), xbestsplit = double(nt * 
                                                                          nrnodes), errtr = double((nclass + 1) * ntree), 
                  testdat = as.integer(testdat), xts = as.double(xtest), 
                  clts = as.integer(ytest), nts = as.integer(ntest), 
                  countts = double(nclass * ntest), outclts = as.integer(numeric(ntest)), 
                  labelts = as.integer(labelts), proxts = proxts, errts = error.test, 
                  inbag = if (keep.inbag) matrix(integer(n * ntree), 
                                                 n) else integer(n), DUP = FALSE, PACKAGE = "randomForest")[-1]
      if (keep.forest) {
        max.nodes <- max(rfout$ndbigtree)
        treemap <- aperm(array(rfout$treemap, dim = c(2, 
                                                      nrnodes, ntree)), c(2, 1, 3))[1:max.nodes, , 
                                                                                    , drop = FALSE]
      }
      if (!addclass) {
        out.class <- factor(rfout$outcl, levels = 1:nclass, 
                            labels = levels(y))
        names(out.class) <- x.row.names
        con <- table(observed = y, predicted = out.class)[levels(y), 
                                                          levels(y)]
        con <- cbind(con, class.error = 1 - diag(con)/rowSums(con))
      }
      out.votes <- t(matrix(rfout$counttr, nclass, nsample))[1:n, 
                                                             ]
      oob.times <- rowSums(out.votes)
      if (norm.votes) 
        out.votes <- t(apply(out.votes, 1, function(x) x/sum(x)))
      dimnames(out.votes) <- list(x.row.names, levels(y))
      class(out.votes) <- c(class(out.votes), "votes")
      if (testdat) {
        out.class.ts <- factor(rfout$outclts, levels = 1:nclass, 
                               labels = levels(y))
        names(out.class.ts) <- xts.row.names
        out.votes.ts <- t(matrix(rfout$countts, nclass, ntest))
        dimnames(out.votes.ts) <- list(xts.row.names, levels(y))
        if (norm.votes) 
          out.votes.ts <- t(apply(out.votes.ts, 1, function(x) x/sum(x)))
        class(out.votes.ts) <- c(class(out.votes.ts), "votes")
        if (labelts) {
          testcon <- table(observed = ytest, predicted = out.class.ts)[levels(y), 
                                                                       levels(y)]
          testcon <- cbind(testcon, class.error = 1 - diag(testcon)/rowSums(testcon))
        }
      }
      cl <- match.call()
      cl[[1]] <- as.name("randomForest")
      out <- list(call = cl, type = if (addclass) "unsupervised" else "classification", 
                  predicted = if (addclass) NULL else out.class, err.rate = if (addclass) NULL else t(matrix(rfout$errtr, 
                                                                                                             nclass + 1, ntree, dimnames = list(c("OOB", levels(y)), 
                                                                                                                                                NULL))), confusion = if (addclass) NULL else con, 
                  votes = out.votes, oob.times = oob.times, classes = levels(y), 
                  importance = if (importance) matrix(rfout$impout, 
                                                      p, nclass + 2, dimnames = list(x.col.names, c(levels(y), 
                                                                                                    "MeanDecreaseAccuracy", "MeanDecreaseGini"))) else matrix(rfout$impout, 
                                                                                                                                                              ncol = 1, dimnames = list(x.col.names, "MeanDecreaseGini")), 
                  importanceSD = if (importance) matrix(rfout$impSD, 
                                                        p, nclass + 1, dimnames = list(x.col.names, c(levels(y), 
                                                                                                      "MeanDecreaseAccuracy"))) else NULL, localImportance = if (localImp) matrix(rfout$impmat, 
                                                                                                                                                                                  p, n, dimnames = list(x.col.names, x.row.names)) else NULL, 
                  proximity = if (proximity) matrix(rfout$prox, n, 
                                                    n, dimnames = list(x.row.names, x.row.names)) else NULL, 
                  ntree = ntree, mtry = mtry, forest = if (!keep.forest) NULL else {
                    list(ndbigtree = rfout$ndbigtree, nodestatus = matrix(rfout$nodestatus, 
                                                                          ncol = ntree)[1:max.nodes, , drop = FALSE], 
                         bestvar = matrix(rfout$bestvar, ncol = ntree)[1:max.nodes, 
                                                                       , drop = FALSE], treemap = treemap, nodepred = matrix(rfout$nodepred, 
                                                                                                                             ncol = ntree)[1:max.nodes, , drop = FALSE], 
                         xbestsplit = matrix(rfout$xbestsplit, ncol = ntree)[1:max.nodes, 
                                                                             , drop = FALSE], pid = rfout$classwt, cutoff = cutoff, 
                         ncat = ncat, maxcat = maxcat, nrnodes = max.nodes, 
                         ntree = ntree, nclass = nclass, xlevels = xlevels)
                  }, y = if (addclass) NULL else y, test = if (!testdat) NULL else list(predicted = out.class.ts, 
                                                                                        err.rate = if (labelts) t(matrix(rfout$errts, 
                                                                                                                         nclass + 1, ntree, dimnames = list(c("Test", 
                                                                                                                                                              levels(y)), NULL))) else NULL, confusion = if (labelts) testcon else NULL, 
                                                                                        votes = out.votes.ts, proximity = if (proximity) matrix(rfout$proxts, 
                                                                                                                                                nrow = ntest, dimnames = list(xts.row.names, 
                                                                                                                                                                              c(xts.row.names, x.row.names))) else NULL), 
                  inbag = if (keep.inbag) rfout$inbag else NULL)
    }
    else {
      ymean <- mean(y)
      y <- y - ymean
      ytest <- ytest - ymean
      rfout <- .C("regRF", x, as.double(y), as.integer(c(n, 
                                                         p)), as.integer(sampsize), as.integer(nodesize), 
                  as.integer(nrnodes), as.integer(ntree), as.integer(mtry), 
                  as.integer(c(importance, localImp, nPerm)), as.integer(ncat), 
                  as.integer(maxcat), as.integer(do.trace), as.integer(proximity), 
                  as.integer(oob.prox), as.integer(corr.bias), ypred = double(n), 
                  impout = impout, impmat = impmat, impSD = impSD, 
                  prox = prox, ndbigtree = integer(ntree), nodestatus = matrix(integer(nrnodes * 
                                                                                         nt), ncol = nt), leftDaughter = matrix(integer(nrnodes * 
                                                                                                                                          nt), ncol = nt), rightDaughter = matrix(integer(nrnodes * 
                                                                                                                                                                                            nt), ncol = nt), nodepred = matrix(double(nrnodes * 
                                                                                                                                                                                                                                        nt), ncol = nt), bestvar = matrix(integer(nrnodes * 
                                                                                                                                                                                                                                                                                    nt), ncol = nt), xbestsplit = matrix(double(nrnodes * 
                                                                                                                                                                                                                                                                                                                                  nt), ncol = nt), mse = double(ntree), keep = as.integer(c(keep.forest, 
                                                                                                                                                                                                                                                                                                                                                                                            keep.inbag)), replace = as.integer(replace), 
                  testdat = as.integer(testdat), xts = xtest, ntest = as.integer(ntest), 
                  yts = as.double(ytest), labelts = as.integer(labelts), 
                  ytestpred = double(ntest), proxts = proxts, msets = double(if (labelts) ntree else 1), 
                  coef = double(2), oob.times = integer(n), inbag = if (keep.inbag) matrix(integer(n * 
                                                                                                     ntree), n) else integer(1), DUP = FALSE, PACKAGE = "randomForest")[c(16:28, 
                                                                                                                                                                          36:41)]
      if (keep.forest) {
        max.nodes <- max(rfout$ndbigtree)
        rfout$nodestatus <- rfout$nodestatus[1:max.nodes, 
                                             , drop = FALSE]
        rfout$bestvar <- rfout$bestvar[1:max.nodes, , drop = FALSE]
        rfout$nodepred <- rfout$nodepred[1:max.nodes, , drop = FALSE] + 
          ymean
        rfout$xbestsplit <- rfout$xbestsplit[1:max.nodes, 
                                             , drop = FALSE]
        rfout$leftDaughter <- rfout$leftDaughter[1:max.nodes, 
                                                 , drop = FALSE]
        rfout$rightDaughter <- rfout$rightDaughter[1:max.nodes, 
                                                   , drop = FALSE]
      }
      cl <- match.call()
      cl[[1]] <- as.name("randomForest")
      ypred <- rfout$ypred
      if (any(rfout$oob.times < 1)) {
        ypred[rfout$oob.times == 0] <- NA
      }
      out <- list(call = cl, type = "regression", predicted = structure(ypred + 
                                                                          ymean, names = x.row.names), mse = rfout$mse, rsq = 1 - 
                    rfout$mse/(var(y) * (n - 1)/n), oob.times = rfout$oob.times, 
                  importance = if (importance) matrix(rfout$impout, 
                                                      p, 2, dimnames = list(x.col.names, c("%IncMSE", 
                                                                                           "IncNodePurity"))) else matrix(rfout$impout, 
                                                                                                                          ncol = 1, dimnames = list(x.col.names, "IncNodePurity")), 
                  importanceSD = if (importance) rfout$impSD else NULL, 
                  localImportance = if (localImp) matrix(rfout$impmat, 
                                                         p, n, dimnames = list(x.col.names, x.row.names)) else NULL, 
                  proximity = if (proximity) matrix(rfout$prox, n, 
                                                    n, dimnames = list(x.row.names, x.row.names)) else NULL, 
                  ntree = ntree, mtry = mtry, forest = if (keep.forest) c(rfout[c("ndbigtree", 
                                                                                  "nodestatus", "leftDaughter", "rightDaughter", 
                                                                                  "nodepred", "bestvar", "xbestsplit")], list(ncat = ncat), 
                                                                          list(nrnodes = max.nodes), list(ntree = ntree), 
                                                                          list(xlevels = xlevels)) else NULL, coefs = if (corr.bias) rfout$coef else NULL, 
                  y = y + ymean, test = if (testdat) {
                    list(predicted = structure(rfout$ytestpred + 
                                                 ymean, names = xts.row.names), mse = if (labelts) rfout$msets else NULL, 
                         rsq = if (labelts) 1 - rfout$msets/(var(ytest) * 
                                                               (n - 1)/n) else NULL, proximity = if (proximity) matrix(rfout$proxts/ntree, 
                                                                                                                       nrow = ntest, dimnames = list(xts.row.names, 
                                                                                                                                                     c(xts.row.names, x.row.names))) else NULL)
                  } else NULL, inbag = if (keep.inbag) matrix(rfout$inbag, 
                                                              nrow(rfout$inbag), dimnames = list(x.row.names, 
                                                                                                 NULL)) else NULL)
    }
    class(out) <- "randomForest"
    return(out)
  }
	function (x, y = NULL, xtest = NULL, ytest = NULL, ntree = 500,
	mtry = if (!is.null(y) && !is.factor(y)) max(floor(ncol(x)/3),
	1) else floor(sqrt(ncol(x))), replace = TRUE, classwt = NULL,
	cutoff, strata, sampsize = if (replace) nrow(x) else ceiling(0.632 *
	nrow(x)), nodesize = if (!is.null(y) && !is.factor(y)) 5 else 1,
	maxnodes = NULL, importance = FALSE, localImp = FALSE, nPerm = 1,
	proximity, oob.prox = proximity, norm.votes = TRUE, do.trace = FALSE,
	keep.forest = !is.null(y) && is.null(xtest), corr.bias = FALSE,
	keep.inbag = FALSE, ...)
	{
	addclass <- is.null(y)
	classRF <- addclass \|\| is.factor(y)
	if (!classRF && length(unique(y)) <= 5) {
	warning("The response has five or fewer unique values. Are you sure you want to do regression?")
	}
	if (classRF && !addclass && length(unique(y)) < 2)
	stop("Need at least two classes to do classification.")
	n <- nrow(x)
	p <- ncol(x)
	if (n == 0)
	stop("data (x) has 0 rows")
	x.row.names <- rownames(x)
	x.col.names <- if (is.null(colnames(x)))
	1:ncol(x)
	else colnames(x)
	keep.forest <- keep.forest
	testdat <- !is.null(xtest)
	if (testdat) {
	if (ncol(x) != ncol(xtest))
	stop("x and xtest must have same number of columns")
	ntest <- nrow(xtest)
	xts.row.names <- rownames(xtest)
	}
	if (mtry < 1 \|\| mtry > p)
	warning("invalid mtry: reset to within valid range")
	mtry <- max(1, min(p, round(mtry)))
	if (!is.null(y)) {
	if (length(y) != n)
	stop("length of response must be the same as predictors")
	addclass <- FALSE
	}
	else {
	if (!addclass)
	addclass <- TRUE
	y <- factor(c(rep(1, n), rep(2, n)))
	x <- rbind(x, x)
	}
	if (any(is.na(x)))
	stop("NA not permitted in predictors")
	if (testdat && any(is.na(xtest)))
	stop("NA not permitted in xtest")
	if (any(is.na(y)))
	stop("NA not permitted in response")
	if (!is.null(ytest) && any(is.na(ytest)))
	stop("NA not permitted in ytest")
	if (is.data.frame(x)) {
	xlevels <- lapply(x, mylevels)
	ncat <- sapply(xlevels, length)
	ncat <- ifelse(sapply(x, is.ordered), 1, ncat)
	x <- data.matrix(x)
	if (testdat) {
	if (!is.data.frame(xtest))
	stop("xtest must be data frame if x is")
	xfactor <- which(sapply(xtest, is.factor))
	if (length(xfactor) > 0) {
	for (i in xfactor) {
	if (any(!levels(xtest[[i]]) %in% xlevels[[i]]))
	stop("New factor levels in xtest not present in x")
	xtest[[i]] <- factor(xlevels[[i]][match(xtest[[i]],
	xlevels[[i]])], levels = xlevels[[i]])
	}
	}
	xtest <- data.matrix(xtest)
	}
	}
	else {
	ncat <- rep(1, p)
	xlevels <- as.list(rep(0, p))
	}
	maxcat <- max(ncat)
	if (maxcat > 53)
	stop("Can not handle categorical predictors with more than 53 categories.")
	if (classRF) {
	nclass <- length(levels(y))
	if (any(table(y) == 0))
	stop("Can't have empty classes in y.")
	if (!is.null(ytest)) {
	if (!is.factor(ytest))
	stop("ytest must be a factor")
	if (!all(levels(y) == levels(ytest)))
	stop("y and ytest must have the same levels")
	}
	if (missing(cutoff)) {
	cutoff <- rep(1/nclass, nclass)
	}
	else {
	if (sum(cutoff) > 1 \|\| sum(cutoff) < 0 \|\| !all(cutoff >
	0) \|\| length(cutoff) != nclass) {
	stop("Incorrect cutoff specified.")
	}
	if (!is.null(names(cutoff))) {
	if (!all(names(cutoff) %in% levels(y))) {
	stop("Wrong name(s) for cutoff")
	}
	cutoff <- cutoff[levels(y)]
	}
	}
	if (!is.null(classwt)) {
	if (length(classwt) != nclass)
	stop("length of classwt not equal to number of classes")
	if (!is.null(names(classwt))) {
	if (!all(names(classwt) %in% levels(y))) {
	stop("Wrong name(s) for classwt")
	}
	classwt <- classwt[levels(y)]
	}
	if (any(classwt <= 0))
	stop("classwt must be positive")
	ipi <- 1
	}
	else {
	classwt <- rep(1, nclass)
	ipi <- 0
	}
	}
	else addclass <- FALSE
	if (missing(proximity))
	proximity <- addclass
	if (proximity) {
	prox <- matrix(0, n, n)
	proxts <- if (testdat)
	matrix(0, ntest, ntest + n)
	else double(1)
	}
	else {
	prox <- proxts <- double(1)
	}
	if (localImp) {
	importance <- TRUE
	impmat <- matrix(0, p, n)
	}
	else impmat <- double(1)
	if (importance) {
	if (nPerm < 1)
	nPerm <- as.integer(1)
	else nPerm <- as.integer(nPerm)
	if (classRF) {
	impout <- matrix(0, p, nclass + 2)
	impSD <- matrix(0, p, nclass + 1)
	}
	else {
	impout <- matrix(0, p, 2)
	impSD <- double(p)
	names(impSD) <- x.col.names
	}
	}
	else {
	impout <- double(p)
	impSD <- double(1)
	}
	nsample <- if (addclass)
	2 * n
	else n
	Stratify <- length(sampsize) > 1
	if ((!Stratify) && sampsize > nrow(x))
	stop("sampsize too large")
	if (Stratify && (!classRF))
	stop("sampsize should be of length one")
	if (classRF) {
	if (Stratify) {
	if (missing(strata))
	strata <- y
	if (!is.factor(strata))
	strata <- as.factor(strata)
	nsum <- sum(sampsize)
	if (length(sampsize) > nlevels(strata))
	stop("sampsize has too many elements.")
	if (any(sampsize <= 0) \|\| nsum == 0)
	stop("Bad sampsize specification")
	if (!is.null(names(sampsize))) {
	sampsize <- sampsize[levels(strata)]
	}
	if (any(sampsize > table(strata)))
	stop("sampsize can not be larger than class frequency")
	}
	else {
	nsum <- sampsize
	}
	nrnodes <- 2 * trunc(nsum/nodesize) + 1
	}
	else {
	nrnodes <- 2 * trunc(sampsize/max(1, nodesize - 4)) +
	1
	}
	if (!is.null(maxnodes)) {
	maxnodes <- 2 * maxnodes - 1
	if (maxnodes > nrnodes)
	warning("maxnodes exceeds its max value.")
	nrnodes <- min(c(nrnodes, max(c(maxnodes, 1))))
	}
	x <- t(x)
	storage.mode(x) <- "double"
	if (testdat) {
	xtest <- t(xtest)
	storage.mode(xtest) <- "double"
	if (is.null(ytest)) {
	ytest <- labelts <- 0
	}
	else {
	labelts <- TRUE
	}
	}
	else {
	xtest <- double(1)
	ytest <- double(1)
	ntest <- 1
	labelts <- FALSE
	}
	nt <- if (keep.forest)
	ntree
	else 1
	if (classRF) {
	cwt <- classwt
	threshold <- cutoff
	error.test <- if (labelts)
	double((nclass + 1) * ntree)
	else double(1)
	rfout <- .C("classRF", x = x, xdim = as.integer(c(p,
	n)), y = as.integer(y), nclass = as.integer(nclass),
	ncat = as.integer(ncat), maxcat = as.integer(maxcat),
	sampsize = as.integer(sampsize), strata = if (Stratify) as.integer(strata) else integer(1),
	Options = as.integer(c(addclass, importance, localImp,
	proximity, oob.prox, do.trace, keep.forest, replace,
	Stratify, keep.inbag)), ntree = as.integer(ntree),
	mtry = as.integer(mtry), ipi = as.integer(ipi), classwt = as.double(cwt),
	cutoff = as.double(threshold), nodesize = as.integer(nodesize),
	outcl = integer(nsample), counttr = integer(nclass *
	nsample), prox = prox, impout = impout, impSD = impSD,
	impmat = impmat, nrnodes = as.integer(nrnodes), ndbigtree = integer(ntree),
	nodestatus = integer(nt * nrnodes), bestvar = integer(nt *
	nrnodes), treemap = integer(nt * 2 * nrnodes),
	nodepred = integer(nt * nrnodes), xbestsplit = double(nt *
	nrnodes), errtr = double((nclass + 1) * ntree),
	testdat = as.integer(testdat), xts = as.double(xtest),
	clts = as.integer(ytest), nts = as.integer(ntest),
	countts = double(nclass * ntest), outclts = as.integer(numeric(ntest)),
	labelts = as.integer(labelts), proxts = proxts, errts = error.test,
	inbag = if (keep.inbag) matrix(integer(n * ntree),
	n) else integer(n), DUP = FALSE, PACKAGE = "randomForest")[-1]
	if (keep.forest) {
	max.nodes <- max(rfout$ndbigtree)
	treemap <- aperm(array(rfout$treemap, dim = c(2,
	nrnodes, ntree)), c(2, 1, 3))[1:max.nodes, ,
	, drop = FALSE]
	}
	if (!addclass) {
	out.class <- factor(rfout$outcl, levels = 1:nclass,
	labels = levels(y))
	names(out.class) <- x.row.names
	con <- table(observed = y, predicted = out.class)[levels(y),
	levels(y)]
	con <- cbind(con, class.error = 1 - diag(con)/rowSums(con))
	}
	out.votes <- t(matrix(rfout$counttr, nclass, nsample))[1:n,
	]
	oob.times <- rowSums(out.votes)
	if (norm.votes)
	out.votes <- t(apply(out.votes, 1, function(x) x/sum(x)))
	dimnames(out.votes) <- list(x.row.names, levels(y))
	class(out.votes) <- c(class(out.votes), "votes")
	if (testdat) {
	out.class.ts <- factor(rfout$outclts, levels = 1:nclass,
	labels = levels(y))
	names(out.class.ts) <- xts.row.names
	out.votes.ts <- t(matrix(rfout$countts, nclass, ntest))
	dimnames(out.votes.ts) <- list(xts.row.names, levels(y))
	if (norm.votes)
	out.votes.ts <- t(apply(out.votes.ts, 1, function(x) x/sum(x)))
	class(out.votes.ts) <- c(class(out.votes.ts), "votes")
	if (labelts) {
	testcon <- table(observed = ytest, predicted = out.class.ts)[levels(y),
	levels(y)]
	testcon <- cbind(testcon, class.error = 1 - diag(testcon)/rowSums(testcon))
	}
	}
	cl <- match.call()
	cl[[1]] <- as.name("randomForest")
	out <- list(call = cl, type = if (addclass) "unsupervised" else "classification",
	predicted = if (addclass) NULL else out.class, err.rate = if (addclass) NULL else t(matrix(rfout$errtr,
	nclass + 1, ntree, dimnames = list(c("OOB", levels(y)),
	NULL))), confusion = if (addclass) NULL else con,
	votes = out.votes, oob.times = oob.times, classes = levels(y),
	importance = if (importance) matrix(rfout$impout,
	p, nclass + 2, dimnames = list(x.col.names, c(levels(y),
	"MeanDecreaseAccuracy", "MeanDecreaseGini"))) else matrix(rfout$impout,
	ncol = 1, dimnames = list(x.col.names, "MeanDecreaseGini")),
	importanceSD = if (importance) matrix(rfout$impSD,
	p, nclass + 1, dimnames = list(x.col.names, c(levels(y),
	"MeanDecreaseAccuracy"))) else NULL, localImportance = if (localImp) matrix(rfout$impmat,
	p, n, dimnames = list(x.col.names, x.row.names)) else NULL,
	proximity = if (proximity) matrix(rfout$prox, n,
	n, dimnames = list(x.row.names, x.row.names)) else NULL,
	ntree = ntree, mtry = mtry, forest = if (!keep.forest) NULL else {
	list(ndbigtree = rfout$ndbigtree, nodestatus = matrix(rfout$nodestatus,
	ncol = ntree)[1:max.nodes, , drop = FALSE],
	bestvar = matrix(rfout$bestvar, ncol = ntree)[1:max.nodes,
	, drop = FALSE], treemap = treemap, nodepred = matrix(rfout$nodepred,
	ncol = ntree)[1:max.nodes, , drop = FALSE],
	xbestsplit = matrix(rfout$xbestsplit, ncol = ntree)[1:max.nodes,
	, drop = FALSE], pid = rfout$classwt, cutoff = cutoff,
	ncat = ncat, maxcat = maxcat, nrnodes = max.nodes,
	ntree = ntree, nclass = nclass, xlevels = xlevels)
	}, y = if (addclass) NULL else y, test = if (!testdat) NULL else list(predicted = out.class.ts,
	err.rate = if (labelts) t(matrix(rfout$errts,
	nclass + 1, ntree, dimnames = list(c("Test",
	levels(y)), NULL))) else NULL, confusion = if (labelts) testcon else NULL,
	votes = out.votes.ts, proximity = if (proximity) matrix(rfout$proxts,
	nrow = ntest, dimnames = list(xts.row.names,
	c(xts.row.names, x.row.names))) else NULL),
	inbag = if (keep.inbag) rfout$inbag else NULL)
	}
	else {
	ymean <- mean(y)
	y <- y - ymean
	ytest <- ytest - ymean
	rfout <- .C("regRF", x, as.double(y), as.integer(c(n,
	p)), as.integer(sampsize), as.integer(nodesize),
	as.integer(nrnodes), as.integer(ntree), as.integer(mtry),
	as.integer(c(importance, localImp, nPerm)), as.integer(ncat),
	as.integer(maxcat), as.integer(do.trace), as.integer(proximity),
	as.integer(oob.prox), as.integer(corr.bias), ypred = double(n),
	impout = impout, impmat = impmat, impSD = impSD,
	prox = prox, ndbigtree = integer(ntree), nodestatus = matrix(integer(nrnodes *
	nt), ncol = nt), leftDaughter = matrix(integer(nrnodes *
	nt), ncol = nt), rightDaughter = matrix(integer(nrnodes *
	nt), ncol = nt), nodepred = matrix(double(nrnodes *
	nt), ncol = nt), bestvar = matrix(integer(nrnodes *
	nt), ncol = nt), xbestsplit = matrix(double(nrnodes *
	nt), ncol = nt), mse = double(ntree), keep = as.integer(c(keep.forest,
	keep.inbag)), replace = as.integer(replace),
	testdat = as.integer(testdat), xts = xtest, ntest = as.integer(ntest),
	yts = as.double(ytest), labelts = as.integer(labelts),
	ytestpred = double(ntest), proxts = proxts, msets = double(if (labelts) ntree else 1),
	coef = double(2), oob.times = integer(n), inbag = if (keep.inbag) matrix(integer(n *
	ntree), n) else integer(1), DUP = FALSE, PACKAGE = "randomForest")[c(16:28,
	36:41)]
	if (keep.forest) {
	max.nodes <- max(rfout$ndbigtree)
	rfout$nodestatus <- rfout$nodestatus[1:max.nodes,
	, drop = FALSE]
	rfout$bestvar <- rfout$bestvar[1:max.nodes, , drop = FALSE]
	rfout$nodepred <- rfout$nodepred[1:max.nodes, , drop = FALSE] +
	ymean
	rfout$xbestsplit <- rfout$xbestsplit[1:max.nodes,
	, drop = FALSE]
	rfout$leftDaughter <- rfout$leftDaughter[1:max.nodes,
	, drop = FALSE]
	rfout$rightDaughter <- rfout$rightDaughter[1:max.nodes,
	, drop = FALSE]
	}
	cl <- match.call()
	cl[[1]] <- as.name("randomForest")
	ypred <- rfout$ypred
	if (any(rfout$oob.times < 1)) {
	ypred[rfout$oob.times == 0] <- NA
	}
	out <- list(call = cl, type = "regression", predicted = structure(ypred +
	ymean, names = x.row.names), mse = rfout$mse, rsq = 1 -
	rfout$mse/(var(y) * (n - 1)/n), oob.times = rfout$oob.times,
	importance = if (importance) matrix(rfout$impout,
	p, 2, dimnames = list(x.col.names, c("%IncMSE",
	"IncNodePurity"))) else matrix(rfout$impout,
	ncol = 1, dimnames = list(x.col.names, "IncNodePurity")),
	importanceSD = if (importance) rfout$impSD else NULL,
	localImportance = if (localImp) matrix(rfout$impmat,
	p, n, dimnames = list(x.col.names, x.row.names)) else NULL,
	proximity = if (proximity) matrix(rfout$prox, n,
	n, dimnames = list(x.row.names, x.row.names)) else NULL,
	ntree = ntree, mtry = mtry, forest = if (keep.forest) c(rfout[c("ndbigtree",
	"nodestatus", "leftDaughter", "rightDaughter",
	"nodepred", "bestvar", "xbestsplit")], list(ncat = ncat),
	list(nrnodes = max.nodes), list(ntree = ntree),
	list(xlevels = xlevels)) else NULL, coefs = if (corr.bias) rfout$coef else NULL,
	y = y + ymean, test = if (testdat) {
	list(predicted = structure(rfout$ytestpred +
	ymean, names = xts.row.names), mse = if (labelts) rfout$msets else NULL,
	rsq = if (labelts) 1 - rfout$msets/(var(ytest) *
	(n - 1)/n) else NULL, proximity = if (proximity) matrix(rfout$proxts/ntree,
	nrow = ntest, dimnames = list(xts.row.names,
	c(xts.row.names, x.row.names))) else NULL)
	} else NULL, inbag = if (keep.inbag) matrix(rfout$inbag,
	nrow(rfout$inbag), dimnames = list(x.row.names,
	NULL)) else NULL)
	}
	class(out) <- "randomForest"
	return(out)
	}
No results found