andybega · March 18, 2015 13:26
diff --git a/predict-ebma.r b/predict-ebma.r
 # EBMA ensemble prediction
 #  
 # Aggregate a set of predictions into an ensemble prediction using a fitted
 # EBMA model. Recreates the functionality used to calculate test predictions
 # during a EBMA calibration call.
 # 
 # ebma - A fitted EBMA object; only works with logit EBMA.
 # inputs - Matrix or data frame in which each column is a series of 
 #   probabilities.
 # outcome - Vector of observed outcomes. Lenght must match rows in inputs.
 # useModelParams - Should the constant/predictor parameters of EBMA be used, or 
 #   only the bias reduction transformation? Both, by default. 

 # Function for logit EBMA that will take input predictions and aggregate 
 # them to an EBMA prediction.
 predict.ebma <- function(ebma, inputs, outcome, useModelParams=TRUE) {
  if (!class(ebma)=="FDatFitLogit") stop("Only works with logit EBMA")
  
  inputs <- as.matrix(inputs)
  exp <- ebma@exp
  modelParams <- ebma@modelParams[, , 1]
  W <- ebma@modelWeights
  
  if (useModelParams == TRUE) {
    .adjPred <- .makeAdj(inputs, exp)
    inputsAdj <- array(NA, dim = c(nrow(inputs), ncol(inputs)))
    
    for (k in 1:ncol(inputs)) {
      inputsAdj[, k] <- affineTransform(.adjPred[, k], modelParams[, k])
      inputsAdj[, k] <- plogis(inputsAdj[, k])
    }
  }
  if (useModelParams == FALSE) {
    .adjPred <- .makeAdj(inputs)
    .adjPred[outcome == 0, ] <- (1 - plogis(.adjPred[outcome == 0, ]))
    .adjPred[outcome == 1, ] <- (plogis(.adjPred[outcome == 1, ]))
    inputsAdj <- .adjPred
  }
  # Weight inputs, with handling for missing predictions
  if (!any(is.na(inputsAdj))) {
    bma <- inputsAdj %*% W
  } else {
    # Weigh non-missing components, then adjust denominator to account for 
    # weights not used because of missing values.
    inputsAdjNA <- inputsAdj
    inputsAdjNA[is.na(inputsAdjNA)] <- 0
    bma <- inputsAdjNA %*% W
    denom <- (!is.na(inputsAdj) * 1) %*% W
    denom[denom==0] <- NA  # prevent /0 if entire row is NA
    bma <- bma/denom
  }
  as.vector(bma)
 }

 # Input adjustment
 .makeAdj <- function(x, exp) {
  .adjPred <- qlogis(x)
  .negative <- .adjPred < 0
  .pos <- .adjPred > 1
  .adjPred <- ((1 + abs(.adjPred))^(1/exp)) - 1
  .miss <- is.na(.adjPred)
  .negative[.miss] <- FALSE
  .adjPred[.negative] <- .adjPred[.negative] * (-1)
  .adjPred[.miss] <- NA
  .adjPred
 }

 # Recreate glm() transformation
 affineTransform <- function(x, b) {
  # Affine transform of y = b1 + x*b2
  y <- cbind(1, x) %*% b
  y
 }


 # Testing -----------------------------------------------------------------
 #
 #   Make sure output replicates original EBMA output
 #   Check handling of NA's in input predictions
 #

 setwd("~/Work/ebma-ex/fcast-func")

 # Example with known solution. 
 # `pr_test` has 7 input streams of predicitons, as well as ebma prediction
 # that was calculated as the test portion of the EBMA calibration, thus
 # using the EBMAforecast internal functions.
 # https://dl.dropboxusercontent.com/u/46758293/data/ebma-rig.rda
 load("ebma-rig.rda")

 # Pull out components
 inputs    <- pr_test[, grep("i[0-9]", colnames(pr_test))]
 outcome   <- pr_test[, "y"]
 ebma_orig <- pr_test[, "ebma"]

 # Does the redo match the original EBMA calculations?
 ebma_redo <- predict.ebma(ensemble, inputs, outcome)

 isTRUE(all.equal(ebma_orig, ebma_redo))

 # Handling NA's
 # Scattered NA's, not complete row
 inputs_pNA <- inputs
 inputs_pNA[1, 1] <- NA
 inputs_pNA[2, c(1:3)] <- NA

 ebma_redo2 <- predict.ebma(ensemble, inputs_pNA, outcome)
 head(cbind(ebma_orig, ebma_redo2))

 # Complete row of NA's
 inputs_rowNA <- inputs
 inputs_rowNA[1, ] <- NA

 ebma_redo3 <- predict.ebma(ensemble, inputs_rowNA, outcome)
 head(cbind(ebma_orig, ebma_redo3))
	# EBMA ensemble prediction
	#
	# Aggregate a set of predictions into an ensemble prediction using a fitted
	# EBMA model. Recreates the functionality used to calculate test predictions
	# during a EBMA calibration call.
	#
	# ebma - A fitted EBMA object; only works with logit EBMA.
	# inputs - Matrix or data frame in which each column is a series of
	# probabilities.
	# outcome - Vector of observed outcomes. Lenght must match rows in inputs.
	# useModelParams - Should the constant/predictor parameters of EBMA be used, or
	# only the bias reduction transformation? Both, by default.

	# Function for logit EBMA that will take input predictions and aggregate
	# them to an EBMA prediction.
	predict.ebma <- function(ebma, inputs, outcome, useModelParams=TRUE) {
	if (!class(ebma)=="FDatFitLogit") stop("Only works with logit EBMA")

	inputs <- as.matrix(inputs)
	exp <- ebma@exp
	modelParams <- ebma@modelParams[, , 1]
	W <- ebma@modelWeights

	if (useModelParams == TRUE) {
	.adjPred <- .makeAdj(inputs, exp)
	inputsAdj <- array(NA, dim = c(nrow(inputs), ncol(inputs)))

	for (k in 1:ncol(inputs)) {
	inputsAdj[, k] <- affineTransform(.adjPred[, k], modelParams[, k])
	inputsAdj[, k] <- plogis(inputsAdj[, k])
	}
	}
	if (useModelParams == FALSE) {
	.adjPred <- .makeAdj(inputs)
	.adjPred[outcome == 0, ] <- (1 - plogis(.adjPred[outcome == 0, ]))
	.adjPred[outcome == 1, ] <- (plogis(.adjPred[outcome == 1, ]))
	inputsAdj <- .adjPred
	}
	# Weight inputs, with handling for missing predictions
	if (!any(is.na(inputsAdj))) {
	bma <- inputsAdj %*% W
	} else {
	# Weigh non-missing components, then adjust denominator to account for
	# weights not used because of missing values.
	inputsAdjNA <- inputsAdj
	inputsAdjNA[is.na(inputsAdjNA)] <- 0
	bma <- inputsAdjNA %*% W
	denom <- (!is.na(inputsAdj) * 1) %*% W
	denom[denom==0] <- NA # prevent /0 if entire row is NA
	bma <- bma/denom
	}
	as.vector(bma)
	}

	# Input adjustment
	.makeAdj <- function(x, exp) {
	.adjPred <- qlogis(x)
	.negative <- .adjPred < 0
	.pos <- .adjPred > 1
	.adjPred <- ((1 + abs(.adjPred))^(1/exp)) - 1
	.miss <- is.na(.adjPred)
	.negative[.miss] <- FALSE
	.adjPred[.negative] <- .adjPred[.negative] * (-1)
	.adjPred[.miss] <- NA
	.adjPred
	}

	# Recreate glm() transformation
	affineTransform <- function(x, b) {
	# Affine transform of y = b1 + x*b2
	y <- cbind(1, x) %*% b
	y
	}


	# Testing -----------------------------------------------------------------
	#
	# Make sure output replicates original EBMA output
	# Check handling of NA's in input predictions
	#

	setwd("~/Work/ebma-ex/fcast-func")

	# Example with known solution.
	# `pr_test` has 7 input streams of predicitons, as well as ebma prediction
	# that was calculated as the test portion of the EBMA calibration, thus
	# using the EBMAforecast internal functions.
	# https://dl.dropboxusercontent.com/u/46758293/data/ebma-rig.rda
	load("ebma-rig.rda")

	# Pull out components
	inputs <- pr_test[, grep("i[0-9]", colnames(pr_test))]
	outcome <- pr_test[, "y"]
	ebma_orig <- pr_test[, "ebma"]

	# Does the redo match the original EBMA calculations?
	ebma_redo <- predict.ebma(ensemble, inputs, outcome)

	isTRUE(all.equal(ebma_orig, ebma_redo))

	# Handling NA's
	# Scattered NA's, not complete row
	inputs_pNA <- inputs
	inputs_pNA[1, 1] <- NA
	inputs_pNA[2, c(1:3)] <- NA

	ebma_redo2 <- predict.ebma(ensemble, inputs_pNA, outcome)
	head(cbind(ebma_orig, ebma_redo2))

	# Complete row of NA's
	inputs_rowNA <- inputs
	inputs_rowNA[1, ] <- NA

	ebma_redo3 <- predict.ebma(ensemble, inputs_rowNA, outcome)
	head(cbind(ebma_orig, ebma_redo3))
No results found