Last active
December 4, 2020 18:10
-
-
Save MirzaCengic/fd5a5347f7237e8ad37494c4439b93f1 to your computer and use it in GitHub Desktop.
ensemble_modeling2.R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
cat("BIOMOD_EnsembleModeling2 loaded", "\n") | |
BIOMOD_EnsembleModeling2 <- function (modeling.output, chosen.models = "all", em.by = "PA_dataset+repet", | |
eval.metric = "all", eval.metric.quality.threshold = NULL, | |
models.eval.meth = c("KAPPA", "TSS", "ROC"), prob.mean = TRUE, | |
prob.cv = FALSE, prob.ci = FALSE, prob.ci.alpha = 0.05, prob.median = FALSE, | |
committee.averaging = FALSE, prob.mean.weight = FALSE, prob.mean.weight.decay = "proportional", | |
VarImport = 0) | |
{ | |
biomod2:::.bmCat("Build Ensemble Models") | |
args <- biomod2:::.BIOMOD_EnsembleModeling.check.args(modeling.output, | |
chosen.models, eval.metric, eval.metric.quality.threshold, | |
models.eval.meth, prob.mean, prob.cv, prob.ci, prob.ci.alpha, | |
prob.median, committee.averaging, prob.mean.weight, prob.mean.weight.decay, | |
em.by) | |
modeling.output <- args$modeling.output | |
chosen.models <- args$chosen.models | |
eval.metric <- args$eval.metric | |
eval.metric.quality.threshold <- args$eval.metric.quality.threshold | |
models.eval.meth <- args$models.eval.meth | |
prob.mean <- args$prob.mean | |
prob.cv <- args$prob.cv | |
prob.ci <- args$prob.ci | |
prob.ci.alpha <- args$prob.ci.alpha | |
prob.median <- args$prob.median | |
committee.averaging <- args$committee.averaging | |
prob.mean.weight <- args$prob.mean.weight | |
prob.mean.weight.decay <- args$prob.mean.weight.decay | |
em.by <- args$em.by | |
rm("args") | |
em.avail <- c("prob.mean", "prob.cv", "prob.ci.inf", "prob.ci.sup", | |
"prob.median", "committee.averaging", "prob.mean.weight") | |
em.algo <- em.avail[c(prob.mean, prob.cv, prob.ci, prob.ci, | |
prob.median, committee.averaging, prob.mean.weight)] | |
Options <- list(em.by = em.by) | |
expl_var_type = get_var_type(get_formal_data(modeling.output, | |
"expl.var")) | |
expl_var_range = get_var_range(get_formal_data(modeling.output, | |
"expl.var")) | |
EM <- new("BIOMOD.EnsembleModeling.out", sp.name = [email protected], | |
expl.var.names = [email protected], em.by = em.by, | |
modeling.id = [email protected]) | |
[email protected]@link <- file.path([email protected], | |
paste([email protected], ".", [email protected], | |
".models.out", sep = "")) | |
em.mod.assemb <- biomod2:::.em.models.assembling(chosen.models, em.by) | |
for (assemb in names(em.mod.assemb)) { | |
cat("\n\n >", assemb, "ensemble modeling") | |
models.kept <- em.mod.assemb[[assemb]] | |
if ([email protected]) { | |
eval.obs <- get_formal_data(modeling.output, "eval.resp.var") | |
eval.expl <- get_formal_data(modeling.output, "eval.expl.var") | |
} | |
obs <- get_formal_data(modeling.output, "resp.var") | |
expl <- get_formal_data(modeling.output, "expl.var") | |
if (em.by %in% c("PA_dataset", "PA_dataset+algo", "PA_dataset+repet")) { | |
if (unlist(strsplit(assemb, "_"))[3] != "AllData") { | |
if (inherits(get_formal_data(modeling.output), | |
"BIOMOD.formated.data.PA")) { | |
kept_cells <- get_formal_data(modeling.output)@PA[, | |
unlist(strsplit(assemb, "_"))[3]] | |
} | |
else { | |
kept_cells <- rep(T, length(obs)) | |
} | |
obs <- obs[kept_cells] | |
expl <- expl[kept_cells, , drop = F] | |
} | |
} | |
obs[is.na(obs)] <- 0 | |
needed_predictions <- biomod2:::.get_needed_predictions(modeling.output, | |
em.by, models.kept, eval.metric, eval.metric.quality.threshold) | |
if (!length(needed_predictions)) | |
next | |
for (eval.m in eval.metric) { | |
models.kept <- needed_predictions$models.kept[[eval.m]] | |
models.kept.scores <- needed_predictions$models.kept.scores[[eval.m]] | |
for (algo in em.algo) { | |
if (algo == "prob.mean") { | |
cat("\n > Mean of probabilities...") | |
model_name <- paste([email protected], | |
"_", "EMmeanBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMmean_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMmean", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected]) | |
} | |
if (algo == "prob.cv") { | |
cat("\n > Coef of variation of probabilities...") | |
model_name <- paste([email protected], | |
"_", "EMcvBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMcv_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMcv", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected]) | |
} | |
if (algo == "prob.median") { | |
cat("\n > Median of probabilities...") | |
model_name <- paste([email protected], | |
"_", "EMmedianBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMmedian_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMmedian", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected]) | |
} | |
if (algo == "prob.ci.inf") { | |
cat("\n > Confidence Interval...") | |
model_name <- paste([email protected], | |
"_", "EMciInfBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMci_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMci", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected], | |
alpha = prob.ci.alpha, side = "inferior") | |
} | |
if (algo == "prob.ci.sup") { | |
model_name <- paste([email protected], | |
"_", "EMciSupBy", eval.m, "_", assemb, sep = "") | |
model.bm <- new("EMci_biomod2_model", model = models.kept, | |
model_name = model_name, model_class = "EMci", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected], | |
alpha = prob.ci.alpha, side = "superior") | |
} | |
if (algo == "committee.averaging") { | |
cat("\n > Committee averaging...") | |
model_name <- paste([email protected], | |
"_", "EMcaBy", eval.m, "_", assemb, sep = "") | |
models.kept.tresh <- unlist(lapply(models.kept, | |
function(x) { | |
mod <- tail(unlist(strsplit(x, "_")), 3)[3] | |
run <- tail(unlist(strsplit(x, "_")), 3)[2] | |
dat <- tail(unlist(strsplit(x, "_")), 3)[1] | |
return(get_evaluations(modeling.output)[eval.m, | |
"Cutoff", mod, run, dat]) | |
})) | |
names(models.kept.tresh) <- models.kept | |
to_keep <- is.finite(models.kept.tresh) | |
model.bm <- new("EMca_biomod2_model", model = models.kept[to_keep], | |
model_name = model_name, model_class = "EMca", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected], | |
tresholds = models.kept.tresh[to_keep]) | |
} | |
if (algo == "prob.mean.weight") { | |
cat("\n > Probabilities weighting mean...") | |
model_name <- paste([email protected], | |
"_", "EMwmeanBy", eval.m, "_", assemb, sep = "") | |
models.kept.tmp <- models.kept | |
models.kept.scores.tmp <- models.kept.scores | |
if (eval.m == "ROC") { | |
sre.id <- grep("_SRE", models.kept) | |
if (length(sre.id) > 0) { | |
cat("\n ! SRE modeling were switched off") | |
models.kept.tmp <- models.kept[-sre.id] | |
models.kept.scores.tmp <- models.kept.scores[-sre.id] | |
} | |
} | |
models.kept.tmp <- models.kept.tmp[is.finite(models.kept.scores.tmp)] | |
models.kept.scores.tmp <- models.kept.scores.tmp[is.finite(models.kept.scores.tmp)] | |
models.kept.scores.tmp <- round(models.kept.scores.tmp, | |
3) | |
cat("\n\t\t", " original models scores = ", | |
models.kept.scores.tmp) | |
if (is.numeric(prob.mean.weight.decay)) { | |
DecayCount <- sum(models.kept.scores.tmp > | |
0) | |
WOrder <- order(models.kept.scores.tmp, decreasing = T) | |
Dweights <- models.kept.scores.tmp | |
for (J in 1:DecayCount) Dweights[WOrder[J]] <- I(prob.mean.weight.decay^(DecayCount - | |
J + 1)) | |
for (J in 1:length(models.kept.scores.tmp)) { | |
if (sum(models.kept.scores.tmp[J] == models.kept.scores.tmp) > | |
1) | |
Dweights[which(models.kept.scores.tmp[J] == | |
models.kept.scores.tmp)] <- mean(Dweights[which(models.kept.scores.tmp[J] == | |
models.kept.scores.tmp)]) | |
} | |
models.kept.scores.tmp <- round(Dweights, | |
digits = 3) | |
rm(list = c("Dweights", "DecayCount", "WOrder")) | |
} | |
else if (is.function(prob.mean.weight.decay)) { | |
models.kept.scores.tmp <- sapply(models.kept.scores.tmp, | |
prob.mean.weight.decay) | |
} | |
models.kept.scores.tmp <- round(models.kept.scores.tmp/sum(models.kept.scores.tmp, | |
na.rm = T), digits = 3) | |
cat("\n\t\t", " final models weights = ", models.kept.scores.tmp) | |
model.bm <- new("EMwmean_biomod2_model", model = models.kept.tmp, | |
model_name = model_name, model_class = "EMwmean", | |
model_options = Options, resp_name = [email protected], | |
expl_var_names = [email protected], | |
expl_var_type = expl_var_type, expl_var_range = expl_var_range, | |
modeling.id = [email protected], | |
penalization_scores = models.kept.scores.tmp) | |
} | |
pred.bm <- predict(model.bm, expl, formal_predictions = needed_predictions$predictions[, | |
model.bm@model, drop = F], on_0_1000 = T) | |
pred.bm.name <- paste0(model_name, ".predictions") | |
pred.bm.outfile <- file.path(model.bm@resp_name, | |
".BIOMOD_DATA", [email protected], "ensemble.models", | |
"ensemble.models.predictions", pred.bm.name) | |
dir.create(dirname(pred.bm.outfile), showWarnings = FALSE, | |
recursive = TRUE) | |
assign(pred.bm.name, pred.bm) | |
save(list = pred.bm.name, file = pred.bm.outfile, | |
compress = TRUE) | |
rm(list = pred.bm.name) | |
if (exists("eval.obs") & exists("eval.expl")) { | |
eval_pred.bm <- predict(model.bm, eval.expl) | |
pred.bm.name <- paste0(model_name, ".predictionsEval") | |
assign(pred.bm.name, eval_pred.bm) | |
save(list = pred.bm.name, file = pred.bm.outfile, | |
compress = TRUE) | |
rm(list = pred.bm.name) | |
} | |
if (length(models.eval.meth)) { | |
cat("\n\t\t\tEvaluating Model stuff...") | |
if (algo == "prob.cv") { | |
cross.validation <- matrix(NA, 4, length(models.eval.meth), | |
dimnames = list(c("Testing.data", "Cutoff", | |
"Sensitivity", "Specificity"), models.eval.meth)) | |
} | |
else { | |
if (em.by == "PA_dataset+repet") { | |
calib_lines <- get_calib_lines(modeling.output) | |
pa_dataset_id <- paste("_", unlist(strsplit(assemb, | |
"_"))[3], sep = "") | |
repet_id <- paste("_", unlist(strsplit(assemb, | |
"_"))[2], sep = "") | |
if (repet_id == "_Full") { | |
eval_lines <- rep(T, length(pred.bm)) | |
} | |
else { | |
eval_lines <- !na.omit(calib_lines[, | |
repet_id, pa_dataset_id]) | |
if (all(!eval_lines)) { | |
eval_lines <- !eval_lines | |
} | |
} | |
} | |
else { | |
eval_lines <- rep(T, length(pred.bm)) | |
} | |
cross.validation <- sapply(models.eval.meth, | |
Find.Optim.Stat, Fit = pred.bm[eval_lines], | |
Obs = obs[eval_lines]) | |
rownames(cross.validation) <- c("Testing.data", | |
"Cutoff", "Sensitivity", "Specificity") | |
} | |
if (exists("eval_pred.bm")) { | |
if (algo == "prob.cv") { | |
cross.validation <- matrix(NA, 5, length(models.eval.meth), | |
dimnames = list(c("Testing.data", "Evaluating.data", | |
"Cutoff", "Sensitivity", "Specificity"), | |
models.eval.meth)) | |
} | |
else { | |
true.evaluation <- sapply(models.eval.meth, | |
function(x) { | |
Find.Optim.Stat(Fit = eval_pred.bm * | |
1000, Obs = eval.obs, Fixed.thresh = cross.validation["Cutoff", | |
x]) | |
}) | |
cross.validation <- rbind(cross.validation["Testing.data", | |
], true.evaluation) | |
rownames(cross.validation) <- c("Testing.data", | |
"Evaluating.data", "Cutoff", "Sensitivity", | |
"Specificity") | |
} | |
} | |
cross.validation <- t(round(cross.validation, | |
digits = 3)) | |
model.bm@model_evaluation <- cross.validation | |
rm(list = c("cross.validation")) | |
} | |
if (VarImport > 0) { | |
cat("\n\t\t\tEvaluating Predictor Contributions...", | |
"\n") | |
variables.importance <- variables_importance(model.bm, | |
expl, nb_rand = VarImport) | |
model.bm@model_variables_importance <- variables.importance$mat | |
rm(list = c("variables.importance")) | |
} | |
assign(model_name, model.bm) | |
save(list = model_name, file = file.path([email protected], | |
"models", [email protected], model_name)) | |
[email protected] <- c([email protected], model.bm) | |
[email protected] <- c([email protected], model_name) | |
} | |
} | |
} | |
names([email protected]) <- [email protected] | |
model.name <- paste([email protected], ".", [email protected], "ensemble.models.out", | |
sep = "") | |
assign(x = model.name, value = EM) | |
save(list = model.name, file = file.path([email protected], model.name)) | |
biomod2:::.bmCat("Done") | |
return(EM) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment