Created
December 29, 2017 11:55
-
-
Save philipp-baumann/f7a8d92f93c7262778e9e8c9d6e17364 to your computer and use it in GitHub Desktop.
Predict from spectra and calibrated models
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################################################ | |
## Task: Example script to predict spectra in simplerspec framework | |
################################################################################ | |
# Remove all R objects from memory | |
rm(list = ls()) | |
# Load packages | |
pkgs <- c("tidyverse", "simplerspec") | |
lapply(pkgs, library, character.only = TRUE) | |
## Register parallel backend for using multiple cores ========================== | |
# Allows to tune the models using parallel processing (e.g. use all available | |
# cores of a CPU); caret package automatically detects the registered backend | |
library(doParallel) | |
# Make a cluster with all possible threads (more than physical cores) | |
cl <- makeCluster(detectCores()) | |
# Register backend | |
registerDoParallel(cl) | |
# Return number of parallel workers | |
getDoParWorkers() # 8 threads on MacBook Pro (Retina, 15-inch, Mid 2015); | |
# Quadcore processor | |
## Read spectra from OPUS files ================================================ | |
# Create vector with list of OPUS files to be read | |
lf <- dir(path = "data/spectra", full.names = TRUE) | |
# Read into list | |
spc_l <- read_opus_univ(fnames = lf, parallel = TRUE) | |
# You can also read in single core modus (files are not read in parallel | |
# using all cores of the computer); ~ 6 times slower... | |
# spc_l <- read_opus_univ(fnames = lf, parallel = FALSE) | |
## Gather and process spectra ================================================== | |
# Spectral processing pipe | |
spc_tbl <- spc_l %>% | |
# Gather list of spectra data into tibble data frame | |
gather_spc() | |
# Investigate spectral tibble (data.frame) | |
# Get raw spectrum from first sample | |
dim(spc_tbl$spc[[1]]) | |
spc_tbl <- spc_tbl %>% | |
# Resample spectra to new wavenumber interval | |
resample_spc(wn_lower = 500, wn_upper = 3996, wn_interval = 2) %>% | |
# Average replicate scans per sample_id | |
average_spc() %>% | |
# Preprocess spectra using Savitzky-Golay first derivative with a window size | |
# of 21 points | |
preprocess_spc(select = "sg_1_w21") | |
## Predict spectra based on YAMSYS reference models ============================ | |
# (1) Read model output from reference models ---------------------------------- | |
# Example: model output of total carbon (C) model | |
pls_C <- readRDS("models/yamsys-soil-models/pls_C.Rds") | |
# Check elements of pls_C list | |
ls(pls_C) # list all element names | |
pls_C$data # Spectral tibble | |
pls_C$p_pc # Plot with PC1 and PC2 of spectral PCA | |
pls_C$model # caret::train() output, can e.g. be used for prediction | |
# Check class of model output | |
class(pls_C$model) # object of class "train" | |
pls_C$p_model # Plot model evaluation summary of calibration model | |
# Create list of models; is subsequently used by | |
# simplerspec::predict_from_spc function | |
models <- list( | |
"pls_C" = pls_C | |
) | |
# (2) Predict new spectral data (preprocessed data) based on reference | |
# calibration model ------------------------------------------------------------ | |
# Predict into tibble data frame | |
prediction_df <- predict_from_spc(model_list = models, spc_tbl = spc_tbl) | |
# Extract predictions as vector (column pls_C) | |
prediction_df$pls_C | |
# Check if is data.frame | |
class(prediction_df) | |
is.data.frame(prediction_df) | |
# Save predictions as csv file ------------------------------------------------- | |
# Define helper function to negate is_list | |
is_not_list <- function(x) ! purrr::is_list(x) | |
# Pipe to save selected columns on disk (csv file) | |
prediction_df %>% | |
# First only select columns that are no lists (lists cannot be written to | |
# csv direcly; would only be possible to store in JSON file) | |
select_if(is_not_list) %>% | |
# now write all columns that are not list-columns as csv file | |
write_csv(x = ., path = "predictions/test-soil-pred.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment