Skip to content

Instantly share code, notes, and snippets.

@philipp-baumann
Created December 29, 2017 11:55
Show Gist options
  • Save philipp-baumann/f7a8d92f93c7262778e9e8c9d6e17364 to your computer and use it in GitHub Desktop.
Save philipp-baumann/f7a8d92f93c7262778e9e8c9d6e17364 to your computer and use it in GitHub Desktop.
Predict from spectra and calibrated models
################################################################################
## Task: Example script to predict spectra in simplerspec framework
################################################################################
# Remove all R objects from memory
rm(list = ls())
# Load packages
pkgs <- c("tidyverse", "simplerspec")
lapply(pkgs, library, character.only = TRUE)
## Register parallel backend for using multiple cores ==========================
# Allows to tune the models using parallel processing (e.g. use all available
# cores of a CPU); caret package automatically detects the registered backend
library(doParallel)
# Make a cluster with all possible threads (more than physical cores)
cl <- makeCluster(detectCores())
# Register backend
registerDoParallel(cl)
# Return number of parallel workers
getDoParWorkers() # 8 threads on MacBook Pro (Retina, 15-inch, Mid 2015);
# Quadcore processor
## Read spectra from OPUS files ================================================
# Create vector with list of OPUS files to be read
lf <- dir(path = "data/spectra", full.names = TRUE)
# Read into list
spc_l <- read_opus_univ(fnames = lf, parallel = TRUE)
# You can also read in single core modus (files are not read in parallel
# using all cores of the computer); ~ 6 times slower...
# spc_l <- read_opus_univ(fnames = lf, parallel = FALSE)
## Gather and process spectra ==================================================
# Spectral processing pipe
spc_tbl <- spc_l %>%
# Gather list of spectra data into tibble data frame
gather_spc()
# Investigate spectral tibble (data.frame)
# Get raw spectrum from first sample
dim(spc_tbl$spc[[1]])
spc_tbl <- spc_tbl %>%
# Resample spectra to new wavenumber interval
resample_spc(wn_lower = 500, wn_upper = 3996, wn_interval = 2) %>%
# Average replicate scans per sample_id
average_spc() %>%
# Preprocess spectra using Savitzky-Golay first derivative with a window size
# of 21 points
preprocess_spc(select = "sg_1_w21")
## Predict spectra based on YAMSYS reference models ============================
# (1) Read model output from reference models ----------------------------------
# Example: model output of total carbon (C) model
pls_C <- readRDS("models/yamsys-soil-models/pls_C.Rds")
# Check elements of pls_C list
ls(pls_C) # list all element names
pls_C$data # Spectral tibble
pls_C$p_pc # Plot with PC1 and PC2 of spectral PCA
pls_C$model # caret::train() output, can e.g. be used for prediction
# Check class of model output
class(pls_C$model) # object of class "train"
pls_C$p_model # Plot model evaluation summary of calibration model
# Create list of models; is subsequently used by
# simplerspec::predict_from_spc function
models <- list(
"pls_C" = pls_C
)
# (2) Predict new spectral data (preprocessed data) based on reference
# calibration model ------------------------------------------------------------
# Predict into tibble data frame
prediction_df <- predict_from_spc(model_list = models, spc_tbl = spc_tbl)
# Extract predictions as vector (column pls_C)
prediction_df$pls_C
# Check if is data.frame
class(prediction_df)
is.data.frame(prediction_df)
# Save predictions as csv file -------------------------------------------------
# Define helper function to negate is_list
is_not_list <- function(x) ! purrr::is_list(x)
# Pipe to save selected columns on disk (csv file)
prediction_df %>%
# First only select columns that are no lists (lists cannot be written to
# csv direcly; would only be possible to store in JSON file)
select_if(is_not_list) %>%
# now write all columns that are not list-columns as csv file
write_csv(x = ., path = "predictions/test-soil-pred.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment