ghrsst.md

## Read Nuyina underway (1-minute interval data collection from the ocean)
get_underway <- function(x) {
  ## read the bulk
  d <- arrow::read_parquet("https://github.com/mdsumner/nuyina.underway/raw/main/data-raw/nuyina_underway.parquet")
  ## read the rest
  d1 <- tibble::as_tibble(vapour::vapour_read_fields("WFS:https://data.aad.gov.au/geoserver/ows?service=wfs&version=2.0.0&request=GetCapabilities",
                                                     sql = sprintf("SELECT * FROM \"underway:nuyina_underway\" WHERE datetime > '%s'", 
                                                                   format(max(d$datetime, "%Y-%m-%dT%H:%M:%SZ")))))
  dplyr::bind_rows(d, d1)
  
}

## reads a cached Parquet and gets more recent rows from the geoserver
d <- get_underway()

## GHRSST files on source.coop
files <- sds::ghrsst()
## exact match because have every day (not right up to the minute)
d$day <- match(as.Date(d$datetime), as.Date(files$date))


## subset to the files we can match to (Nuyina just docked this morning in Hobart)
nuy <- d[!is.na(d$day), c("longitude", "latitude", "datetime", "day")]
nuy$file <- files$source[nuy$day]
l <- split(nuy, nuy$day)
## function to apply in parallel to get GHRSST on underway
extractit <- function(x) {
  sst <- terra::rast(x$file[1])
  terra::extract(sst, cbind(x$longitude, x$latitude))[,1L, drop = TRUE]
}

## new parallel framework for purrr, via mirai
library(purrr)
library(mirai)
daemons(31)
### takes about 2 minutes, 1e6 ship locations, 725 days of voyaging
system.time({
  l2 <- map(l, extractit, .parallel = TRUE)
})

plot(nuy$datetime, unlist(l2), ylab = "ghrsst", xlab = "nuyina time", pch = ".")
plot(d$datetime, d$sea_water_temperature, pch = 19, cex = .2)
lines(nuy$datetime, unlist(l2), col = "red")
title("Nuyina underway water temp vs GHRSST")

mdsumner/ghrsst.md