Skip to content

Instantly share code, notes, and snippets.

@M3nin0
Created April 25, 2021 12:39
Show Gist options
  • Save M3nin0/dda8fd04be2c211ab546a85376602bf4 to your computer and use it in GitHub Desktop.
Save M3nin0/dda8fd04be2c211ab546a85376602bf4 to your computer and use it in GitHub Desktop.
Exemplo de especificação de um workflow de classificação utilizando Common Workflow Language (CWL).
# Autor: Felipe Menino Carlos
# Data: 25/04/2021
set.seed(777)
library(sits)
#
# Auxiliary function
#
extract_ts_by_sample_location <- function(collection, start_date, end_date, bands, sample_file) {
cube <- sits_cube(
type = "BDC",
name = "cube_to_extract_sample",
url = "https://brazildatacube.dpi.inpe.br/stac/",
collection = collection,
start_date = start_date,
end_date = end_date,
bands = bands
)
samples <- sits_get_data(cube = cube, file = sample_file)
samples
}
#
# User Defined parameters
#
args <- commandArgs(trailingOnly=TRUE)
collection <- args[1]
collection_bands <- strsplit(args[2], ",")
#
# General definitions
#
start_date <- Sys.getenv("START_DATE")
end_date <- Sys.getenv("END_DATE")
sample_file <- "https://brazildatacube.dpi.inpe.br/geo-knowledge-hub/bdc-article/training-samples/training-samples.csv"
#
# Output directory
#
output_dir <- paste0("training-samples/")
dir.create(
path = output_dir,
showWarnings = FALSE,
recursive = TRUE
)
samples_with_ts <- extract_ts_by_sample_location(
collection = collection,
start_date = start_date,
end_date = end_date,
bands = collection_bands[[1]],
sample_file = sample_file
)
saveRDS(samples_with_ts, paste0(output_dir, "/", collection, ".rds"))
# Autor: Felipe Menino Carlos
# Data: 25/04/2021
set.seed(777)
library(sits)
#
# General definitions
#
args <- commandArgs(trailingOnly = TRUE)
collection <- args[1]
samples_file <- args[2]
classification_memsize <- as.integer(Sys.getenv("CLASSIFICATION_MEMSIZE"))
classification_multicores <- as.integer(Sys.getenv("CLASSIFICATION_MULTICORES"))
start_date <- Sys.getenv("START_DATE")
end_date <- Sys.getenv("END_DATE")
# define the roi and load samples file
samples <- readRDS(samples_file)
roi <- readRDS(url("https://brazildatacube.dpi.inpe.br/geo-knowledge-hub/bdc-article/roi/roi.rds"))
#
# Output directory
#
output_dir <- collection
dir.create(
path = output_dir,
showWarnings = FALSE,
recursive = TRUE
)
#
# Load data cube from BDC-STAC
#
cube <- sits_cube(
type = "BDC",
name = "cube_to_classify",
url = "https://brazildatacube.dpi.inpe.br/stac/",
collection = collection,
start_date = start_date,
end_date = end_date,
roi = roi$search_roi
)
#
# Defining tree Model
#
tree_model <- sits_rfor(num_trees = 1000)
#
# Training model
#
treined_model <- sits_train(samples, tree_model)
#
# Classify using the data cubes
#
probs <- sits_classify(data = cube,
ml_model = treined_model,
memsize = classification_memsize,
multicores = classification_multicores,
roi = roi$classification_roi,
output_dir = output_dir)
#
# Post-processing
#
probs_smoothed <- sits_smooth(probs, type = "bayes", output_dir = output_dir)
labels <- sits_label_classification(probs_smoothed, output_dir = output_dir)
#
# Saving results
#
# labels
saveRDS(
labels, file = paste0(output_dir, "/labels.rds")
)
# probs
saveRDS(
probs, file = paste0(output_dir, "/probs_cube.rds")
)
# smoothed probs
saveRDS(
probs_smoothed, file = paste0(output_dir, "/probs_smoothed_cube.rds")
)
#!/usr/local/bin/env cwl-runner
# Autor: Felipe Menino Carlos
# Data: 25/04/2021
cwlVersion: v1.0
class: CommandLineTool
baseCommand: Rscript
inputs:
classify_datacube_script:
type: File
inputBinding:
position: 0
collection_name:
type: string
inputBinding:
position: 1
samples_tseries_rds:
type: File
inputBinding:
position: 2
outputfile:
type: string
inputBinding:
position: 3 # not used in script
outputs:
classification:
type:
type: array
items: File
outputBinding:
glob: $(inputs.outputfile)
#!/usr/local/bin/env cwl-runner
# Autor: Felipe Menino Carlos
# Data: 25/04/2021
cwlVersion: v1.0
class: CommandLineTool
baseCommand: Rscript
inputs:
extractseries_script:
type: File
inputBinding:
position: 0
collection_name:
type: string
inputBinding:
position: 1
collection_bands:
type: string
inputBinding:
position: 2
outputfile:
type: string
inputBinding:
position: 3 # not used in script
outputs:
result:
type: File
outputBinding:
glob: $(inputs.outputfile)
#
# General definitions
#
extractseries_script:
class: File
path: 01_ExtractTimeSeries.R
classify_datacube_script:
class: File
path: 02_Classification.R
#
# CBERS-4/AWFI definitions
#
cb4_collection_name: CB4_64_16D_STK-1
cb4_collection_bands: BAND15,BAND14,BAND13,BAND16,NDVI,EVI
cb4_series_output: training-samples/CB4_64_16D_STK-1.rds
cb4_classification_output: CB4_64_16D_STK-1/*.tif
#!/usr/bin/env cwl-runner
# Autor: Felipe Menino Carlos
# Data: 25/04/2021
cwlVersion: v1.0
class: Workflow
hints:
DockerRequirement:
dockerPull: m3nin0/sits:0.10.0
requirements:
ResourceRequirement:
ramMin: 8192
coresMin: 6
EnvVarRequirement:
envDef:
START_DATE: "2018-09-01"
END_DATE: "2019-08-31"
CLASSIFICATION_MEMSIZE: "8"
CLASSIFICATION_MULTICORES: "6"
BDC_ACCESS_KEY: ""
inputs:
extractseries_script: File
classify_datacube_script: File
# CBERS-4/AWFI inputs
cb4_series_output: string
cb4_classification_output: string
cb4_collection_name: string
cb4_collection_bands: string
outputs:
result_extract_ts_cb4:
type: File
outputSource: cb4_extractseries/result
result_cb4classification:
type: File[]
outputSource: cb4_classification/classification
steps:
cb4_extractseries:
run: extractseries.tool
in:
extractseries_script: extractseries_script
collection_name: cb4_collection_name
collection_bands: cb4_collection_bands
outputfile: cb4_series_output
out: [result]
cb4_classification:
run: classification.tool
in:
classify_datacube_script: classify_datacube_script
samples_tseries_rds: cb4_extractseries/result
collection_name: cb4_collection_name
outputfile: cb4_classification_output
out: [classification]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment