rajadain · November 13, 2023 15:42
diff --git a/epr_workflow.sh b/epr_workflow.sh
 #!/usr/bin/env bash

 set -ex

 ###
 # Basic settings
 ###

 # Model My Watershed URL to hit
 MMW_API_URL="http://localhost:8000/api"
 # API Key for a user
 MMW_API_KEY="my-api-key"
 # Built JAR of this repository: https://github.com/jpolchlo/mmw-etl
 MMW_ETL_JAR="target/scala-2.12/mmw-etl-assembly-0.0.1.jar"

 # Bucket that stores MMW layers
 S3_BUCKET="s3://my-bucket-name"
 # Reference layer to use to align new layers with
 REF_LAYER="nlcd-2019-30m-epsg5070-512-uint8raw"

 # Path to folder containing input data
 IN_PATH="scratch/example_lu_data"

 ###
 # Function that takes an input to use with /modeling/gwlf-e/prepare endpoint,
 # and returns the output of GWLF-E for that input. To see documentation on how
 # that input should be shaped, reference: https://modelmywatershed.org/api/docs/
 ###
 function mmw_run_gwlfe () {
    MMW_GWLFE_PREPARE_REQUEST_DATA=$1

    # Post a request to /modeling/gwlf-e/prepare and save the job uuid
    MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X POST \
        -H "Content-Type: application/json" \
        -H "Authorization: Token ${MMW_API_KEY}" \
        -d "${MMW_GWLFE_PREPARE_REQUEST_DATA}" \
        "${MMW_API_URL}/modeling/gwlf-e/prepare/")
    
    MMW_GWLFE_PREPARE_JOB_UUID=$(echo "${MMW_GWLFE_PREPARE_JOB_RESPONSE}" \
        | jq -r .job_uuid)

    if [[ "${MMW_GWLFE_PREPARE_JOB_UUID}" == "null" ]]
    then
        echo "Unable to start prepare job: ${MMW_GWLFE_PREPARE_JOB_RESPONSE}"
        exit 1
    fi

    MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
            | jq -r .status)

    # While the job status is "started", poll once a second and update status
    while [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" == "started" ]]
    do
        sleep 1
        MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X GET \
            -H "Authorization: Token ${MMW_API_KEY}" \
            "${MMW_API_URL}/jobs/${MMW_GWLFE_PREPARE_JOB_UUID}/")
        
        MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
            | jq -r .status)
    done

    # If the job status is not "complete", report error and stop
    if [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" != "complete" ]]
    then
        MMW_GWLFE_PREPARE_JOB_ERROR=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
            | jq .)
        
        echo "GWLF-E PREPARE job ${MMW_GWLFE_PREPARE_JOB_UUID} failed with error: ${MMW_GWLFE_PREPARE_JOB_ERROR}"
        exit 1
    fi

    # Otherwise, post a request to /modeling/gwlf-e/run/ with the prepare job uuid
    MMW_GWLFE_RUN_REQUEST_DATA="{\"job_uuid\": \"${MMW_GWLFE_PREPARE_JOB_UUID}\"}"

    MMW_GWLFE_RUN_JOB_UUID=$(curl -X POST \
        -H "Content-Type: application/json" \
        -H "Authorization: Token ${MMW_API_KEY}" \
        -d "${MMW_GWLFE_RUN_REQUEST_DATA}" \
        "${MMW_API_URL}/modeling/gwlf-e/run/" \
        | jq -r .job_uuid)

    MMW_GWLFE_RUN_JOB_STATUS="started"

    # While the job status is "started", poll once a second and update status
    while [[ "${MMW_GWLFE_RUN_JOB_STATUS}" == "started" ]]
    do
        sleep 1
        MMW_GWLFE_RUN_JOB_RESPONSE=$(curl -X GET \
            -H "Authorization: Token ${MMW_API_KEY}" \
            "${MMW_API_URL}/jobs/${MMW_GWLFE_RUN_JOB_UUID}/")
        
        MMW_GWLFE_RUN_JOB_STATUS=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
            | jq -r .status)
    done

    # If the job status is not "complete", report error and stop
    if [[ "${MMW_GWLFE_RUN_JOB_STATUS}" != "complete" ]]
    then
        MMW_GWLFE_RUN_JOB_ERROR=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
            | jq .)
        
        echo "GWLF-E RUN job ${MMW_GWLFE_RUN_JOB_UUID} failed with error: ${MMW_GWLFE_RUN_JOB_ERROR}"
        exit 1
    fi

    # Fetch the GWLF-E result and report it
    MMW_GWLFE_RUN_JOB_RESULT=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
        | jq -r .result)

    echo ${MMW_GWLFE_RUN_JOB_RESULT}
 }

 ###
 # For every .tif file in the specified input data folder, we read the corresponding
 # .geojson shape, run GWLF-E for that shape and save the results to a _baseline.json,
 # then ingest the .tif file, run GWLF-E for that shape with that land layer, and
 # save the results to a _modified.json.
 ###
 for filename in ${IN_PATH}/*.tif
 do
    # Skip if there was an error with file globbing
    [ -e "$filename" ] || continue

    # Extract name of the .tif file without extension, use that for everything
    IN_LABEL=$(basename "$filename" .tif)

    # The source .tif file
    SOURCE_IMAGE="${IN_PATH}/${IN_LABEL}.tif"
    # The source .geojson file
    SOURCE_SHAPE="${IN_PATH}/${IN_LABEL}.geojson"
    # The name of the output layer to ingest to, and use with MMW
    OUT_LAYER="20231109-tt-${IN_LABEL}"

    # Path of the baseline output
    OUT_GWLFE_BASELINE="${IN_PATH}/${IN_LABEL}_gwlfe_baseline.json"
    # Path of the modified output
    OUT_GWLFE_MODIFIED="${IN_PATH}/${IN_LABEL}_gwlfe_modified.json"

    # Run source shape against vanilla GWLF-E and save a baseline
    MMW_GWLFE_BASELINE_INPUT="{\"area_of_interest\": $(<$SOURCE_SHAPE)}"
    MMW_GWLFE_BASELINE_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_BASELINE_INPUT}")

    echo "${MMW_GWLFE_BASELINE_OUTPUT}" | jq -S . > $OUT_GWLFE_BASELINE

    # Delete any existing layer with the same name
    aws s3 rm "${S3_BUCKET}/_attributes/metadata__${OUT_LAYER}__0.json"
    aws s3 rm --recursive "${S3_BUCKET}/${OUT_LAYER}"

    # Ingest given .tif into MMW layer
    spark-submit "${MMW_ETL_JAR}" \
        --source-image "${SOURCE_IMAGE}" \
        --reference-catalog "${S3_BUCKET}" \
        --reference-layer "${REF_LAYER}" \
        --output-catalog "${S3_BUCKET}" \
        --output-layer "${OUT_LAYER}"

    # Run source shape with this new ingested layer and save modified output
    MMW_GWLFE_MODIFIED_INPUT="{
        \"area_of_interest\": $(<$SOURCE_SHAPE),
        \"layer_overrides\": {
            \"__LAND__\": \"${OUT_LAYER}\"
        }
    }"
    MMW_GWLFE_MODIFIED_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_MODIFIED_INPUT}")

    echo "${MMW_GWLFE_MODIFIED_OUTPUT}" | jq -S . > $OUT_GWLFE_MODIFIED
 done
	#!/usr/bin/env bash

	set -ex

	###
	# Basic settings
	###

	# Model My Watershed URL to hit
	MMW_API_URL="http://localhost:8000/api"
	# API Key for a user
	MMW_API_KEY="my-api-key"
	# Built JAR of this repository: https://github.com/jpolchlo/mmw-etl
	MMW_ETL_JAR="target/scala-2.12/mmw-etl-assembly-0.0.1.jar"

	# Bucket that stores MMW layers
	S3_BUCKET="s3://my-bucket-name"
	# Reference layer to use to align new layers with
	REF_LAYER="nlcd-2019-30m-epsg5070-512-uint8raw"

	# Path to folder containing input data
	IN_PATH="scratch/example_lu_data"

	###
	# Function that takes an input to use with /modeling/gwlf-e/prepare endpoint,
	# and returns the output of GWLF-E for that input. To see documentation on how
	# that input should be shaped, reference: https://modelmywatershed.org/api/docs/
	###
	function mmw_run_gwlfe () {
	MMW_GWLFE_PREPARE_REQUEST_DATA=$1

	# Post a request to /modeling/gwlf-e/prepare and save the job uuid
	MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X POST \
	-H "Content-Type: application/json" \
	-H "Authorization: Token ${MMW_API_KEY}" \
	-d "${MMW_GWLFE_PREPARE_REQUEST_DATA}" \
	"${MMW_API_URL}/modeling/gwlf-e/prepare/")

	MMW_GWLFE_PREPARE_JOB_UUID=$(echo "${MMW_GWLFE_PREPARE_JOB_RESPONSE}" \
	\| jq -r .job_uuid)

	if [[ "${MMW_GWLFE_PREPARE_JOB_UUID}" == "null" ]]
	then
	echo "Unable to start prepare job: ${MMW_GWLFE_PREPARE_JOB_RESPONSE}"
	exit 1
	fi

	MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
	\| jq -r .status)

	# While the job status is "started", poll once a second and update status
	while [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" == "started" ]]
	do
	sleep 1
	MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X GET \
	-H "Authorization: Token ${MMW_API_KEY}" \
	"${MMW_API_URL}/jobs/${MMW_GWLFE_PREPARE_JOB_UUID}/")

	MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
	\| jq -r .status)
	done

	# If the job status is not "complete", report error and stop
	if [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" != "complete" ]]
	then
	MMW_GWLFE_PREPARE_JOB_ERROR=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
	\| jq .)

	echo "GWLF-E PREPARE job ${MMW_GWLFE_PREPARE_JOB_UUID} failed with error: ${MMW_GWLFE_PREPARE_JOB_ERROR}"
	exit 1
	fi

	# Otherwise, post a request to /modeling/gwlf-e/run/ with the prepare job uuid
	MMW_GWLFE_RUN_REQUEST_DATA="{\"job_uuid\": \"${MMW_GWLFE_PREPARE_JOB_UUID}\"}"

	MMW_GWLFE_RUN_JOB_UUID=$(curl -X POST \
	-H "Content-Type: application/json" \
	-H "Authorization: Token ${MMW_API_KEY}" \
	-d "${MMW_GWLFE_RUN_REQUEST_DATA}" \
	"${MMW_API_URL}/modeling/gwlf-e/run/" \
	\| jq -r .job_uuid)

	MMW_GWLFE_RUN_JOB_STATUS="started"

	# While the job status is "started", poll once a second and update status
	while [[ "${MMW_GWLFE_RUN_JOB_STATUS}" == "started" ]]
	do
	sleep 1
	MMW_GWLFE_RUN_JOB_RESPONSE=$(curl -X GET \
	-H "Authorization: Token ${MMW_API_KEY}" \
	"${MMW_API_URL}/jobs/${MMW_GWLFE_RUN_JOB_UUID}/")

	MMW_GWLFE_RUN_JOB_STATUS=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
	\| jq -r .status)
	done

	# If the job status is not "complete", report error and stop
	if [[ "${MMW_GWLFE_RUN_JOB_STATUS}" != "complete" ]]
	then
	MMW_GWLFE_RUN_JOB_ERROR=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
	\| jq .)

	echo "GWLF-E RUN job ${MMW_GWLFE_RUN_JOB_UUID} failed with error: ${MMW_GWLFE_RUN_JOB_ERROR}"
	exit 1
	fi

	# Fetch the GWLF-E result and report it
	MMW_GWLFE_RUN_JOB_RESULT=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
	\| jq -r .result)

	echo ${MMW_GWLFE_RUN_JOB_RESULT}
	}

	###
	# For every .tif file in the specified input data folder, we read the corresponding
	# .geojson shape, run GWLF-E for that shape and save the results to a _baseline.json,
	# then ingest the .tif file, run GWLF-E for that shape with that land layer, and
	# save the results to a _modified.json.
	###
	for filename in ${IN_PATH}/*.tif
	do
	# Skip if there was an error with file globbing
	[ -e "$filename" ] \|\| continue

	# Extract name of the .tif file without extension, use that for everything
	IN_LABEL=$(basename "$filename" .tif)

	# The source .tif file
	SOURCE_IMAGE="${IN_PATH}/${IN_LABEL}.tif"
	# The source .geojson file
	SOURCE_SHAPE="${IN_PATH}/${IN_LABEL}.geojson"
	# The name of the output layer to ingest to, and use with MMW
	OUT_LAYER="20231109-tt-${IN_LABEL}"

	# Path of the baseline output
	OUT_GWLFE_BASELINE="${IN_PATH}/${IN_LABEL}_gwlfe_baseline.json"
	# Path of the modified output
	OUT_GWLFE_MODIFIED="${IN_PATH}/${IN_LABEL}_gwlfe_modified.json"

	# Run source shape against vanilla GWLF-E and save a baseline
	MMW_GWLFE_BASELINE_INPUT="{\"area_of_interest\": $(<$SOURCE_SHAPE)}"
	MMW_GWLFE_BASELINE_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_BASELINE_INPUT}")

	echo "${MMW_GWLFE_BASELINE_OUTPUT}" \| jq -S . > $OUT_GWLFE_BASELINE

	# Delete any existing layer with the same name
	aws s3 rm "${S3_BUCKET}/_attributes/metadata__${OUT_LAYER}__0.json"
	aws s3 rm --recursive "${S3_BUCKET}/${OUT_LAYER}"

	# Ingest given .tif into MMW layer
	spark-submit "${MMW_ETL_JAR}" \
	--source-image "${SOURCE_IMAGE}" \
	--reference-catalog "${S3_BUCKET}" \
	--reference-layer "${REF_LAYER}" \
	--output-catalog "${S3_BUCKET}" \
	--output-layer "${OUT_LAYER}"

	# Run source shape with this new ingested layer and save modified output
	MMW_GWLFE_MODIFIED_INPUT="{
	\"area_of_interest\": $(<$SOURCE_SHAPE),
	\"layer_overrides\": {
	\"__LAND__\": \"${OUT_LAYER}\"
	}
	}"
	MMW_GWLFE_MODIFIED_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_MODIFIED_INPUT}")

	echo "${MMW_GWLFE_MODIFIED_OUTPUT}" \| jq -S . > $OUT_GWLFE_MODIFIED
	done