Created
March 21, 2026 16:14
-
-
Save legel/765e8c2fbad76528c8a68a7b825ca397 to your computer and use it in GitHub Desktop.
Quick proposed modification to Q.E.D. Lab FLUXNET Data Explorer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # Below is a proposed modification for the automatically generated .sh script from https://www.keenangroup.info/fluxnet-explorer.html | |
| # Specific change is related to making it easier for users across operating systems to install `jq` | |
| set -euo pipefail | |
| OUTDIR="${1:-ameriflux_downloads}" | |
| SITES_FILE="${2:-ameriflux_selected_sites.txt}" | |
| LOGFILE="${3:-ameriflux_bulk_download.log}" | |
| USER_ID="${AMERIFLUX_USER_ID:-<insert_ameriflux_username_here>}" | |
| USER_EMAIL="${AMERIFLUX_USER_EMAIL:-<insert_ameriflux_password_here>}" | |
| V2_DOWNLOAD_URL="${AMERIFLUX_V2_DOWNLOAD_URL:-https://amfcdn.lbl.gov/api/v2/data_download}" | |
| V1_DOWNLOAD_URL="${AMERIFLUX_V1_DOWNLOAD_URL:-https://amfcdn.lbl.gov/api/v1/data_download}" | |
| mkdir -p "$OUTDIR" | |
| cd "$OUTDIR" | |
| : > "$LOGFILE" | |
| if ! command -v jq >/dev/null 2>&1; then | |
| echo "Error: jq (a JSON processor) is required but was not found." >&2 | |
| echo "" >&2 | |
| echo "Install jq for your platform:" >&2 | |
| case "$(uname -s)" in | |
| Darwin) | |
| echo " brew install jq" >&2 | |
| ;; | |
| Linux) | |
| echo " sudo apt-get install jq # Debian/Ubuntu" >&2 | |
| echo " sudo dnf install jq # Fedora/RHEL" >&2 | |
| echo " sudo pacman -S jq # Arch" >&2 | |
| ;; | |
| MINGW*|MSYS*|CYGWIN*) | |
| echo " choco install jq # Chocolatey" >&2 | |
| echo " scoop install jq # Scoop" >&2 | |
| echo " winget install jqlang.jq # winget" >&2 | |
| ;; | |
| *) | |
| echo " See https://jqlang.github.io/jq/download/" >&2 | |
| ;; | |
| esac | |
| exit 1 | |
| fi | |
| if [ ! -f "$SITES_FILE" ]; then | |
| cat > "$SITES_FILE" <<'AMERIFLUX_SITES' | |
| # site_id data_product source_label | |
| US-DPW FLUXNET AmeriFlux | |
| US-Esm FLUXNET AmeriFlux | |
| US-KS1 FLUXNET AmeriFlux | |
| US-KS2 FLUXNET AmeriFlux | |
| US-KS3 FLUXNET AmeriFlux | |
| AMERIFLUX_SITES | |
| fi | |
| if [ ! -s "$SITES_FILE" ]; then | |
| echo "No AmeriFlux API-backed sites provided in $SITES_FILE." | tee -a "$LOGFILE" | |
| exit 0 | |
| fi | |
| while IFS=$'\t' read -r SITE_ID DATA_PRODUCT SOURCE_LABEL; do | |
| SITE_ID="${SITE_ID%$'\r'}" | |
| DATA_PRODUCT="${DATA_PRODUCT%$'\r'}" | |
| SOURCE_LABEL="${SOURCE_LABEL%$'\r'}" | |
| [ -n "$SITE_ID" ] || continue | |
| case "$SITE_ID" in | |
| \#*) continue ;; | |
| esac | |
| if [ -z "$DATA_PRODUCT" ]; then | |
| DATA_PRODUCT="FLUXNET" | |
| fi | |
| if [ -z "$SOURCE_LABEL" ]; then | |
| SOURCE_LABEL="AmeriFlux" | |
| fi | |
| echo "Requesting ${DATA_PRODUCT} URLs for ${SITE_ID} (${SOURCE_LABEL})..." | tee -a "$LOGFILE" | |
| REQUEST_URL="$V2_DOWNLOAD_URL" | |
| REQUEST_BODY="{ | |
| \"user_id\": \"${USER_ID}\", | |
| \"user_email\": \"${USER_EMAIL}\", | |
| \"data_policy\": \"CCBY4.0\", | |
| \"data_product\": \"${DATA_PRODUCT}\", | |
| \"data_variant\": \"FULLSET\", | |
| \"site_ids\": [\"${SITE_ID}\"], | |
| \"intended_use\": \"other_research\", | |
| \"description\": \"Request ${DATA_PRODUCT} download for ${SITE_ID} via the Q.E.D. Lab FLUXNET Data Explorer for Keenan Group research workflows.\" | |
| }" | |
| if [ "$DATA_PRODUCT" = "FLUXNET2015" ]; then | |
| REQUEST_URL="$V1_DOWNLOAD_URL" | |
| REQUEST_BODY="{ | |
| \"user_id\": \"${USER_ID}\", | |
| \"user_email\": \"${USER_EMAIL}\", | |
| \"data_product\": \"${DATA_PRODUCT}\", | |
| \"data_variant\": \"FULLSET\", | |
| \"data_policy\": \"CCBY4.0\", | |
| \"site_ids\": [\"${SITE_ID}\"], | |
| \"intended_use\": \"QED Lab FLUXNET Data Explorer\", | |
| \"description\": \"Download ${DATA_PRODUCT} for ${SITE_ID}\", | |
| \"agree_policy\": true | |
| }" | |
| fi | |
| RESPONSE=$(curl -sS -X POST "$REQUEST_URL" \ | |
| -H "Content-Type: application/json" \ | |
| -H "accept: application/json" \ | |
| --data-binary "$REQUEST_BODY") || { | |
| echo "Request failed for ${SITE_ID}; skipping." | tee -a "$LOGFILE" | |
| continue | |
| } | |
| URLS=$(printf '%s' "$RESPONSE" | jq -r '.data_urls[]?.url // empty' 2>/dev/null || true) | |
| if [ -z "$URLS" ]; then | |
| echo "No data_urls returned for ${SITE_ID} (${DATA_PRODUCT}); continuing." | tee -a "$LOGFILE" | |
| continue | |
| fi | |
| while IFS= read -r url; do | |
| [ -n "$url" ] || continue | |
| clean_url="${url%%\?*}" | |
| filename="$(basename "$clean_url")" | |
| echo "Downloading ${filename} (${SITE_ID}, ${DATA_PRODUCT})" | tee -a "$LOGFILE" | |
| curl -L "$url" -o "$filename" || echo "Download failed for ${SITE_ID} (${DATA_PRODUCT}): $url" | tee -a "$LOGFILE" | |
| done <<< "$URLS" | |
| done < "$SITES_FILE" | |
| echo "AmeriFlux API bulk download complete." | tee -a "$LOGFILE" |
Author
Absolutely! Very happy and honored to support.
Looking forward to putting this data downloader to work!
I can also envision us producing and making available new deep learning representations from original data (e.g. pre-trained self-supervised AI model weights from autoencoding of AmeriFLUX streams).
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks so much for the suggestion!
I've updated the explorer integrating the gracefull exit but also adding a python3 fallback in the absence of jq to avoid instal requests when possible.
Thanks again for the feedback.