Skip to content

Instantly share code, notes, and snippets.

@AndrewDryga
Last active September 28, 2025 03:32
Show Gist options
  • Save AndrewDryga/754a3841cb811c476d0d23faf34da552 to your computer and use it in GitHub Desktop.
Save AndrewDryga/754a3841cb811c476d0d23faf34da552 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
set -euo pipefail
# This script takes an attachment field from source, minimized the images and puts it to destination to reduce the storage usage.
# create here https://airtable.com/create/tokens
AIRTABLE_TOKEN="XXXXXX"
# ---------- CONFIG (SOURCE) ----------
BASE_ID="${BASE_ID:-appB9RP6Hs87LG5hM}" # from your URL
TABLE_ID_OR_NAME="${TABLE_ID_OR_NAME:-tbl70W0ckkvD1Du8L}" # id or name
VIEW_ID="${VIEW_ID:-viwsvZfi8aGdZ4Qw4}" # optional
SRC_FIELD="${SRC_FIELD:-Photos}" # attachment field with originals
DST_FIELD="${DST_FIELD:-Photos}" # attachment field to write compressed (source, not used when UPDATE_SOURCE=false)
SIZE_THRESHOLD_MB="${SIZE_THRESHOLD_MB:-1.5}" # only compress if > X MB
MAX_DIM="${MAX_DIM:-1600}" # max width/height (px)
QUALITY="${QUALITY:-85}" # JPEG/WebP quality
KEEP_SMALL_OR_NONIMAGE="${KEEP_SMALL_OR_NONIMAGE:-true}" # keep originals if small/non-image
UPDATE_SOURCE="${UPDATE_SOURCE:-false}" # DO NOT update source by default (per request)
CLEAR_SOURCE_AFTER_WRITE="${CLEAR_SOURCE_AFTER_WRITE:-false}" # ignored if UPDATE_SOURCE=false
RATE_LIMIT_SLEEP="${RATE_LIMIT_SLEEP:-0.25}" # seconds between API calls
TMP_DIR="${TMP_DIR:-/tmp/airtable-compress}"
AIRTABLE_API="https://api.airtable.com/v0"
AUTH_HEADER="Authorization: Bearer ${AIRTABLE_TOKEN:?AIRTABLE_TOKEN env var required}"
# ---------- CONFIG (DESTINATION) ----------
DEST_BASE_ID="${DEST_BASE_ID:-appwNbEw0wd5ly7FD}"
DEST_TABLE_ID_OR_NAME="${DEST_TABLE_ID_OR_NAME:-tbl70W0ckkvD1Du8L}"
DEST_VIEW_ID="${DEST_VIEW_ID:-viwsvZfi8aGdZ4Qw4}" # optional
MATCH_FIELD="${MATCH_FIELD:-Name}" # field present in BOTH bases to map records
DEST_DST_FIELD="${DEST_DST_FIELD:-$DST_FIELD}" # where to write minimized images in dest (defaults to source DST_FIELD)
# -----------------------------------------------
mkdir -p "$TMP_DIR"
mb_to_bytes() { python - <<EOF
t=float("${1}"); print(int(t*1024*1024))
EOF
}
THRESHOLD_BYTES=$(mb_to_bytes "$SIZE_THRESHOLD_MB")
# ------------- API helpers (source) -------------
api_get() { # path, query
local path="$1"; shift
local qs="${1:-}"
curl -sS -H "$AUTH_HEADER" "${AIRTABLE_API}/${BASE_ID}/${path}${qs:+?$qs}"
}
api_patch_record() { # record_id, json_body
local rec="$1"; shift
local body="$1"; shift
curl -sS -X PATCH -H "$AUTH_HEADER" -H "Content-Type: application/json" \
-d "$body" "${AIRTABLE_API}/${BASE_ID}/${TABLE_ID_OR_NAME}/${rec}"
sleep "$RATE_LIMIT_SLEEP"
}
# ------------- API helpers (destination) -------------
api_get_dest() { # path, query
local path="$1"; shift
local qs="${1:-}"
curl -sS -H "$AUTH_HEADER" "${AIRTABLE_API}/${DEST_BASE_ID}/${path}${qs:+?$qs}"
}
api_patch_record_dest() { # record_id, json_body
local rec="$1"; shift
local body="$1"; shift
curl -sS -X PATCH -H "$AUTH_HEADER" -H "Content-Type: application/json" \
-d "$body" "${AIRTABLE_API}/${DEST_BASE_ID}/${DEST_TABLE_ID_OR_NAME}/${rec}"
sleep "$RATE_LIMIT_SLEEP"
}
# Upload a local file to tmpfiles to obtain a public URL Airtable can ingest
upload_tmpfiles() { # filepath -> echo direct_download_url
local fp="$1"
local mime; mime="$(file -b --mime-type "$fp" 2>/dev/null || echo application/octet-stream)"
local resp url
resp=$(curl -sS -F "file=@${fp};type=${mime}" https://tmpfiles.org/api/v1/upload)
url=$(echo "$resp" | jq -r '.data.url // empty')
[[ -z "$url" ]] && return 1
url="${url/http:\/\//https://}"
echo "$url" | sed -E 's#^(https://tmpfiles\.org)/#\1/dl/#'
}
is_image_ext() {
local name
name="$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')"
case "$name" in
*.jpg|*.jpeg|*.png|*.webp|*.heic) return 0 ;;
*) return 1 ;;
esac
}
compress_file() { # in out
local in="$1" ; local out="$2"
# Apply EXIF orientation to pixel data first, then strip metadata, then resize/quality.
if command -v magick >/dev/null 2>&1; then
magick "$in" -auto-orient -strip -resize "${MAX_DIM}x${MAX_DIM}>" -quality "$QUALITY" "$out"
else
convert "$in" -auto-orient -strip -resize "${MAX_DIM}x${MAX_DIM}>" -quality "$QUALITY" "$out"
fi
}
# Build dest record id by matching MATCH_FIELD value via filterByFormula
find_dest_record_id_by_value() { # value -> echo dest_id or empty
local value="$1"
# JSON-encode string literal for safe use in formula
local val_json
val_json=$(printf '%s' "$value" | jq -Rr @json)
# {Field} = "value"
local formula="{${MATCH_FIELD}} = ${val_json}"
local encoded
encoded=$(printf '%s' "$formula" | jq -sRr @uri)
local q="pageSize=1&filterByFormula=${encoded}"
[[ -n "${DEST_VIEW_ID:-}" ]] && q="${q}&view=${DEST_VIEW_ID}"
local resp
resp="$(api_get_dest "${DEST_TABLE_ID_OR_NAME}" "$q")"
echo "$resp" | jq -r '.records[0].id // empty'
}
process_record() { # record JSON (from SOURCE)
local rec_json="$1"
local rec_id; rec_id=$(echo "$rec_json" | jq -r '.id')
local fields_json; fields_json=$(echo "$rec_json" | jq -c '.fields')
# array of src attachments
local files_json
files_json=$(echo "$fields_json" | jq -c --arg f "$SRC_FIELD" '.[$f] // []')
local count; count=$(echo "$files_json" | jq 'length')
[[ "$count" -eq 0 ]] && return
local new_array="[]"
local changed="false"
# for logging totals
local total_orig=0
local total_min=0
for i in $(seq 0 $((count-1))); do
local item url filename size dl out direct out_size
item=$(echo "$files_json" | jq -c ".[$i]")
url=$(echo "$item" | jq -r '.url')
filename=$(echo "$item" | jq -r '.filename // "file.jpg"')
size=$(echo "$item" | jq -r '.size // 0')
total_orig=$(( total_orig + size ))
if [[ "$size" -le "$THRESHOLD_BYTES" ]] || ! is_image_ext "$filename"; then
if [[ "$KEEP_SMALL_OR_NONIMAGE" == "true" ]]; then
new_array=$(echo "$new_array" | jq --arg u "$url" --arg n "$filename" '. + [{url:$u, filename:$n}]')
fi
continue
fi
dl="$TMP_DIR/${rec_id}_${i}_orig"
curl -sSL "$url" -o "$dl"
out="$TMP_DIR/${rec_id}_${i}_compressed.jpg"
compress_file "$dl" "$out"
if ! direct=$(upload_tmpfiles "$out"); then
# fallback: keep original
new_array=$(echo "$new_array" | jq --arg u "$url" --arg n "$filename" '. + [{url:$u, filename:$n}]')
continue
fi
# get compressed byte size (portable stat)
out_size=$(stat -c%s "$out" 2>/dev/null || stat -f%z "$out" 2>/dev/null || echo 0)
total_min=$(( total_min + out_size ))
new_array=$(echo "$new_array" | jq --arg u "$direct" --arg n "$filename" '. + [{url:$u, filename:$n}]')
changed="true"
done
# Write ONLY to DESTINATION (per request)
if [[ "$changed" == "true" || "$KEEP_SMALL_OR_NONIMAGE" == "true" ]]; then
# Obtain match value from the source record
local match_value
match_value=$(echo "$fields_json" | jq -r --arg f "$MATCH_FIELD" '
if has($f) then
.[$f] | (if type=="string" then . elif type=="number" then tostring else "" end)
else "" end
')
if [[ -z "$match_value" ]]; then
echo "WARN: Source record $rec_id missing MATCH_FIELD '$MATCH_FIELD'; skipping dest update"
return
fi
local dest_id
dest_id="$(find_dest_record_id_by_value "$match_value")"
if [[ -z "$dest_id" ]]; then
echo "WARN: No destination record matching {${MATCH_FIELD}} = '$match_value'; skipping"
return
fi
local body
body=$(jq -n --arg fld "$DEST_DST_FIELD" --argjson arr "$new_array" \
'{fields: {($fld): $arr}}')
echo "Updating DEST record $dest_id (match ${MATCH_FIELD}='$match_value')"
echo "== Files in src record: $count"
echo "== Total original: ${total_orig} bytes"
echo "== Total compressed: ${total_min} bytes"
echo "$body" | jq .
api_patch_record_dest "$dest_id" "$body"
fi
# Optionally update the SOURCE (kept switch, default false)
if [[ "$UPDATE_SOURCE" == "true" ]]; then
local body_src
if [[ "$CLEAR_SOURCE_AFTER_WRITE" == "true" ]]; then
body_src=$(jq -n --arg dst "$DST_FIELD" --arg src "$SRC_FIELD" --argjson arr "$new_array" \
'{fields: {($dst): $arr, ($src): []}}')
else
body_src=$(jq -n --arg dst "$DST_FIELD" --argjson arr "$new_array" \
'{fields: {($dst): $arr}}')
fi
echo "Updating SOURCE record $rec_id"
echo "$body_src" | jq .
api_patch_record "$rec_id" "$body_src"
fi
}
# -------- MAIN: paginate over SOURCE view or whole SOURCE table ----------
echo "Scanning SOURCE table ${TABLE_ID_OR_NAME} in base ${BASE_ID} ..."
OFFSET=""
while : ; do
QUERY="pageSize=50"
[[ -n "${VIEW_ID:-}" ]] && QUERY="${QUERY}&view=${VIEW_ID}"
[[ -n "$OFFSET" ]] && QUERY="${QUERY}&offset=${OFFSET}"
resp="$(api_get "${TABLE_ID_OR_NAME}" "$QUERY")"
echo "$resp" | jq -c '.records[]' | while read -r rec; do
process_record "$rec"
sleep "$RATE_LIMIT_SLEEP"
done
OFFSET=$(echo "$resp" | jq -r '.offset // empty')
[[ -z "$OFFSET" ]] && break
done
echo "Done."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment