Last active
September 28, 2025 03:32
-
-
Save AndrewDryga/754a3841cb811c476d0d23faf34da552 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
set -euo pipefail | |
# This script takes an attachment field from source, minimized the images and puts it to destination to reduce the storage usage. | |
# create here https://airtable.com/create/tokens | |
AIRTABLE_TOKEN="XXXXXX" | |
# ---------- CONFIG (SOURCE) ---------- | |
BASE_ID="${BASE_ID:-appB9RP6Hs87LG5hM}" # from your URL | |
TABLE_ID_OR_NAME="${TABLE_ID_OR_NAME:-tbl70W0ckkvD1Du8L}" # id or name | |
VIEW_ID="${VIEW_ID:-viwsvZfi8aGdZ4Qw4}" # optional | |
SRC_FIELD="${SRC_FIELD:-Photos}" # attachment field with originals | |
DST_FIELD="${DST_FIELD:-Photos}" # attachment field to write compressed (source, not used when UPDATE_SOURCE=false) | |
SIZE_THRESHOLD_MB="${SIZE_THRESHOLD_MB:-1.5}" # only compress if > X MB | |
MAX_DIM="${MAX_DIM:-1600}" # max width/height (px) | |
QUALITY="${QUALITY:-85}" # JPEG/WebP quality | |
KEEP_SMALL_OR_NONIMAGE="${KEEP_SMALL_OR_NONIMAGE:-true}" # keep originals if small/non-image | |
UPDATE_SOURCE="${UPDATE_SOURCE:-false}" # DO NOT update source by default (per request) | |
CLEAR_SOURCE_AFTER_WRITE="${CLEAR_SOURCE_AFTER_WRITE:-false}" # ignored if UPDATE_SOURCE=false | |
RATE_LIMIT_SLEEP="${RATE_LIMIT_SLEEP:-0.25}" # seconds between API calls | |
TMP_DIR="${TMP_DIR:-/tmp/airtable-compress}" | |
AIRTABLE_API="https://api.airtable.com/v0" | |
AUTH_HEADER="Authorization: Bearer ${AIRTABLE_TOKEN:?AIRTABLE_TOKEN env var required}" | |
# ---------- CONFIG (DESTINATION) ---------- | |
DEST_BASE_ID="${DEST_BASE_ID:-appwNbEw0wd5ly7FD}" | |
DEST_TABLE_ID_OR_NAME="${DEST_TABLE_ID_OR_NAME:-tbl70W0ckkvD1Du8L}" | |
DEST_VIEW_ID="${DEST_VIEW_ID:-viwsvZfi8aGdZ4Qw4}" # optional | |
MATCH_FIELD="${MATCH_FIELD:-Name}" # field present in BOTH bases to map records | |
DEST_DST_FIELD="${DEST_DST_FIELD:-$DST_FIELD}" # where to write minimized images in dest (defaults to source DST_FIELD) | |
# ----------------------------------------------- | |
mkdir -p "$TMP_DIR" | |
mb_to_bytes() { python - <<EOF | |
t=float("${1}"); print(int(t*1024*1024)) | |
EOF | |
} | |
THRESHOLD_BYTES=$(mb_to_bytes "$SIZE_THRESHOLD_MB") | |
# ------------- API helpers (source) ------------- | |
api_get() { # path, query | |
local path="$1"; shift | |
local qs="${1:-}" | |
curl -sS -H "$AUTH_HEADER" "${AIRTABLE_API}/${BASE_ID}/${path}${qs:+?$qs}" | |
} | |
api_patch_record() { # record_id, json_body | |
local rec="$1"; shift | |
local body="$1"; shift | |
curl -sS -X PATCH -H "$AUTH_HEADER" -H "Content-Type: application/json" \ | |
-d "$body" "${AIRTABLE_API}/${BASE_ID}/${TABLE_ID_OR_NAME}/${rec}" | |
sleep "$RATE_LIMIT_SLEEP" | |
} | |
# ------------- API helpers (destination) ------------- | |
api_get_dest() { # path, query | |
local path="$1"; shift | |
local qs="${1:-}" | |
curl -sS -H "$AUTH_HEADER" "${AIRTABLE_API}/${DEST_BASE_ID}/${path}${qs:+?$qs}" | |
} | |
api_patch_record_dest() { # record_id, json_body | |
local rec="$1"; shift | |
local body="$1"; shift | |
curl -sS -X PATCH -H "$AUTH_HEADER" -H "Content-Type: application/json" \ | |
-d "$body" "${AIRTABLE_API}/${DEST_BASE_ID}/${DEST_TABLE_ID_OR_NAME}/${rec}" | |
sleep "$RATE_LIMIT_SLEEP" | |
} | |
# Upload a local file to tmpfiles to obtain a public URL Airtable can ingest | |
upload_tmpfiles() { # filepath -> echo direct_download_url | |
local fp="$1" | |
local mime; mime="$(file -b --mime-type "$fp" 2>/dev/null || echo application/octet-stream)" | |
local resp url | |
resp=$(curl -sS -F "file=@${fp};type=${mime}" https://tmpfiles.org/api/v1/upload) | |
url=$(echo "$resp" | jq -r '.data.url // empty') | |
[[ -z "$url" ]] && return 1 | |
url="${url/http:\/\//https://}" | |
echo "$url" | sed -E 's#^(https://tmpfiles\.org)/#\1/dl/#' | |
} | |
is_image_ext() { | |
local name | |
name="$(printf '%s' "$1" | tr '[:upper:]' '[:lower:]')" | |
case "$name" in | |
*.jpg|*.jpeg|*.png|*.webp|*.heic) return 0 ;; | |
*) return 1 ;; | |
esac | |
} | |
compress_file() { # in out | |
local in="$1" ; local out="$2" | |
# Apply EXIF orientation to pixel data first, then strip metadata, then resize/quality. | |
if command -v magick >/dev/null 2>&1; then | |
magick "$in" -auto-orient -strip -resize "${MAX_DIM}x${MAX_DIM}>" -quality "$QUALITY" "$out" | |
else | |
convert "$in" -auto-orient -strip -resize "${MAX_DIM}x${MAX_DIM}>" -quality "$QUALITY" "$out" | |
fi | |
} | |
# Build dest record id by matching MATCH_FIELD value via filterByFormula | |
find_dest_record_id_by_value() { # value -> echo dest_id or empty | |
local value="$1" | |
# JSON-encode string literal for safe use in formula | |
local val_json | |
val_json=$(printf '%s' "$value" | jq -Rr @json) | |
# {Field} = "value" | |
local formula="{${MATCH_FIELD}} = ${val_json}" | |
local encoded | |
encoded=$(printf '%s' "$formula" | jq -sRr @uri) | |
local q="pageSize=1&filterByFormula=${encoded}" | |
[[ -n "${DEST_VIEW_ID:-}" ]] && q="${q}&view=${DEST_VIEW_ID}" | |
local resp | |
resp="$(api_get_dest "${DEST_TABLE_ID_OR_NAME}" "$q")" | |
echo "$resp" | jq -r '.records[0].id // empty' | |
} | |
process_record() { # record JSON (from SOURCE) | |
local rec_json="$1" | |
local rec_id; rec_id=$(echo "$rec_json" | jq -r '.id') | |
local fields_json; fields_json=$(echo "$rec_json" | jq -c '.fields') | |
# array of src attachments | |
local files_json | |
files_json=$(echo "$fields_json" | jq -c --arg f "$SRC_FIELD" '.[$f] // []') | |
local count; count=$(echo "$files_json" | jq 'length') | |
[[ "$count" -eq 0 ]] && return | |
local new_array="[]" | |
local changed="false" | |
# for logging totals | |
local total_orig=0 | |
local total_min=0 | |
for i in $(seq 0 $((count-1))); do | |
local item url filename size dl out direct out_size | |
item=$(echo "$files_json" | jq -c ".[$i]") | |
url=$(echo "$item" | jq -r '.url') | |
filename=$(echo "$item" | jq -r '.filename // "file.jpg"') | |
size=$(echo "$item" | jq -r '.size // 0') | |
total_orig=$(( total_orig + size )) | |
if [[ "$size" -le "$THRESHOLD_BYTES" ]] || ! is_image_ext "$filename"; then | |
if [[ "$KEEP_SMALL_OR_NONIMAGE" == "true" ]]; then | |
new_array=$(echo "$new_array" | jq --arg u "$url" --arg n "$filename" '. + [{url:$u, filename:$n}]') | |
fi | |
continue | |
fi | |
dl="$TMP_DIR/${rec_id}_${i}_orig" | |
curl -sSL "$url" -o "$dl" | |
out="$TMP_DIR/${rec_id}_${i}_compressed.jpg" | |
compress_file "$dl" "$out" | |
if ! direct=$(upload_tmpfiles "$out"); then | |
# fallback: keep original | |
new_array=$(echo "$new_array" | jq --arg u "$url" --arg n "$filename" '. + [{url:$u, filename:$n}]') | |
continue | |
fi | |
# get compressed byte size (portable stat) | |
out_size=$(stat -c%s "$out" 2>/dev/null || stat -f%z "$out" 2>/dev/null || echo 0) | |
total_min=$(( total_min + out_size )) | |
new_array=$(echo "$new_array" | jq --arg u "$direct" --arg n "$filename" '. + [{url:$u, filename:$n}]') | |
changed="true" | |
done | |
# Write ONLY to DESTINATION (per request) | |
if [[ "$changed" == "true" || "$KEEP_SMALL_OR_NONIMAGE" == "true" ]]; then | |
# Obtain match value from the source record | |
local match_value | |
match_value=$(echo "$fields_json" | jq -r --arg f "$MATCH_FIELD" ' | |
if has($f) then | |
.[$f] | (if type=="string" then . elif type=="number" then tostring else "" end) | |
else "" end | |
') | |
if [[ -z "$match_value" ]]; then | |
echo "WARN: Source record $rec_id missing MATCH_FIELD '$MATCH_FIELD'; skipping dest update" | |
return | |
fi | |
local dest_id | |
dest_id="$(find_dest_record_id_by_value "$match_value")" | |
if [[ -z "$dest_id" ]]; then | |
echo "WARN: No destination record matching {${MATCH_FIELD}} = '$match_value'; skipping" | |
return | |
fi | |
local body | |
body=$(jq -n --arg fld "$DEST_DST_FIELD" --argjson arr "$new_array" \ | |
'{fields: {($fld): $arr}}') | |
echo "Updating DEST record $dest_id (match ${MATCH_FIELD}='$match_value')" | |
echo "== Files in src record: $count" | |
echo "== Total original: ${total_orig} bytes" | |
echo "== Total compressed: ${total_min} bytes" | |
echo "$body" | jq . | |
api_patch_record_dest "$dest_id" "$body" | |
fi | |
# Optionally update the SOURCE (kept switch, default false) | |
if [[ "$UPDATE_SOURCE" == "true" ]]; then | |
local body_src | |
if [[ "$CLEAR_SOURCE_AFTER_WRITE" == "true" ]]; then | |
body_src=$(jq -n --arg dst "$DST_FIELD" --arg src "$SRC_FIELD" --argjson arr "$new_array" \ | |
'{fields: {($dst): $arr, ($src): []}}') | |
else | |
body_src=$(jq -n --arg dst "$DST_FIELD" --argjson arr "$new_array" \ | |
'{fields: {($dst): $arr}}') | |
fi | |
echo "Updating SOURCE record $rec_id" | |
echo "$body_src" | jq . | |
api_patch_record "$rec_id" "$body_src" | |
fi | |
} | |
# -------- MAIN: paginate over SOURCE view or whole SOURCE table ---------- | |
echo "Scanning SOURCE table ${TABLE_ID_OR_NAME} in base ${BASE_ID} ..." | |
OFFSET="" | |
while : ; do | |
QUERY="pageSize=50" | |
[[ -n "${VIEW_ID:-}" ]] && QUERY="${QUERY}&view=${VIEW_ID}" | |
[[ -n "$OFFSET" ]] && QUERY="${QUERY}&offset=${OFFSET}" | |
resp="$(api_get "${TABLE_ID_OR_NAME}" "$QUERY")" | |
echo "$resp" | jq -c '.records[]' | while read -r rec; do | |
process_record "$rec" | |
sleep "$RATE_LIMIT_SLEEP" | |
done | |
OFFSET=$(echo "$resp" | jq -r '.offset // empty') | |
[[ -z "$OFFSET" ]] && break | |
done | |
echo "Done." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment