Skip to content

Instantly share code, notes, and snippets.

@linuxmalaysia
Last active July 11, 2025 13:20
Show Gist options
  • Save linuxmalaysia/85fa591010db6ab389ed7803c146b818 to your computer and use it in GitHub Desktop.
Save linuxmalaysia/85fa591010db6ab389ed7803c146b818 to your computer and use it in GitHub Desktop.
script for es_ingestion_monitor.sh
#!/bin/bash
# Configuration
ES_HOST="https://your-es-cluster.example.com:9200" # Your Elasticsearch host, IP address and port (e.g., "https://127.0.0.1:9200" or "https://<cloud_id>.es.<region>.gcp.cloud.es.io:9243")
# ES_USER="elastic" # If using username/password, uncomment and replace with your username
# ES_PASS="your-elastic-password" # If using username/password, uncomment and replace with your password
ES_TOKEN="your-generated-service-account-token" # If using a service account token, uncomment and replace with your token value
INTERVAL_SECONDS=10 # How often to check and calculate (e.g., 10 seconds)
# --- Authentication Header Setup ---
# Use an array to properly pass multiple arguments to curl
CURL_AUTH_ARGS=()
# Prioritize token authentication if ES_TOKEN is set
if [ -n "${ES_TOKEN}" ]; then
CURL_AUTH_ARGS+=("--header" "Authorization: Bearer ${ES_TOKEN}")
elif [ -n "${ES_USER}" ] && [ -n "${ES_PASS}" ]; then
CURL_AUTH_ARGS+=("-u" "${ES_USER}:${ES_PASS}")
fi
# --- SSL Insecure Flag Setup ---
# Add -k (insecure) flag if ES_HOST uses HTTPS. Use with caution in production.
CURL_INSECURE_FLAG=""
if [[ "${ES_HOST}" == https://* ]]; then
CURL_INSECURE_FLAG="-k"
fi
# --- Check for jq ---
if ! command -v jq &> /dev/null
then
echo "Error: 'jq' is not installed. Please install it to parse JSON output."
echo " For Ubuntu: sudo apt-get install jq"
echo " For AlmaLinux/Rocky Linux: sudo dnf install epel-release && sudo dnf install jq"
exit 1
fi
# --- Function to get total indexed documents and store size from a node ---
# We'll target the sum across all nodes for a cluster-wide rate
get_total_indexed_metrics() {
local response
# Request both index_total and store.size_in_bytes
response=$(curl -s "${CURL_AUTH_ARGS[@]}" "${CURL_INSECURE_FLAG}" "${ES_HOST}/_nodes/stats/indices/indexing,store?filter_path=nodes.*.indices.indexing.index_total,nodes.*.indices.store.size_in_bytes")
# Check if curl command was successful
if [ $? -ne 0 ]; then
echo "Error: Failed to connect to Elasticsearch. Check ES_HOST and connectivity, especially SSL/TLS issues if using HTTPS." >&2
return 1
fi
# Check if the response contains nodes data (e.g., for auth failure or wrong path)
if echo "$response" | grep -q "error"; then
echo "Error from Elasticsearch: $(echo "$response" | jq -r '.error.reason // .error.type // "Unknown error"') (HTTP Code: $(echo "$response" | jq -r '.status // "N/A"'))" >&2
return 1
fi
# Sum index_total and store.size_in_bytes across all nodes
# Output: "total_docs total_bytes"
echo "$response" | jq -r '
{
total_docs: ([.nodes[] | .indices.indexing.index_total] | add // 0),
total_bytes: ([.nodes[] | .indices.store.size_in_bytes] | add // 0)
} | "\(.total_docs) \(.total_bytes)"
'
}
# --- Main loop ---
echo "Starting Elasticsearch Ingestion Rate Monitor..."
echo "Monitoring ${ES_HOST} every ${INTERVAL_SECONDS} seconds."
# Get initial reading
read initial_docs initial_bytes <<< "$(get_total_indexed_metrics)"
if [ $? -ne 0 ]; then
echo "Initial data collection failed. Exiting."
exit 1
fi
echo "Initial indexed documents: $initial_docs, Initial store size: $(echo "scale=2; $initial_bytes / (1024*1024*1024)" | bc -l) GB"
# Start the monitoring loop
while true; do
sleep "$INTERVAL_SECONDS"
read current_docs current_bytes <<< "$(get_total_indexed_metrics)"
if [ $? -ne 0 ]; then
echo "Failed to get current data. Retrying in ${INTERVAL_SECONDS} seconds."
continue # Skip this iteration, try again
fi
# Calculate difference and rate for documents
if (( initial_docs > current_docs )); then
echo "Warning: Current docs ($current_docs) is less than initial docs ($initial_docs). Resetting initial_docs."
initial_docs=$current_docs
initial_bytes=$current_bytes # Reset bytes too
continue
fi
diff_docs=$(( current_docs - initial_docs ))
ingestion_rate_docs=$(echo "scale=2; $diff_docs / $INTERVAL_SECONDS" | bc -l)
# Calculate difference and rate for bytes
diff_bytes=$(( current_bytes - initial_bytes ))
ingestion_rate_gb=$(echo "scale=2; $diff_bytes / (1024 * 1024 * 1024) / $INTERVAL_SECONDS" | bc -l)
echo "$(date '+%Y-%m-%d %H:%M:%S') - Docs/sec: ${ingestion_rate_docs} (New docs: ${diff_docs}) | GB/sec: ${ingestion_rate_gb} (New data: $(echo "scale=2; $diff_bytes / (1024*1024*1024)" | bc -l) GB)"
# Update initial_metrics for the next iteration
initial_docs=$current_docs
initial_bytes=$current_bytes
done
@linuxmalaysia
Copy link
Author

image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment