Last active
July 11, 2025 13:20
-
-
Save linuxmalaysia/85fa591010db6ab389ed7803c146b818 to your computer and use it in GitHub Desktop.
script for es_ingestion_monitor.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| # Configuration | |
| ES_HOST="https://your-es-cluster.example.com:9200" # Your Elasticsearch host, IP address and port (e.g., "https://127.0.0.1:9200" or "https://<cloud_id>.es.<region>.gcp.cloud.es.io:9243") | |
| # ES_USER="elastic" # If using username/password, uncomment and replace with your username | |
| # ES_PASS="your-elastic-password" # If using username/password, uncomment and replace with your password | |
| ES_TOKEN="your-generated-service-account-token" # If using a service account token, uncomment and replace with your token value | |
| INTERVAL_SECONDS=10 # How often to check and calculate (e.g., 10 seconds) | |
| # --- Authentication Header Setup --- | |
| # Use an array to properly pass multiple arguments to curl | |
| CURL_AUTH_ARGS=() | |
| # Prioritize token authentication if ES_TOKEN is set | |
| if [ -n "${ES_TOKEN}" ]; then | |
| CURL_AUTH_ARGS+=("--header" "Authorization: Bearer ${ES_TOKEN}") | |
| elif [ -n "${ES_USER}" ] && [ -n "${ES_PASS}" ]; then | |
| CURL_AUTH_ARGS+=("-u" "${ES_USER}:${ES_PASS}") | |
| fi | |
| # --- SSL Insecure Flag Setup --- | |
| # Add -k (insecure) flag if ES_HOST uses HTTPS. Use with caution in production. | |
| CURL_INSECURE_FLAG="" | |
| if [[ "${ES_HOST}" == https://* ]]; then | |
| CURL_INSECURE_FLAG="-k" | |
| fi | |
| # --- Check for jq --- | |
| if ! command -v jq &> /dev/null | |
| then | |
| echo "Error: 'jq' is not installed. Please install it to parse JSON output." | |
| echo " For Ubuntu: sudo apt-get install jq" | |
| echo " For AlmaLinux/Rocky Linux: sudo dnf install epel-release && sudo dnf install jq" | |
| exit 1 | |
| fi | |
| # --- Function to get total indexed documents and store size from a node --- | |
| # We'll target the sum across all nodes for a cluster-wide rate | |
| get_total_indexed_metrics() { | |
| local response | |
| # Request both index_total and store.size_in_bytes | |
| response=$(curl -s "${CURL_AUTH_ARGS[@]}" "${CURL_INSECURE_FLAG}" "${ES_HOST}/_nodes/stats/indices/indexing,store?filter_path=nodes.*.indices.indexing.index_total,nodes.*.indices.store.size_in_bytes") | |
| # Check if curl command was successful | |
| if [ $? -ne 0 ]; then | |
| echo "Error: Failed to connect to Elasticsearch. Check ES_HOST and connectivity, especially SSL/TLS issues if using HTTPS." >&2 | |
| return 1 | |
| fi | |
| # Check if the response contains nodes data (e.g., for auth failure or wrong path) | |
| if echo "$response" | grep -q "error"; then | |
| echo "Error from Elasticsearch: $(echo "$response" | jq -r '.error.reason // .error.type // "Unknown error"') (HTTP Code: $(echo "$response" | jq -r '.status // "N/A"'))" >&2 | |
| return 1 | |
| fi | |
| # Sum index_total and store.size_in_bytes across all nodes | |
| # Output: "total_docs total_bytes" | |
| echo "$response" | jq -r ' | |
| { | |
| total_docs: ([.nodes[] | .indices.indexing.index_total] | add // 0), | |
| total_bytes: ([.nodes[] | .indices.store.size_in_bytes] | add // 0) | |
| } | "\(.total_docs) \(.total_bytes)" | |
| ' | |
| } | |
| # --- Main loop --- | |
| echo "Starting Elasticsearch Ingestion Rate Monitor..." | |
| echo "Monitoring ${ES_HOST} every ${INTERVAL_SECONDS} seconds." | |
| # Get initial reading | |
| read initial_docs initial_bytes <<< "$(get_total_indexed_metrics)" | |
| if [ $? -ne 0 ]; then | |
| echo "Initial data collection failed. Exiting." | |
| exit 1 | |
| fi | |
| echo "Initial indexed documents: $initial_docs, Initial store size: $(echo "scale=2; $initial_bytes / (1024*1024*1024)" | bc -l) GB" | |
| # Start the monitoring loop | |
| while true; do | |
| sleep "$INTERVAL_SECONDS" | |
| read current_docs current_bytes <<< "$(get_total_indexed_metrics)" | |
| if [ $? -ne 0 ]; then | |
| echo "Failed to get current data. Retrying in ${INTERVAL_SECONDS} seconds." | |
| continue # Skip this iteration, try again | |
| fi | |
| # Calculate difference and rate for documents | |
| if (( initial_docs > current_docs )); then | |
| echo "Warning: Current docs ($current_docs) is less than initial docs ($initial_docs). Resetting initial_docs." | |
| initial_docs=$current_docs | |
| initial_bytes=$current_bytes # Reset bytes too | |
| continue | |
| fi | |
| diff_docs=$(( current_docs - initial_docs )) | |
| ingestion_rate_docs=$(echo "scale=2; $diff_docs / $INTERVAL_SECONDS" | bc -l) | |
| # Calculate difference and rate for bytes | |
| diff_bytes=$(( current_bytes - initial_bytes )) | |
| ingestion_rate_gb=$(echo "scale=2; $diff_bytes / (1024 * 1024 * 1024) / $INTERVAL_SECONDS" | bc -l) | |
| echo "$(date '+%Y-%m-%d %H:%M:%S') - Docs/sec: ${ingestion_rate_docs} (New docs: ${diff_docs}) | GB/sec: ${ingestion_rate_gb} (New data: $(echo "scale=2; $diff_bytes / (1024*1024*1024)" | bc -l) GB)" | |
| # Update initial_metrics for the next iteration | |
| initial_docs=$current_docs | |
| initial_bytes=$current_bytes | |
| done |
Author
linuxmalaysia
commented
Jul 11, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment