linuxmalaysia · July 11, 2025 13:20 · linuxmalaysia · Jul 11, 2025
diff --git a/es_ingestion_monitor.sh b/es_ingestion_monitor.sh
 #!/bin/bash

 # Configuration
 ES_HOST="https://your-es-cluster.example.com:9200" # Your Elasticsearch host, IP address and port (e.g., "https://127.0.0.1:9200" or "https://<cloud_id>.es.<region>.gcp.cloud.es.io:9243")
 # ES_USER="elastic"        # If using username/password, uncomment and replace with your username
 # ES_PASS="your-elastic-password"       # If using username/password, uncomment and replace with your password
 ES_TOKEN="your-generated-service-account-token" # If using a service account token, uncomment and replace with your token value
 INTERVAL_SECONDS=10      # How often to check and calculate (e.g., 10 seconds)

 # --- Authentication Header Setup ---
 # Use an array to properly pass multiple arguments to curl
 CURL_AUTH_ARGS=()
 # Prioritize token authentication if ES_TOKEN is set
 if [ -n "${ES_TOKEN}" ]; then
    CURL_AUTH_ARGS+=("--header" "Authorization: Bearer ${ES_TOKEN}")
 elif [ -n "${ES_USER}" ] && [ -n "${ES_PASS}" ]; then
    CURL_AUTH_ARGS+=("-u" "${ES_USER}:${ES_PASS}")
 fi

 # --- SSL Insecure Flag Setup ---
 # Add -k (insecure) flag if ES_HOST uses HTTPS. Use with caution in production.
 CURL_INSECURE_FLAG=""
 if [[ "${ES_HOST}" == https://* ]]; then
    CURL_INSECURE_FLAG="-k"
 fi

 # --- Check for jq ---
 if ! command -v jq &> /dev/null
 then
    echo "Error: 'jq' is not installed. Please install it to parse JSON output."
    echo "  For Ubuntu: sudo apt-get install jq"
    echo "  For AlmaLinux/Rocky Linux: sudo dnf install epel-release && sudo dnf install jq"
    exit 1
 fi

 # --- Function to get total indexed documents and store size from a node ---
 # We'll target the sum across all nodes for a cluster-wide rate
 get_total_indexed_metrics() {
    local response
    # Request both index_total and store.size_in_bytes
    response=$(curl -s "${CURL_AUTH_ARGS[@]}" "${CURL_INSECURE_FLAG}" "${ES_HOST}/_nodes/stats/indices/indexing,store?filter_path=nodes.*.indices.indexing.index_total,nodes.*.indices.store.size_in_bytes")

    # Check if curl command was successful
    if [ $? -ne 0 ]; then
        echo "Error: Failed to connect to Elasticsearch. Check ES_HOST and connectivity, especially SSL/TLS issues if using HTTPS." >&2
        return 1
    fi

    # Check if the response contains nodes data (e.g., for auth failure or wrong path)
    if echo "$response" | grep -q "error"; then
        echo "Error from Elasticsearch: $(echo "$response" | jq -r '.error.reason // .error.type // "Unknown error"') (HTTP Code: $(echo "$response" | jq -r '.status // "N/A"'))" >&2
        return 1
    fi

    # Sum index_total and store.size_in_bytes across all nodes
    # Output: "total_docs total_bytes"
    echo "$response" | jq -r '
        {
            total_docs: ([.nodes[] | .indices.indexing.index_total] | add // 0),
            total_bytes: ([.nodes[] | .indices.store.size_in_bytes] | add // 0)
        } | "\(.total_docs) \(.total_bytes)"
    '
 }

 # --- Main loop ---
 echo "Starting Elasticsearch Ingestion Rate Monitor..."
 echo "Monitoring ${ES_HOST} every ${INTERVAL_SECONDS} seconds."

 # Get initial reading
 read initial_docs initial_bytes <<< "$(get_total_indexed_metrics)"
 if [ $? -ne 0 ]; then
    echo "Initial data collection failed. Exiting."
    exit 1
 fi

 echo "Initial indexed documents: $initial_docs, Initial store size: $(echo "scale=2; $initial_bytes / (1024*1024*1024)" | bc -l) GB"

 # Start the monitoring loop
 while true; do
    sleep "$INTERVAL_SECONDS"

    read current_docs current_bytes <<< "$(get_total_indexed_metrics)"
    if [ $? -ne 0 ]; then
        echo "Failed to get current data. Retrying in ${INTERVAL_SECONDS} seconds."
        continue # Skip this iteration, try again
    fi

    # Calculate difference and rate for documents
    if (( initial_docs > current_docs )); then
        echo "Warning: Current docs ($current_docs) is less than initial docs ($initial_docs). Resetting initial_docs."
        initial_docs=$current_docs
        initial_bytes=$current_bytes # Reset bytes too
        continue
    fi

    diff_docs=$(( current_docs - initial_docs ))
    ingestion_rate_docs=$(echo "scale=2; $diff_docs / $INTERVAL_SECONDS" | bc -l)

    # Calculate difference and rate for bytes
    diff_bytes=$(( current_bytes - initial_bytes ))
    ingestion_rate_gb=$(echo "scale=2; $diff_bytes / (1024 * 1024 * 1024) / $INTERVAL_SECONDS" | bc -l)


    echo "$(date '+%Y-%m-%d %H:%M:%S') - Docs/sec: ${ingestion_rate_docs} (New docs: ${diff_docs}) | GB/sec: ${ingestion_rate_gb} (New data: $(echo "scale=2; $diff_bytes / (1024*1024*1024)" | bc -l) GB)"

    # Update initial_metrics for the next iteration
    initial_docs=$current_docs
    initial_bytes=$current_bytes

 done
	#!/bin/bash

	# Configuration
	ES_HOST="https://your-es-cluster.example.com:9200" # Your Elasticsearch host, IP address and port (e.g., "https://127.0.0.1:9200" or "https://<cloud_id>.es.<region>.gcp.cloud.es.io:9243")
	# ES_USER="elastic" # If using username/password, uncomment and replace with your username
	# ES_PASS="your-elastic-password" # If using username/password, uncomment and replace with your password
	ES_TOKEN="your-generated-service-account-token" # If using a service account token, uncomment and replace with your token value
	INTERVAL_SECONDS=10 # How often to check and calculate (e.g., 10 seconds)

	# --- Authentication Header Setup ---
	# Use an array to properly pass multiple arguments to curl
	CURL_AUTH_ARGS=()
	# Prioritize token authentication if ES_TOKEN is set
	if [ -n "${ES_TOKEN}" ]; then
	CURL_AUTH_ARGS+=("--header" "Authorization: Bearer ${ES_TOKEN}")
	elif [ -n "${ES_USER}" ] && [ -n "${ES_PASS}" ]; then
	CURL_AUTH_ARGS+=("-u" "${ES_USER}:${ES_PASS}")
	fi

	# --- SSL Insecure Flag Setup ---
	# Add -k (insecure) flag if ES_HOST uses HTTPS. Use with caution in production.
	CURL_INSECURE_FLAG=""
	if [[ "${ES_HOST}" == https://* ]]; then
	CURL_INSECURE_FLAG="-k"
	fi

	# --- Check for jq ---
	if ! command -v jq &> /dev/null
	then
	echo "Error: 'jq' is not installed. Please install it to parse JSON output."
	echo " For Ubuntu: sudo apt-get install jq"
	echo " For AlmaLinux/Rocky Linux: sudo dnf install epel-release && sudo dnf install jq"
	exit 1
	fi

	# --- Function to get total indexed documents and store size from a node ---
	# We'll target the sum across all nodes for a cluster-wide rate
	get_total_indexed_metrics() {
	local response
	# Request both index_total and store.size_in_bytes
	response=$(curl -s "${CURL_AUTH_ARGS[@]}" "${CURL_INSECURE_FLAG}" "${ES_HOST}/_nodes/stats/indices/indexing,store?filter_path=nodes..indices.indexing.index_total,nodes..indices.store.size_in_bytes")

	# Check if curl command was successful
	if [ $? -ne 0 ]; then
	echo "Error: Failed to connect to Elasticsearch. Check ES_HOST and connectivity, especially SSL/TLS issues if using HTTPS." >&2
	return 1
	fi

	# Check if the response contains nodes data (e.g., for auth failure or wrong path)
	if echo "$response" \| grep -q "error"; then
	echo "Error from Elasticsearch: $(echo "$response" \| jq -r '.error.reason // .error.type // "Unknown error"') (HTTP Code: $(echo "$response" \| jq -r '.status // "N/A"'))" >&2
	return 1
	fi

	# Sum index_total and store.size_in_bytes across all nodes
	# Output: "total_docs total_bytes"
	echo "$response" \| jq -r '
	{
	total_docs: ([.nodes[] \| .indices.indexing.index_total] \| add // 0),
	total_bytes: ([.nodes[] \| .indices.store.size_in_bytes] \| add // 0)
	} \| "\(.total_docs) \(.total_bytes)"
	'
	}

	# --- Main loop ---
	echo "Starting Elasticsearch Ingestion Rate Monitor..."
	echo "Monitoring ${ES_HOST} every ${INTERVAL_SECONDS} seconds."

	# Get initial reading
	read initial_docs initial_bytes <<< "$(get_total_indexed_metrics)"
	if [ $? -ne 0 ]; then
	echo "Initial data collection failed. Exiting."
	exit 1
	fi

	echo "Initial indexed documents: $initial_docs, Initial store size: $(echo "scale=2; $initial_bytes / (102410241024)" \| bc -l) GB"

	# Start the monitoring loop
	while true; do
	sleep "$INTERVAL_SECONDS"

	read current_docs current_bytes <<< "$(get_total_indexed_metrics)"
	if [ $? -ne 0 ]; then
	echo "Failed to get current data. Retrying in ${INTERVAL_SECONDS} seconds."
	continue # Skip this iteration, try again
	fi

	# Calculate difference and rate for documents
	if (( initial_docs > current_docs )); then
	echo "Warning: Current docs ($current_docs) is less than initial docs ($initial_docs). Resetting initial_docs."
	initial_docs=$current_docs
	initial_bytes=$current_bytes # Reset bytes too
	continue
	fi

	diff_docs=$(( current_docs - initial_docs ))
	ingestion_rate_docs=$(echo "scale=2; $diff_docs / $INTERVAL_SECONDS" \| bc -l)

	# Calculate difference and rate for bytes
	diff_bytes=$(( current_bytes - initial_bytes ))
	ingestion_rate_gb=$(echo "scale=2; $diff_bytes / (1024 * 1024 * 1024) / $INTERVAL_SECONDS" \| bc -l)


	echo "$(date '+%Y-%m-%d %H:%M:%S') - Docs/sec: ${ingestion_rate_docs} (New docs: ${diff_docs}) \| GB/sec: ${ingestion_rate_gb} (New data: $(echo "scale=2; $diff_bytes / (102410241024)" \| bc -l) GB)"

	# Update initial_metrics for the next iteration
	initial_docs=$current_docs
	initial_bytes=$current_bytes

	done