Last active
July 11, 2025 13:20
-
-
Save linuxmalaysia/85fa591010db6ab389ed7803c146b818 to your computer and use it in GitHub Desktop.
script for es_ingestion_monitor.sh
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Configuration | |
ES_HOST="https://your-es-cluster.example.com:9200" # Your Elasticsearch host, IP address and port (e.g., "https://127.0.0.1:9200" or "https://<cloud_id>.es.<region>.gcp.cloud.es.io:9243") | |
# ES_USER="elastic" # If using username/password, uncomment and replace with your username | |
# ES_PASS="your-elastic-password" # If using username/password, uncomment and replace with your password | |
ES_TOKEN="your-generated-service-account-token" # If using a service account token, uncomment and replace with your token value | |
INTERVAL_SECONDS=10 # How often to check and calculate (e.g., 10 seconds) | |
# --- Authentication Header Setup --- | |
# Use an array to properly pass multiple arguments to curl | |
CURL_AUTH_ARGS=() | |
# Prioritize token authentication if ES_TOKEN is set | |
if [ -n "${ES_TOKEN}" ]; then | |
CURL_AUTH_ARGS+=("--header" "Authorization: Bearer ${ES_TOKEN}") | |
elif [ -n "${ES_USER}" ] && [ -n "${ES_PASS}" ]; then | |
CURL_AUTH_ARGS+=("-u" "${ES_USER}:${ES_PASS}") | |
fi | |
# --- SSL Insecure Flag Setup --- | |
# Add -k (insecure) flag if ES_HOST uses HTTPS. Use with caution in production. | |
CURL_INSECURE_FLAG="" | |
if [[ "${ES_HOST}" == https://* ]]; then | |
CURL_INSECURE_FLAG="-k" | |
fi | |
# --- Check for jq --- | |
if ! command -v jq &> /dev/null | |
then | |
echo "Error: 'jq' is not installed. Please install it to parse JSON output." | |
echo " For Ubuntu: sudo apt-get install jq" | |
echo " For AlmaLinux/Rocky Linux: sudo dnf install epel-release && sudo dnf install jq" | |
exit 1 | |
fi | |
# --- Function to get total indexed documents and store size from a node --- | |
# We'll target the sum across all nodes for a cluster-wide rate | |
get_total_indexed_metrics() { | |
local response | |
# Request both index_total and store.size_in_bytes | |
response=$(curl -s "${CURL_AUTH_ARGS[@]}" "${CURL_INSECURE_FLAG}" "${ES_HOST}/_nodes/stats/indices/indexing,store?filter_path=nodes.*.indices.indexing.index_total,nodes.*.indices.store.size_in_bytes") | |
# Check if curl command was successful | |
if [ $? -ne 0 ]; then | |
echo "Error: Failed to connect to Elasticsearch. Check ES_HOST and connectivity, especially SSL/TLS issues if using HTTPS." >&2 | |
return 1 | |
fi | |
# Check if the response contains nodes data (e.g., for auth failure or wrong path) | |
if echo "$response" | grep -q "error"; then | |
echo "Error from Elasticsearch: $(echo "$response" | jq -r '.error.reason // .error.type // "Unknown error"') (HTTP Code: $(echo "$response" | jq -r '.status // "N/A"'))" >&2 | |
return 1 | |
fi | |
# Sum index_total and store.size_in_bytes across all nodes | |
# Output: "total_docs total_bytes" | |
echo "$response" | jq -r ' | |
{ | |
total_docs: ([.nodes[] | .indices.indexing.index_total] | add // 0), | |
total_bytes: ([.nodes[] | .indices.store.size_in_bytes] | add // 0) | |
} | "\(.total_docs) \(.total_bytes)" | |
' | |
} | |
# --- Main loop --- | |
echo "Starting Elasticsearch Ingestion Rate Monitor..." | |
echo "Monitoring ${ES_HOST} every ${INTERVAL_SECONDS} seconds." | |
# Get initial reading | |
read initial_docs initial_bytes <<< "$(get_total_indexed_metrics)" | |
if [ $? -ne 0 ]; then | |
echo "Initial data collection failed. Exiting." | |
exit 1 | |
fi | |
echo "Initial indexed documents: $initial_docs, Initial store size: $(echo "scale=2; $initial_bytes / (1024*1024*1024)" | bc -l) GB" | |
# Start the monitoring loop | |
while true; do | |
sleep "$INTERVAL_SECONDS" | |
read current_docs current_bytes <<< "$(get_total_indexed_metrics)" | |
if [ $? -ne 0 ]; then | |
echo "Failed to get current data. Retrying in ${INTERVAL_SECONDS} seconds." | |
continue # Skip this iteration, try again | |
fi | |
# Calculate difference and rate for documents | |
if (( initial_docs > current_docs )); then | |
echo "Warning: Current docs ($current_docs) is less than initial docs ($initial_docs). Resetting initial_docs." | |
initial_docs=$current_docs | |
initial_bytes=$current_bytes # Reset bytes too | |
continue | |
fi | |
diff_docs=$(( current_docs - initial_docs )) | |
ingestion_rate_docs=$(echo "scale=2; $diff_docs / $INTERVAL_SECONDS" | bc -l) | |
# Calculate difference and rate for bytes | |
diff_bytes=$(( current_bytes - initial_bytes )) | |
ingestion_rate_gb=$(echo "scale=2; $diff_bytes / (1024 * 1024 * 1024) / $INTERVAL_SECONDS" | bc -l) | |
echo "$(date '+%Y-%m-%d %H:%M:%S') - Docs/sec: ${ingestion_rate_docs} (New docs: ${diff_docs}) | GB/sec: ${ingestion_rate_gb} (New data: $(echo "scale=2; $diff_bytes / (1024*1024*1024)" | bc -l) GB)" | |
# Update initial_metrics for the next iteration | |
initial_docs=$current_docs | |
initial_bytes=$current_bytes | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Appendix: Generating and Using Elasticsearch Service Account Tokens
Instead of using a username and password directly, Elasticsearch allows you to use service account tokens for authentication. These tokens are a more secure and flexible way for applications and services (like Kibana or monitoring scripts) to authenticate with Elasticsearch.
How to Generate a Service Account Token
You can generate a service account token using a curl command similar to the one you provided. This command requires a user with sufficient privileges (like the elastic superuser) to create tokens.
Command Example:
curl -k -u elastic:your-elastic-password -X POST "https://192.168.1.100:9200/_security/service/elastic/kibana/credential/token?pretty"
Explanation of the Command:
curl -k: The curl command with the -k (insecure) flag. This flag is used to bypass SSL certificate validation, which is often necessary when connecting to Elasticsearch clusters with self-signed certificates or certificates not trusted by your system's certificate store.
-u elastic:your-elastic-password: Specifies the username (elastic) and a dummy password for HTTP Basic Authentication. This is the credential of a user who has the authority to create service account tokens. You would replace your-elastic-password with your actual elastic user's password when generating a real token.
-X POST: Specifies the HTTP method as POST, indicating that you are sending data to the server to create a new resource.
"https://192.168.1.100:9200/_security/service/elastic/kibana/credential/token?pretty":
https://192.168.1.100:9200: The address and port of your Elasticsearch cluster, using HTTPS.
_security/service/elastic/kibana/credential/token: This is the specific Elasticsearch Security API endpoint for generating a token for the kibana service account within the elastic realm.
?pretty: A query parameter to make the JSON response human-readable.
Example Output:
The value field in the token object is your actual service account token.
How the Token is Used in the Script
In the es_ingestion_monitor.sh script, the ES_TOKEN variable is used to construct the Authorization header for curl requests.
When ES_TOKEN is set, the script will use Authorization: Bearer <your_token_value> in its HTTP requests to Elasticsearch, providing a secure and revocable authentication method.
Impact of Generating a Token
Generating a service account token has no direct negative impact on your Elasticsearch system's performance or on any currently running Kibana instances.
No Performance Impact: It's a single, lightweight API call that creates a new credential entry. It does not consume significant resources or affect ongoing data indexing or searching operations.
No Disruption to Existing Services: If you have Kibana instances or other applications already authenticated with Elasticsearch (using other tokens or username/password), they will continue to function normally. Generating a new token does not invalidate or change existing credentials.
Security Best Practice: Using service account tokens is generally considered a good security practice for machine-to-machine communication as they can be scoped to specific roles and privileges, and they can be revoked independently without affecting other users or services.
The primary "impact" is the creation of a new, valid credential. This token should be treated with the same level of security as a password, as anyone with access to it can authenticate as the service account it represents.