Skip to content

Instantly share code, notes, and snippets.

@fabriziosalmi
Created August 26, 2024 08:23
Show Gist options
  • Save fabriziosalmi/d32ba218b98bde3e196b474bf340b18c to your computer and use it in GitHub Desktop.
Save fabriziosalmi/d32ba218b98bde3e196b474bf340b18c to your computer and use it in GitHub Desktop.
httpd-error-analyzer.sh
#!/bin/bash
# Define the directory containing Apache error logs
LOG_DIR="/var/log/httpd/" # Adjust the path according to your configuration
# Define the temporary file path
TMP_DIR=$(mktemp -d)
TMP_LOG_FILE="$TMP_DIR/combined_error.log"
# Define the output file for the summary
OUTPUT_FILE="apache_error_log_summary.txt"
# Configuration: Set the number of top N results to display
TOP_N=10
# Function to log messages to both terminal and output file
log_message() {
local message="$1"
echo "$message"
echo "$message" >> "$OUTPUT_FILE"
}
# Function to print a table row with three columns
print_table_row() {
printf "%-10s %-30s %-40s\n" "$1" "$2" "$3" >> "$OUTPUT_FILE"
printf "%-10s %-30s %-40s\n" "$1" "$2" "$3"
}
# Function to print a table header
print_table_header() {
printf "\n%-10s %-30s %-40s\n" "$1" "$2" "$3" >> "$OUTPUT_FILE"
printf "%-10s %-30s %-40s\n" "$1" "$2" "$3"
printf "%-10s %-30s %-40s\n" "----------" "------------------------------" "----------------------------------------" >> "$OUTPUT_FILE"
printf "%-10s %-30s %-40s\n" "----------" "------------------------------" "----------------------------------------"
}
# Function to provide guidance based on the error type
provide_guidance() {
local error="$1"
case "$error" in
"[proxy_fcgi:error]")
echo "Possible Cause: Issues with FastCGI or PHP-FPM. Check PHP-FPM service status, configuration files, or resource limits." >> "$OUTPUT_FILE"
echo "Possible Cause: Issues with FastCGI or PHP-FPM. Check PHP-FPM service status, configuration files, or resource limits."
;;
"[cgid:error]")
echo "Possible Cause: Issues with CGI scripts. Ensure scripts have correct permissions and paths." >> "$OUTPUT_FILE"
echo "Possible Cause: Issues with CGI scripts. Ensure scripts have correct permissions and paths."
;;
"[autoindex:error]")
echo "Possible Cause: Directory index settings misconfigured. Check your Apache directory settings." >> "$OUTPUT_FILE"
echo "Possible Cause: Directory index settings misconfigured. Check your Apache directory settings."
;;
"[authz_core:error]")
echo "Possible Cause: Authorization configuration issues. Check .htaccess files or Apache configuration for access rules." >> "$OUTPUT_FILE"
echo "Possible Cause: Authorization configuration issues. Check .htaccess files or Apache configuration for access rules."
;;
"[mpm_event:notice]")
echo "Possible Cause: General notices from the MPM Event module. Typically informational, but review for any anomalies." >> "$OUTPUT_FILE"
echo "Possible Cause: General notices from the MPM Event module. Typically informational, but review for any anomalies."
;;
"[core:notice]")
echo "Possible Cause: General core module notices. Usually benign but review if recurring." >> "$OUTPUT_FILE"
echo "Possible Cause: General core module notices. Usually benign but review if recurring."
;;
"[core:error]")
echo "Possible Cause: General core module errors. Review Apache error log for details." >> "$OUTPUT_FILE"
echo "Possible Cause: General core module errors. Review Apache error log for details."
;;
"[suexec:notice]")
echo "Possible Cause: Issues with executing CGI scripts under different user permissions. Check suexec logs." >> "$OUTPUT_FILE"
echo "Possible Cause: Issues with executing CGI scripts under different user permissions. Check suexec logs."
;;
"[mpm_event:error]")
echo "Possible Cause: Issues with the MPM Event module. Review server resource usage and Apache configuration." >> "$OUTPUT_FILE"
echo "Possible Cause: Issues with the MPM Event module. Review server resource usage and Apache configuration."
;;
"[lbmethod_heartbeat:notice]")
echo "Possible Cause: Load balancer heartbeats. Typically benign, but review load balancer settings if unsure." >> "$OUTPUT_FILE"
echo "Possible Cause: Load balancer heartbeats. Typically benign, but review load balancer settings if unsure."
;;
*)
echo "No specific guidance available. Review Apache documentation or logs for more details." >> "$OUTPUT_FILE"
echo "No specific guidance available. Review Apache documentation or logs for more details."
;;
esac
}
# Ensure the log directory exists
if [ ! -d "$LOG_DIR" ]; then
log_message "Error: Log directory $LOG_DIR not found!"
exit 1
fi
# Combine all error logs into the temporary file
log_message "Combining log files from $LOG_DIR into $TMP_LOG_FILE..."
log_files=$(ls "$LOG_DIR/error_log"* 2>/dev/null)
if [ -z "$log_files" ]; then
log_message "Error: No log files matching $LOG_DIR/error_log* found!"
exit 1
fi
cat $log_files > "$TMP_LOG_FILE"
# Create or clear the output file
> "$OUTPUT_FILE"
# Function to extract and preprocess log entries
extract_log_entries() {
awk '{print $1, $2, $3, $6, $7, $9}' "$TMP_LOG_FILE" | sed 's/^[ \t]*//' | sort
}
# Analyze errors by time range
log_message "Analyzing errors by time range:"
# Error Count by Hour of Day and Date
log_message "Error Count by Hour of Day and Date:"
extract_log_entries | awk '{split($3, time, ":"); print $1, $2, time[1]}' | sort | uniq -c | sort -nr | tee -a "$OUTPUT_FILE"
# Identify recurring errors with time patterns
log_message "Recurring Errors with Time Patterns:"
extract_log_entries | awk '{split($3, time, ":"); error_time[$4][time[1]]++}
END {
for (error in error_time) {
printf "Error: %s\n", error >> "'$OUTPUT_FILE'";
for (hour in error_time[error]) {
printf " Hour: %s, Count: %d\n", hour, error_time[error][hour] >> "'$OUTPUT_FILE'";
}
}
}' | tee -a "$OUTPUT_FILE"
# Detect spikes in error occurrences dynamically based on average
log_message "Detecting Spikes in Error Occurrences:"
average_errors_per_hour=$(extract_log_entries | awk '{split($3, time, ":"); print time[1]}' | sort | uniq -c | awk '{total += $1; count++} END {print total/count}')
spike_threshold=$(echo "$average_errors_per_hour * 2" | bc)
log_message "Average errors per hour: $average_errors_per_hour (Spike threshold: $spike_threshold)"
extract_log_entries | awk '{split($3, time, ":"); hour_errors[time[1]]++}
END {
for (hour in hour_errors) {
if (hour_errors[hour] > '"$spike_threshold"') {
printf "Spike Detected - Hour: %s, Error Count: %d\n", hour, hour_errors[hour] >> "'$OUTPUT_FILE'";
}
}
}' | tee -a "$OUTPUT_FILE"
# Summary Statistics
log_message "Summary Statistics:"
total_errors=$(wc -l < "$TMP_LOG_FILE")
unique_error_types=$(awk '{print $6}' "$TMP_LOG_FILE" | sort | uniq | wc -l)
unique_status_codes=$(awk '{print $6}' "$TMP_LOG_FILE" | sort | uniq | wc -l)
unique_ips=$(awk '{print $5}' "$TMP_LOG_FILE" | sort | uniq | wc -l)
log_message "Total number of errors: $total_errors"
log_message "Number of unique error types: $unique_error_types"
log_message "Number of unique status codes: $unique_status_codes"
log_message "Number of unique remote IPs: $unique_ips"
# Most frequent status codes
print_table_header "Count" "Status Code" "Description"
awk '{print $6}' "$TMP_LOG_FILE" | sort | uniq -c | sort -nr | head -n "$TOP_N" | while read count code; do
print_table_row "$count" "$code" "$(provide_guidance "$code")"
done
# Most frequent remote IPs
print_table_header "Count" "Remote IP" "Description"
awk '{print $5}' "$TMP_LOG_FILE" | sort | uniq -c | sort -nr | head -n "$TOP_N" | while read count ip; do
print_table_row "$count" "$ip" "Most frequent remote IPs."
done
# Most frequent errors
print_table_header "Count" "Error Type" "Description"
awk '{print $6}' "$TMP_LOG_FILE" | sort | uniq -c | sort -nr | head -n "$TOP_N" | while read count error; do
print_table_row "$count" "$error" "$(provide_guidance "$error")"
done
# Correlation between IPs and error types
print_table_header "Count" "Remote IP" "Error Type"
awk '{print $5, $6}' "$TMP_LOG_FILE" | sort | uniq -c | sort -nr | head -n "$TOP_N" | while read count ip error; do
print_table_row "$count" "$ip" "$error"
done
# Clean up the temporary file and directory
rm -rf "$TMP_DIR"
log_message "Summary has been written to $OUTPUT_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment