linuxmalaysia · July 9, 2025 22:51 · linuxmalaysia · Jul 9, 2025
diff --git a/php-fpm-slowlog.sh b/php-fpm-slowlog.sh
 #!/bin/bash

 # Define the path to your PHP-FPM slow log file
 LOG_FILE="/var/log/www.log.slow"

 # Check if the log file exists
 if [ ! -f "$LOG_FILE" ]; then
    echo "Error: Log file '$LOG_FILE' not found."
    exit 1
 fi

 echo "--- Detailed PHP-FPM Slow Log Analysis (by Date, Hour, and Script) ---"
 echo ""

 # Use awk to parse the log file in a single pass.
 # It extracts date, hour, and script name for detailed output,
 # and simultaneously collects data for summary calculations for slow logs.
 # The summary data is printed at the END block with special markers
 # so the outer bash script can parse it separately.
 awk '
 BEGIN {
    total_slow_hits = 0 # Initialize counter for total slow hits
 }

 # --- Section for PHP-FPM Slow Log Entries ---
 # This block processes lines containing "pid" and extracts the timestamp
 /pid [0-9]+/ {
    # Extract the full date (DD-Mon-YYYY) and the hour (HH) from the timestamp line
    # Example: [09-Jul-2025 21:51:03] -> "09-Jul-2025" and "21"
    if (match($0, /^\[([0-9]{2}-[A-Za-z]{3}-[0-9]{4}) ([0-9]{2}):/, matches)) {
        current_date = matches[1] # Stores the date (e.g., "09-Jul-2025")
        current_hour = matches[2] # Stores the hour (e.g., "21")
    }
    next # Move to the next line immediately after processing the PID line
 }

 # This block processes lines containing "script_filename ="
 /script_filename =/ {
    # Extract the full path to the script filename
    # It takes everything after "script_filename = "
    script_name = substr($0, index($0, "=") + 2)
    
    # If we have a stored date and hour from the preceding PID line (ensuring they are paired)
    if (current_date != "" && current_hour != "") {
        # Print for the detailed output. This will be collected by the outer bash script.
        print current_date, current_hour, script_name
        
        # For summary calculations: increment total hits for this specific date and hour
        daily_total[current_date]++  # Counts total slow hits per day
        hourly_total[current_hour]++ # Counts total slow hits per hour (across all days)
        
        # Store count for combined date and hour for the absolute highest point
        date_hour_combined_total[current_date " " current_hour]++

        # NEW: Increment total slow hits counter
        total_slow_hits++

        # NEW: Count hits per script name
        script_hits[script_name]++
    }
    # Reset current_date and current_hour to avoid using old values for subsequent unrelated lines
    current_date = ""
    current_hour = ""
 }

 # This END block executes after awk has processed the entire log file.
 # It calculates and prints the summary data, marked for easy parsing by the bash script.
 END {
    # --- Special marker to indicate the start of summary data ---
    print "---SUMMARY_DATA_START---"

    # --- Slow Log Summary Calculations ---
    # Find the day with the highest number of slow hits
    max_daily_hits = 0
    highest_day = ""
    for (date in daily_total) {
        if (daily_total[date] > max_daily_hits) {
            max_daily_hits = daily_total[date]
            highest_day = date
        }
    }

    # Find the hour with the highest number of slow hits (across all days)
    max_hourly_hits = 0
    highest_hour = ""
    for (hour in hourly_total) {
        if (hourly_total[hour] > max_hourly_hits) {
            max_hourly_hits = hourly_total[hour]
            highest_hour = hour
        }
    }

    # NEW: Find the specific date and hour with the highest number of slow hits
    max_date_hour_hits = 0
    highest_date_hour = ""
    for (date_hour_key in date_hour_combined_total) {
        if (date_hour_combined_total[date_hour_key] > max_date_hour_hits) {
            max_date_hour_hits = date_hour_combined_total[date_hour_key]
            highest_date_hour = date_hour_key
        }
    }
    
    # Print the highest slow log day and hour summary lines
    if (highest_day != "") {
        print "HIGHEST_DAY_SUMMARY", highest_day, max_daily_hits
    }
    if (highest_hour != "") {
        print "HIGHEST_HOUR_SUMMARY", highest_hour, max_hourly_hits
    }
    if (highest_date_hour != "") {
        print "HIGHEST_DATE_HOUR_SPECIFIC_SUMMARY", highest_date_hour, max_date_hour_hits
    }

    # NEW: Print total slow hits
    print "TOTAL_SLOW_HITS", total_slow_hits

    # NEW: Print all script hits for sorting in bash
    printf "TOP_SLOW_SCRIPTS_START\n"
    for (script in script_hits) {
        print script_hits[script], script
    }
    printf "TOP_SLOW_SCRIPTS_END\n"

    # --- Special marker to indicate the end of summary data ---
    print "---SUMMARY_DATA_END---"
 }' "$LOG_FILE" | {
    # This block is a subshell that receives all output from the awk command.
    # It separates the detailed log entries from the summary data.

    detailed_lines=() # Array to store lines for the detailed output
    highest_day_summary=""
    highest_hour_summary=""
    highest_date_hour_specific_summary="" # Variable for specific date/hour summary
    total_slow_hits_summary="" # New variable for total slow hits
    top_slow_scripts_lines=() # New array for top scripts
    
    summary_data_started=false # Flag to track when summary data section begins
    top_scripts_section_started=false # Flag for top scripts section

    # Read each line from awk's output
    while IFS= read -r line; do
        # Check for the start of the summary data section
        if [[ "$line" == "---SUMMARY_DATA_START---" ]]; then
            summary_data_started=true
            continue # Skip this marker line
        fi
        
        # Check for the end of the summary data section
        if [[ "$line" == "---SUMMARY_DATA_END---" ]]; then
            summary_data_started=false
            break # Exit the loop as we have all data needed
        fi

        # If we are in the summary data section, store the specific summary lines
        if "$summary_data_started"; then
            if [[ "$line" == "HIGHEST_DAY_SUMMARY"* ]]; then
                highest_day_summary="$line"
            elif [[ "$line" == "HIGHEST_HOUR_SUMMARY"* ]]; then
                highest_hour_summary="$line"
            elif [[ "$line" == "HIGHEST_DATE_HOUR_SPECIFIC_SUMMARY"* ]]; then
                highest_date_hour_specific_summary="$line"
            elif [[ "$line" == "TOTAL_SLOW_HITS"* ]]; then # New condition for total hits
                total_slow_hits_summary="$line"
            elif [[ "$line" == "TOP_SLOW_SCRIPTS_START"* ]]; then # Start of top scripts section
                top_scripts_section_started=true
            elif [[ "$line" == "TOP_SLOW_SCRIPTS_END"* ]]; then # End of top scripts section
                top_scripts_section_started=false
            elif "$top_scripts_section_started"; then # Collect lines within top scripts section
                top_slow_scripts_lines+=("$line")
            fi
        else
            # Otherwise, collect lines for the detailed output
            detailed_lines+=("$line")
        fi
    done

    # Print the collected detailed output.
    # It's sorted by date (month-aware) and then by hour (numerically),
    # and counts are added using uniq -c.
    printf "%s\n" "${detailed_lines[@]}" | sort -k2,2M -k3,3n | uniq -c

    echo ""
    echo "--- Summary of Slow Log Hits ---"
    echo ""

    # Display total slow hits
    if [[ -n "$total_slow_hits_summary" ]]; then
        read -r _ total_hits <<< "$total_slow_hits_summary"
        echo "Total Slow Hits Recorded: $total_hits"
    else
        echo "No total slow hits data found."
    fi

    echo ""

    # Display the highest slow log day summary
    if [[ -n "$highest_day_summary" ]]; then
        read -r _ parsed_date parsed_hits <<< "$highest_day_summary"
        echo "Highest Day for Slow Hits:"
        echo "  Date: $parsed_date, Total Hits: $parsed_hits"
    else
        echo "No daily slow log summary data found."
    fi

    echo ""

    # Display the highest slow log hour summary (across all days)
    if [[ -n "$highest_hour_summary" ]]; then
        read -r _ parsed_hour parsed_hits <<< "$highest_hour_summary"
        echo "Highest Hour for Slow Hits (across all days):"
        echo "  Hour: $parsed_hour, Total Hits: $parsed_hits"
    else
        echo "No hourly slow log summary data found."
    fi

    echo ""
    # Display the highest specific date and hour for slow hits
    if [[ -n "$highest_date_hour_specific_summary" ]]; then
        read -r _ date_part hour_part total_hits <<< "$highest_date_hour_specific_summary"
        echo "Highest Specific Date and Hour for Slow Hits:"
        echo "  Date and Hour: $date_part $hour_part, Total Hits: $total_hits"
    else
        echo "No specific date/hour slow log summary data found."
    fi

    echo ""
    echo "--- Top 5 Slowest Scripts (Overall) ---"
    echo ""
    if [ ${#top_slow_scripts_lines[@]} -gt 0 ]; then
        # Print only the top 5, or fewer if less than 5 exist
        printf "%s\n" "${top_slow_scripts_lines[@]}" | sort -rn | head -n 5 | while IFS= read -r line; do
            read -r hits script_name <<< "$line"
            echo "  $hits hits: $script_name"
        done
    else
        echo "No top slowest scripts data found."
    fi
 }

 echo ""
 echo "Analysis complete."

 # Comment added by Harisfazillah Jamel with the help of Google Gemini on 2025-07-10 04:50 AM +08.
 # Explanation added by Harisfazillah Jamel with the help of Google Gemini on 2025-07-10 05:00 AM +08.
	#!/bin/bash

	# Define the path to your PHP-FPM slow log file
	LOG_FILE="/var/log/www.log.slow"

	# Check if the log file exists
	if [ ! -f "$LOG_FILE" ]; then
	echo "Error: Log file '$LOG_FILE' not found."
	exit 1
	fi

	echo "--- Detailed PHP-FPM Slow Log Analysis (by Date, Hour, and Script) ---"
	echo ""

	# Use awk to parse the log file in a single pass.
	# It extracts date, hour, and script name for detailed output,
	# and simultaneously collects data for summary calculations for slow logs.
	# The summary data is printed at the END block with special markers
	# so the outer bash script can parse it separately.
	awk '
	BEGIN {
	total_slow_hits = 0 # Initialize counter for total slow hits
	}

	# --- Section for PHP-FPM Slow Log Entries ---
	# This block processes lines containing "pid" and extracts the timestamp
	/pid [0-9]+/ {
	# Extract the full date (DD-Mon-YYYY) and the hour (HH) from the timestamp line
	# Example: [09-Jul-2025 21:51:03] -> "09-Jul-2025" and "21"
	if (match($0, /^\[([0-9]{2}-[A-Za-z]{3}-[0-9]{4}) ([0-9]{2}):/, matches)) {
	current_date = matches[1] # Stores the date (e.g., "09-Jul-2025")
	current_hour = matches[2] # Stores the hour (e.g., "21")
	}
	next # Move to the next line immediately after processing the PID line
	}

	# This block processes lines containing "script_filename ="
	/script_filename =/ {
	# Extract the full path to the script filename
	# It takes everything after "script_filename = "
	script_name = substr($0, index($0, "=") + 2)

	# If we have a stored date and hour from the preceding PID line (ensuring they are paired)
	if (current_date != "" && current_hour != "") {
	# Print for the detailed output. This will be collected by the outer bash script.
	print current_date, current_hour, script_name

	# For summary calculations: increment total hits for this specific date and hour
	daily_total[current_date]++ # Counts total slow hits per day
	hourly_total[current_hour]++ # Counts total slow hits per hour (across all days)

	# Store count for combined date and hour for the absolute highest point
	date_hour_combined_total[current_date " " current_hour]++

	# NEW: Increment total slow hits counter
	total_slow_hits++

	# NEW: Count hits per script name
	script_hits[script_name]++
	}
	# Reset current_date and current_hour to avoid using old values for subsequent unrelated lines
	current_date = ""
	current_hour = ""
	}

	# This END block executes after awk has processed the entire log file.
	# It calculates and prints the summary data, marked for easy parsing by the bash script.
	END {
	# --- Special marker to indicate the start of summary data ---
	print "---SUMMARY_DATA_START---"

	# --- Slow Log Summary Calculations ---
	# Find the day with the highest number of slow hits
	max_daily_hits = 0
	highest_day = ""
	for (date in daily_total) {
	if (daily_total[date] > max_daily_hits) {
	max_daily_hits = daily_total[date]
	highest_day = date
	}
	}

	# Find the hour with the highest number of slow hits (across all days)
	max_hourly_hits = 0
	highest_hour = ""
	for (hour in hourly_total) {
	if (hourly_total[hour] > max_hourly_hits) {
	max_hourly_hits = hourly_total[hour]
	highest_hour = hour
	}
	}

	# NEW: Find the specific date and hour with the highest number of slow hits
	max_date_hour_hits = 0
	highest_date_hour = ""
	for (date_hour_key in date_hour_combined_total) {
	if (date_hour_combined_total[date_hour_key] > max_date_hour_hits) {
	max_date_hour_hits = date_hour_combined_total[date_hour_key]
	highest_date_hour = date_hour_key
	}
	}

	# Print the highest slow log day and hour summary lines
	if (highest_day != "") {
	print "HIGHEST_DAY_SUMMARY", highest_day, max_daily_hits
	}
	if (highest_hour != "") {
	print "HIGHEST_HOUR_SUMMARY", highest_hour, max_hourly_hits
	}
	if (highest_date_hour != "") {
	print "HIGHEST_DATE_HOUR_SPECIFIC_SUMMARY", highest_date_hour, max_date_hour_hits
	}

	# NEW: Print total slow hits
	print "TOTAL_SLOW_HITS", total_slow_hits

	# NEW: Print all script hits for sorting in bash
	printf "TOP_SLOW_SCRIPTS_START\n"
	for (script in script_hits) {
	print script_hits[script], script
	}
	printf "TOP_SLOW_SCRIPTS_END\n"

	# --- Special marker to indicate the end of summary data ---
	print "---SUMMARY_DATA_END---"
	}' "$LOG_FILE" \| {
	# This block is a subshell that receives all output from the awk command.
	# It separates the detailed log entries from the summary data.

	detailed_lines=() # Array to store lines for the detailed output
	highest_day_summary=""
	highest_hour_summary=""
	highest_date_hour_specific_summary="" # Variable for specific date/hour summary
	total_slow_hits_summary="" # New variable for total slow hits
	top_slow_scripts_lines=() # New array for top scripts

	summary_data_started=false # Flag to track when summary data section begins
	top_scripts_section_started=false # Flag for top scripts section

	# Read each line from awk's output
	while IFS= read -r line; do
	# Check for the start of the summary data section
	if [[ "$line" == "---SUMMARY_DATA_START---" ]]; then
	summary_data_started=true
	continue # Skip this marker line
	fi

	# Check for the end of the summary data section
	if [[ "$line" == "---SUMMARY_DATA_END---" ]]; then
	summary_data_started=false
	break # Exit the loop as we have all data needed
	fi

	# If we are in the summary data section, store the specific summary lines
	if "$summary_data_started"; then
	if [[ "$line" == "HIGHEST_DAY_SUMMARY"* ]]; then
	highest_day_summary="$line"
	elif [[ "$line" == "HIGHEST_HOUR_SUMMARY"* ]]; then
	highest_hour_summary="$line"
	elif [[ "$line" == "HIGHEST_DATE_HOUR_SPECIFIC_SUMMARY"* ]]; then
	highest_date_hour_specific_summary="$line"
	elif [[ "$line" == "TOTAL_SLOW_HITS"* ]]; then # New condition for total hits
	total_slow_hits_summary="$line"
	elif [[ "$line" == "TOP_SLOW_SCRIPTS_START"* ]]; then # Start of top scripts section
	top_scripts_section_started=true
	elif [[ "$line" == "TOP_SLOW_SCRIPTS_END"* ]]; then # End of top scripts section
	top_scripts_section_started=false
	elif "$top_scripts_section_started"; then # Collect lines within top scripts section
	top_slow_scripts_lines+=("$line")
	fi
	else
	# Otherwise, collect lines for the detailed output
	detailed_lines+=("$line")
	fi
	done

	# Print the collected detailed output.
	# It's sorted by date (month-aware) and then by hour (numerically),
	# and counts are added using uniq -c.
	printf "%s\n" "${detailed_lines[@]}" \| sort -k2,2M -k3,3n \| uniq -c

	echo ""
	echo "--- Summary of Slow Log Hits ---"
	echo ""

	# Display total slow hits
	if [[ -n "$total_slow_hits_summary" ]]; then
	read -r _ total_hits <<< "$total_slow_hits_summary"
	echo "Total Slow Hits Recorded: $total_hits"
	else
	echo "No total slow hits data found."
	fi

	echo ""

	# Display the highest slow log day summary
	if [[ -n "$highest_day_summary" ]]; then
	read -r _ parsed_date parsed_hits <<< "$highest_day_summary"
	echo "Highest Day for Slow Hits:"
	echo " Date: $parsed_date, Total Hits: $parsed_hits"
	else
	echo "No daily slow log summary data found."
	fi

	echo ""

	# Display the highest slow log hour summary (across all days)
	if [[ -n "$highest_hour_summary" ]]; then
	read -r _ parsed_hour parsed_hits <<< "$highest_hour_summary"
	echo "Highest Hour for Slow Hits (across all days):"
	echo " Hour: $parsed_hour, Total Hits: $parsed_hits"
	else
	echo "No hourly slow log summary data found."
	fi

	echo ""
	# Display the highest specific date and hour for slow hits
	if [[ -n "$highest_date_hour_specific_summary" ]]; then
	read -r _ date_part hour_part total_hits <<< "$highest_date_hour_specific_summary"
	echo "Highest Specific Date and Hour for Slow Hits:"
	echo " Date and Hour: $date_part $hour_part, Total Hits: $total_hits"
	else
	echo "No specific date/hour slow log summary data found."
	fi

	echo ""
	echo "--- Top 5 Slowest Scripts (Overall) ---"
	echo ""
	if [ ${#top_slow_scripts_lines[@]} -gt 0 ]; then
	# Print only the top 5, or fewer if less than 5 exist
	printf "%s\n" "${top_slow_scripts_lines[@]}" \| sort -rn \| head -n 5 \| while IFS= read -r line; do
	read -r hits script_name <<< "$line"
	echo " $hits hits: $script_name"
	done
	else
	echo "No top slowest scripts data found."
	fi
	}

	echo ""
	echo "Analysis complete."

	# Comment added by Harisfazillah Jamel with the help of Google Gemini on 2025-07-10 04:50 AM +08.
	# Explanation added by Harisfazillah Jamel with the help of Google Gemini on 2025-07-10 05:00 AM +08.
No results found