Skip to content

Instantly share code, notes, and snippets.

@pepicrft
Created August 29, 2025 11:16
Show Gist options
  • Select an option

  • Save pepicrft/872b7ff1d98b584c973c5c22259648c7 to your computer and use it in GitHub Desktop.

Select an option

Save pepicrft/872b7ff1d98b584c973c5c22259648c7 to your computer and use it in GitHub Desktop.
I implemented this script to benchmark connections to S3 an diagnose network issues
#!/bin/bash
# S3 Performance Benchmark with wrk
# Usage: ./s3-benchmark.sh <url> [threads] [connections] [duration] [output_file]
URL="$1"
THREADS=${2:-4}
CONNECTIONS=${3:-30}
DURATION=${4:-30s}
OUTPUT_FILE=${5:-"benchmark-$(date +%Y%m%d_%H%M%S).txt"}
TEMP_DIR="/tmp/s3-benchmark-$$"
REPORT_FILE="${OUTPUT_FILE%.*}-report.txt"
if [ -z "$URL" ]; then
echo "S3 Performance Benchmark Script"
echo "Usage: $0 <url> [threads] [connections] [duration] [output_file]"
echo ""
echo "Examples:"
echo " $0 'https://bucket.s3.amazonaws.com/file.zip'"
echo " $0 'https://presigned-url' 8 50 60s results.txt"
echo ""
echo "Parameters:"
echo " url - S3 presigned URL or public URL"
echo " threads - Number of worker threads (default: 4)"
echo " connections - Concurrent connections (default: 30)"
echo " duration - Test duration (default: 30s)"
echo " output_file - Output filename (default: benchmark-TIMESTAMP.txt)"
exit 1
fi
# Check if wrk is installed
if ! command -v wrk &> /dev/null; then
echo "ERROR: wrk is not installed"
echo "Install with: brew install wrk"
exit 1
fi
mkdir -p "$TEMP_DIR"
echo "S3 Performance Benchmark"
echo "========================="
echo "URL: $URL"
echo "Configuration: $THREADS threads, $CONNECTIONS connections, $DURATION duration"
echo "Start Time: $(date)"
echo "Output: $OUTPUT_FILE"
echo
# Pre-flight checks
echo "Pre-flight Diagnostics:"
echo "------------------------"
# 1. Basic connectivity test
echo -n "Connectivity test: "
if curl -s -I --max-time 10 "$URL" > "$TEMP_DIR/headers.txt" 2>&1; then
echo "✓ SUCCESS"
HTTP_STATUS=$(grep "HTTP/" "$TEMP_DIR/headers.txt" | head -1)
echo " Status: $HTTP_STATUS"
# Check content length
if grep -i "content-length" "$TEMP_DIR/headers.txt" > /dev/null; then
CONTENT_LENGTH=$(grep -i "content-length" "$TEMP_DIR/headers.txt" | cut -d' ' -f2 | tr -d '\r')
CONTENT_SIZE_MB=$(echo "scale=2; $CONTENT_LENGTH / 1024 / 1024" | bc -l 2>/dev/null || echo "unknown")
echo " File Size: ${CONTENT_SIZE_MB} MB"
fi
# Check server info
if grep -i "server:" "$TEMP_DIR/headers.txt" > /dev/null; then
SERVER=$(grep -i "server:" "$TEMP_DIR/headers.txt" | cut -d' ' -f2- | tr -d '\r')
echo " Server: $SERVER"
fi
else
echo "✗ FAILED"
echo " Cannot connect to URL. Check network connectivity and URL validity."
exit 1
fi
# 2. DNS resolution test
echo -n "DNS resolution: "
HOSTNAME=$(echo "$URL" | sed -e 's|^https\?://||' -e 's|/.*$||')
if DNS_TIME=$(time (nslookup "$HOSTNAME" > /dev/null 2>&1) 2>&1 | grep "real" | awk '{print $2}'); then
echo "✓ SUCCESS (${DNS_TIME})"
else
echo "⚠ WARNING - DNS resolution issues detected"
fi
# 3. Single request latency test
echo -n "Baseline latency: "
BASELINE_LATENCY=$(curl -o /dev/null -s -w "%{time_total}" --max-time 30 "$URL" 2>/dev/null)
if [ $? -eq 0 ]; then
echo "✓ ${BASELINE_LATENCY}s"
else
echo "⚠ WARNING - Baseline request failed"
BASELINE_LATENCY="unknown"
fi
echo
# Create Lua script for detailed reporting
cat > "$TEMP_DIR/report.lua" << 'EOF'
-- Comprehensive benchmark reporting script
local json = {}
local request_count = 0
local error_count = 0
local status_codes = {}
function init(args)
-- Initialize counters
request_count = 0
error_count = 0
status_codes = {}
end
function request()
return wrk.request()
end
function response(status, headers, body)
request_count = request_count + 1
-- Track status codes
if status_codes[status] then
status_codes[status] = status_codes[status] + 1
else
status_codes[status] = 1
end
-- Count errors
if status >= 400 then
error_count = error_count + 1
end
end
function done(summary, latency, requests)
local duration_seconds = summary.duration / 1000000
print("=== BENCHMARK RESULTS ===")
print()
-- Overall Performance
print("PERFORMANCE SUMMARY:")
print("Duration: " .. string.format("%.2f", duration_seconds) .. "s")
print("Total Requests: " .. summary.requests)
print("Successful Requests: " .. (summary.requests - error_count))
print("Failed Requests: " .. error_count)
print("Success Rate: " .. string.format("%.1f", ((summary.requests - error_count) / summary.requests) * 100) .. "%")
print("Requests/Second: " .. string.format("%.2f", summary.requests / duration_seconds))
print()
-- Throughput Analysis
local mb_transferred = summary.bytes / (1024 * 1024)
local mb_per_second = mb_transferred / duration_seconds
print("THROUGHPUT ANALYSIS:")
print("Total Data: " .. string.format("%.2f", mb_transferred) .. " MB")
print("Average Throughput: " .. string.format("%.2f", mb_per_second) .. " MB/s")
print("Peak Theoretical: " .. string.format("%.2f", (summary.bytes / (latency.min / 1000000)) / (1024 * 1024)) .. " MB/s")
print()
-- Latency Analysis
print("LATENCY ANALYSIS (milliseconds):")
print("Minimum: " .. string.format("%.2f", latency.min / 1000) .. "ms")
print("Maximum: " .. string.format("%.2f", latency.max / 1000) .. "ms")
print("Average: " .. string.format("%.2f", latency.mean / 1000) .. "ms")
print("Std Dev: " .. string.format("%.2f", latency.stdev / 1000) .. "ms")
print()
print("LATENCY PERCENTILES:")
print("50th percentile (median): " .. string.format("%.2f", latency:percentile(50) / 1000) .. "ms")
print("75th percentile: " .. string.format("%.2f", latency:percentile(75) / 1000) .. "ms")
print("90th percentile: " .. string.format("%.2f", latency:percentile(90) / 1000) .. "ms")
print("95th percentile: " .. string.format("%.2f", latency:percentile(95) / 1000) .. "ms")
print("99th percentile: " .. string.format("%.2f", latency:percentile(99) / 1000) .. "ms")
print("99.9th percentile: " .. string.format("%.2f", latency:percentile(99.9) / 1000) .. "ms")
print()
-- Request Rate Analysis
print("REQUEST RATE ANALYSIS:")
print("Min req/s: " .. string.format("%.2f", requests.min))
print("Max req/s: " .. string.format("%.2f", requests.max))
print("Avg req/s: " .. string.format("%.2f", requests.mean))
print("Std Dev req/s: " .. string.format("%.2f", requests.stdev))
print()
-- Status Code Distribution
print("HTTP STATUS CODES:")
for status, count in pairs(status_codes) do
local percentage = (count / summary.requests) * 100
print("HTTP " .. status .. ": " .. count .. " requests (" .. string.format("%.1f", percentage) .. "%)")
end
print()
-- Performance Assessment
print("PERFORMANCE ASSESSMENT:")
local p95_latency = latency:percentile(95) / 1000
local error_rate = (error_count / summary.requests) * 100
if error_rate < 1 then
print("✓ Error Rate: EXCELLENT (<1%)")
elseif error_rate < 5 then
print("⚠ Error Rate: ACCEPTABLE (1-5%)")
else
print("✗ Error Rate: POOR (>5%)")
end
if p95_latency < 500 then
print("✓ P95 Latency: EXCELLENT (<500ms)")
elseif p95_latency < 1000 then
print("⚠ P95 Latency: ACCEPTABLE (500ms-1s)")
else
print("✗ P95 Latency: POOR (>1s)")
end
if mb_per_second > 50 then
print("✓ Throughput: EXCELLENT (>50 MB/s)")
elseif mb_per_second > 10 then
print("⚠ Throughput: ACCEPTABLE (10-50 MB/s)")
else
print("✗ Throughput: POOR (<10 MB/s)")
end
print()
print("=== END RESULTS ===")
end
EOF
# Run the benchmark
echo "Running wrk benchmark..."
echo "Command: wrk -t$THREADS -c$CONNECTIONS -d$DURATION -s $TEMP_DIR/report.lua --latency --timeout 60s \"$URL\""
echo
# Execute wrk and capture output
wrk -t"$THREADS" -c"$CONNECTIONS" -d"$DURATION" -s "$TEMP_DIR/report.lua" --latency --timeout 60s "$URL" > "$OUTPUT_FILE" 2>&1
BENCHMARK_EXIT_CODE=$?
echo "Benchmark completed. Results saved to: $OUTPUT_FILE"
echo
# Generate diagnostic report
{
echo "S3 PERFORMANCE BENCHMARK DIAGNOSTIC REPORT"
echo "=========================================="
echo "Generated: $(date)"
echo "URL: $URL"
echo "Configuration: $THREADS threads, $CONNECTIONS connections, $DURATION"
echo "Baseline Latency: $BASELINE_LATENCY"
echo
if [ $BENCHMARK_EXIT_CODE -eq 0 ]; then
echo "Benchmark Status: SUCCESS"
else
echo "Benchmark Status: FAILED (exit code: $BENCHMARK_EXIT_CODE)"
fi
echo
# Include the benchmark results
cat "$OUTPUT_FILE"
echo
echo "DIAGNOSTIC RECOMMENDATIONS:"
echo "============================"
# Parse results for diagnostics
if grep -q "POOR" "$OUTPUT_FILE"; then
echo
echo "⚠ PERFORMANCE ISSUES DETECTED:"
if grep -q "Error Rate: POOR" "$OUTPUT_FILE"; then
echo "• HIGH ERROR RATE detected"
echo " - Check network stability"
echo " - Verify S3 URL is still valid"
echo " - Consider reducing connection count"
echo " - Check for rate limiting (HTTP 503 errors)"
fi
if grep -q "Latency: POOR" "$OUTPUT_FILE"; then
echo "• HIGH LATENCY detected"
echo " - Check geographic distance to S3 region"
echo " - Test from different network location"
echo " - Consider using CloudFront CDN"
echo " - Check for network congestion"
fi
if grep -q "Throughput: POOR" "$OUTPUT_FILE"; then
echo "• LOW THROUGHPUT detected"
echo " - Check local bandwidth limitations"
echo " - Test with fewer concurrent connections"
echo " - Verify S3 Transfer Acceleration is enabled if needed"
echo " - Check for bandwidth throttling"
fi
else
echo "✓ No major performance issues detected"
fi
echo
echo "TROUBLESHOOTING COMMANDS:"
echo "------------------------"
echo "# Test single connection:"
echo "curl -w \"Connect: %{time_connect}s, TTFB: %{time_starttransfer}s, Total: %{time_total}s\\n\" -o /dev/null \"$URL\""
echo
echo "# Test from different region/network:"
echo "# Run this script from different geographic location"
echo
echo "# Check S3 server headers:"
echo "curl -I \"$URL\""
echo
echo "# Monitor network during test:"
echo "# Run: sudo netstat -i 1 (during benchmark execution)"
} > "$REPORT_FILE"
echo "Diagnostic report saved to: $REPORT_FILE"
echo
echo "Quick Summary:"
echo "=============="
tail -20 "$OUTPUT_FILE" | grep -E "(SUCCESS|FAILED|EXCELLENT|ACCEPTABLE|POOR|✓|⚠|✗)"
# CI Integration
if [ "$CI" = "true" ] || [ "$GITHUB_ACTIONS" = "true" ]; then
# Extract key metrics for CI
SUCCESS_RATE=$(grep "Success Rate:" "$OUTPUT_FILE" | grep -o "[0-9.]*%" | head -1)
THROUGHPUT=$(grep "Average Throughput:" "$OUTPUT_FILE" | grep -o "[0-9.]*" | head -1)
P95_LATENCY=$(grep "95th percentile:" "$OUTPUT_FILE" | grep -o "[0-9.]*ms" | head -1)
echo
echo "::group::S3 Benchmark Results"
echo "Success Rate: $SUCCESS_RATE"
echo "Throughput: ${THROUGHPUT} MB/s"
echo "P95 Latency: $P95_LATENCY"
echo "::endgroup::"
# Fail CI if performance is poor
if grep -q "✗" "$OUTPUT_FILE"; then
echo "❌ Benchmark failed: Performance issues detected"
exit 1
fi
fi
# Cleanup
trap "rm -rf $TEMP_DIR" EXIT
echo
echo "Files generated:"
echo "- Raw results: $OUTPUT_FILE"
echo "- Diagnostic report: $REPORT_FILE"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment