Last active
December 12, 2015 12:49
-
-
Save eqhmcow/4774549 to your computer and use it in GitHub Desktop.
script to kill apache 1.3 / 2.x prefork httpd processes serving preconnect connections, in an attempt to prevent chrome from causing a DoS against the httpd
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/env perl | |
# kill_idle_cms_httpd.pl - kill idle httpd connections | |
# $Id$ | |
use strict; | |
use warnings; | |
use Time::HiRes 'time'; | |
# chrome likes to preconnect to servers, which causes issues since there are | |
# a limited number of httpd slots available, and chrome preconnections can | |
# exhaust them all. this script kills httpds serving preconnect connections, | |
# preventing chrome from causing a DoS against the httpd. | |
# see also https://code.google.com/p/chromium/issues/detail?id=85229 | |
# This script is version 1 . For a better (but more complicated) version 2, see | |
# https://gist.github.com/eqhmcow/5222092 | |
my $status_url = 'http://localhost/server-status'; | |
my $expected_total = 10; # total number of available httpd slots | |
my $waiting_threshold = 8; # start killing when we have <= this many slots available to receive a request | |
my $let_idle = 2; # kill connections after they idle for this many seconds | |
my $kill_idle = 1; # kill up to this many idle connections before requesting new server status | |
my $rekill_after = 16; # kill stuck processes after this many seconds | |
# main loop | |
while (1) { | |
my ($p, $k) = check_connections(); | |
if ($k) { | |
kill_connections($p); | |
} | |
# sometimes when we kill a process, it hangs instead of exiting. kill -9 | |
# any such processes | |
rekill_stuck(); | |
sleep 1; | |
} | |
# pids that were killed | |
my @killed; | |
# slots that we're tracking | |
my %slots; | |
# last apache server status data | |
my @status; | |
sub check_connections { | |
@status = qx!/usr/bin/elinks -dump 1 $status_url | | |
/bin/grep -P '^\\s*\\d+-\\d+\\s' | | |
/bin/grep -P '/\\d+\\s+[_SRWKDLG.] \\d' | |
!; | |
# key: | |
# "_" Waiting for Connection, "S" Starting up, "R" Reading Request, | |
# "W" Sending Reply, "K" Keepalive (read), "D" DNS Lookup, | |
# "L" Logging, "G" Gracefully finishing, "." Open slot with no current | |
# process | |
my $time = time(); | |
# parse and classify httpd process slots | |
my %p; | |
my @m = ( | |
{ m => '_', v => 'waiting' }, | |
{ m => 'R', v => 'reading' }, | |
{ m => 'W', v => 'sending' }, | |
{ m => '.', v => 'open' }, | |
); | |
# Couldn't parse 4-0 0 0/0/10634 S 0.00 4 0 0.0 0.00 26.44 127.0.0.1 (unavailable) GET /server-status HTTP/1.1 | |
foreach my $line (@status) { | |
my ($slot, $pid, $mode, $ss, $rest) = ($line =~ | |
m!^\s* | |
(\d+) # slot | |
-\d+ # generation | |
\s+(\d+|-) # pid | |
\s+\d+/\d+/\d+ # accesses per connection / child / slot | |
\s+([_SRWKDLG.]) # mode | |
\s+\d+\.\d+ # CPU usage | |
\s+(\d+) # SS - seconds since beginning of most recent request (or not, see below) | |
\s+\d+ # Milliseconds required to process most recent request | |
\s+\d+\.\d+ # Kilobytes transferred this connection | |
\s+\d+\.\d+ # Megabytes transferred this child | |
\s+\d+\.\d+ # Total megabytes transferred this slot | |
(.*)$ # host, vhost, request | |
!x); | |
# NOTE: SS value is not actually usable since it doesn't reset when | |
# apache accepts a connection; so we have to keep track of it ourselves | |
# try again if the pid is 0 | |
return (undef, 0) if $pid eq '0'; | |
die "Couldn't parse $line" unless $pid; | |
$p{'total'}++; | |
# normalize spaces | |
$rest =~ s/\s+/ /g; | |
# keep track of when a slot starts processing a new request by | |
# tracking changes | |
if ($pid ne $slots{$slot}{'pid'} or | |
$mode ne $slots{$slot}{'mode'} or | |
$rest ne $slots{$slot}{'rest'} | |
) { | |
$slots{$slot}{'time'} = $time; | |
} | |
my $slot_time = $slots{$slot}{'time'} || $time; | |
$slots{$slot} = { | |
'pid' => $pid, | |
'mode' => $mode, | |
'rest' => $rest, | |
'time' => $slot_time, | |
}; | |
my $found = 0; | |
foreach my $m (@m) { | |
if ($mode eq $m->{m}) { | |
$p{$m->{v}}{'count'}++; | |
$p{$m->{v}}{'pids'} ||= []; | |
push @{$p{$m->{v}}{'pids'}}, [ $slot, $pid, $rest, $slot_time ]; | |
$found = 1; | |
last; | |
} | |
} | |
$p{'other'}++ unless $found; | |
# print "pid [$pid] mode [$mode] rest [$rest]\n"; | |
} | |
$p{'total'} += 0; # make numeric | |
# if we didn't parse any lines, the server is probably down; try again | |
unless ($p{'total'}) { | |
print scalar localtime(), ": couldn't parse any lines, assuming httpd is restarting; trying again\n"; | |
return (undef, 0); | |
} | |
# print "Couldn't find all the slots, is apache still initializing?\n@status" | |
# unless $p{'total'} == $expected_total; | |
$p{'waiting'}{'count'} ||= 0; | |
$p{'open'}{'count'} ||= 0; | |
# update the open count based on the max slot config; when apache has just started this may differ | |
if ($p{'total'} < $expected_total) { | |
$p{'open'}{'count'} += $expected_total - $p{'total'}; | |
} | |
# print a quick status update | |
print $p{'waiting'}{'count'} + $p{'open'}{'count'}, "."; | |
# if there are <= threshold waiting slots, and no open slots, then kill | |
# any idle reading slots | |
if ($p{'waiting'}{'count'} <= $waiting_threshold and not $p{'open'}{'count'}) { | |
return (\%p, 1); | |
} | |
return (\%p, 0); | |
} | |
sub kill_connections { | |
my $p = shift; | |
print "\n", scalar localtime(), ": looking to kill idle read connections\n@status\n"; | |
system(q!sudo netstat -np | grep 'ESTABLISHED' | egrep ':(80|443) '!); | |
# check for idle reading connections | |
# idle connections look like: | |
# 0-0 30044 0/20/10453 R 1.00 7 0 0.0 1.27 25.39 ? ? ..reading.. | |
$p->{'reading'}{'pids'} ||= []; | |
my $i = 0 ; | |
my $time = time(); | |
foreach my $slot (@{$p->{'reading'}{'pids'}}) { | |
my $s = $slot->[0]; | |
my $pid = $slot->[1]; | |
my $rest = $slot->[2]; | |
my $slot_time = $slot->[3]; | |
# find a slot that's accepting a request | |
next unless $rest =~ m/\s+\?\s+\?\s+\Q..reading..\E/; | |
# make sure it's been idle | |
my $idle = $time - $slot_time; | |
next unless $idle > $let_idle; | |
# skip if we already killed it | |
next if grep $pid == $_->[0], @killed; | |
# kill | |
print scalar localtime(), ": killing pid: [$pid] idle for: [$idle] info: $rest\n"; | |
system("sudo kill $pid"); | |
push @killed, [ $pid, $time ]; | |
$i++; | |
last if $i >= $kill_idle; | |
} | |
return; | |
} | |
sub rekill_stuck { | |
my $time = time(); | |
foreach my $k (splice @killed) { | |
my $pid = $k->[0]; | |
my $t = $k->[1]; | |
# use /proc to check pid | |
next unless -r "/proc/$pid"; | |
print "\n", scalar localtime(), ": killed process still active: $pid\n"; | |
push @killed, $k; | |
next unless $time - $t > $rekill_after; | |
print "\n", scalar localtime(), ": kill -9 pid: $pid\n"; | |
system("sudo kill -9 $pid"); | |
} | |
return; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Seems like this ought to be some kind of
MaxChromeIdlers
setting in httpd.conf, perhaps with IfModule eqhmcow around it.