Created
April 22, 2016 04:18
-
-
Save xtetsuji/621da10319ab94a2e597b362d045e94f to your computer and use it in GitHub Desktop.
ModPerl::DenyFakeBot - deny fake bot (currently, only deny fake-googlebot). It is my shorthand script in few minutes, as-is... (probably code has wrong things)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ModPerl::DenyFakeBot; | |
# PerlPostReadRequestHadnler ModPerl::DenyFakeBot | |
use strict; | |
use warnings; | |
use APR::Table (); | |
use Apache2::Connection (); | |
use Apache2::RequestRec (); | |
use Apache2::RequestUtil (); | |
use Apache2::ServerUtil (); | |
use Apache2::Const -compile => qw(FORBIDDEN DECLINED); | |
use Net::DNS; | |
use Time::HiRes qw(ualarm); | |
our $DNS_TIMEOUT_MS = 500; | |
sub handler { | |
my $r = shift; | |
my $p = __PACKAGE__->new( r => $r ); | |
local $@; | |
my $is_fake; | |
eval { | |
local $SIG{ALRM} = sub { die "timeout"; }; | |
ualarm($DNS_TIMEOUT_MS); | |
$is_fake = $p->is_fake_googlebot; | |
ualarm(0); | |
}; | |
if ( $@ ) { | |
# timeout | |
my $warning = "timeout DNS lookup (ms=$DNS_TIMEOUT_MS)"; | |
warn $warning; | |
# for CustomLog %n{MP_DenyFakeBot_warning} | |
$r->notes->set( "MP_DenyFakeBot_warning" => $warning ); | |
} | |
if ( $is_fake ) { | |
return Apache2::Const::FORBIDDEN; | |
} | |
return Apache2::Const::DECLINED; | |
} | |
sub new { | |
my $class = shift; | |
my %arg = @_; | |
my $r = $arg{r} || Apache2::RequestUtil->request; | |
return bless { r => $r }, $class; | |
} | |
sub r { | |
my $self = shift; | |
return $self->{r}; | |
} | |
sub is_fake_googlebot { | |
my $self = shift; | |
my $ua_name = $self->r->headers_in->get("User-Agent"); | |
return 0 if $ua_name !~ /Googlebot/i; | |
my $remote_ip = $self->remote_ip; | |
my $rev_name = $self->resolve_ip($remote_ip); | |
# https://support.google.com/webmasters/answer/182072 | |
if ( $rev_name =~ /\.google(?:bot)?\.com\.?$/) { | |
return 0; # Googlebot の IP アドレスなので OK | |
} else { | |
return 1; # これ UA 偽装 | |
} | |
} | |
sub remote_ip { | |
my $self = shift; | |
my $r = $self->r; | |
if ( my $x_forwarded_for = $r->headers_in->get("X-Forwarded-For") ) { | |
return $x_forwarded_for; | |
} else { | |
return $r->connection->remote_ip; | |
} | |
} | |
sub resolve_ip { | |
my $self = shift; | |
my $ip = shift; | |
if ( $resolve_cache = $self->resolve_cache($ip) ) { | |
return $resolve_cache; | |
} | |
if ( my $query = $self->resolver->search($ip, "PTR") ) { | |
for ($query->answer) { | |
next if $_->type ne "PTR"; | |
my $rdatastr = $_->rdatastr; | |
$self->resolve_cache($ip => $rdatastr); | |
return $rdatastr; | |
} | |
} | |
return; | |
} | |
sub resolve_cache { | |
my $self = shift; | |
my $key = shift; | |
my $value = shift; | |
my $cache = $self->{resolve_ip_cache} ||= {}; | |
if ( $key && $value ) { | |
# set | |
return $cache->{$key} = $value; | |
} elsif ( $key ) { | |
return $cache->{$key}; | |
} else { | |
die "resolve_ip_cache: key is required"; | |
} | |
} | |
sub resolver { | |
my $self = shift; | |
return $self->{resolver} ||= Net::DNS::Resolver->new; | |
} | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment