Skip to content

Instantly share code, notes, and snippets.

@xtetsuji
Created April 22, 2016 04:18
Show Gist options
  • Save xtetsuji/621da10319ab94a2e597b362d045e94f to your computer and use it in GitHub Desktop.
Save xtetsuji/621da10319ab94a2e597b362d045e94f to your computer and use it in GitHub Desktop.
ModPerl::DenyFakeBot - deny fake bot (currently, only deny fake-googlebot). It is my shorthand script in few minutes, as-is... (probably code has wrong things)
package ModPerl::DenyFakeBot;
# PerlPostReadRequestHadnler ModPerl::DenyFakeBot
use strict;
use warnings;
use APR::Table ();
use Apache2::Connection ();
use Apache2::RequestRec ();
use Apache2::RequestUtil ();
use Apache2::ServerUtil ();
use Apache2::Const -compile => qw(FORBIDDEN DECLINED);
use Net::DNS;
use Time::HiRes qw(ualarm);
our $DNS_TIMEOUT_MS = 500;
sub handler {
my $r = shift;
my $p = __PACKAGE__->new( r => $r );
local $@;
my $is_fake;
eval {
local $SIG{ALRM} = sub { die "timeout"; };
ualarm($DNS_TIMEOUT_MS);
$is_fake = $p->is_fake_googlebot;
ualarm(0);
};
if ( $@ ) {
# timeout
my $warning = "timeout DNS lookup (ms=$DNS_TIMEOUT_MS)";
warn $warning;
# for CustomLog %n{MP_DenyFakeBot_warning}
$r->notes->set( "MP_DenyFakeBot_warning" => $warning );
}
if ( $is_fake ) {
return Apache2::Const::FORBIDDEN;
}
return Apache2::Const::DECLINED;
}
sub new {
my $class = shift;
my %arg = @_;
my $r = $arg{r} || Apache2::RequestUtil->request;
return bless { r => $r }, $class;
}
sub r {
my $self = shift;
return $self->{r};
}
sub is_fake_googlebot {
my $self = shift;
my $ua_name = $self->r->headers_in->get("User-Agent");
return 0 if $ua_name !~ /Googlebot/i;
my $remote_ip = $self->remote_ip;
my $rev_name = $self->resolve_ip($remote_ip);
# https://support.google.com/webmasters/answer/182072
if ( $rev_name =~ /\.google(?:bot)?\.com\.?$/) {
return 0; # Googlebot の IP アドレスなので OK
} else {
return 1; # これ UA 偽装
}
}
sub remote_ip {
my $self = shift;
my $r = $self->r;
if ( my $x_forwarded_for = $r->headers_in->get("X-Forwarded-For") ) {
return $x_forwarded_for;
} else {
return $r->connection->remote_ip;
}
}
sub resolve_ip {
my $self = shift;
my $ip = shift;
if ( $resolve_cache = $self->resolve_cache($ip) ) {
return $resolve_cache;
}
if ( my $query = $self->resolver->search($ip, "PTR") ) {
for ($query->answer) {
next if $_->type ne "PTR";
my $rdatastr = $_->rdatastr;
$self->resolve_cache($ip => $rdatastr);
return $rdatastr;
}
}
return;
}
sub resolve_cache {
my $self = shift;
my $key = shift;
my $value = shift;
my $cache = $self->{resolve_ip_cache} ||= {};
if ( $key && $value ) {
# set
return $cache->{$key} = $value;
} elsif ( $key ) {
return $cache->{$key};
} else {
die "resolve_ip_cache: key is required";
}
}
sub resolver {
my $self = shift;
return $self->{resolver} ||= Net::DNS::Resolver->new;
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment