Created
May 17, 2009 19:04
-
-
Save antonlindstrom/113108 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
# | |
# Collecting data from access.log and | |
# presenting data as top 10 lists. | |
# | |
# The lists are; IP adresses, domains, | |
# files, month, hours and browsers. | |
# | |
# When domains does not exists IP is printed. | |
# | |
# Author Anton Lindström | |
# [email protected] | |
use strict; | |
use warnings; | |
use HTML::ParseBrowser; | |
# Logfile | |
my $accessfile = "access.log"; | |
# Declare global variables. | |
my %ipadresses; | |
my %efiles; | |
my %pmonth; | |
my %phours; | |
my %browsers; | |
my %pdomains; | |
# Open Apache logfile | |
open(ACCESS, $accessfile); | |
my @access = <ACCESS>; | |
close(ACCESS); | |
print "Collecting data, hold on..\n\n"; | |
foreach (@access) { | |
# Parse logfile. | |
$_ =~ m/(^[0-9\.]+)[\s\-]+\[(.+)\]\s\"[a-z]+\s(.+)\sHTTP\/[\d.]+\"[\s\d]+\"\-\"\s\"(.+)\"\s(\".*)?/gi; | |
# List for better readability. | |
my ($ip, $datetime, $file, $browser) = ($1, $2, $3, $4); | |
# Split date for use in peaktimes. | |
my ($day, $month, $next) = split(/\//, $datetime); | |
my ($year, $hour, $minute, $second) = split(/:/, $next); | |
# Parse Browser. | |
my $ua = HTML::ParseBrowser->new($ENV{HTTP_USER_AGENT}); | |
$ua->Parse($browser); | |
my $browsername = $ua->name; | |
# For every occurance of IP add +1. | |
if (exists $ipadresses{$ip}) {$ipadresses{$ip} += 1;} | |
else {$ipadresses{$ip} = 1;} | |
# For every occurance of file add +1. | |
if (exists $efiles{$file}) {$efiles{$file} += 1;} | |
else {$efiles{$file} = 1;} | |
# Peak months. | |
if (exists $pmonth{$month}) {$pmonth{$month} += 1;} | |
else {$pmonth{$month} = 1;} | |
# Peak hours. | |
if (exists $phours{"$hour:00"}) {$phours{"$hour:00"} += 1;} | |
else {$phours{"$hour:00"} = 1;} | |
# Peak hours. | |
if (exists $browsers{$browsername}) {$browsers{$browsername} += 1;} | |
else {$browsers{$browsername} = 1;} | |
# Domain lookup. | |
if($ip) { | |
my $dom = `host $ip`; | |
$dom =~ m/pointer\s(.+)/g; | |
$dom = $1; | |
# For every occurance of domains add +1. | |
if (exists $pdomains{$dom}) {$pdomains{$dom} += 1;} | |
else {$pdomains{$dom} = 1;} | |
} | |
} | |
# Print most frequent domains. | |
print "Most frequent visitors (domain):\n"; | |
my $i=0; | |
# Print visits and Domains, sorted. | |
foreach my $key (sort {$pdomains{$b} <=> $pdomains{$a} } keys %pdomains) { | |
next if ($i == 10); | |
print "\t$pdomains{$key}\t $key\n"; | |
$i++; | |
} | |
# Print most frequent IP addresses. | |
print "Most frequent visitors (IP):\n"; | |
$i=0; | |
# Print visits and IP addresses, sorted. | |
foreach my $key (sort {$ipadresses{$b} <=> $ipadresses{$a} } keys %ipadresses) { | |
next if ($i == 10); | |
print "\t$ipadresses{$key}\t $key\n"; | |
$i++; | |
} | |
# Print most frequent files accessed. | |
print "Most popular files:\n"; | |
$i=0; | |
# Print amount of files and filenames, sorted. | |
foreach my $key (sort {$efiles{$b} <=> $efiles{$a} } keys %efiles) { | |
next if ($i == 10); | |
print "\t$efiles{$key}\t $key\n"; | |
$i++; | |
} | |
# Print peak month. | |
print "Most popular month to access site:\n"; | |
$i=0; | |
# Print amount of visits and month, sorted. | |
foreach my $key (sort {$pmonth{$b} <=> $pmonth{$a} } keys %pmonth) { | |
next if ($i == 10); | |
print "\t$pmonth{$key}\t $key\n"; | |
$i++; | |
} | |
# Print peak hours. | |
print "Most popular hour to access site:\n"; | |
$i=0; | |
# Print amount of visits and hours, sorted. | |
foreach my $key (sort {$phours{$b} <=> $phours{$a} } keys %phours) { | |
next if ($i == 10); | |
print "\t$phours{$key}\t $key\n"; | |
$i++; | |
} | |
# Print most used browser. | |
print "Top 10 Browsers:\n"; | |
$i=0; | |
# Print usage of browsers, sorted. | |
foreach my $key (sort {$browsers{$b} <=> $browsers{$a} } keys %browsers) { | |
next if ($i == 10); | |
print "\t$browsers{$key}\t $key\n"; | |
$i++; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment