Skip to content

Instantly share code, notes, and snippets.

@mosladil
Created December 1, 2014 12:41
Show Gist options
  • Save mosladil/a1d0f737b2f1502b3b22 to your computer and use it in GitHub Desktop.
Save mosladil/a1d0f737b2f1502b3b22 to your computer and use it in GitHub Desktop.
access log parser
#!/usr/bin/env perl
# use warnings;
# use strict;
die('File not specified') if not $ARGV[0];
open FILE, $ARGV[0] or die $!;
my %months = ();
my ($firstRecord, $lastRecord);
my ($records, $total);
my @excludeIp = ('127.1.1.1', '127.2.2.2', '127.3.3.3', '127.4.4.4');
my $apacheRegex = '
^(\S+)\s # IP
\S+\s+ # remote logname
(?:\S+\s+)+ # remote user
\[([^]]+)\]\s # date
"(\S*)\s? # method
(?:((?:[^"]*(?:\\")?)*)\s # URL
([^"]*)"\s| # protocol
((?:[^"]*(?:\\")?)*)"\s) # or, possibly URL with no protocol
(\S+)\s # status code
(\S+)\s # bytes
"((?:[^"]*(?:\\")?)*)"\s # referrer
"(.*)"$ # user agent';
my $dateRegex = '^\S+\/(\S+)\/\S+\s\S+';
# Loop records
while (<FILE>) {
$total++;
my ($ip, $date, $method, $url, $protocol, $alt_url, $code, $bytes, $referrer, $ua) = (m/$apacheRegex/xs);
$firstRecord = $date if not $firstRecord;
$lastRecord = $date;
next if &isExcludeIp($ip);
&setDownloadCount(&getMonthFromDate($date));
}
# Print month stats
printf("First record: %s\n", $firstRecord);
printf("Last record: %s\n\n", $lastRecord);
foreach my $month (keys %months) {
printf("%s: %d\n", $month, $months{$month});
$records += $months{$month};
}
printf("\nTotal records: %d (%d excluded)\n\n", $records, ($total - $records));
#######
## SUBS
sub isExcludeIp($) {
my ($ip) = @_;
if (grep(/^$ip/, @excludeIp)) { return 1 } else { return 0 };
}
sub getMonthFromDate($) {
my ($date) = @_;
my $month = $date =~ m/$dateRegex/;
return $1;
}
sub setDownloadCount($) {
my ($date) = @_;
$months{$date}++;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment