Skip to content

Instantly share code, notes, and snippets.

@matheusfillipe
Last active October 25, 2022 17:05
Show Gist options
  • Save matheusfillipe/8b5e278fa98cdebb2e11eaaa7bc6a337 to your computer and use it in GitHub Desktop.
Save matheusfillipe/8b5e278fa98cdebb2e11eaaa7bc6a337 to your computer and use it in GitHub Desktop.
Parse the logs from apache access log and count the most acessing ip while the script it is running
#!/usr/bin/python3
import subprocess
from os import system
import re
from dataclasses import dataclass
from datetime import datetime
from collections import Counter
LOGFILE = "/var/log/apache2/live-access_log"
LOG = 10
@dataclass
class Access:
address: str
date: datetime
method: str
path: str
def __post_init__(self):
if isinstance(self.date, str):
self.date = datetime.strptime(self.date, '%d/%b/%Y:%H:%M:%S')
def __str__(self):
address, date, method, path = self.address, self.date, self.method, self.path
return f"{address=} {date=} {method=} {path=}"
def tailf(filename: str):
command = f"tail -f {filename}"
p = subprocess.Popen(
command.split(), stdout=subprocess.PIPE, universal_newlines=True)
for line in p.stdout:
yield line
def parse_log_line(line: str):
try:
return Access(*re.match(r'^(\S+) .*\[(\S+) \S+\] "(\S+) (\S+) .*"', line).groups())
except AttributeError:
return None
if __name__ == "__main__":
ip_hits = Counter()
path_hits = Counter()
ip_path_hits = {}
for access in (parse_log_line(line) for line in tailf(LOGFILE)):
if access is None:
continue
ip = access.address
path_hits[access.path] += 1
ip_hits[ip] += 1
if ip not in ip_path_hits:
ip_path_hits[ip] = Counter()
ip_path_hits[ip][access.path] += 1
ip_path_hits[ip] = Counter({k: v for k, v in ip_path_hits[ip].most_common(100)})
# print 10 most acessing ips and their respective most accessed paths
system("clear")
print("CLIENTS")
for ip, count in ip_hits.most_common(LOG):
print(f"{ip}: {count=}: ", end="")
for path, count in ip_path_hits[ip].most_common(1):
print(f"{path}: {count}, ", end="")
print()
print(30 * "-")
# Most accessed paths overall
print("MOST ACESSED PATHS")
for path, count in path_hits.most_common(5):
print(f"{path} -> {count}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment