Last active
October 25, 2022 17:05
-
-
Save matheusfillipe/8b5e278fa98cdebb2e11eaaa7bc6a337 to your computer and use it in GitHub Desktop.
Parse the logs from apache access log and count the most acessing ip while the script it is running
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import subprocess | |
from os import system | |
import re | |
from dataclasses import dataclass | |
from datetime import datetime | |
from collections import Counter | |
LOGFILE = "/var/log/apache2/live-access_log" | |
LOG = 10 | |
@dataclass | |
class Access: | |
address: str | |
date: datetime | |
method: str | |
path: str | |
def __post_init__(self): | |
if isinstance(self.date, str): | |
self.date = datetime.strptime(self.date, '%d/%b/%Y:%H:%M:%S') | |
def __str__(self): | |
address, date, method, path = self.address, self.date, self.method, self.path | |
return f"{address=} {date=} {method=} {path=}" | |
def tailf(filename: str): | |
command = f"tail -f {filename}" | |
p = subprocess.Popen( | |
command.split(), stdout=subprocess.PIPE, universal_newlines=True) | |
for line in p.stdout: | |
yield line | |
def parse_log_line(line: str): | |
try: | |
return Access(*re.match(r'^(\S+) .*\[(\S+) \S+\] "(\S+) (\S+) .*"', line).groups()) | |
except AttributeError: | |
return None | |
if __name__ == "__main__": | |
ip_hits = Counter() | |
path_hits = Counter() | |
ip_path_hits = {} | |
for access in (parse_log_line(line) for line in tailf(LOGFILE)): | |
if access is None: | |
continue | |
ip = access.address | |
path_hits[access.path] += 1 | |
ip_hits[ip] += 1 | |
if ip not in ip_path_hits: | |
ip_path_hits[ip] = Counter() | |
ip_path_hits[ip][access.path] += 1 | |
ip_path_hits[ip] = Counter({k: v for k, v in ip_path_hits[ip].most_common(100)}) | |
# print 10 most acessing ips and their respective most accessed paths | |
system("clear") | |
print("CLIENTS") | |
for ip, count in ip_hits.most_common(LOG): | |
print(f"{ip}: {count=}: ", end="") | |
for path, count in ip_path_hits[ip].most_common(1): | |
print(f"{path}: {count}, ", end="") | |
print() | |
print(30 * "-") | |
# Most accessed paths overall | |
print("MOST ACESSED PATHS") | |
for path, count in path_hits.most_common(5): | |
print(f"{path} -> {count}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment