|
#!/usr/bin/python3 |
|
# -*- coding: UTF-8 -*- |
|
from datetime import datetime |
|
from tinyscript import * |
|
from tinyscript.report import Section |
|
|
|
|
|
__script__ = "WordPress Debug Log File Parser" |
|
__version__ = "1.0" |
|
__author__ = "Alexandre D'Hondt" |
|
__email__ = "[email protected]" |
|
__copyright__ = ("A .D'Hondt", 2023) |
|
__license__ = "gpl-3.0" |
|
__docformat__ = "md" |
|
__doc__ = """ |
|
This simple tool takes a WordPress debug log file and aggregates all the lines per error with time frames when they |
|
occured and the number of occurences. |
|
""" |
|
__examples__ = ["debug.log", "/tmp/wp-errors.log --no-urls --search user"] |
|
|
|
|
|
DT_FORMAT = "%d-%b-%Y %H:%M:%S %Z" |
|
DTG_LINE = re.compile("^\[\d{2}-[A-Z][a-z]{2}-\d{4} \d{2}:\d{2}:\d{2} [A-Z]{3}\] .*$") |
|
LOG_LINE = re.compile("^\[(\d{2}-[A-Z][a-z]{2}-\d{4} \d{2}:\d{2}:\d{2} [A-Z]{3})\] (.*)", re.M|re.DOTALL) |
|
URL_LINE = re.compile("^https?://.*$") |
|
|
|
|
|
def parse(logfile, sort="number-of-occurences", show_urls=True, search=r".*"): |
|
dat, urls, latest = {}, {}, datetime(1, 1, 1) |
|
# helper for formatting the result to be displayed for a dictionary (data or URLs) |
|
def _format(log_data): |
|
nonlocal latest |
|
is_url, latest_dt = URL_LINE.match(list(log_data.keys())[0]) is not None, latest.strftime("%d-%m-%y") |
|
tdata = [["Count", "Start date", "End date", ["Entry", "URL"][is_url]]] |
|
_sort = lambda x: (-x[1][0][0].timestamp(), -x[1][0][1].timestamp(), -x[1][1]) if sort == "start-date" else \ |
|
(-x[1][0][1].timestamp(), -x[1][0][0].timestamp(), -x[1][1]) if sort == "end-date" else \ |
|
(-x[1][1], -x[1][0][0].timestamp(), -x[1][0][1].timestamp()) |
|
for i, dt in sorted((urls if is_url else dat).items(), key=_sort): |
|
start, end = dt[0][0].strftime("%d-%m-%y"), dt[0][1].strftime("%d-%m-%y") |
|
if end == latest_dt: |
|
end = "" |
|
tdata.append([str(dt[1]), start, end, i]) |
|
return ts.BorderlessTable(tdata) |
|
# helper for parsing a single log entry to the right destination dictionary (data or URLs) |
|
def _parse(log): |
|
nonlocal latest |
|
dt, err = LOG_LINE.match(log).groups() |
|
if re.search(search, err) is None: |
|
return |
|
is_url = URL_LINE.match(err) is not None |
|
if is_url: |
|
err = err.rstrip("/") |
|
dt = datetime.strptime(dt, DT_FORMAT) |
|
d = urls if is_url else dat |
|
d.setdefault(err, ([dt, dt], 0)) |
|
d[err] = ([min(dt, d[err][0][0]), max(dt, d[err][0][1])], d[err][1] + 1) |
|
latest = max(latest, d[err][0][1]) |
|
# start parsing the log file |
|
with open(logfile) as f: |
|
prev = None |
|
for l in f: |
|
l = l.strip() |
|
if l == "": |
|
continue |
|
if DTG_LINE.match(l): |
|
if prev is not None: |
|
_parse(prev) |
|
prev = l |
|
else: |
|
try: |
|
prev += "\n" + l |
|
except TypeError: |
|
prev = l |
|
if prev is not None: |
|
_parse(prev) |
|
# print results found |
|
if len(dat) > 0: |
|
print(Section("Errors").rst()) |
|
print(_format(dat)) |
|
if show_urls and len(urls) > 0: |
|
print(Section("URLs seen").rst()) |
|
print(_format(urls)) |
|
|
|
|
|
if __name__ == '__main__': |
|
SORTS = ("start-date", "end-date", "number-of-occurences") |
|
parser.add_argument("path", type=ts.file_exists, help="path to WordPress log file") |
|
parser.add_argument("-s", "--search", default=r".*", help="search for specific patterns in log entries") |
|
parser.add_argument("-u", "--no-urls", action="store_false", help="do not display URLs seen") |
|
parser.add_argument("--sort", choices=SORTS, default="number-of-occurences", |
|
help="sort by '%s', '%s' or '%s'" % SORTS) |
|
initialize() |
|
logger.info("Parsing %s..." % args.path) |
|
parse(args.path, args.sort, args.no_urls, args.search) |