Created
December 17, 2021 18:30
-
-
Save DMRobertson/133e303f659cace07d7126e82b8b9b62 to your computer and use it in GitHub Desktop.
dirty script to ingest synapse request logs into sqlite
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
import argparse | |
import re | |
import sqlite3 | |
import sys | |
# Sorry everyone. | |
# Use nongreedy matches to make this well fast. | |
from datetime import datetime | |
multiple_files_pattern = re.compile(r""" | |
^ | |
([^:]*?):(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
{(.*?)}\ .*?:\ | |
(.*?)sec/(.*?)sec\ | |
\((.*?)sec,\ (.*?)sec\)\ | |
\((.*?)sec/(.*?)sec/(.*?)\)\ | |
(.*?)B\ (.*?)\ | |
\"(.*?)\ (.*?)\ .*?\"\ | |
\"(.*?)\"\ | |
\[(.*?)\ dbevts\] | |
$ | |
""", re.VERBOSE) | |
single_file_pattern = re.compile(r""" | |
^ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
(.*?)\ ?-\ | |
(.*?)\ -\ | |
(.*?)\ -\ | |
{(.*?)}\ .*?:\ | |
(.*?)sec/(.*?)sec\ | |
\((.*?)sec,\ (.*?)sec\)\ | |
\((.*?)sec/(.*?)sec/(.*?)\)\ | |
(.*?)B\ (.*?)\ | |
\"(.*?)\ (.*?)\ .*?\"\ | |
\"(.*?)\"\ | |
\[(.*?)\ dbevts\] | |
""", re.VERBOSE) | |
def log_entries(args): | |
pattern = multiple_files_pattern if args.multiple_files else single_file_pattern | |
with open(args.input, "rt") as f: | |
for i, line in enumerate(f, start=1): | |
if i % 1000 == 0: | |
print(i) | |
match = pattern.match(line) | |
if match is None: | |
print(f"Couldn't parse {repr(line)}", file=sys.stderr) | |
continue | |
parameters = [] | |
# If the input isn't from grepping multiple files, add the filename as source | |
if not args.multiple_files: | |
parameters.append(args.input) | |
parameters.extend(match.groups()) | |
parameters[1] = datetime.strptime(parameters[1], "%Y-%m-%d %H:%M:%S,%f").timestamp() | |
yield parameters | |
parser = argparse.ArgumentParser() | |
parser.add_argument("input", help="Read this log file") | |
parser.add_argument("--output", default="logs.db", help="Write to this sqlite db") | |
parser.add_argument("--table_name", help="Write entries to this table. If omitted, uses " | |
"the filename up until the first fullstop") | |
parser.add_argument("--multiple-files", action="store_true", | |
help="Use this if your output comes from grepping multiple files," | |
"i.e. if lines are prefixed with filename:'") | |
if __name__ == "__main__": | |
args = parser.parse_args() | |
if args.table_name is None: | |
args.table_name = args.input.split(".")[0] | |
con = sqlite3.connect(args.output) | |
cur = con.cursor() | |
# BEWARE: SQL injection in args.table_name here | |
# But not sure there's a better way? C.f. https://stackoverflow.com/a/3247553/5252017 | |
cur.execute(rf""" | |
CREATE TABLE {args.table_name} | |
( | |
filename text, | |
timestamp double, -- seconds since epoch | |
logger text, | |
lineno int, | |
level text, | |
request_id text, | |
ip_addr text, | |
port int, | |
requester text, | |
processing_time double, | |
response_send_time double, | |
ru_utime double, | |
ru_stime double, | |
db_sched_duration_sec double, | |
db_txn_duration_sec double, | |
db_txn_count int, | |
bytes_sent int, | |
status int, | |
method text, | |
url text, | |
user_agent text, | |
db_events int | |
); | |
""") | |
sql = f"INSERT INTO {args.table_name} VALUES ({','.join('?' * 22)});" | |
cur.executemany(sql, log_entries(args)) | |
con.commit() | |
print(f"Wrote {cur.rowcount} rows to table {args.table_name} in {args.output}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Brief instructions: