Last active
February 20, 2024 17:39
-
-
Save chmduquesne/713c45560dc0a1bb011a to your computer and use it in GitHub Desktop.
Put irssi logs in sqlite
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!env python | |
""" | |
Script to load irssi logs into a sqlite database. | |
This script assumes the following irssi log config: | |
autolog = "Yes"; | |
autolog_path = "~/.irssi/logs/$tag/$0.log"; | |
Launch it without argument. It will create or update the file irssi.db | |
in the current directory. Each channel is loaded into a table, and each | |
row within this table has 4 fields: | |
line: the line number of the message in the file | |
date: the datetime of the message | |
nick: the author of the message | |
mesg: the text of the message | |
If you run it several times, it will only import the new messages. | |
For example, on my system: | |
- 1st run: | |
2501.41s user 15.16s system 99% cpu 42:15.15 total | |
- 2nd run: | |
19.30s user 0.39s system 98% cpu 19.934 total | |
You can then play with | |
$ sqlite3 irssi.db | |
# Report the number of message per month | |
sqlite> select strftime("%Y-%m", date), count(*) from | |
server__channel group by strftime("%Y-%m", date) order by date; | |
# Number of lol per nick and per month | |
sqlite> select nick, strftime("%Y-%m", date), count(*) from | |
server__channel where mesg like '%lol%' group by nick, | |
strftime("%Y-%m", date) order by nick, date; | |
and so on... | |
""" | |
from __future__ import with_statement | |
import os | |
import datetime | |
import dateparser | |
import sqlite3 | |
import re | |
def parse(logs, server, chan, db): | |
connection = sqlite3.connect(db) | |
cursor = connection.cursor() | |
message = re.compile('([0-9]{2}:[0-9]{2}) <([^>]+)> (.*)') | |
# Compute table name | |
table = re.sub("[^0-9a-zA-Z]", "_", server + "/" + chan) | |
print("Creating or updating %s" % table) | |
cursor.execute( | |
'CREATE TABLE IF NOT EXISTS %s ' | |
'(line INT, date TEXT, nick TEXT, mesg TEXT)' % table | |
) | |
# Compute the line of the last message added | |
d = datetime.datetime.min | |
skip_to_line = -1 | |
cursor.execute('SELECT MAX(line), date FROM ' + table) | |
r = cursor.fetchone() | |
if r is not None and r[0] is not None: | |
skip_to_line = int(r[0]) | |
d = dateparser.parse(r[1]) | |
with open(os.path.join(logs, server, chan + ".log")) as f: | |
for line, text in enumerate(f): | |
# Skip lines we already have | |
if line <= skip_to_line: | |
continue | |
text = text.strip('\n') | |
# If we find a new date, that is the current one | |
if text.startswith("--- Log opened "): | |
d = dateparser.parse(text[len("--- Log opened "):]) | |
if text.startswith("--- Day changed "): | |
d = dateparser.parse(text[len("--- Day changed "):]) | |
# Finally if it is a message, save it | |
m = message.match(text) | |
if m: | |
date = datetime.datetime.combine( | |
d.date(), | |
dateparser.parse(m.group(1)).time() | |
) | |
nick = unicode(m.group(2).strip().lstrip('@'), errors='ignore') | |
mesg = unicode(m.group(3), errors='ignore') | |
row = (line, str(date), nick, mesg) | |
cursor.execute('INSERT INTO ' + table + | |
' VALUES (?,?,?,?)', row) | |
connection.commit() | |
connection.close() | |
def main(): | |
db = "irssi.db" | |
logs = os.path.expanduser("~/.irssi/logs/") | |
for server in os.listdir(logs): | |
for c in os.listdir(os.path.join(logs, server)): | |
chan = c[:-len(".log")] | |
parse(logs, server, chan, db) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment