Skip to content

Instantly share code, notes, and snippets.

@chmduquesne
Last active February 20, 2024 17:39
Show Gist options
  • Save chmduquesne/713c45560dc0a1bb011a to your computer and use it in GitHub Desktop.
Save chmduquesne/713c45560dc0a1bb011a to your computer and use it in GitHub Desktop.
Put irssi logs in sqlite
#!env python
"""
Script to load irssi logs into a sqlite database.
This script assumes the following irssi log config:
autolog = "Yes";
autolog_path = "~/.irssi/logs/$tag/$0.log";
Launch it without argument. It will create or update the file irssi.db
in the current directory. Each channel is loaded into a table, and each
row within this table has 4 fields:
line: the line number of the message in the file
date: the datetime of the message
nick: the author of the message
mesg: the text of the message
If you run it several times, it will only import the new messages.
For example, on my system:
- 1st run:
2501.41s user 15.16s system 99% cpu 42:15.15 total
- 2nd run:
19.30s user 0.39s system 98% cpu 19.934 total
You can then play with
$ sqlite3 irssi.db
# Report the number of message per month
sqlite> select strftime("%Y-%m", date), count(*) from
server__channel group by strftime("%Y-%m", date) order by date;
# Number of lol per nick and per month
sqlite> select nick, strftime("%Y-%m", date), count(*) from
server__channel where mesg like '%lol%' group by nick,
strftime("%Y-%m", date) order by nick, date;
and so on...
"""
from __future__ import with_statement
import os
import datetime
import dateparser
import sqlite3
import re
def parse(logs, server, chan, db):
connection = sqlite3.connect(db)
cursor = connection.cursor()
message = re.compile('([0-9]{2}:[0-9]{2}) <([^>]+)> (.*)')
# Compute table name
table = re.sub("[^0-9a-zA-Z]", "_", server + "/" + chan)
print("Creating or updating %s" % table)
cursor.execute(
'CREATE TABLE IF NOT EXISTS %s '
'(line INT, date TEXT, nick TEXT, mesg TEXT)' % table
)
# Compute the line of the last message added
d = datetime.datetime.min
skip_to_line = -1
cursor.execute('SELECT MAX(line), date FROM ' + table)
r = cursor.fetchone()
if r is not None and r[0] is not None:
skip_to_line = int(r[0])
d = dateparser.parse(r[1])
with open(os.path.join(logs, server, chan + ".log")) as f:
for line, text in enumerate(f):
# Skip lines we already have
if line <= skip_to_line:
continue
text = text.strip('\n')
# If we find a new date, that is the current one
if text.startswith("--- Log opened "):
d = dateparser.parse(text[len("--- Log opened "):])
if text.startswith("--- Day changed "):
d = dateparser.parse(text[len("--- Day changed "):])
# Finally if it is a message, save it
m = message.match(text)
if m:
date = datetime.datetime.combine(
d.date(),
dateparser.parse(m.group(1)).time()
)
nick = unicode(m.group(2).strip().lstrip('@'), errors='ignore')
mesg = unicode(m.group(3), errors='ignore')
row = (line, str(date), nick, mesg)
cursor.execute('INSERT INTO ' + table +
' VALUES (?,?,?,?)', row)
connection.commit()
connection.close()
def main():
db = "irssi.db"
logs = os.path.expanduser("~/.irssi/logs/")
for server in os.listdir(logs):
for c in os.listdir(os.path.join(logs, server)):
chan = c[:-len(".log")]
parse(logs, server, chan, db)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment