Skip to content

Instantly share code, notes, and snippets.

@t33chong
Created June 2, 2014 03:36
Show Gist options
  • Save t33chong/9110e67bf776ad471c92 to your computer and use it in GitHub Desktop.
Save t33chong/9110e67bf776ad471c92 to your computer and use it in GitHub Desktop.
Heuristic to parse IRC logs for humorous content
import os
import re
DIR = '' # Directory containing chat logs
FILENAMES = ('#python',) # Substring that chat logs to searched should contain
NICKS = ('tristaneuan',) # Nicknames expected to be upvoted
LAUGHS = ('[ha]{6,}', '[lo]{6,}', 'lmao', 'lmfao', 'rofl')
fileregex = re.compile('|'.join(FILENAMES))
laughregex = re.compile('|'.join(LAUGHS), flags=re.I)
upregex = re.compile('|'.join(map(lambda x: x + '\+\+', NICKS)))
for filename in os.listdir(DIR):
if fileregex.search(filename):
filepath = os.path.join(DIR, filename)
with open(filepath) as log:
lines = log.readlines()
upcount = 0
last = 0
for n, line in enumerate(lines):
if laughregex.search(line):
print '%s: %d' % (filename, n)
print ''.join(lines[n-10:n+1])
if upregex.search(line):
upcount += 1
last = n
if upcount > 2:
print '%s: %d' % (filename, n)
print ''.join(lines[n-20:n+1])
upcount = 0
if n >= last + 10:
upcount = 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment