Created
June 2, 2014 03:36
-
-
Save t33chong/9110e67bf776ad471c92 to your computer and use it in GitHub Desktop.
Heuristic to parse IRC logs for humorous content
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
DIR = '' # Directory containing chat logs | |
FILENAMES = ('#python',) # Substring that chat logs to searched should contain | |
NICKS = ('tristaneuan',) # Nicknames expected to be upvoted | |
LAUGHS = ('[ha]{6,}', '[lo]{6,}', 'lmao', 'lmfao', 'rofl') | |
fileregex = re.compile('|'.join(FILENAMES)) | |
laughregex = re.compile('|'.join(LAUGHS), flags=re.I) | |
upregex = re.compile('|'.join(map(lambda x: x + '\+\+', NICKS))) | |
for filename in os.listdir(DIR): | |
if fileregex.search(filename): | |
filepath = os.path.join(DIR, filename) | |
with open(filepath) as log: | |
lines = log.readlines() | |
upcount = 0 | |
last = 0 | |
for n, line in enumerate(lines): | |
if laughregex.search(line): | |
print '%s: %d' % (filename, n) | |
print ''.join(lines[n-10:n+1]) | |
if upregex.search(line): | |
upcount += 1 | |
last = n | |
if upcount > 2: | |
print '%s: %d' % (filename, n) | |
print ''.join(lines[n-20:n+1]) | |
upcount = 0 | |
if n >= last + 10: | |
upcount = 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment