Created
February 15, 2013 03:22
-
-
Save rht/4958340 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import re | |
petitions = json.load(open('aaronsw.json')) | |
#ntotal = 761 | |
#nempty = 384 | |
#wpcomment = 5 | |
#nyan = 7 # by yan | |
sanitizedpetitions = [] | |
msglength = [] | |
for p in petitions: | |
#print p[1][1] # flags seen | |
#print p[1][0][0] # rfc822.text | |
message = p[1][0][1] # rfc822.text | |
if ("[email protected]" not in message) and ("[email protected]" not in message) and ("zyan/www" not in message) and ("A new pingback on the post" not in message): #filter out private comments and spams | |
#filtering out empty comment | |
emptycomment = "\\[Optional: Insert a personal message to the MIT administration here.\\].*\n.*\n--" | |
if not re.search(emptycomment, message): | |
#wp comments | |
if "A new comment" in message: | |
email = re.search('E-mail : .*\n', | |
message).group(0).replace('E-mail : ','').strip() | |
author = re.search('Author : .*\(', | |
message).group(0).replace('(', '').replace('Author : ', '').strip() | |
comment = re.search('Comment: (.*\n)*$', message).group() | |
#cleaning out the comment | |
comment = re.sub('(Approve it:.*\n|Trash it:.*\n|Spam it:.*\n|Currently.*comments are waiting.*\n|http://open.scripts.mit.edu/blog/wp-admin/edit-comments.php.*\n|Comment:.*\n)','',comment).strip() | |
sanitizedpetitions.append("**%s(%s)**\n%s\n" %(author,email,comment)) | |
msglength.append(len(comment)) | |
else: | |
filteredmessage = re.sub(".*Insert a personal message to the MIT administration here.*\n",'',message) | |
comment = re.search('(.*\n)*--', filteredmessage) | |
if comment: | |
#print comment.group() | |
sender = re.search('--.*\n(.*\n)*', filteredmessage).group() | |
sender = re.sub('--.*\n','',sender).strip().split('\n') | |
sender.pop(1) | |
sender = [i.strip() for i in sender] | |
sender = ', '.join(sender) | |
comment = comment.group().replace('--','').strip() | |
sanitizedpetitions.append("**%s**\n%s\n" %(sender, comment)) | |
msglength.append(len(comment)) | |
else: | |
#anonymous | |
sanitizedpetitions.append("**Anonymous**\n%s\n" %filteredmessage) | |
if comment: | |
msglength.append(len(comment)) | |
else: | |
msglength.append(0) | |
else: | |
filteredmessage = re.sub(".*Insert a personal message to the MIT administration here.*\n",'',message).replace('--','').strip() | |
sender = filteredmessage.split('\n') | |
sender.pop(1) | |
sender = [i.strip() for i in sender] | |
sender = ', '.join(sender) | |
sanitizedpetitions.append("**%s**\n" %sender) | |
msglength.append(0) | |
else: | |
msglength.append(0) | |
for i in sanitizedpetitions: print i.encode('utf-8') | |
#maxlength = max(msglength) | |
#maxindex = [i for i, j in enumerate(msglength) if j == maxlength][0] | |
#print maxindex | |
#print maxlength | |
#print petitions[maxindex][1][0][1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment