Skip to content

Instantly share code, notes, and snippets.

@DamianZaremba
Created September 16, 2011 18:18
Show Gist options
  • Save DamianZaremba/1222738 to your computer and use it in GitHub Desktop.
Save DamianZaremba/1222738 to your computer and use it in GitHub Desktop.
Test CBNG scoring from python
import socket
import xml.etree.ElementTree as etree
def xmlize_edit(edit, part=False):
xml = ''
if not part: xml += '<?xml version="1.0"?><WPEditSet><WPEdit>'
for key,val in edit.iteritems():
# Dicts
if type(val) == type(dict()):
xml += "<%s>" % str(key).encode("utf-8")
xml += xmlize_edit(val, True)
xml += "</%s>" % str(key).encode("utf-8")
# Lists
elif type(val) == type(list()):
for sval in val:
xml += "<%s>" % str(key).encode("utf-8")
if type(sval) == type({}):
xml += xmlize_edit(sval, True)
else:
xml += str(sval).encode("utf-8")
xml += "</%s>" % str(key).encode("utf-8")
# Strings
else:
xml += "<%s>" % str(key).encode("utf-8")
xml += str(val).encode("utf-8")
xml += "</%s>" % str(key).encode("utf-8")
if not part: xml += '</WPEdit>'
return xml
def check_vandalism(edit):
data = {
"score": "?",
"think_vandalism": "?",
}
try:
csocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
csocket.connect(("localhost", 3565))
except:
print "Do some logging about a fail here"
else:
csocket.send( xmlize_edit(edit) )
xml = ""
endEditSet = False
while True:
tmp = csocket.recv(4096)
if not tmp: break
xml += tmp
if '</WPEdit>' not in xml and endEditSet == False:
csocket.send('</WPEditSet>')
endEditSet = True
csocket.close()
# Load the string back into a dict
try:
editSet = etree.fromstring(xml)
except:
print "Do some logging here...."
else:
if editSet:
edit = editSet.find("WPEdit")
if edit:
try:
data['score'] = float(edit.find('score').text)
data['think_vandalism'] = str(edit.find('think_vandalism').text)
except:
print "Do some logging about bad values"
else:
if data['think_vandalism'] == "true":
return (True, "ANN scored at %s" % data['score'])
return (False, "ANN scored at %s" % data['score'])
edit = {
'EditType': 'change',
'EditID': 0,
'comment': 'abc 123!',
'user': 'mrBoobs',
'user_edit_count': '90001',
'user_distinct_pages': '1',
'user_warns': '25',
'prev_user': 'meErrp',
'user_reg_time': '943920000',
'common': {
'page_made_time': '943920000',
'title': 'WIBBLES',
'namespace': 'Main',
'creator': 'Meeeps',
'num_recent_edits': '100000',
'num_recent_reversions': '355555',
},
'current': {
'minor': False,
'timestamp': 943920035,
'text': 'BOOOOOOOOOOOOOOS ARE NATZI',
},
'previous': {
'timestamp': 943920034,
'text': 'Monkeys like bannanas.',
}
}
print check_vandalism(edit)
@DamianZaremba
Copy link
Author

damian@delta:~/CBNG$ python test_score.py
(True, 'ANN scored at 0.972285'

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment