Skip to content

Instantly share code, notes, and snippets.

@ianonavy
Created August 9, 2012 01:33
Show Gist options
  • Save ianonavy/3300214 to your computer and use it in GitHub Desktop.
Save ianonavy/3300214 to your computer and use it in GitHub Desktop.
This simple Python script shows the top contributing members of the online community, webdevRefinery. It compiles a list of members with the highest post counts and sorts them by how high their reputations are in proportion to their post counts.
#!/usr/bin/env python
""" wdR Top Contributor List
This simple Python script shows the top contributing members of the online
community, webdevRefinery. It compiles a list of members with the highest post
counts and sorts them by how high their reputations are in proportion to their
post counts.
Author: ianonavy <[email protected]>
Last updated: 08 August 2012
"""
from datetime import datetime
from urllib2 import urlopen
from HTMLParser import HTMLParser
# Configuration
BASE_URL = ("http://webdevrefinery.com/forums/members/page__sort_key__posts__so"
"rt_order__desc__max_results__60__name_box__begins__st__%d")
HEADER_WIDTH = 63 # the number of dashes in ----------------------------
MINIMUM_POST_COUNT = 150 # minimim post count required to be listed
MINIMUM_REPUTATION = 20 # minimum reputation required to be listed
NUMBER_OF_PAGES = 5 # number of pages to get from webdevRefinery
class Member():
""" Simple class used to represent a member because I hate tuples. """
username = ""
post_count = 0
reputation = 0.0
helpfulness = 0.0
days_joined = 0
def __init__(self, username, post_count, reputation, date_joined):
"""
Member class constructor, which calculates helpfulness based on the
ratio between reputation and post count.
Keyword arguments:
username - the username of the member
post_count - the number of posts the member has posted
reputation - the amount of reputation the member has earned
date_joined - a Python date object representing the date the member
joined wdR.
"""
self.username = str(username) # just in case someone has a numeric name
self.post_count = int(post_count)
self.reputation = float(reputation)
self.days_joined = int((datetime.now() - date_joined).days)
self.helpfulness = self.reputation + self.post_count / self.days_joined
def __repr__(self):
""" Returns a string version of this member for table printing. """
# Very hacky formatting; sorry about that
return "%16.16s\t%7.2f\t%d\t%d\t%d" % (self.username + " " * 16,
self.helpfulness, self.post_count, self.reputation,
self.days_joined)
class MyHTMLParser(HTMLParser):
"""
Custom HTML parser which fetches the data and appends it to a set of
global lists.
"""
found_username = False
found_post_count = False
found_date_joined = False
def handle_starttag(self, tag, attrs):
"""
Checks for the View Profile anchor tag and toggles a flag for the
handle_data. Overrides the HTMLParser method.
"""
if ('title', 'View Profile') in attrs and tag == "a":
self.found_username = True
else:
self.found_username = False
def handle_post_count_flag(self, data):
"""
Parses a piece of data and toggles the post count flag. If the current
piece of data is "Posts:", the next one is the post count, so set the
flag to true. Unset the flag when the data becomes "Views".
Keyword argument:
data - parsed HTML input data used to determine the flag state.
Returns True when the data causes the flag to toggle, False otherwise
"""
if data == "Posts:":
self.found_post_count = True
return True
elif "Views" in data:
self.found_post_count = False
return True
return False
def handle_date_joined_count_flag(self, data):
"""
Parses a piece of data and toggles the date joined flag. If the current
piece of data is "Joined:", the next one is the post count, so set the
flag to true. Unset the flag when the data becomes "Group".
Keyword argument:
data - parsed HTML input data used to determine the flag state.
Returns True when the data causes the flag to toggle, False otherwise
"""
if data == "Joined:":
self.found_date_joined = True
return True
elif "Group" in data:
self.found_date_joined = False
return True
return False
def handle_data(self, data):
"""
Parses HTML input data (innerText NOT innerHTML) and uses flags to tell
itself what kind of data it is. When the correct data type is found, it
populates the global lists usernames, post_counts and reputations.
Overrides the HTMLParser method.
"""
data = data.strip()
if data is "" or self.handle_post_count_flag(data) or \
self.handle_date_joined_count_flag(data):
pass # Do nothing if data is empty or the data toggled a flag.
elif self.found_username:
usernames.append(data)
elif self.found_post_count:
post_counts.append(data)
elif self.found_date_joined:
dates_joined.append(datetime.strptime(data, '%d-%B %y'))
elif "Reputation: " in data:
reputation = float(data[(data.find(":") + 2):].strip())
reputations.append(reputation)
# Global lists
usernames = []
post_counts = []
reputations = []
dates_joined = []
members = []
def fetch_data():
""" Fetches the data from wdR. No caching yet, but that's on the TODO. """
print "Fetching data...\n"
parser = MyHTMLParser()
for i in xrange(NUMBER_OF_PAGES):
#print "Fetching page %d of %d" % ((i + 1), NUMBER_OF_PAGES)
parser.feed(urlopen(BASE_URL % (i * 60)).read())
def populate_members():
""" Populates the list of members based on fetched data. """
assert len(usernames) > 0 # Assert data fetched.
assert len(usernames) == len(post_counts)
assert len(post_counts) == len(reputations)
assert len(reputations) == len(dates_joined)
for i in xrange(len(usernames)):
new_member = Member(usernames[i], post_counts[i], reputations[i],
dates_joined[i])
if new_member.post_count >= MINIMUM_POST_COUNT and \
new_member.reputation >= MINIMUM_REPUTATION:
members.append(new_member)
def sort_members():
""" Sorts members by their helpfulness. """
members.sort(key=lambda x: x.helpfulness, reverse=True)
def print_top_list():
""" Print a table of the top most helpful wdR members. """
print "The top %d most helpful members of webdevRefinery are:\n" % \
len(members)
print " Username\t\tScore\tPosts\tRep\tDays Joined\n" + \
"-" * HEADER_WIDTH
for i in xrange(len(members)):
print "#%02d: %s" % (i + 1, members[i])
def run():
""" Runs all of the main functions. """
fetch_data()
populate_members()
sort_members()
print_top_list()
if __name__ == "__main__":
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment