Created
August 9, 2012 01:33
-
-
Save ianonavy/3300214 to your computer and use it in GitHub Desktop.
This simple Python script shows the top contributing members of the online community, webdevRefinery. It compiles a list of members with the highest post counts and sorts them by how high their reputations are in proportion to their post counts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" wdR Top Contributor List | |
This simple Python script shows the top contributing members of the online | |
community, webdevRefinery. It compiles a list of members with the highest post | |
counts and sorts them by how high their reputations are in proportion to their | |
post counts. | |
Author: ianonavy <[email protected]> | |
Last updated: 08 August 2012 | |
""" | |
from datetime import datetime | |
from urllib2 import urlopen | |
from HTMLParser import HTMLParser | |
# Configuration | |
BASE_URL = ("http://webdevrefinery.com/forums/members/page__sort_key__posts__so" | |
"rt_order__desc__max_results__60__name_box__begins__st__%d") | |
HEADER_WIDTH = 63 # the number of dashes in ---------------------------- | |
MINIMUM_POST_COUNT = 150 # minimim post count required to be listed | |
MINIMUM_REPUTATION = 20 # minimum reputation required to be listed | |
NUMBER_OF_PAGES = 5 # number of pages to get from webdevRefinery | |
class Member(): | |
""" Simple class used to represent a member because I hate tuples. """ | |
username = "" | |
post_count = 0 | |
reputation = 0.0 | |
helpfulness = 0.0 | |
days_joined = 0 | |
def __init__(self, username, post_count, reputation, date_joined): | |
""" | |
Member class constructor, which calculates helpfulness based on the | |
ratio between reputation and post count. | |
Keyword arguments: | |
username - the username of the member | |
post_count - the number of posts the member has posted | |
reputation - the amount of reputation the member has earned | |
date_joined - a Python date object representing the date the member | |
joined wdR. | |
""" | |
self.username = str(username) # just in case someone has a numeric name | |
self.post_count = int(post_count) | |
self.reputation = float(reputation) | |
self.days_joined = int((datetime.now() - date_joined).days) | |
self.helpfulness = self.reputation + self.post_count / self.days_joined | |
def __repr__(self): | |
""" Returns a string version of this member for table printing. """ | |
# Very hacky formatting; sorry about that | |
return "%16.16s\t%7.2f\t%d\t%d\t%d" % (self.username + " " * 16, | |
self.helpfulness, self.post_count, self.reputation, | |
self.days_joined) | |
class MyHTMLParser(HTMLParser): | |
""" | |
Custom HTML parser which fetches the data and appends it to a set of | |
global lists. | |
""" | |
found_username = False | |
found_post_count = False | |
found_date_joined = False | |
def handle_starttag(self, tag, attrs): | |
""" | |
Checks for the View Profile anchor tag and toggles a flag for the | |
handle_data. Overrides the HTMLParser method. | |
""" | |
if ('title', 'View Profile') in attrs and tag == "a": | |
self.found_username = True | |
else: | |
self.found_username = False | |
def handle_post_count_flag(self, data): | |
""" | |
Parses a piece of data and toggles the post count flag. If the current | |
piece of data is "Posts:", the next one is the post count, so set the | |
flag to true. Unset the flag when the data becomes "Views". | |
Keyword argument: | |
data - parsed HTML input data used to determine the flag state. | |
Returns True when the data causes the flag to toggle, False otherwise | |
""" | |
if data == "Posts:": | |
self.found_post_count = True | |
return True | |
elif "Views" in data: | |
self.found_post_count = False | |
return True | |
return False | |
def handle_date_joined_count_flag(self, data): | |
""" | |
Parses a piece of data and toggles the date joined flag. If the current | |
piece of data is "Joined:", the next one is the post count, so set the | |
flag to true. Unset the flag when the data becomes "Group". | |
Keyword argument: | |
data - parsed HTML input data used to determine the flag state. | |
Returns True when the data causes the flag to toggle, False otherwise | |
""" | |
if data == "Joined:": | |
self.found_date_joined = True | |
return True | |
elif "Group" in data: | |
self.found_date_joined = False | |
return True | |
return False | |
def handle_data(self, data): | |
""" | |
Parses HTML input data (innerText NOT innerHTML) and uses flags to tell | |
itself what kind of data it is. When the correct data type is found, it | |
populates the global lists usernames, post_counts and reputations. | |
Overrides the HTMLParser method. | |
""" | |
data = data.strip() | |
if data is "" or self.handle_post_count_flag(data) or \ | |
self.handle_date_joined_count_flag(data): | |
pass # Do nothing if data is empty or the data toggled a flag. | |
elif self.found_username: | |
usernames.append(data) | |
elif self.found_post_count: | |
post_counts.append(data) | |
elif self.found_date_joined: | |
dates_joined.append(datetime.strptime(data, '%d-%B %y')) | |
elif "Reputation: " in data: | |
reputation = float(data[(data.find(":") + 2):].strip()) | |
reputations.append(reputation) | |
# Global lists | |
usernames = [] | |
post_counts = [] | |
reputations = [] | |
dates_joined = [] | |
members = [] | |
def fetch_data(): | |
""" Fetches the data from wdR. No caching yet, but that's on the TODO. """ | |
print "Fetching data...\n" | |
parser = MyHTMLParser() | |
for i in xrange(NUMBER_OF_PAGES): | |
#print "Fetching page %d of %d" % ((i + 1), NUMBER_OF_PAGES) | |
parser.feed(urlopen(BASE_URL % (i * 60)).read()) | |
def populate_members(): | |
""" Populates the list of members based on fetched data. """ | |
assert len(usernames) > 0 # Assert data fetched. | |
assert len(usernames) == len(post_counts) | |
assert len(post_counts) == len(reputations) | |
assert len(reputations) == len(dates_joined) | |
for i in xrange(len(usernames)): | |
new_member = Member(usernames[i], post_counts[i], reputations[i], | |
dates_joined[i]) | |
if new_member.post_count >= MINIMUM_POST_COUNT and \ | |
new_member.reputation >= MINIMUM_REPUTATION: | |
members.append(new_member) | |
def sort_members(): | |
""" Sorts members by their helpfulness. """ | |
members.sort(key=lambda x: x.helpfulness, reverse=True) | |
def print_top_list(): | |
""" Print a table of the top most helpful wdR members. """ | |
print "The top %d most helpful members of webdevRefinery are:\n" % \ | |
len(members) | |
print " Username\t\tScore\tPosts\tRep\tDays Joined\n" + \ | |
"-" * HEADER_WIDTH | |
for i in xrange(len(members)): | |
print "#%02d: %s" % (i + 1, members[i]) | |
def run(): | |
""" Runs all of the main functions. """ | |
fetch_data() | |
populate_members() | |
sort_members() | |
print_top_list() | |
if __name__ == "__main__": | |
run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment