Created
December 2, 2013 01:16
-
-
Save rohitdholakia/7743403 to your computer and use it in GitHub Desktop.
Summary of stackexchange site
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#give the top level xml directory and this script will return | |
''' | |
num users | |
Num epic users | |
num famous questions | |
num questions | |
num answers | |
''' | |
from lxml import etree | |
import sys | |
import Utils | |
import os | |
class Stats: | |
def __init__(self, users, badges, posts): | |
self.users = users | |
self.badges = badges | |
self.posts = posts | |
self.summary = dict.fromkeys(["epic", "famous", "questions", "answers", "accepted", "users"],0) | |
self.userContext = etree.iterparse(self.users) | |
self.badgeContext = etree.iterparse(self.badges) | |
self.postContext = etree.iterparse(self.posts) | |
def numUsers(self): | |
for event, elem in self.userContext: | |
self.summary["users"] += 1 | |
Utils.clearElem(elem) | |
def answerFeatures(self): | |
for event, elem in self.postContext: | |
if Utils.getPostTypeId(elem) == "1":#is question | |
self.summary["questions"] += 1 | |
if Utils.getViewCount(elem) > 10000: | |
self.summary["famous"] += 1 | |
if Utils.getAcceptedId(elem): | |
self.summary["accepted"] += 1 | |
else: | |
self.summary["answers"] += 1 | |
Utils.clearElem(elem) | |
def numEpic(self): | |
for event, elem in self.badgeContext: | |
if Utils.getBadgeName(elem) == "Epic": | |
self.summary["epic"] += 1 | |
Utils.clearElem(elem) | |
def printSummary(self): | |
print 'Num Users: num epic Users : ', self.summary['users'], '\t', self.summary['epic'] | |
print 'num questions: answers : accepted answers: ', self.summary['questions'], self.summary['answers'], self.summary['accepted'] | |
print 'num famous: ', self.summary['famous'] | |
def generateSummary(self): | |
self.numUsers() | |
self.numEpic() | |
self.answerFeatures() | |
self.printSummary() | |
if __name__ == '__main__': | |
if(len(sys.argv)<2): | |
print 'python name.py topLevelDirectory' | |
sys.exit() | |
users = open(os.path.join(sys.argv[1],'users.xml'),'r') | |
badges = open(os.path.join(sys.argv[1],'badges.xml'), 'r') | |
posts = open(os.path.join(sys.argv[1],'posts.xml'), 'r') | |
summ = Stats(users, badges, posts) | |
summ.generateSummary() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment