Created
October 15, 2012 16:09
-
-
Save mlincoln/3893322 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import flask, flask.views | |
| import os | |
| import functools | |
| import urllib2 | |
| import re | |
| import operator | |
| import webbrowser | |
| from datetime import datetime | |
| opener=urllib2.build_opener() | |
| opener.addheaders=[('User-agent', 'Mozilla/5.0')] | |
| app = flask.Flask(__name__) | |
| app.secret_key = "bacon" | |
| users = {'miles':'bacon'} | |
| class Main(flask.views.MethodView): | |
| def get(self): | |
| return flask.render_template('index.html') | |
| def post(self): | |
| if 'logout' in flask.request.form: | |
| flask.session.pop('username', None) | |
| return flask.redirect(flask.url_for('index')) | |
| required = ['username', 'passwd'] | |
| for r in required: | |
| if r not in flask.request.form: | |
| flask.flash("Error: {0} is required.".format(r)) | |
| return flask.redirect(flask.url_for('index')) | |
| username = flask.request.form['username'] | |
| passwd = flask.request.form['passwd'] | |
| if username in users and users[username] == passwd: | |
| flask.session['username'] = username | |
| else: | |
| flask.flash("Username doesn't exist or incorrect password") | |
| return flask.redirect(flask.url_for('index')) | |
| def login_required(method): | |
| @functools.wraps(method) | |
| def wrapper(*args, **kwargs): | |
| if 'username' in flask.session: | |
| return method(*args, **kwargs) | |
| else: | |
| flask.flash("A login is required to see the page!") | |
| return flask.redirect(flask.url_for('index')) | |
| return wrapper | |
| class Remote(flask.views.MethodView): | |
| @login_required | |
| def get(self): | |
| return flask.render_template('remote.html') | |
| @login_required | |
| def post(self): | |
| input = flask.request.form['expression'] | |
| result = prepare(input) | |
| ## result = eval(flask.request.form['expression']) | |
| flask.flash(result) | |
| return flask.redirect(flask.url_for('remote')) | |
| app.add_url_rule('/', | |
| view_func=Main.as_view('index'), | |
| methods=["GET", "POST"]) | |
| app.add_url_rule('/remote/', | |
| view_func=Remote.as_view('remote'), | |
| methods=['GET', 'POST']) | |
| def prepare(wikiurl): | |
| startTime=datetime.now() | |
| offset="" | |
| matchlist="" | |
| matchdict={} | |
| totalmatches=0 | |
| output="Profiling the "+wikiurl+" page...\n" | |
| return scrapewiki(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output) | |
| def scrapewiki(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output): | |
| matchesonpage=0 | |
| url="http://en.wikipedia.org/w/index.php?title="+wikiurl+"&offset="+offset+"&limit=500&action=history" | |
| page=opener.open(url) | |
| while True: | |
| currentline=page.readline() | |
| if re.search(wikiurl+'\&offset=(\d{14})',currentline): | |
| offset=re.search(wikiurl+'\&offset=(\d{14})',currentline).group(1) | |
| if re.search(r'mw-changeslist-date">(\d{2}:\d{2}),\s{1}(\d{1,2})\s{1}(\w{3,10})\s{1}(\d{4})',currentline): | |
| matchesonpage+=1 | |
| edittimestamp=re.search(r'mw-changeslist-date">(\d{2}:\d{2}),\s{1}(\d{1,2})\s{1}(\w{3,10})\s{1}(\d{4})',currentline) | |
| time = edittimestamp.group(1) | |
| day = edittimestamp.group(2) | |
| month = edittimestamp.group(3) | |
| monthlist=["January","February","March","April","May","June","July","August","September","October","November","December"] | |
| if month in monthlist: | |
| month = "%02d" % (monthlist.index(month)+1) | |
| year = edittimestamp.group(4) | |
| ddmmyyyy = str(day+"-"+month+"-"+year) | |
| if ddmmyyyy in matchdict: | |
| matchdict[ddmmyyyy]+=1 | |
| else: | |
| matchdict[ddmmyyyy]=1 | |
| matchlist += time + "\t" + day + "\t" + month + "\t" + year + "\n" | |
| totalmatches += 1 | |
| if len(currentline)==0: | |
| output += "matches found on first page: "+str(matchesonpage)+"\n" | |
| if matchesonpage>=499 and offset!="": | |
| return recursion(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output) | |
| break | |
| else: | |
| return dumpresults(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output) | |
| break | |
| def recursion(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output): | |
| url="http://en.wikipedia.org/w/index.php?title="+wikiurl+"&offset="+offset+"&limit=500&action=history" | |
| page=opener.open(url) | |
| matchesonpage=0 | |
| while True: | |
| currentline=page.readline() | |
| if re.search(wikiurl+'\&offset=(\d{14})',currentline): | |
| if re.search(wikiurl+'\&offset=(\d{14})',currentline).group(1)<offset: | |
| offset=re.search(wikiurl+'\&offset=(\d{14})',currentline).group(1) | |
| if re.search(r'mw-changeslist-date">(\d{2}:\d{2}),\s{1}(\d{1,2})\s{1}(\w{3,10})\s{1}(\d{4})',currentline): | |
| matchesonpage+=1 | |
| edittimestamp=re.search(r'mw-changeslist-date">(\d{2}:\d{2}),\s{1}(\d{1,2})\s{1}(\w{3,10})\s{1}(\d{4})',currentline) | |
| time = edittimestamp.group(1) | |
| day = edittimestamp.group(2) | |
| month = edittimestamp.group(3) | |
| monthlist=["January","February","March","April","May","June","July","August","September","October","November","December"] | |
| if month in monthlist: | |
| month = "%02d" % (monthlist.index(month)+1) | |
| year = edittimestamp.group(4) | |
| ddmmyyyy = str(day+"-"+month+"-"+year) | |
| if ddmmyyyy in matchdict: | |
| matchdict[ddmmyyyy]+=1 | |
| else: | |
| matchdict[ddmmyyyy]=1 | |
| matchlist += time + "\t" + day + "\t" + month + "\t" + year + "\n" | |
| totalmatches += 1 | |
| if len(currentline)==0 and matchesonpage<499: | |
| output += "matches on final page: "+str(matchesonpage)+"\n" | |
| output += "A total of "+str(totalmatches)+" edits have been made to this page.\n" | |
| return dumpresults(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output) | |
| break | |
| if len(currentline)==0: | |
| output += "matches found on this page: "+str(matchesonpage)+"\n" | |
| output += "matches found so far: "+str(totalmatches)+"\n" | |
| if matchesonpage>=499: | |
| output += "going to the next page\n" | |
| return recursion(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output) | |
| break | |
| def dumpresults(wikiurl,offset,matchlist,matchdict,totalmatches,startTime,output): | |
| sortdict = (sorted(matchdict.iteritems(), key=operator.itemgetter(1), reverse=True)) | |
| ## print sortdict | |
| maxeditday = max(matchdict.iteritems(), key=operator.itemgetter(1))[0] | |
| output += "The highest number of edits ("+ str(matchdict[maxeditday]) + ") to the "+wikiurl+" page occurred on " + str(maxeditday) + " (dd/mm/yyyy).\n" | |
| timeTotal=datetime.now()-startTime | |
| output += "This code took "+str(timeTotal)+" seconds to execute\n" | |
| return output | |
| ## return | |
| ####not ideal: | |
| ## sys.exit() | |
| ## outfile=open(wikiurl+".html",'w') | |
| ## outfile.write("testing") | |
| ## outfile.close() | |
| ## webbrowser.open(wikiurl+".html") | |
| ####only writes first 500 lines--variables not passing correctly | |
| ## outcsv=open(wikiurl+".csv",'w') | |
| ## outcsv.write(matchlist) | |
| ## outcsv.close() | |
| ##prepare("Heath_Ledger") | |
| ##prepare("Jimmy_John%27s") | |
| app.debug = True | |
| app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment