Created
July 28, 2013 05:51
-
-
Save whym/6097573 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def format_isodate(x): | |
| from datetime import datetime | |
| return datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ') | |
| def parse_wikidate(x): | |
| from datetime import datetime | |
| return datetime.strptime(x, '%Y%m%d%H%M%S') | |
| def connect(db): | |
| import os | |
| import oursql | |
| db = db.replace('_','-') | |
| host = db + '.userdb.toolserver.org' | |
| conn = oursql.connect(host = host, | |
| read_default_file=os.path.expanduser('~/.my.cnf'), | |
| db = db.replace('-','_'), | |
| charset=None, | |
| use_unicode=False) | |
| return conn | |
| def render(text, script): | |
| import urllib2 | |
| import json | |
| url = script | |
| data ='format=json&action=parse&text=%s' % (urllib2.quote(text)) | |
| while True: | |
| try: | |
| #print >>sys.stderr, 'fetching %s' % url | |
| res = urllib2.urlopen(urllib2.Request(url, | |
| data=data, | |
| headers={'User-Agent': 'Mozilla/5.0 (Windows; U; Windows NT 5.1; it; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11'})).read() | |
| break | |
| except urllib2.URLError, e: | |
| import sys | |
| print >>sys.stderr, e | |
| exit(1) | |
| return json.loads(res.decode('utf-8'))['parse']['text']['*'] | |
| def render_html(site, wikitext, postprocess=lambda x: x, title='(auto generated HTML)'): | |
| from datetime import datetime | |
| lang = site.split('.')[0] | |
| header = ''' | |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> | |
| <html lang="%(lang)s" dir="ltr" class="client-nojs" xmlns="http://www.w3.org/1999/xhtml"> | |
| <head> | |
| <title>%(title)s</title> | |
| <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /> | |
| <meta http-equiv="Content-Style-Type" content="text/css" /> | |
| <link rel="copyright" href="//creativecommons.org/licenses/by-sa/3.0/" /> | |
| <link rel="stylesheet" href="//bits.wikimedia.org/%(site)s/load.php?debug=false&lang=%(lang)s&modules=site&only=styles&skin=vector&*" type="text/css" media="all" /> | |
| <style type="text/css" media="all">a:lang(ar),a:lang(ckb),a:lang(fa),a:lang(kk-arab),a:lang(mzn),a:lang(ps),a:lang(ur){text-decoration:none}a.new,#quickbar a.new{color:#ba0000} | |
| </style> | |
| <script src="//bits.wikimedia.org/%(site)s/load.php?debug=false&lang=%(lang)s&modules=startup&only=scripts&skin=vector&*" type="text/javascript"></script> | |
| <script type="text/javascript" src="//toolserver.org/~whym/jquery/jquery-latest.js"></script> | |
| <script type="text/javascript" src="//toolserver.org/~whym/jquery/jquery.tablesorter.js"></script> | |
| <script type="text/javascript"> | |
| $(document).ready(function() | |
| { | |
| $(".sortable").tablesorter(); | |
| } | |
| ); | |
| </script> | |
| </head> | |
| <body class="mediawiki ltr sitedir-ltr ns-0 ns-subject skin-vector action-view"> | |
| ''' % {'site': site, 'lang': lang, 'title': title} | |
| footer = ''' | |
| <!-- /footer --> | |
| <script type="text/javascript">if(window.mw){ | |
| mw.loader.load(["mediawiki.user","mediawiki.page.ready","mediawiki.legacy.mwsuggest","ext.vector.collapsibleNav","ext.vector.collapsibleTabs","ext.vector.editWarning","ext.vector.simpleSearch"], null, true); | |
| }</script> | |
| </body> | |
| </html> | |
| ''' | |
| header = header % {'site': site} | |
| script = 'http://%s/w/api.php' % site | |
| return """ | |
| %s | |
| %s | |
| %s | |
| """ % (header, | |
| postprocess(render("\n".join(wikitext), script).encode('utf-8').replace('="/wiki/', '="//'+site+'/wiki/').replace('="/w/', '="//'+site+'/w/')), | |
| footer) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment