Skip to content

Instantly share code, notes, and snippets.

@twneale
Created September 4, 2013 21:42
Show Gist options
  • Save twneale/6443253 to your computer and use it in GitHub Desktop.
Save twneale/6443253 to your computer and use it in GitHub Desktop.
Salty bet search script
from operator import methodcaller
import lxml.html
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.qparser import QueryParser
class Stats(object):
def __init__(self, html):
self.html = html
self.doc = lxml.html.fromstring(html)
def __iter__(self):
keys = ('name', 'matches', 'wins', 'losses')
for tr in self.doc.xpath('//table//tr')[1:]:
yield dict(
name=unicode(tr[0].text_content().strip()),
matches=int(tr[1].text_content()),
wins=int(tr[2].text_content()),
losses=int(tr[3].text_content()))
class App(object):
def __init__(self, html):
self.schema = Schema(
name=NGRAM(stored=True, minsize=2),
matches=NUMERIC(stored=True),
wins=NUMERIC(stored=True),
losses=NUMERIC(stored=True))
self.index = create_in("indexdir", self.schema)
writer = self.index.writer()
for data in Stats(html):
writer.add_document(**data)
writer.commit()
def query(self, text):
parser = QueryParser("name", self.index.schema)
res = self.index.searcher().search(parser.parse(text))
return res
if __name__ == '__main__':
with open('stats.html') as f:
html = f.read().decode('iso-8859-1')
app = App(html)
while True:
query = raw_input('\n\nSearch: ')
for term in query.split(','):
print 'Results for %r' % term
for result in app.query(term):
print ' - ', result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment