Created
July 12, 2016 17:26
-
-
Save freundTech/ae09311c8c21706a1e27b73d65004a9b to your computer and use it in GitHub Desktop.
Hangoutsbot lookup plugin supporting multiple tables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import logging | |
import hangups | |
from utils import unicode_to_ascii | |
import urllib.request | |
import plugins | |
logger = logging.getLogger(__name__) | |
def _initialise(bot): | |
plugins.register_user_command(["lookup"]) | |
def lookup(bot, event, *args): | |
"""find keywords in a specified spreadsheet""" | |
if not bot.get_config_suboption(event.conv_id, 'multispreadsheet_enabled'): | |
yield from bot.coro_send_message(event.conv, _("Spreadsheet function disabled")) | |
return | |
if not bot.get_config_suboption(event.conv_id, 'spreadsheet_urls'): | |
yield from bot.coro_send_message(event.conv, _("Spreadsheet URL not set")) | |
return | |
spreadsheet_urls = bot.get_config_suboption(event.conv_id, 'spreadsheet_urls') | |
table_class = "waffle" # Name of table class to search. Note that 'waffle' seems to be the default for all spreadsheets | |
if args[0].startswith('<'): | |
counter_max = int(args[0][1:]) # Maximum rows displayed per query | |
table = args[1].lower() | |
keyword = ' '.join(args[2:]) | |
else: | |
counter_max = 5 | |
table = args[0].lower() | |
keyword = ' '.join(args[1:]) | |
if table not in spreadsheet_urls: | |
yield from bot.coro_send_message(event.conv, _("Spreadsheet doesn't exist")) | |
return | |
htmlmessage = _('Results for keyword <b>{}</b> in table <b>{}</b>:<br />').format(keyword, table) | |
logger.debug("{0} ({1}) has requested to lookup '{2}'".format(event.user.full_name, event.user.id_.chat_id, keyword)) | |
html = urllib.request.urlopen(spreadsheet_urls[table]).read() | |
keyword_raw = keyword.strip().lower() | |
keyword_ascii = unicode_to_ascii(keyword_raw) | |
data = [] | |
counter = 0 | |
# Adapted from http://stackoverflow.com/questions/23377533/python-beautifulsoup-parsing-table | |
from bs4 import BeautifulSoup | |
soup = BeautifulSoup(str(html, 'utf-8'), 'html.parser') | |
table = soup.find('table', attrs={'class':table_class}) | |
table_body = table.find('tbody') | |
rows = table_body.find_all('tr') | |
for row in rows: | |
col = row.find_all('td') | |
cols = [ele.text.strip() for ele in col] | |
data.append([ele for ele in cols if ele]) # Get rid of empty values | |
for row in data: | |
for cell in row: | |
cellcontent_raw = str(cell).lower().strip() | |
cellcontent_ascii = unicode_to_ascii(cellcontent_raw) | |
if keyword_raw in cellcontent_raw or keyword_ascii in cellcontent_ascii: | |
if counter < counter_max: | |
htmlmessage += _('<br />Row {}: ').format(counter+1) | |
for datapoint in row: | |
htmlmessage += '{} | '.format(datapoint) | |
htmlmessage += '<br />' | |
counter += 1 | |
break # prevent multiple subsequent cell matches appending identical rows | |
else: | |
# count row matches only beyond the limit, to avoid over-long message | |
counter += 1 | |
if counter > counter_max: | |
htmlmessage += _('<br />{0} rows found. Only returning first {1}.').format(counter, counter_max) | |
if counter_max == 5: | |
htmlmessage += _('<br />Hint: Use <b>/bot lookup <{0} {1}</b> to view {0} rows').format(counter_max*2, keyword) | |
if counter == 0: | |
htmlmessage += _('No match found') | |
yield from bot.coro_send_message(event.conv, htmlmessage) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment