Skip to content

Instantly share code, notes, and snippets.

@sbp
Created July 16, 2013 17:13
Show Gist options
  • Save sbp/6010647 to your computer and use it in GitHub Desktop.
Save sbp/6010647 to your computer and use it in GitHub Desktop.
Webpage title summariser
# Copyright 2013, Sean B. Palmer
# Source: http://inamidst.com/saxo/
import re
import saxo
regex_link = re.compile(r"^(http[s]?://[^<> \"\x01]+)[,.]?$")
regex_title = re.compile(r"(?ims)<title>(.*?)</title>")
regex_tag = re.compile(r"<[^>]+>")
def longest(input, sep):
longest = 0
result = ""
for part in input.split(sep):
if len(part) > longest:
longest = len(part)
result = part
return result
blacklist = (
"github.com",
"swhack.com",
"translate.google.com",
"tumbolia.appspot.com",
"twitter.com",
"wikia.com",
"wikipedia.org"
)
@saxo.event("PRIVMSG")
def link(irc):
if irc.nick == "yoleaux":
return
search = regex_link.match(irc.text)
if not search:
return
arg = search.group(1)
if "#" in arg:
arg = arg.split("#", 1)[0]
for blacklisted in blacklist:
if blacklisted in arg:
return
page = saxo.request(arg, limit=262144)
search = regex_title.search(page["text"])
if search:
title = search.group(1)
title = regex_tag.sub("", title)
title = title.replace("\r", "")
title = title.replace("\n", "")
title = longest(title, " : ")
title = longest(title, " | ")
title = longest(title, "| ")
title = longest(title, " — ")
if "youtube.com" not in arg:
title = longest(title, " - ")
elif title.endswith(" - YouTube"):
title = title[:-10]
title = title.replace('"', "'")
irc.say('"' + title.strip() + '"')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment