Created
February 3, 2010 11:55
-
-
Save peritus/293563 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# vim: set fileencoding=utf-8 | |
__author__ = 'Filip Noetzel <http://github.com/peritus/>' | |
__url__ = 'http://longurlspleasebot.appspot.com/' | |
import logging | |
import re | |
import urllib2 | |
from api import simplejson | |
from api import events | |
from api import robot | |
from api import document | |
from google.appengine.api import memcache | |
ROBOT_NAME = 'Long URLs Please Bot' | |
SUPPORTED_SERVICES = [ | |
'bit.ly',' cli.gs',' digg.com',' is.gd',' kl.am',' su.pr',' tinyurl.com', | |
'307.to',' adjix.com',' b23.ru',' bacn.me',' bloat.me',' budurl.com', | |
'clipurl.us',' cort.as',' dwarfurl.com',' ff.im',' fff.to',' href.in', | |
'idek.net',' korta.nu',' lin.cr',' ln-s.net',' loopt.us',' lost.in', | |
'memurl.com',' merky.de',' migre.me',' moourl.com',' nanourl.se',' ow.ly', | |
'peaurl.com',' ping.fm',' piurl.com',' plurl.me',' pnt.me',' poprl.com', | |
'post.ly',' rde.me',' reallytinyurl.com',' redir.ec',' rubyurl.com', | |
'short.ie',' short.to',' smallr.com',' sn.im',' sn.vc',' snipr.com', | |
'snipurl.com',' snurl.com',' tiny.cc',' tinysong.com',' togoto.us',' tr.im', | |
'tra.kz',' trg.li',' twurl.cc',' twurl.nl',' u.mavrev.com',' u.nu',' ur1.ca', | |
'url.az',' url.ie',' urlx.ie',' w34.us',' xrl.us',' yep.it',' zi.ma', | |
'zurl.ws',' chilp.it',' notlong.com',' qlnk.net',' trim.li', | |
] | |
FINDURLS_REGEX = re.compile(r'(http://(?:' + "|".join(SUPPORTED_SERVICES).replace(" ", "") + ')/[^ \t\n\r]+)', re.MULTILINE) | |
def get_longurls_for(list_of_urls): | |
''' | |
Uses http://www.longurlplease.com/docs#api | |
''' | |
url = 'http://www.longurlplease.com/api/v1.1?q=' + "&q=".join(list_of_urls) | |
from_cache = memcache.get(url) | |
if from_cache is not None: | |
logging.debug("Found %s in cache", url) | |
return from_cache | |
try: | |
result = urllib2.urlopen(url) | |
except urllib2.URLError, e: | |
logging.exception("HTTP ERROR!!!", exc_info=1) | |
return None | |
answer = simplejson.loads(result.read()) | |
logging.info("Fetched %s: %s", url, answer) | |
memcache.add(url, answer, 60*60) | |
return answer | |
def get_substitution_plan(string): | |
''' | |
>>> get_substitution_plan('baz http://example.com/foo bar') | |
[] | |
>>> get_substitution_plan('baz http://tinyurl.com/THISTHINGDOESNOTEXIST baz') | |
[] | |
>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 bar') | |
[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46)] | |
>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 http://tinyurl.com/longurlpleasebot-test-2 bar') | |
[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46), (u'http://tinyurl.com/longurlpleasebot-test-2', u'http://example.com/2.html', 47, 89)] | |
''' | |
# find all tinyurls | |
tosubstitute = [ (unicode(found.group()), found.start(), found.end(), ) for found in FINDURLS_REGEX.finditer(string) ] | |
# get the corresponding long equivalents | |
longurls = get_longurls_for([tinyurl[0] for tinyurl in tosubstitute]) | |
# make a plan | |
plan = [(tinyurl, longurls[tinyurl] if tinyurl in longurls else tinyurl, start, end, ) for tinyurl, start, end in tosubstitute] | |
# filter out steps where tinyurl == longurl, no need for replacing | |
# filter out steps tinyurl or longurl are None | |
return [ step for step in plan if (not step[0] == step[1]) and step[0] and step[1]] | |
def on_blip_change(properties, context): | |
blips = context.GetBlips() | |
logging.debug("GetBlips %s", repr(context.GetBlips())) | |
logging.debug("GetChildBlipIds %s", repr( [blip.GetChildBlipIds() for blip in context.GetBlips()] )) | |
logging.debug("GetRootWavelet %s", repr(context.GetRootWavelet())) | |
logging.debug("GetWavelets %s", repr(context.GetWavelets())) | |
logging.debug("GetWaves %s", repr(context.GetWaves())) | |
for blip in blips: | |
logging.debug("Text: '%s'", blip.GetDocument().GetText()) | |
for tinyurl, longurl, start, end in get_substitution_plan(blip.GetDocument().GetText()): | |
logging.debug("Substituting: '%s', s/%s/%s/ (from %d, %d)", blip.GetDocument().GetText(), repr(tinyurl), repr(longurl), start, end) | |
blip.GetDocument().SetTextInRange( | |
document.Range(start, start + len(tinyurl)), | |
longurl | |
) | |
if __name__ == '__main__': | |
dummy = robot.Robot(ROBOT_NAME.capitalize(), '1', profile_url='_wave/profile.xml') | |
dummy.RegisterHandler(events.WAVELET_SELF_ADDED, on_blip_change) | |
dummy.RegisterHandler(events.DOCUMENT_CHANGED, on_blip_change) | |
dummy.RegisterHandler(events.BLIP_SUBMITTED, on_blip_change) | |
dummy.Run(debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment