Skip to content

Instantly share code, notes, and snippets.

@peritus
Created February 3, 2010 11:55
Show Gist options
  • Save peritus/293563 to your computer and use it in GitHub Desktop.
Save peritus/293563 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# vim: set fileencoding=utf-8
__author__ = 'Filip Noetzel <http://github.com/peritus/>'
__url__ = 'http://longurlspleasebot.appspot.com/'
import logging
import re
import urllib2
from api import simplejson
from api import events
from api import robot
from api import document
from google.appengine.api import memcache
ROBOT_NAME = 'Long URLs Please Bot'
SUPPORTED_SERVICES = [
'bit.ly',' cli.gs',' digg.com',' is.gd',' kl.am',' su.pr',' tinyurl.com',
'307.to',' adjix.com',' b23.ru',' bacn.me',' bloat.me',' budurl.com',
'clipurl.us',' cort.as',' dwarfurl.com',' ff.im',' fff.to',' href.in',
'idek.net',' korta.nu',' lin.cr',' ln-s.net',' loopt.us',' lost.in',
'memurl.com',' merky.de',' migre.me',' moourl.com',' nanourl.se',' ow.ly',
'peaurl.com',' ping.fm',' piurl.com',' plurl.me',' pnt.me',' poprl.com',
'post.ly',' rde.me',' reallytinyurl.com',' redir.ec',' rubyurl.com',
'short.ie',' short.to',' smallr.com',' sn.im',' sn.vc',' snipr.com',
'snipurl.com',' snurl.com',' tiny.cc',' tinysong.com',' togoto.us',' tr.im',
'tra.kz',' trg.li',' twurl.cc',' twurl.nl',' u.mavrev.com',' u.nu',' ur1.ca',
'url.az',' url.ie',' urlx.ie',' w34.us',' xrl.us',' yep.it',' zi.ma',
'zurl.ws',' chilp.it',' notlong.com',' qlnk.net',' trim.li',
]
FINDURLS_REGEX = re.compile(r'(http://(?:' + "|".join(SUPPORTED_SERVICES).replace(" ", "") + ')/[^ \t\n\r]+)', re.MULTILINE)
def get_longurls_for(list_of_urls):
'''
Uses http://www.longurlplease.com/docs#api
'''
url = 'http://www.longurlplease.com/api/v1.1?q=' + "&q=".join(list_of_urls)
from_cache = memcache.get(url)
if from_cache is not None:
logging.debug("Found %s in cache", url)
return from_cache
try:
result = urllib2.urlopen(url)
except urllib2.URLError, e:
logging.exception("HTTP ERROR!!!", exc_info=1)
return None
answer = simplejson.loads(result.read())
logging.info("Fetched %s: %s", url, answer)
memcache.add(url, answer, 60*60)
return answer
def get_substitution_plan(string):
'''
>>> get_substitution_plan('baz http://example.com/foo bar')
[]
>>> get_substitution_plan('baz http://tinyurl.com/THISTHINGDOESNOTEXIST baz')
[]
>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 bar')
[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46)]
>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 http://tinyurl.com/longurlpleasebot-test-2 bar')
[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46), (u'http://tinyurl.com/longurlpleasebot-test-2', u'http://example.com/2.html', 47, 89)]
'''
# find all tinyurls
tosubstitute = [ (unicode(found.group()), found.start(), found.end(), ) for found in FINDURLS_REGEX.finditer(string) ]
# get the corresponding long equivalents
longurls = get_longurls_for([tinyurl[0] for tinyurl in tosubstitute])
# make a plan
plan = [(tinyurl, longurls[tinyurl] if tinyurl in longurls else tinyurl, start, end, ) for tinyurl, start, end in tosubstitute]
# filter out steps where tinyurl == longurl, no need for replacing
# filter out steps tinyurl or longurl are None
return [ step for step in plan if (not step[0] == step[1]) and step[0] and step[1]]
def on_blip_change(properties, context):
blips = context.GetBlips()
logging.debug("GetBlips %s", repr(context.GetBlips()))
logging.debug("GetChildBlipIds %s", repr( [blip.GetChildBlipIds() for blip in context.GetBlips()] ))
logging.debug("GetRootWavelet %s", repr(context.GetRootWavelet()))
logging.debug("GetWavelets %s", repr(context.GetWavelets()))
logging.debug("GetWaves %s", repr(context.GetWaves()))
for blip in blips:
logging.debug("Text: '%s'", blip.GetDocument().GetText())
for tinyurl, longurl, start, end in get_substitution_plan(blip.GetDocument().GetText()):
logging.debug("Substituting: '%s', s/%s/%s/ (from %d, %d)", blip.GetDocument().GetText(), repr(tinyurl), repr(longurl), start, end)
blip.GetDocument().SetTextInRange(
document.Range(start, start + len(tinyurl)),
longurl
)
if __name__ == '__main__':
dummy = robot.Robot(ROBOT_NAME.capitalize(), '1', profile_url='_wave/profile.xml')
dummy.RegisterHandler(events.WAVELET_SELF_ADDED, on_blip_change)
dummy.RegisterHandler(events.DOCUMENT_CHANGED, on_blip_change)
dummy.RegisterHandler(events.BLIP_SUBMITTED, on_blip_change)
dummy.Run(debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment