peritus · February 3, 2010 11:55
diff --git a/longurlspleasebot.py b/longurlspleasebot.py
 #!/usr/bin/env python
 # vim: set fileencoding=utf-8

 __author__ = 'Filip Noetzel <http://github.com/peritus/>'
 __url__ = 'http://longurlspleasebot.appspot.com/'

 import logging
 import re
 import urllib2

 from api import simplejson
 from api import events
 from api import robot
 from api import document

 from google.appengine.api import memcache

 ROBOT_NAME = 'Long URLs Please Bot'

 SUPPORTED_SERVICES = [
  'bit.ly',' cli.gs',' digg.com',' is.gd',' kl.am',' su.pr',' tinyurl.com',
  '307.to',' adjix.com',' b23.ru',' bacn.me',' bloat.me',' budurl.com',
  'clipurl.us',' cort.as',' dwarfurl.com',' ff.im',' fff.to',' href.in',
  'idek.net',' korta.nu',' lin.cr',' ln-s.net',' loopt.us',' lost.in',
  'memurl.com',' merky.de',' migre.me',' moourl.com',' nanourl.se',' ow.ly',
  'peaurl.com',' ping.fm',' piurl.com',' plurl.me',' pnt.me',' poprl.com',
  'post.ly',' rde.me',' reallytinyurl.com',' redir.ec',' rubyurl.com',
  'short.ie',' short.to',' smallr.com',' sn.im',' sn.vc',' snipr.com',
  'snipurl.com',' snurl.com',' tiny.cc',' tinysong.com',' togoto.us',' tr.im',
  'tra.kz',' trg.li',' twurl.cc',' twurl.nl',' u.mavrev.com',' u.nu',' ur1.ca',
  'url.az',' url.ie',' urlx.ie',' w34.us',' xrl.us',' yep.it',' zi.ma',
  'zurl.ws',' chilp.it',' notlong.com',' qlnk.net',' trim.li',
 ]

 FINDURLS_REGEX = re.compile(r'(http://(?:' + "|".join(SUPPORTED_SERVICES).replace(" ", "") + ')/[^ \t\n\r]+)', re.MULTILINE)

 def get_longurls_for(list_of_urls):
    '''
    Uses http://www.longurlplease.com/docs#api
    '''

    url = 'http://www.longurlplease.com/api/v1.1?q=' + "&q=".join(list_of_urls)

    from_cache = memcache.get(url)

    if from_cache is not None:
        logging.debug("Found %s in cache", url)
        return from_cache

    try:
        result = urllib2.urlopen(url)
    except urllib2.URLError, e:
      logging.exception("HTTP ERROR!!!", exc_info=1)
      return None

    answer = simplejson.loads(result.read())

    logging.info("Fetched %s: %s", url, answer)

    memcache.add(url, answer, 60*60)
    return answer

 def get_substitution_plan(string):
    '''
    >>> get_substitution_plan('baz http://example.com/foo bar')
    []
    >>> get_substitution_plan('baz http://tinyurl.com/THISTHINGDOESNOTEXIST baz')
    []
    >>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 bar')
    [(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46)]
    >>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 http://tinyurl.com/longurlpleasebot-test-2 bar')
    [(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46), (u'http://tinyurl.com/longurlpleasebot-test-2', u'http://example.com/2.html', 47, 89)]
    '''

    # find all tinyurls
    tosubstitute = [ (unicode(found.group()), found.start(), found.end(), ) for found in FINDURLS_REGEX.finditer(string) ]

    # get the corresponding long equivalents
    longurls = get_longurls_for([tinyurl[0] for tinyurl in tosubstitute])

    # make a plan
    plan = [(tinyurl, longurls[tinyurl] if tinyurl in longurls else tinyurl, start, end, ) for tinyurl, start, end in tosubstitute]

    # filter out steps where tinyurl == longurl, no need for replacing
    # filter out steps tinyurl or longurl are None
    return [ step for step in plan if (not step[0] == step[1]) and step[0] and step[1]]

 def on_blip_change(properties, context):
  blips = context.GetBlips()

  logging.debug("GetBlips %s",        repr(context.GetBlips()))
  logging.debug("GetChildBlipIds %s", repr( [blip.GetChildBlipIds() for blip in context.GetBlips()] ))
  logging.debug("GetRootWavelet %s",  repr(context.GetRootWavelet()))
  logging.debug("GetWavelets %s",     repr(context.GetWavelets()))
  logging.debug("GetWaves %s",        repr(context.GetWaves()))

  for blip in blips:
      logging.debug("Text: '%s'", blip.GetDocument().GetText())

      for tinyurl, longurl, start, end in get_substitution_plan(blip.GetDocument().GetText()):

         logging.debug("Substituting: '%s', s/%s/%s/ (from %d, %d)", blip.GetDocument().GetText(), repr(tinyurl), repr(longurl), start, end)

         blip.GetDocument().SetTextInRange(
           document.Range(start, start + len(tinyurl)),
           longurl
         )

 if __name__ == '__main__':
  dummy = robot.Robot(ROBOT_NAME.capitalize(), '1', profile_url='_wave/profile.xml')
  dummy.RegisterHandler(events.WAVELET_SELF_ADDED, on_blip_change)
  dummy.RegisterHandler(events.DOCUMENT_CHANGED, on_blip_change)
  dummy.RegisterHandler(events.BLIP_SUBMITTED, on_blip_change)
  dummy.Run(debug=True)
	#!/usr/bin/env python
	# vim: set fileencoding=utf-8

	__author__ = 'Filip Noetzel <http://github.com/peritus/>'
	__url__ = 'http://longurlspleasebot.appspot.com/'

	import logging
	import re
	import urllib2

	from api import simplejson
	from api import events
	from api import robot
	from api import document

	from google.appengine.api import memcache

	ROBOT_NAME = 'Long URLs Please Bot'

	SUPPORTED_SERVICES = [
	'bit.ly',' cli.gs',' digg.com',' is.gd',' kl.am',' su.pr',' tinyurl.com',
	'307.to',' adjix.com',' b23.ru',' bacn.me',' bloat.me',' budurl.com',
	'clipurl.us',' cort.as',' dwarfurl.com',' ff.im',' fff.to',' href.in',
	'idek.net',' korta.nu',' lin.cr',' ln-s.net',' loopt.us',' lost.in',
	'memurl.com',' merky.de',' migre.me',' moourl.com',' nanourl.se',' ow.ly',
	'peaurl.com',' ping.fm',' piurl.com',' plurl.me',' pnt.me',' poprl.com',
	'post.ly',' rde.me',' reallytinyurl.com',' redir.ec',' rubyurl.com',
	'short.ie',' short.to',' smallr.com',' sn.im',' sn.vc',' snipr.com',
	'snipurl.com',' snurl.com',' tiny.cc',' tinysong.com',' togoto.us',' tr.im',
	'tra.kz',' trg.li',' twurl.cc',' twurl.nl',' u.mavrev.com',' u.nu',' ur1.ca',
	'url.az',' url.ie',' urlx.ie',' w34.us',' xrl.us',' yep.it',' zi.ma',
	'zurl.ws',' chilp.it',' notlong.com',' qlnk.net',' trim.li',
	]

	FINDURLS_REGEX = re.compile(r'(http://(?:' + "\|".join(SUPPORTED_SERVICES).replace(" ", "") + ')/[^ \t\n\r]+)', re.MULTILINE)

	def get_longurls_for(list_of_urls):
	'''
	Uses http://www.longurlplease.com/docs#api
	'''

	url = 'http://www.longurlplease.com/api/v1.1?q=' + "&q=".join(list_of_urls)

	from_cache = memcache.get(url)

	if from_cache is not None:
	logging.debug("Found %s in cache", url)
	return from_cache

	try:
	result = urllib2.urlopen(url)
	except urllib2.URLError, e:
	logging.exception("HTTP ERROR!!!", exc_info=1)
	return None

	answer = simplejson.loads(result.read())

	logging.info("Fetched %s: %s", url, answer)

	memcache.add(url, answer, 60*60)
	return answer

	def get_substitution_plan(string):
	'''
	>>> get_substitution_plan('baz http://example.com/foo bar')
	[]
	>>> get_substitution_plan('baz http://tinyurl.com/THISTHINGDOESNOTEXIST baz')
	[]
	>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 bar')
	[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46)]
	>>> get_substitution_plan('baz http://tinyurl.com/longurlpleasebot-test-1 http://tinyurl.com/longurlpleasebot-test-2 bar')
	[(u'http://tinyurl.com/longurlpleasebot-test-1', u'http://example.com/1.html', 4, 46), (u'http://tinyurl.com/longurlpleasebot-test-2', u'http://example.com/2.html', 47, 89)]
	'''

	# find all tinyurls
	tosubstitute = [ (unicode(found.group()), found.start(), found.end(), ) for found in FINDURLS_REGEX.finditer(string) ]

	# get the corresponding long equivalents
	longurls = get_longurls_for([tinyurl[0] for tinyurl in tosubstitute])

	# make a plan
	plan = [(tinyurl, longurls[tinyurl] if tinyurl in longurls else tinyurl, start, end, ) for tinyurl, start, end in tosubstitute]

	# filter out steps where tinyurl == longurl, no need for replacing
	# filter out steps tinyurl or longurl are None
	return [ step for step in plan if (not step[0] == step[1]) and step[0] and step[1]]

	def on_blip_change(properties, context):
	blips = context.GetBlips()

	logging.debug("GetBlips %s", repr(context.GetBlips()))
	logging.debug("GetChildBlipIds %s", repr( [blip.GetChildBlipIds() for blip in context.GetBlips()] ))
	logging.debug("GetRootWavelet %s", repr(context.GetRootWavelet()))
	logging.debug("GetWavelets %s", repr(context.GetWavelets()))
	logging.debug("GetWaves %s", repr(context.GetWaves()))

	for blip in blips:
	logging.debug("Text: '%s'", blip.GetDocument().GetText())

	for tinyurl, longurl, start, end in get_substitution_plan(blip.GetDocument().GetText()):

	logging.debug("Substituting: '%s', s/%s/%s/ (from %d, %d)", blip.GetDocument().GetText(), repr(tinyurl), repr(longurl), start, end)

	blip.GetDocument().SetTextInRange(
	document.Range(start, start + len(tinyurl)),
	longurl
	)

	if __name__ == '__main__':
	dummy = robot.Robot(ROBOT_NAME.capitalize(), '1', profile_url='_wave/profile.xml')
	dummy.RegisterHandler(events.WAVELET_SELF_ADDED, on_blip_change)
	dummy.RegisterHandler(events.DOCUMENT_CHANGED, on_blip_change)
	dummy.RegisterHandler(events.BLIP_SUBMITTED, on_blip_change)
	dummy.Run(debug=True)