clehner · July 26, 2010 04:21
diff --git a/append_content.py b/append_content.py
 from BeautifulSoup import BeautifulSoup, NavigableString
 from waveapi import element
 import re
 import htmlentitydefs

 IMAGE_PLACEHOLDER = '***{{((IMAGE_ELEMENT))}}***'

 def append_content_to_blip(blip, content, type=None):
  if type == 'text/plain':
    # Replace characters that Wave breaks on
    text = content.replace('\t', ' ').replace('\r', '\n')
    blip.append(text)
    return
  
  imgs = []
  
  # originally by Pamela Fox (Google)
  # http://google-wave-resources.googlecode.com/svn/trunk/samples/extensions/robots/python/maildigester/handler.py
  def cleanup(soup):
    for tag in soup:
      if not isinstance(tag, NavigableString):
        if tag.name == 'img':
          imgs.append({'url': tag.get('src'),
                       'width': tag.get('width'),
                       'height': tag.get('height')})
          # replace it with an image element later
          tag.replaceWith(IMAGE_PLACEHOLDER)
        if tag.name == 'a':
          tag['href'] = tag['href'].replace('&', '&amp;')
        cleanup(tag)
  
  html = unescape(content)
  soup = BeautifulSoup(html.strip())
  cleanup(soup)
  html = unicode(soup)
  html = html.replace('\t', ' ')
  # Since its HTML, it should use <br>s instead of line breaks.
  html = html.replace('\r', '').replace('\n', '')
  
  blip.append_markup(html)
  
  # Because append_markup doesn't accept images, we replace img tags in the
  # html with placeholders and then replace them with image elements.
  for img in imgs:
    image = element.Image(url=img['url'],
                        width=img['width'],
                        height=img['height'])
    placeholder = blip.first(IMAGE_PLACEHOLDER)
    # Image elements don't allow links on them.
    # So insert an extra space after images so that a link can still
    # be clicked if it would normally be on the image.
    placeholder.insert_after(' ')
    placeholder.replace(image)


 def unescape(text):
  '''
  Replaces HTML entities with unicode characters
  
  by Fredrik Lundh
  http://effbot.org/zone/re-sub.htm#unescape-html
  '''
  def fixup(m):
    text = m.group(0)
    if text[:2] == "&#":
      # character reference
      try:
        if text[:3] == "&#x":
          return unichr(int(text[3:-1], 16))
        else:
          return unichr(int(text[2:-1]))
      except ValueError:
        pass
    else:
      # named entity
      try:
        text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
      except KeyError:
        pass
    return text # leave as is
  return re.sub("&#?\w+;", fixup, text)
	from BeautifulSoup import BeautifulSoup, NavigableString
	from waveapi import element
	import re
	import htmlentitydefs

	IMAGE_PLACEHOLDER = '*{{((IMAGE_ELEMENT))}}*'

	def append_content_to_blip(blip, content, type=None):
	if type == 'text/plain':
	# Replace characters that Wave breaks on
	text = content.replace('\t', ' ').replace('\r', '\n')
	blip.append(text)
	return

	imgs = []

	# originally by Pamela Fox (Google)
	# http://google-wave-resources.googlecode.com/svn/trunk/samples/extensions/robots/python/maildigester/handler.py
	def cleanup(soup):
	for tag in soup:
	if not isinstance(tag, NavigableString):
	if tag.name == 'img':
	imgs.append({'url': tag.get('src'),
	'width': tag.get('width'),
	'height': tag.get('height')})
	# replace it with an image element later
	tag.replaceWith(IMAGE_PLACEHOLDER)
	if tag.name == 'a':
	tag['href'] = tag['href'].replace('&', '&')
	cleanup(tag)

	html = unescape(content)
	soup = BeautifulSoup(html.strip())
	cleanup(soup)
	html = unicode(soup)
	html = html.replace('\t', ' ')
	# Since its HTML, it should use <br>s instead of line breaks.
	html = html.replace('\r', '').replace('\n', '')

	blip.append_markup(html)

	# Because append_markup doesn't accept images, we replace img tags in the
	# html with placeholders and then replace them with image elements.
	for img in imgs:
	image = element.Image(url=img['url'],
	width=img['width'],
	height=img['height'])
	placeholder = blip.first(IMAGE_PLACEHOLDER)
	# Image elements don't allow links on them.
	# So insert an extra space after images so that a link can still
	# be clicked if it would normally be on the image.
	placeholder.insert_after(' ')
	placeholder.replace(image)


	def unescape(text):
	'''
	Replaces HTML entities with unicode characters

	by Fredrik Lundh
	http://effbot.org/zone/re-sub.htm#unescape-html
	'''
	def fixup(m):
	text = m.group(0)
	if text[:2] == "&#":
	# character reference
	try:
	if text[:3] == "&#x":
	return unichr(int(text[3:-1], 16))
	else:
	return unichr(int(text[2:-1]))
	except ValueError:
	pass
	else:
	# named entity
	try:
	text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
	except KeyError:
	pass
	return text # leave as is
	return re.sub("&#?\w+;", fixup, text)
No results found