Skip to content

Instantly share code, notes, and snippets.

@dmiro
Created February 27, 2015 17:39
Show Gist options
  • Select an option

  • Save dmiro/d5c6738ed11c7dfc8495 to your computer and use it in GitHub Desktop.

Select an option

Save dmiro/d5c6738ed11c7dfc8495 to your computer and use it in GitHub Desktop.
HTML2Text
def HTML2Text(html):
class _HTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self._text = []
def handle_data(self, data):
append = True
text = data.split()
if text:
tag = self.get_starttag_text()
if tag:
tag = tag.lower()
append = not tag.startswith(('<script','<style'))
if append:
self._text.extend(text)
h = _HTMLParser()
h.feed(html)
return ' '.join(h._text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment