Skip to content

Instantly share code, notes, and snippets.

@alexras
Created June 22, 2012 23:49
Show Gist options
  • Select an option

  • Save alexras/2975814 to your computer and use it in GitHub Desktop.

Select an option

Save alexras/2975814 to your computer and use it in GitHub Desktop.
Quick-and-dirty HTML tag stripper
#!/usr/bin/env python
from HTMLParser import HTMLParser
import sys, os
class MLStripper(HTMLParser):
def __init__(self):
self.reset()
self.fed = []
def handle_data(self, d):
self.fed.append(d)
def get_data(self):
return ''.join(self.fed)
def strip_tags(html):
s = MLStripper()
s.feed(html)
return s.get_data()
for line in strip_tags(open(sys.argv[1], 'r').read().replace("<br>", "\r\n")).splitlines():
if len(line.strip()) > 0:
print line.strip()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment