Skip to content

Instantly share code, notes, and snippets.

@hktechn0
Created February 18, 2010 20:32
Show Gist options
  • Save hktechn0/308029 to your computer and use it in GitHub Desktop.
Save hktechn0/308029 to your computer and use it in GitHub Desktop.
pick out title from HTML
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import urllib2
import HTMLParser
html = urllib2.urlopen(sys.argv[1]).read()
class titleparser(HTMLParser.HTMLParser):
title = unicode()
titleflg = False
def handle_starttag(self, tag, attrs):
if tag == "title":
self.titleflg = True
def handle_endtag(self, tag):
if tag == "title":
self.titleflg = False
def handle_data(self, data):
if self.titleflg:
self.title += data.decode("utf-8")
def handle_charref(self, ref):
if self.titleflg:
if ref.isdigit():
self.title += unichr(int(ref))
p = titleparser()
p.feed(html)
print p.title
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment