jepler · March 19, 2013 03:01
diff --git a/gistfile1.py b/gistfile1.py
 import os
 import re
 import sys
 import xml.etree.ElementTree
 import BeautifulSoup

 def exc_wrap(f):
    def inner(*args, **kw):
        try:
            return f(*args, **kw)
        except Exception, e:
            raise
    return inner
    
 @exc_wrap
 def get_title(e):
    return e.findtext('{http://www.w3.org/2005/Atom}title')
            
 @exc_wrap
 def get_link(e):
    return e.find('{http://www.w3.org/2005/Atom}link').get('href')

 def normtext(el):
    print >>sys.stderr, "normtext", el
    return re.sub("\s+", " ", " ".join(
        se.text or '' for se in el.getiterator()).strip())
            
 def bsparse(text):
    soup = BeautifulSoup.BeautifulSoup(text,
            convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
    def emit(soup):
        if isinstance(soup, BeautifulSoup.NavigableString):
            if isinstance(soup, BeautifulSoup.Comment):
                return
            builder.data(soup)
        else:
            builder.start(soup.name, dict(soup.attrs))
            for s in soup:
                emit(s)
            builder.end(soup.name)
    builder = xml.etree.ElementTree.TreeBuilder()
    emit(soup)
    return builder.close()

 @exc_wrap
 def get_comment(e):
    b = e.find('{http://www.w3.org/2005/Atom}summary').text
    print >>sys.stderr, "<<<", repr(b)
    b = bsparse(b)
    print >>sys.stderr, ">>>", b
    if not b: return
    global gb; gb = b
    b = b[0]
    if b.tag != 'table': return
    if b.get('cellpadding') != "0": return
    if b.get('style') != 'border: 1px solid #E0E0E0; margin: 0; padding: 0; background-color: #F0F0F0': return
    bc = b.getchildren()
    if len(bc) != 2: return
    t0 = normtext(bc[0])
    t1 = normtext(bc[1])
    if t0 != 'jepler shared this story': return
    return t1

 def parse_rss(u):
    x = xml.etree.ElementTree.fromstring(u)
    for e in x.findall('{http://www.w3.org/2005/Atom}entry'):
        t = get_title(e)
        l = get_link(e)
        c = get_comment(e)
        print >>sys.stderr, repr(c)
        if t or l or c:
            yield t, l, c
	import os
	import re
	import sys
	import xml.etree.ElementTree
	import BeautifulSoup

	def exc_wrap(f):
	def inner(args, *kw):
	try:
	return f(args, *kw)
	except Exception, e:
	raise
	return inner

	@exc_wrap
	def get_title(e):
	return e.findtext('{http://www.w3.org/2005/Atom}title')

	@exc_wrap
	def get_link(e):
	return e.find('{http://www.w3.org/2005/Atom}link').get('href')

	def normtext(el):
	print >>sys.stderr, "normtext", el
	return re.sub("\s+", " ", " ".join(
	se.text or '' for se in el.getiterator()).strip())

	def bsparse(text):
	soup = BeautifulSoup.BeautifulSoup(text,
	convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
	def emit(soup):
	if isinstance(soup, BeautifulSoup.NavigableString):
	if isinstance(soup, BeautifulSoup.Comment):
	return
	builder.data(soup)
	else:
	builder.start(soup.name, dict(soup.attrs))
	for s in soup:
	emit(s)
	builder.end(soup.name)
	builder = xml.etree.ElementTree.TreeBuilder()
	emit(soup)
	return builder.close()

	@exc_wrap
	def get_comment(e):
	b = e.find('{http://www.w3.org/2005/Atom}summary').text
	print >>sys.stderr, "<<<", repr(b)
	b = bsparse(b)
	print >>sys.stderr, ">>>", b
	if not b: return
	global gb; gb = b
	b = b[0]
	if b.tag != 'table': return
	if b.get('cellpadding') != "0": return
	if b.get('style') != 'border: 1px solid #E0E0E0; margin: 0; padding: 0; background-color: #F0F0F0': return
	bc = b.getchildren()
	if len(bc) != 2: return
	t0 = normtext(bc[0])
	t1 = normtext(bc[1])
	if t0 != 'jepler shared this story': return
	return t1

	def parse_rss(u):
	x = xml.etree.ElementTree.fromstring(u)
	for e in x.findall('{http://www.w3.org/2005/Atom}entry'):
	t = get_title(e)
	l = get_link(e)
	c = get_comment(e)
	print >>sys.stderr, repr(c)
	if t or l or c:
	yield t, l, c