jeremyfelt · January 21, 2013 07:46
diff --git a/a_better_opml.py b/a_better_opml.py
 import tweepy
 from BeautifulSoup import BeautifulSoup as parser
 import urllib
 import sys

 consumer_key=''
 consumer_secret=''

 access_token=''
 access_token_secret=''

 def detect_feeds_in_HTML(input_stream):
    """ examines an open text stream with HTML for referenced feeds.

    This is achieved by detecting all ``link`` tags that reference a feed in HTML.

    :param input_stream: an arbitrary opened input stream that has a :func:`read` method.
    :type input_stream: an input stream (e.g. open file or URL)
    :return: a list of tuples ``(url, feed_type)``
    :rtype: ``list(tuple(str, str))``
    """
    # check if really an input stream
    if not hasattr(input_stream, "read"):
        raise TypeError("An opened input *stream* should be given, was %s instead!" % type(input_stream))
    result = []
    # get the textual data (the HTML) from the input stream
    html = parser(input_stream.read())
    # find all links that have an "alternate" attribute
    feed_urls = html.findAll("link", rel="alternate")
    # extract URL and type
    for feed_link in feed_urls:
        url = feed_link.get("href", None)
        # if a valid URL is there
        if url:
            result.append(url)
    return result

 def chunks(l, n):
    """ Yield successive n-sized chunks from l.
    """
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

 auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
 auth.set_access_token(access_token, access_token_secret)

 api = tweepy.API(auth)
 me = api.me()
 friends = api.friends_ids()

 opml_start = """<?xml version="1.0" encoding="UTF-8"?>
 <opml version="1.1">
 <head>
 <title>People I follow</title>
 </head>
 <body>
 <outline text="People I follow" title="People I follow">"""

 opml_end = """</outline>
 </body>
 </opml>"""

 opml_outline_feed = '<outline text="%(title)s" title="%(title)s" type="rss" version="RSS" htmlUrl="%(html_url)s" xmlUrl="%(xml_url)s" />'

 print opml_start
 for c in chunks(friends, 100):
    users = api.lookup_users(c)
    for u in users:
        if u.url:
           print "<!-- %s -->" % u.screen_name
           try:
               site = urllib.urlopen(u.url)
               tuples = detect_feeds_in_HTML(site)
               for t in tuples:
                   html = parser(u.url, convertEntities=parser.HTML_ENTITIES).contents[0]
                   if "http" in t:
                       xml = parser(t, convertEntities=parser.HTML_ENTITIES).contents[0]
                   else:
                       myxml = html + t
                       xml = parser(myxml, convertEntities=parser.HTML_ENTITIES).contents[0]
                   print opml_outline_feed % {'title': u.name, 'html_url': html, 'xml_url': xml}

           except Exception, err:
                sys.stderr.write('ERROR: %s\n' % str(err))
                pass

 print opml_end
	import tweepy
	from BeautifulSoup import BeautifulSoup as parser
	import urllib
	import sys

	consumer_key=''
	consumer_secret=''

	access_token=''
	access_token_secret=''

	def detect_feeds_in_HTML(input_stream):
	""" examines an open text stream with HTML for referenced feeds.

	This is achieved by detecting all ``link`` tags that reference a feed in HTML.

	:param input_stream: an arbitrary opened input stream that has a :func:`read` method.
	:type input_stream: an input stream (e.g. open file or URL)
	:return: a list of tuples ``(url, feed_type)``
	:rtype: ``list(tuple(str, str))``
	"""
	# check if really an input stream
	if not hasattr(input_stream, "read"):
	raise TypeError("An opened input stream should be given, was %s instead!" % type(input_stream))
	result = []
	# get the textual data (the HTML) from the input stream
	html = parser(input_stream.read())
	# find all links that have an "alternate" attribute
	feed_urls = html.findAll("link", rel="alternate")
	# extract URL and type
	for feed_link in feed_urls:
	url = feed_link.get("href", None)
	# if a valid URL is there
	if url:
	result.append(url)
	return result

	def chunks(l, n):
	""" Yield successive n-sized chunks from l.
	"""
	for i in xrange(0, len(l), n):
	yield l[i:i+n]

	auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
	auth.set_access_token(access_token, access_token_secret)

	api = tweepy.API(auth)
	me = api.me()
	friends = api.friends_ids()

	opml_start = """<?xml version="1.0" encoding="UTF-8"?>
	<opml version="1.1">
	<head>
	<title>People I follow</title>
	</head>
	<body>
	<outline text="People I follow" title="People I follow">"""

	opml_end = """</outline>
	</body>
	</opml>"""

	opml_outline_feed = '<outline text="%(title)s" title="%(title)s" type="rss" version="RSS" htmlUrl="%(html_url)s" xmlUrl="%(xml_url)s" />'

	print opml_start
	for c in chunks(friends, 100):
	users = api.lookup_users(c)
	for u in users:
	if u.url:
	print "<!-- %s -->" % u.screen_name
	try:
	site = urllib.urlopen(u.url)
	tuples = detect_feeds_in_HTML(site)
	for t in tuples:
	html = parser(u.url, convertEntities=parser.HTML_ENTITIES).contents[0]
	if "http" in t:
	xml = parser(t, convertEntities=parser.HTML_ENTITIES).contents[0]
	else:
	myxml = html + t
	xml = parser(myxml, convertEntities=parser.HTML_ENTITIES).contents[0]
	print opml_outline_feed % {'title': u.name, 'html_url': html, 'xml_url': xml}

	except Exception, err:
	sys.stderr.write('ERROR: %s\n' % str(err))
	pass

	print opml_end