Skip to content

Instantly share code, notes, and snippets.

@jeremyfelt
Forked from selenamarie/a_better_opml.py
Created January 21, 2013 07:46
Show Gist options
  • Save jeremyfelt/4584337 to your computer and use it in GitHub Desktop.
Save jeremyfelt/4584337 to your computer and use it in GitHub Desktop.
import tweepy
from BeautifulSoup import BeautifulSoup as parser
import urllib
import sys
consumer_key=''
consumer_secret=''
access_token=''
access_token_secret=''
def detect_feeds_in_HTML(input_stream):
""" examines an open text stream with HTML for referenced feeds.
This is achieved by detecting all ``link`` tags that reference a feed in HTML.
:param input_stream: an arbitrary opened input stream that has a :func:`read` method.
:type input_stream: an input stream (e.g. open file or URL)
:return: a list of tuples ``(url, feed_type)``
:rtype: ``list(tuple(str, str))``
"""
# check if really an input stream
if not hasattr(input_stream, "read"):
raise TypeError("An opened input *stream* should be given, was %s instead!" % type(input_stream))
result = []
# get the textual data (the HTML) from the input stream
html = parser(input_stream.read())
# find all links that have an "alternate" attribute
feed_urls = html.findAll("link", rel="alternate")
# extract URL and type
for feed_link in feed_urls:
url = feed_link.get("href", None)
# if a valid URL is there
if url:
result.append(url)
return result
def chunks(l, n):
""" Yield successive n-sized chunks from l.
"""
for i in xrange(0, len(l), n):
yield l[i:i+n]
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
me = api.me()
friends = api.friends_ids()
opml_start = """<?xml version="1.0" encoding="UTF-8"?>
<opml version="1.1">
<head>
<title>People I follow</title>
</head>
<body>
<outline text="People I follow" title="People I follow">"""
opml_end = """</outline>
</body>
</opml>"""
opml_outline_feed = '<outline text="%(title)s" title="%(title)s" type="rss" version="RSS" htmlUrl="%(html_url)s" xmlUrl="%(xml_url)s" />'
print opml_start
for c in chunks(friends, 100):
users = api.lookup_users(c)
for u in users:
if u.url:
print "<!-- %s -->" % u.screen_name
try:
site = urllib.urlopen(u.url)
tuples = detect_feeds_in_HTML(site)
for t in tuples:
html = parser(u.url, convertEntities=parser.HTML_ENTITIES).contents[0]
if "http" in t:
xml = parser(t, convertEntities=parser.HTML_ENTITIES).contents[0]
else:
myxml = html + t
xml = parser(myxml, convertEntities=parser.HTML_ENTITIES).contents[0]
print opml_outline_feed % {'title': u.name, 'html_url': html, 'xml_url': xml}
except Exception, err:
sys.stderr.write('ERROR: %s\n' % str(err))
pass
print opml_end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment