Skip to content

Instantly share code, notes, and snippets.

@scturtle
Created March 19, 2013 14:10
Show Gist options
  • Save scturtle/5196402 to your computer and use it in GitHub Desktop.
Save scturtle/5196402 to your computer and use it in GitHub Desktop.
get web page content via instapaper (slow but quick)
# coding: utf-8
import requests
from urllib import quote_plus
from bs4 import BeautifulSoup
def get_html(url):
text = requests.get('http://www.instapaper.com/text?u='+
quote_plus(url)).content
soup = BeautifulSoup(text)
title = soup.find('div',{'id':'titlebar'}).find('h1').get_text()
content = soup.find('div',{'id':'story'})
return map(lambda x: x.encode('utf-8'), (title, content, content.text))
if __name__=='__main__':
t, c, ct = get_html('http://www.solidot.org/story?sid=33854')
print t, ct
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment