tazjel · July 5, 2013 06:10
diff --git a/gistfile1.py b/gistfile1.py
 from datetime import datetime
 import BeautifulSoup as soup
 import requests

 host = 'http://news.ycombinator.com'

 def chunks(l, n):
    """ Yield successive n-sized chunks from l.
    """
    for i in xrange(0, len(l), n):
        yield l[i:i+n]

 def get_news(depth):
    def _get_news(page, acc, depth):
        if not page.startswith('/'):
            page = '/' + page

        r = requests.get(host + page)
        doc = soup.BeautifulSoup(r.content)
        titles = doc.table.findAll(True, {'class': 'title'})

        articles = titles[:-1]
        for no, title in chunks(articles, 2):
            acc.append( (title.a.string, title.a['href']) )

        if depth > 1:
            more = titles[-1]
            return _get_news(more.a['href'], acc, depth - 1)
        else:
            return acc
    return _get_news('/', [], depth)

 def main():
    print 'Hacker News Scrap'
    print 'generated at %s' % datetime.now().strftime('%a, %d %b %Y %H:%M:%S')
    print 
    for no,(title, href) in enumerate(get_news(5)):
        print '%d. %s (%s)' % (no+1, title, href)

 if __name__ == '__main__':
    main()
	from datetime import datetime
	import BeautifulSoup as soup
	import requests

	host = 'http://news.ycombinator.com'

	def chunks(l, n):
	""" Yield successive n-sized chunks from l.
	"""
	for i in xrange(0, len(l), n):
	yield l[i:i+n]

	def get_news(depth):
	def _get_news(page, acc, depth):
	if not page.startswith('/'):
	page = '/' + page

	r = requests.get(host + page)
	doc = soup.BeautifulSoup(r.content)
	titles = doc.table.findAll(True, {'class': 'title'})

	articles = titles[:-1]
	for no, title in chunks(articles, 2):
	acc.append( (title.a.string, title.a['href']) )

	if depth > 1:
	more = titles[-1]
	return _get_news(more.a['href'], acc, depth - 1)
	else:
	return acc
	return _get_news('/', [], depth)

	def main():
	print 'Hacker News Scrap'
	print 'generated at %s' % datetime.now().strftime('%a, %d %b %Y %H:%M:%S')
	print
	for no,(title, href) in enumerate(get_news(5)):
	print '%d. %s (%s)' % (no+1, title, href)

	if __name__ == '__main__':
	main()