Skip to content

Instantly share code, notes, and snippets.

@abhiomkar
Created April 30, 2013 09:52
Show Gist options
  • Save abhiomkar/5487747 to your computer and use it in GitHub Desktop.
Save abhiomkar/5487747 to your computer and use it in GitHub Desktop.
The Hindu Quick Scraper
#!/usr/bin/python
# Abhinay Omkar
from urllib2 import urlopen
import simplejson as json
from lxml import etree
root = etree.fromstring(urlopen("http://www.thehindu.com/?service=rss").read())
items = root[0].findall('item')
output = []
for item in items:
item_dict = {}
item_dict["title"] = item.find('title').text
item_dict["description"] = item.find('description').text
item_dict["link"] = item.find('link').text
item_dict["pubDate"] = item.find('pubDate').text
output.append(item_dict)
print json.dumps(output, indent=4, separators=(',', ': '))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment