Created
December 29, 2011 04:06
-
-
Save h3xstream/1531808 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
import xml.dom.minidom as dom | |
import sys | |
import re | |
#Feeds | |
url = "http://www.teamalexandriz.org/category/policier/feed/" | |
#url = "http://www.teamalexandriz.org/category/aventure/feed/" | |
#url = "http://www.teamalexandriz.org/category/science-fiction/feed/" | |
sock = urllib.urlopen(url) | |
resp = sock.read() | |
encoding = sock.headers['Content-type'].split('charset=')[1] | |
#Transcoding (Needed for xml.dom) | |
resp = resp.decode(encoding,'ignore').encode('ascii', 'xmlcharrefreplace') | |
dom = dom.parseString(resp) | |
items = dom.getElementsByTagName("item") | |
#Extra the first node with this name | |
def extractUtfNode(parent,name): | |
return parent.getElementsByTagName(name)[0].firstChild.data.encode('UTF-8') | |
for i in items: | |
title = extractUtfNode(i,"title") | |
megaupload_urls = re.search('(http://www.megaupload.com/[^\"]+)',extractUtfNode(i,"content:encoded")) | |
url = megaupload_urls.group(0) | |
print "-----" | |
print title | |
print url |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment