Created
August 22, 2011 17:45
-
-
Save bltavares/1163016 to your computer and use it in GitHub Desktop.
Crawler de twitter para buscar todos os tweets (requested by @elvisoliveira)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib | |
from xml.dom import minidom | |
class crawlXML: | |
def __init__(self, user): | |
self.user = user | |
print "Crawl setado para o usuario " + self.user | |
def getProfile(self): | |
return urllib.urlopen("http://api.twitter.com/1/users/show.xml?screen_name=%s" % self.user).read() | |
def getFollowersId(self): | |
#lista = []#a lista so e retornada depois de buscar todos os seguidores, criando um delay | |
cursor = -1 | |
while cursor != 0: | |
get = urllib.urlopen("http://api.twitter.com/1/followers/ids.xml?screen_name=%s&cursor=%s" % (self.user, cursor)).read() | |
yield get | |
#lista.append(get) | |
get = minidom.parseString(get)#parsa o proprio valor | |
cursor = int(get.getElementsByTagName('next_cursor')[0].firstChild.data)#retorno do fsChild do primeiro encontrado | |
#return lista | |
def getAllTweets(self): | |
cursor = 1 | |
running = True | |
while running: | |
get = urllib.urlopen("http://api.twitter.com/1/statuses/user_timeline.xml?screen_name=%s&page=%s" % (self.user, cursor)).read() | |
yield get | |
get = minidom.parseString(get)#parsa o proprio valor | |
cursor = cursor + 1 | |
running = len(get.getElementsByTagName('statuses')[0].childNodes()) > 1 | |
crawler = crawlXML("elvisoliveira") | |
#print crawler.getProfile() | |
#for x in crawler.getFollowersId(): print x | |
for x in crawler.getAllTweets(): print x |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment