Skip to content

Instantly share code, notes, and snippets.

@cbertelegni
Created January 26, 2015 18:59
Show Gist options
  • Select an option

  • Save cbertelegni/f64698bd736b843c63e6 to your computer and use it in GitHub Desktop.

Select an option

Save cbertelegni/f64698bd736b843c63e6 to your computer and use it in GitHub Desktop.
Search all results and save into file.
#!/usr/bin/python
from twitter import * # need install twitter.py: `pip install twitter`
import json
import time
from urlparse import urlparse, parse_qs
"""SETTINGS"""
from twitter_keys import OAUTH_TOKEN, OAUTH_SECRET, TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET
LAST_METADATA_SEARCH_FILE = "app/data/last_metadata_search.txt"
TUITS_FILE = "app/data/test.txt"
QUERY_TW = "developers"
SLEEP_TIME = 10 # tiempo entre cada request
COUNT = 100
INCLUDE_ENTITIES = 0
"""Def twitter instance"""
t = Twitter(auth=OAuth(OAUTH_TOKEN, OAUTH_SECRET, TWITTER_CONSUMER_KEY, TWITTER_CONSUMER_SECRET))
def get_data(max_id = False):
if(max_id):
print max_id
q = t.search.tweets(q=QUERY_TW, count=COUNT, include_entities = INCLUDE_ENTITIES, max_id = max_id)
else:
"""get max id"""
q = t.search.tweets(q=QUERY_TW, count=COUNT, include_entities = INCLUDE_ENTITIES)
save_data(q)
def save_data(q):
"""
Guarda la data de twitter
Primero intenta hacer merge del array anterior y el actual
"""
if q["statuses"][0]:
"""Si hay respuesta"""
try:
f = open(TUITS_FILE, "r")
tw = json.loads(f.read())
f.close()
print "Cantidad de tuits almacenados: %s" % len(tw)
f = open(TUITS_FILE, "w")
f.write(json.dumps(tw + q["statuses"]))
# print json.dumps(q["statuses"])
f.close()
except IOError:
"""Crea el archivo"""
print "\n"
print "\n"
print "*****Crear archivo****"
print "\n"
print "\n"
f = open(TUITS_FILE, "w")
f.write(json.dumps(q["statuses"]))
# print json.dumps(q["statuses"])
f.close()
else:
# print "No exeption"
pass
""" creo el ultimo resultado """
if q["search_metadata"]["next_results"]:
max_id = parse_qs(q["search_metadata"]["next_results"].replace('?', ''))['max_id'][0]
f = open(LAST_METADATA_SEARCH_FILE, "w")
f.write(json.dumps(max_id))
f.close()
print max_id
print "speep %s" % SLEEP_TIME
time.sleep(SLEEP_TIME)
get_data(max_id)
# else:
# next_q = q["search_metadata"]
else:
print q
get_data()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment