Skip to content

Instantly share code, notes, and snippets.

@sbp
Last active December 17, 2015 14:59
Show Gist options
  • Save sbp/5628555 to your computer and use it in GitHub Desktop.
Save sbp/5628555 to your computer and use it in GitHub Desktop.
Scrape twitter
#!/usr/bin/env python3
import re
import time
from saxo_web import request
# ATTENTION, USER!
# Adjust the following mysterious variables to your taste
poll = 20
since = 3600
account = "reuters"
# ATTENTION, USER!
# Don't touch this stuff
r_element = re.compile(r"(?ims)(<a\b.*?\ba>|<p\b.*?\bp>)")
r_nick = re.compile(r"/([^/]+)/")
r_tag = re.compile(r"<[^>]+>")
r_unixtime = re.compile(r"(?ims)\"([0-9]{10})\"")
while True:
recent = 0
unixtime = 0
nick = None
page = request("https://twitter.com/search/realtime?q=" + account)
for element in r_element.findall(page["text"]):
if "tweet-timestamp" in element:
for unixtime in r_unixtime.findall(element):
unixtime = int(unixtime)
break
for nick in r_nick.findall(element):
break
if "tweet-text" in element:
text = r_tag.sub("", element)
if unixtime > recent:
if unixtime > (time.time() - since):
print("<%s> %s" % (nick, text))
recent = unixtime
time.sleep(poll)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment