Last active
August 29, 2015 14:17
-
-
Save om-henners/fe173f5d1c2dd667b3d4 to your computer and use it in GitHub Desktop.
Tweepy streaming to Fiona
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Stream twitter data directly to a dataset with Fiona and Tweepy. | |
""" | |
import fiona | |
import fiona.crs | |
from tweepy.streaming import StreamListener | |
class FionaStreaming(StreamListener): | |
""" | |
Stream Listener subclass that wraps a Fiona dataset to stream individual | |
tweets with coordinates to a spatial dataset. | |
Examples | |
-------- | |
>>> auth = tweepy.OAuthHandler(client_key, client_secret_key) | |
>>> auth.set_access_token(access_key, access_key_secret) | |
>>> api = tweepy.API(auth) | |
>>> l = FionaStreaming("melbourne_tweets.shp") | |
>>> stream = tweepy.Stream(auth, l) | |
>>> stream.filter(locations=(144.463056,-38.313611,145.463056,-37.313611)) | |
>>> l.close() | |
""" | |
def __init__(self, out_path, mode="w", driver='ESRI Shapefile', api=None): | |
""" | |
Create the dataset object and the stream listener. | |
:param out_path: Path to write to on disk | |
:type out_path: str | |
:param mode: File open mode. Should be either "w" or "a" | |
:type mode: str | |
:param driver: Fiona (OGR) driver to write the dataset | |
:type driver: str | |
:param api: tweepy API (optional - will be created automatically if not | |
provided) | |
:type api: tweepy.API | |
""" | |
super(FionaStreaming, self).__init__(api) | |
schema = { | |
"geometry": "Point", | |
"properties": { | |
"id": "str", | |
"username": "str", | |
"name": "str", | |
"text": "str:144", | |
"created_at": "datetime", | |
"orig_username": "str", | |
"orig_name": "str", | |
"orig_created_at": "datetime" | |
} | |
} | |
crs = fiona.crs.from_epsg(4326) | |
self.dest = fiona.open(out_path, mode, driver=driver, crs=crs, schema=schema) | |
def on_status(self, status): | |
""" | |
When a new status comes in write if there are coordinates available | |
write it to the output dataset | |
:param status: The twitter status object from tweepy | |
:type status: tweepy.Status | |
:return: Whether to keep the stream open | |
:rtype: bool | |
""" | |
if not status.coordinates: | |
return True | |
properties = { | |
"id": status.id_str, | |
"username": status.user.screen_name, | |
"name": status.user.name, | |
"text": status.text, | |
"created_at": status.created_at.isoformat(), | |
"orig_username": None, | |
"orig_name": None, | |
"orig_created_at": None | |
} | |
if status.retweeted: | |
properties.update({ | |
"orig_username": status.retweeted_status.user.screen_name, | |
"orig_name": status.retweeted_status.user.name, | |
"orig_created_at": status.retweeted_status.user.created_at.isoformat() | |
}) | |
feature = { | |
"geometry": status.coordinates, | |
"properties": properties, | |
"id": "-1" | |
} | |
self.dest.write(feature) | |
self.dest.flush() | |
return True | |
def close(self): | |
""" | |
Make sure to close the dataset when you've finished streaming to the | |
file | |
:rtype: None | |
""" | |
self.dest.close() | |
__author__ = "om_henners" | |
__license__ = "Apache 2.0" | |
__version__ = "0.0.1" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment