Skip to content

Instantly share code, notes, and snippets.

@fclairamb
Created March 2, 2013 22:28
Show Gist options
  • Save fclairamb/5073559 to your computer and use it in GitHub Desktop.
Save fclairamb/5073559 to your computer and use it in GitHub Desktop.
A basic implementation of time series in python with pycassa. The idea is to explain how simple this can be.
#!/usr/bin/python
import pycassa
import datetime
import json
# This is a basic (and yet generic) implementation of time series using pycassa
class TimeSerie:
def __init__(self, keyspace, subject, sys=None):
# To make things easier, we can create the keyspace and column families ourself
if sys != None:
if not keyspace in sys.list_keyspaces():
sys.create_keyspace(keyspace, strategy_options={"replication_factor": "1"})
if not "ts_index" in sys.get_keyspace_column_families( keyspace ):
sys.create_column_family(keyspace, "ts_index") # The index of
if not "ts_data" in sys.get_keyspace_column_families( keyspace ):
sys.create_column_family(keyspace, "ts_data")
# Initializing data
self.pool = pycassa.ConnectionPool( keyspace )
self.cf_index = pycassa.ColumnFamily(self.pool, "ts_index")
self.cf_data = pycassa.ColumnFamily(self.pool, "ts_data")
self.subject=subject # Subject could be the name of a stock, an device sensor, etc.
self.indexSize=8 # Dates will be in the form of a 14 chars long string ("20130302224430"),
# the index size is the number of chars you will take to index data
# We will
self.last_index = ''
def store(self, time, data):
data = json.dumps( data ) # We start by converting the received object to json
date_s = time.strftime("%Y%m%d%H%M%S%f") # We convert the time to a string
date_si = date_s[:self.indexSize] # We generate the indexed part (for an indexSize of 8, it will look like "20130302")
date_sd = date_s[self.indexSize:] # We generate the value part (for in indexSize of 8, it will look like "224430")
if self.last_index != date_si: # We will try to avoid writing the index key everytime
print("ts_index['{key}']='{value}':''".format(key=self.subject,value=date_si))
self.cf_index.insert( date_si, { date_s[self.indexSize:] : '' } ) # We write the indexed key (ie: ts_index['subject']={'20130302':''})
self.last_index = date_si
key = self.subject+"-"+date_si
print("ts_date['{key}']='{col}':'{value}'".format(key=key, col=date_sd, value=data))
self.cf_data.insert( key, { date_sd : data } ) # We write the indexed value (ie: ts_data['subject-20130302']={'224430':'data'})
sys = pycassa.system_manager.SystemManager("localhost")
ts = TimeSerie("ks", "subject", sys)
for i in range(0,1000):
ts.store( datetime.datetime.now(), {"key":"value{i}".format(i=i)} )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment