Created
March 2, 2013 22:28
-
-
Save fclairamb/5073559 to your computer and use it in GitHub Desktop.
A basic implementation of time series in python with pycassa. The idea is to explain how simple this can be.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import pycassa | |
import datetime | |
import json | |
# This is a basic (and yet generic) implementation of time series using pycassa | |
class TimeSerie: | |
def __init__(self, keyspace, subject, sys=None): | |
# To make things easier, we can create the keyspace and column families ourself | |
if sys != None: | |
if not keyspace in sys.list_keyspaces(): | |
sys.create_keyspace(keyspace, strategy_options={"replication_factor": "1"}) | |
if not "ts_index" in sys.get_keyspace_column_families( keyspace ): | |
sys.create_column_family(keyspace, "ts_index") # The index of | |
if not "ts_data" in sys.get_keyspace_column_families( keyspace ): | |
sys.create_column_family(keyspace, "ts_data") | |
# Initializing data | |
self.pool = pycassa.ConnectionPool( keyspace ) | |
self.cf_index = pycassa.ColumnFamily(self.pool, "ts_index") | |
self.cf_data = pycassa.ColumnFamily(self.pool, "ts_data") | |
self.subject=subject # Subject could be the name of a stock, an device sensor, etc. | |
self.indexSize=8 # Dates will be in the form of a 14 chars long string ("20130302224430"), | |
# the index size is the number of chars you will take to index data | |
# We will | |
self.last_index = '' | |
def store(self, time, data): | |
data = json.dumps( data ) # We start by converting the received object to json | |
date_s = time.strftime("%Y%m%d%H%M%S%f") # We convert the time to a string | |
date_si = date_s[:self.indexSize] # We generate the indexed part (for an indexSize of 8, it will look like "20130302") | |
date_sd = date_s[self.indexSize:] # We generate the value part (for in indexSize of 8, it will look like "224430") | |
if self.last_index != date_si: # We will try to avoid writing the index key everytime | |
print("ts_index['{key}']='{value}':''".format(key=self.subject,value=date_si)) | |
self.cf_index.insert( date_si, { date_s[self.indexSize:] : '' } ) # We write the indexed key (ie: ts_index['subject']={'20130302':''}) | |
self.last_index = date_si | |
key = self.subject+"-"+date_si | |
print("ts_date['{key}']='{col}':'{value}'".format(key=key, col=date_sd, value=data)) | |
self.cf_data.insert( key, { date_sd : data } ) # We write the indexed value (ie: ts_data['subject-20130302']={'224430':'data'}) | |
sys = pycassa.system_manager.SystemManager("localhost") | |
ts = TimeSerie("ks", "subject", sys) | |
for i in range(0,1000): | |
ts.store( datetime.datetime.now(), {"key":"value{i}".format(i=i)} ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment