Created
January 15, 2010 06:45
-
-
Save wroscoe/277865 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Author: William Roscoe | |
Date: 2010.1 | |
This is an attempt at a flat database with single and dynamically created indexes to entity ids. | |
If you know of a better or comprable simple system please let me know. | |
This idea is a simple mimic of Google's BigFile system and Friend Feed's python/mysql implementation. | |
""" | |
import os, cPickle, time, sqlite3, uuid | |
import config | |
INDEX_PREFIX = "index_" | |
class Datastore(): | |
"""Datastore is a way to store data in a blob it is called through indexes""" | |
def __init__(self, path, indexed_properties): | |
self.conn = sqlite3.connect(path) | |
self.indexed_properties = indexed_properties | |
#create datastore and index tables if they don't exist | |
c = self.conn.cursor() | |
if not self.table_exists('datastore'): | |
c.execute('create table datastore ( entity_id TEXT, blob BLOB, updated INTEGER)' ) | |
for property_name in indexed_properties: | |
if not self.table_exists(INDEX_PREFIX+property_name): | |
c.execute('create table %s ( entity_id TEXT, %s TEXT)' %(INDEX_PREFIX+property_name, property_name) ) | |
self.conn.commit() | |
def table_exists(self, table_name): | |
"""check if table_name exists""" | |
c = self.conn.cursor() | |
c.execute('SELECT name from sqlite_master WHERE name=?', [table_name]) | |
if c.fetchone() > 0: | |
return True | |
return False | |
def put(self, new_entity): | |
"""Insert new_entity into datastore table and add indexed properties to index tables. | |
Create a unique entity id with uuid1 and an integer time value.""" | |
new_entity['id'] = uuid.uuid1().get_hex() | |
new_entity['updated'] = int(time.time()) | |
c = self.conn.cursor() | |
c.execute('INSERT INTO datastore(entity_id, blob, updated) VALUES(?, ?, ?);', [new_entity['id'], cPickle.dumps(new_entity), int(time.time()) ] ) | |
self.conn.commit() | |
for property_name in self.indexed_properties: | |
if new_entity.has_key(property_name): | |
self.put_in_index(property_name, new_entity['id'], new_entity[property_name]) | |
return new_entity['id'] | |
def put_in_index(self, property_name, id, property_value): | |
"""put an (entity_id, property_name) row into the property's index""" | |
query = 'INSERT INTO %s (entity_id, %s) VALUES(?, ?);'%(INDEX_PREFIX+property_name, property_name) | |
c = self.conn.cursor() | |
c.execute(query, [id, property_value]) | |
self.conn.commit() | |
def get_id(self, id): | |
"""SELECT an entity with its id""" | |
c = self.conn.cursor() | |
c.execute('SELECT blob FROM datastore WHERE entity_id = ?', [id]) | |
self.conn.commit() | |
return cPickle.loads(str(c.fetchone()[0])) | |
def get_id_list(self, id_list): | |
"""get a list of entities by passing a list of entity ids""" | |
in_list = self.make_list_a_string(id_list) | |
print in_list | |
c = self.conn.cursor() | |
query = 'SELECT blob FROM datastore WHERE entity_id IN (%s)' % (in_list) | |
c.execute(query) | |
self.conn.commit() | |
results = [] | |
for row in c.fetchall(): | |
results.append(cPickle.loads(str(row[0]))) | |
return results | |
def get_where(self, property_name, property_value_list): | |
"""get a list of entities that match a passed list of property names""" | |
in_list = self.make_list_a_string(property_value_list) | |
c = self.conn.cursor() | |
query = 'SELECT * FROM %s WHERE %s IN (%s)' %(INDEX_PREFIX+property_name, property_name, in_list) | |
c.execute(query) | |
self.conn.commit() | |
out_list = [] | |
for row in c.fetchall(): | |
out_list.append(row[0]) | |
print out_list | |
return self.get_id_list(out_list) | |
def get_index_older(self, property_name, older): | |
"""SELECT entities indexted in the properties index. Limit the number of values returned""" | |
index_name = INDEX_PREFIX+property_name | |
query = 'SELECT datastore.blob FROM datastore, %s WHERE %s.entity_id = datastore.entity_id AND updated < %s LIMIT 20' %(index_name, index_name, older) | |
c = self.conn.cursor() | |
c.execute(query) | |
results = [] | |
for row in c.fetchall(): | |
results.append(cPickle.loads(str(row[0]))) | |
return results | |
def make_list_a_string(self, list1): | |
"""convert a list to a string that can be inserted into an SQL "IN" statment""" | |
list2 = [] | |
for i in list1: | |
list2.append(str(i)) | |
return str(list2).strip("[]") | |
if __name__ == '__main__': | |
d = Datastore(config.PATH_DB, ['test']) | |
d.put({'test':'hello', 'other':'blob..lasdfasdf;lasdjflasd'}) | |
print d.get_where('test', ['hello']) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment