Last active
March 11, 2018 15:34
-
-
Save asdine/21f258dfbfdf6670e885699ce3e528d2 to your computer and use it in GitHub Desktop.
Brainstorming on Storm v3 query system
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package storm | |
// The scan function is here to illustrate how the selection part of the query system of Storm v3 could work. | |
// All Storm queries, using indexes or not, will rely on the query system. | |
// This function is an attempt to describe the flow of the scan and the role of each external component in the system. | |
// The goal is to have a core that doesn't care about: | |
// - encoding format | |
// - types (struct or maps should work seamlessly) | |
// - index optimization | |
// - sorting | |
// The scan function focuses on the algorithm of selection of records. | |
// | |
// Proposed flow of the query: | |
// 1. User runs a query (example, using SQL to illustrate: `SELECT FROM bucket-a WHERE age > 10 && name != "john"`) | |
// 2. We analyse the matching tree and the indexes to optimize the query. | |
// a. The query can't use indexes -> we generate a cursor that will scan the entire bucket. | |
// b. The query can use indexes -> we generate a cursor that will scan a list of predefined keys. | |
// 3. We run the scan function to fetch all the results. | |
// 4. We apply the transformation on the results (return results to the user, update or delete) | |
// 5. End | |
func scan(query *query, cursor cursor, codec codec, factory factory, matcher matcher, sink sink) error { | |
for { | |
// the cursor decides where to start, where to finish and what's the next key. | |
// Depending on if indexes are used or not, it can scan a bucket entirely or just some keys. | |
// Indexes can perform query optimisation based on the matching tree by generating cursors | |
// before the scan function being called. | |
k, v := cursor.Next() | |
if k == nil { | |
break | |
} | |
// the factory returns instances. it can be fresh instances or pointers to | |
// already allocated resources. | |
elem := factory.New() | |
// the codec knows how to decode a record for a specific bucket. | |
// a bucket is tied to a single codec otherwise we would have to store some metadata | |
// with content-type for each record. | |
err := codec.Decode(v, elem) | |
if err != nil { | |
return err | |
} | |
// Matchers are organized as a tree that analyses the record and executes all the | |
// user selected checks (field greater than, equal, AND, OR etc.) | |
// This is basically the WHERE clause of SQL. | |
// Indexes can disable some nodes of the tree that where already used to select indexed keys. | |
match, err := matcher.Match(elem) | |
if err != nil { | |
return err | |
} | |
// no match, we skip the record | |
if !match { | |
continue | |
} | |
// the sink is where the matched elements are stored until the end of the query. | |
// it will then be used to return the result to the user or to update or delete the records. | |
sink.Add(elem) | |
} | |
// once the query is over, we sort the sink if the user used an OrderBy clause | |
if query.orderBy != "" { | |
sink.Sort(query.orderBy) | |
} | |
return nil | |
} | |
type cursor interface { | |
Next() (key []byte, value []byte) | |
} | |
type codec interface { | |
Encode(interface{}) ([]byte, error) | |
Decode([]byte, interface{}) error | |
} | |
type factory interface { | |
New() interface{} | |
} | |
type matcher interface { | |
Match(interface{}) (bool, error) | |
} | |
type sink interface { | |
Add(interface{}) | |
Sort(field string) | |
} | |
type query struct { | |
orderBy string | |
limit int | |
offset int | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment