Created
May 29, 2014 13:39
-
-
Save avovsya/e23aaea3e8364934a58b to your computer and use it in GitHub Desktop.
Helper to work with cassandra's inverted indeces table. Allows to partition index key to multiple row keys.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"use strict"; | |
var slug = require('slug'); | |
var hashFunctions = { | |
/** | |
* Return string hash, based on first two symbols of string | |
* @param {string} str Str to calculate the hash | |
*/ | |
"two_symbol_hash": function (str) { | |
str = slug(str); | |
str = str.toLowerCase().replace(/^www\d*\./i, '').replace(/\.|\d|-/gi, 'z'); | |
return str.slice(0, 2); | |
}, | |
"two_symbol_hash_range": function twoSymbolHashRange() { | |
if (twoSymbolHashRange.result) { | |
return twoSymbolHashRange.result; | |
} | |
var results = []; | |
for(var ch = 'a'.charCodeAt(0); ch <= 'z'.charCodeAt(0); ch++ ) { | |
for(var sch = 'a'.charCodeAt(0); sch <= 'z'.charCodeAt(0); sch++ ) { | |
results.push(String.fromCharCode(ch) + String.fromCharCode(sch)); | |
} | |
} | |
twoSymbolHashRange.result = results; | |
return results; | |
} | |
}; | |
/** | |
* Return a row key for an index table in Cassandra, based on value, | |
* @param {string} keyPrefix Key prefix. e.g. 'some_tag', 'another_tag', etc. | |
* @param {string} keyValue Value to put on that key | |
* @param {string} [hashFunction='two_symbol_hash'] Hash function to use | |
* @return {string} Partition key to use in cassandra index table. e.g. | |
* 'some_tag_ab', 'another_tag_jj', etc. | |
*/ | |
function createPartitionKey(keyPrefix, value, hashFunction) { | |
hashFunction = hashFunction || 'two_symbol_hash'; | |
return keyPrefix + '_' + hashFunctions[hashFunction](value); | |
} | |
/** | |
* Get key range for specified key to lookup in Cassandra index table | |
* @param {string} keyPrefix For which index key get key range. 'some_tag', | |
* 'another_tag', etc | |
* @param {string} [hashFunction='two_symbol_hash'] Which hash function to use | |
* @return {Array} array of keys to lookup in cassandra. e.g. ['some_tag_1', | |
* 'some_tag_2', 'some_tag_3'...] | |
*/ | |
function getPartiotionKeyRange(keyPrefix, hashFunction) { | |
hashFunction = hashFunction || 'two_symbol_hash'; | |
var results = []; | |
hashFunctions[hashFunction + '_range']().forEach(function (hashKey) { | |
results.push("'" + keyPrefix + '_' + hashKey + "'"); | |
}); | |
return results; | |
} | |
/** | |
* Get query to getch key values from inverted index | |
* @param {string} indexName Name of the table to query on | |
* @param {string} key Name of the key to get from index | |
* @param {string} [hashFunction='two_symbol_hash'] Name of the hash function | |
* for partitioning keys in index | |
* @return {string} Query to get key values from inverted index | |
*/ | |
function getFromInvertedIndexQuery(indexName, key, hashFunction) { | |
var keys = getPartiotionKeyRange(key, hashFunction); | |
var query = "SELECT * FROM " + indexName + " WHERE key IN ("; | |
query += keys.join(',') + ');'; | |
return query; | |
} | |
/** | |
* Get query to put value to inverted index for specified key | |
* @param {string} indexName Name of the table to put value | |
* @param {string} key Name of the key to put | |
* @param {string} value Value to put into key | |
* @param {string} [hashFunction='two_symbol_hash'] Name of the hash function | |
* for partitioning keys in index | |
* @return {string} Query to put value into key in inverted index | |
*/ | |
function putToInvertedIndexQuery(indexName, key, value, hashFunction) { | |
key = createPartitionKey(key, value, hashFunction); | |
var query = "INSERT INTO " + indexName + " (key, value) VALUES ('" + key + "', '" + value + "');"; | |
return query; | |
} | |
module.exports = { | |
getFromInvertedIndexQuery: getFromInvertedIndexQuery, | |
putToInvertedIndexQuery: putToInvertedIndexQuery | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment