Created
September 23, 2020 08:00
-
-
Save knazarov/8102e558bac54cdd255e9025eee8aa11 to your computer and use it in GitHub Desktop.
Full text search example for Tarantool
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env tarantool | |
local pickle = require('pickle') | |
local yaml = require('yaml') | |
function trivec(str) | |
str = string.lower(str) | |
local vec = "" | |
if #str < 3 then | |
return "" | |
end | |
local res = 0 | |
for i = 1,#str-2 do | |
local c1 = string.sub(str, i, i) | |
local c2 = string.sub(str, i+1, i+1) | |
local c3 = string.sub(str, i+2, i+2) | |
local val = string.byte(c1) * 10000 + | |
string.byte(c2) * 100 + string.byte(c3) | |
res = bit.bor(res, bit.lshift(1ULL, val%64)) | |
end | |
return pickle.pack('Q', res) | |
end | |
box.cfg{} | |
box.schema.space.create('account', {if_not_exists=true}) | |
box.space.account:format({ {name='id',type='unsigned'}, | |
{name='name',type='string'}, | |
{name='trivec',type='string'}, | |
}) | |
box.space.account:create_index( | |
'primary', | |
{ | |
unique = true, | |
parts = { {field = 'id', type = 'unsigned'}}, | |
if_not_exists=true}) | |
box.space.account:create_index( | |
'hash', | |
{unique=false, type='BITSET', parts={3,type='string'},if_not_exists=true}) | |
function put_user(id, name) | |
return box.space.account:put({id, name, trivec(name)}) | |
end | |
function find_user(name_part) | |
local vec = trivec(name_part) | |
local res = {} | |
for _, v in box.space.account.index.hash:pairs(vec, {iterator='BITS_ALL_SET'}) do | |
if string.match(string.lower(v.name), string.lower(name_part)) then | |
table.insert(res, v) | |
end | |
end | |
return res | |
end | |
put_user(1, "Konstantin Nazarov") | |
put_user(2, "Konstantin Osipov") | |
print(yaml.encode(find_user('onst'))) |
filonenko-mikhail
commented
Nov 3, 2020
•
WTF? Why is it working?
Modifying constants in hash function seems to change nothing!
#!/usr/bin/env tarantool
require 'strict'.on()
local utf8 = require 'utf8'
box.cfg{}
box.once('schema:v1', function()
box.schema.space.create('accounts', {
format = {
{ name = 'id', type = 'unsigned' },
{ name = 'name', type = 'string' },
{ name = 'bpf', type = 'string' },
}
})
box.space.accounts:create_index('primary', {
unique = true,
parts = { 'id' },
})
box.space.accounts:create_index('bpf', {
unique = false,
type = 'BITSET',
parts = { 'bpf' },
})
print("space accounts created")
end)
local api = {}
rawset(_G, 'api', api)
local function trivec(str)
if #str < 3 then return "" end
local res = 0
for i = 1,#str-2 do
local b1 = str:sub(i, i):byte()
local b2 = str:sub(i+1, i+1):byte()
local b3 = str:sub(i+2, i+2):byte()
local val = b1 * 0xffff + b2 * 0xff + b3
res = bit.bor(res, bit.lshift(1, val%64))
end
return require'pickle'.pack('Q', res)
end
function api.register(id, name)
return box.space.accounts:replace{ id, name, trivec(utf8.lower(name)) }
end
function api.find(name)
name = utf8.lower(name)
return box.space.accounts.index.bpf
:pairs({ trivec(name) }, {iterator='BITS_ALL_SET'})
:take(1000) -- hard limit
:grep(function(account)
return utf8.lower(account.name):find(name)
end)
:totable()
end
require 'console'.start()
os.exit(0)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment