Last active
October 6, 2016 15:28
-
-
Save Apkawa/74ea2dffafdd8fd95e8be3072aa8e8f9 to your computer and use it in GitHub Desktop.
Example fuzzy search for postgresql
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTENSION pg_trgm; | |
CREATE EXTENSION fuzzystrmatch; | |
-- word index | |
TRUNCATE TABLE "fuzzy_search_wordlist"; | |
INSERT INTO "fuzzy_search_wordlist" (word, norm_word, stem_word) ( | |
SELECT | |
W.word, | |
( | |
SELECT array_to_string(array_agg(sorted_word.x), '') | |
FROM ( | |
SELECT unnest(string_to_array(W.word, NULL)) AS x | |
ORDER BY x | |
) AS sorted_word | |
) AS norm_word, | |
W.stem_word | |
FROM ( | |
SELECT | |
word, | |
COALESCE(NULLIF(array_to_string(ts_lexize('russian_stem', word), ''), ''), word) AS stem_word | |
FROM ( | |
SELECT word | |
FROM ts_stat('SELECT | |
to_tsvector(''simple'', title|| '' '' ||description|| '' '' ||short_description) | |
FROM table1;') | |
UNION SELECT word | |
FROM ts_stat('SELECT | |
to_tsvector(''simple'', title) | |
FROM table2;') | |
) AS T | |
) AS W | |
); | |
-- match word | |
SELECT * | |
FROM ( | |
SELECT | |
T.letter, | |
word, | |
similarity(word, T.letter) AS sml, | |
levenshtein(word, T.letter, 1, 2, 2) AS lev, | |
levenshtein(norm_word, T.norm_letter, 1, 2, 2) AS norm_lev | |
FROM fuzzy_search_wordlist | |
JOIN ( | |
SELECT | |
*, | |
(SELECT array_to_string(array_agg(x), '') | |
FROM ( | |
SELECT unnest(string_to_array(T2.letter, NULL)) AS x | |
ORDER BY x | |
) AS _) AS norm_letter | |
FROM ( | |
SELECT | |
letter, | |
COALESCE(NULLIF(array_to_string(ts_lexize('russian_stem', letter), ''), ''), | |
letter) AS stem_letter | |
FROM (VALUES ('Корова', 'Кавоор')) AS t (letter) | |
) AS T2 | |
) AS T ON norm_word % T.norm_letter | |
) AS T3 | |
WHERE lev < 4 AND norm_lev < 4 | |
ORDER BY norm_lev ASC, lev ASC |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment