Skip to content

Instantly share code, notes, and snippets.

@Apkawa
Last active October 6, 2016 15:28
Show Gist options
  • Save Apkawa/74ea2dffafdd8fd95e8be3072aa8e8f9 to your computer and use it in GitHub Desktop.
Save Apkawa/74ea2dffafdd8fd95e8be3072aa8e8f9 to your computer and use it in GitHub Desktop.
Example fuzzy search for postgresql
CREATE EXTENSION pg_trgm;
CREATE EXTENSION fuzzystrmatch;
-- word index
TRUNCATE TABLE "fuzzy_search_wordlist";
INSERT INTO "fuzzy_search_wordlist" (word, norm_word, stem_word) (
SELECT
W.word,
(
SELECT array_to_string(array_agg(sorted_word.x), '')
FROM (
SELECT unnest(string_to_array(W.word, NULL)) AS x
ORDER BY x
) AS sorted_word
) AS norm_word,
W.stem_word
FROM (
SELECT
word,
COALESCE(NULLIF(array_to_string(ts_lexize('russian_stem', word), ''), ''), word) AS stem_word
FROM (
SELECT word
FROM ts_stat('SELECT
to_tsvector(''simple'', title|| '' '' ||description|| '' '' ||short_description)
FROM table1;')
UNION SELECT word
FROM ts_stat('SELECT
to_tsvector(''simple'', title)
FROM table2;')
) AS T
) AS W
);
-- match word
SELECT *
FROM (
SELECT
T.letter,
word,
similarity(word, T.letter) AS sml,
levenshtein(word, T.letter, 1, 2, 2) AS lev,
levenshtein(norm_word, T.norm_letter, 1, 2, 2) AS norm_lev
FROM fuzzy_search_wordlist
JOIN (
SELECT
*,
(SELECT array_to_string(array_agg(x), '')
FROM (
SELECT unnest(string_to_array(T2.letter, NULL)) AS x
ORDER BY x
) AS _) AS norm_letter
FROM (
SELECT
letter,
COALESCE(NULLIF(array_to_string(ts_lexize('russian_stem', letter), ''), ''),
letter) AS stem_letter
FROM (VALUES ('Корова', 'Кавоор')) AS t (letter)
) AS T2
) AS T ON norm_word % T.norm_letter
) AS T3
WHERE lev < 4 AND norm_lev < 4
ORDER BY norm_lev ASC, lev ASC
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment