Created
October 19, 2022 23:31
-
-
Save brito/f21c835b95008608ec7f85ac6e9b4fd4 to your computer and use it in GitHub Desktop.
Test multiple configurations of tsqueries and ranked search scores
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- TEST ts_rank functions and (some) configs | |
select * | |
from (select trim(regexp_replace(txt, E'\\s*\n\\s*', ' ', 'g')) as txt, tsv | |
from (values ('my phone will not make or receive calls'), | |
($$ no matter what I do, my phone will not make or receive | |
calls even when I've asked nicely and it all goes well in the day | |
and then one day I did this other unrelated thing | |
and it still wouldn't work AT ALL so I asked Ben | |
who probably doesn't either but he has interesting answers $$), | |
('call my phone')) test_values (txt), | |
to_tsvector(txt) tsv) v, | |
(values ('call|make|phone|receiv'::tsquery), | |
('call&make&phone&receiv'::tsquery), | |
('phone & (make|receiv) & call'::tsquery), | |
('phone & make & receiv & call'::tsquery)) test_queries (tsq), | |
generate_series(0, 5) bits, | |
cast(pow(2, bits) as int) flag, | |
cast(flag as bit(6)) bitstring, | |
round(ts_rank(tsv, tsq, flag)::numeric, 3) tsr, | |
round(ts_rank_cd(tsv, tsq, flag)::numeric, 3) tscd, | |
length(case when flag in (1, 2) then txt end) doclen, | |
round(log(doclen)::numeric) log_doclen, | |
lateral (select case when flag in (4, 8, 16) then count(*) end as uniqlex from unnest(tsv)) l, | |
round(log(uniqlex)::numeric, 1) log_uniqlex | |
order by greatest(tsr, tscd) desc, | |
bits; | |
-- visualize these results | |
-- https://docs.google.com/spreadsheets/d/1FFbqwmiZ4W-ZENwzUzrJnVmwcGDYozq4AwoMbPx7SJw/edit#gid=974618960 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment