Last active
May 10, 2025 19:27
-
-
Save rawnly/39797585ecc4379c6d1e7cd2945e2a42 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
query_embedding AS ( | |
SELECT ai.openai_embed( | |
'text-embedding-3-small', | |
${query}, | |
${OPENAI_API_KEY} | |
) AS embedding | |
), | |
scored AS ( | |
SELECT | |
s.id, | |
s.url, | |
s.title, | |
s.description, | |
s.keywords, | |
se.embedding <=> qe.embedding AS distance, | |
to_tsvector( | |
'english', | |
coalesce(s.title, '') || ' ' | |
|| coalesce(s.description, '') || ' ' | |
|| coalesce(s.keywords, '') | |
) AS document, | |
count(*) OVER (PARTITION BY s.id) AS rn_count, | |
row_number() OVER ( | |
PARTITION BY s.id | |
ORDER BY se.embedding <=> qe.embedding | |
) AS rn | |
FROM source s | |
LEFT JOIN source_embedding se ON s.id = se.id | |
CROSS JOIN query_embedding qe | |
) | |
SELECT id, | |
url, | |
title, | |
description, | |
distance, | |
rn_count, | |
ts_rank(document ,plainto_tsquery('english', ${query})) AS text_rank, | |
(coalesce(ts_rank(document, plainto_tsquery('english', ${query})), 0) * 0.5) + | |
(CASE WHEN distance IS NOT NULL THEN (1-least(distance, 1)) * 0.5 ELSE 0 END) AS hybrid_score | |
FROM scored | |
WHERE rn = 1 AND ( | |
distance IS NOT NULL | |
OR document @@ plainto_tsquery('english', ${query}) | |
) | |
ORDER BY hybrid_score DESC; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment