Skip to content

Instantly share code, notes, and snippets.

@rawnly
Last active May 10, 2025 19:27
Show Gist options
  • Save rawnly/39797585ecc4379c6d1e7cd2945e2a42 to your computer and use it in GitHub Desktop.
Save rawnly/39797585ecc4379c6d1e7cd2945e2a42 to your computer and use it in GitHub Desktop.
WITH
query_embedding AS (
SELECT ai.openai_embed(
'text-embedding-3-small',
${query},
${OPENAI_API_KEY}
) AS embedding
),
scored AS (
SELECT
s.id,
s.url,
s.title,
s.description,
s.keywords,
se.embedding <=> qe.embedding AS distance,
to_tsvector(
'english',
coalesce(s.title, '') || ' '
|| coalesce(s.description, '') || ' '
|| coalesce(s.keywords, '')
) AS document,
count(*) OVER (PARTITION BY s.id) AS rn_count,
row_number() OVER (
PARTITION BY s.id
ORDER BY se.embedding <=> qe.embedding
) AS rn
FROM source s
LEFT JOIN source_embedding se ON s.id = se.id
CROSS JOIN query_embedding qe
)
SELECT id,
url,
title,
description,
distance,
rn_count,
ts_rank(document ,plainto_tsquery('english', ${query})) AS text_rank,
(coalesce(ts_rank(document, plainto_tsquery('english', ${query})), 0) * 0.5) +
(CASE WHEN distance IS NOT NULL THEN (1-least(distance, 1)) * 0.5 ELSE 0 END) AS hybrid_score
FROM scored
WHERE rn = 1 AND (
distance IS NOT NULL
OR document @@ plainto_tsquery('english', ${query})
)
ORDER BY hybrid_score DESC;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment