Skip to content

Instantly share code, notes, and snippets.

@RomanSteinberg
Last active January 1, 2025 14:22
Show Gist options
  • Save RomanSteinberg/1533848a240658aadc42150599638327 to your computer and use it in GitHub Desktop.
Save RomanSteinberg/1533848a240658aadc42150599638327 to your computer and use it in GitHub Desktop.
options = SyncClientOptions(
postgrest_client_timeout=Timeout(60.0)
)
self._store = create_client(
cfg.supabase.url,
cfg.supabase.key,
options = options
)
response = self._store.rpc(
f'match_documents',
{
'query_embedding': embed_query(text),
'match_count': k,
'metadata_filter': metadata_filter,
'similarity_threshold': similarity_threshold,
'target_table' : collection_name,
}
).execute()
CREATE OR REPLACE FUNCTION match_documents(
query_embedding VECTOR(1536),
match_count INT,
metadata_filter JSONB DEFAULT NULL,
similarity_threshold FLOAT DEFAULT 2.0,
target_table TEXT DEFAULT 'message_documents'
)
RETURNS TABLE(id BIGINT, content TEXT, metadata JSONB, similarity FLOAT)
LANGUAGE plpgsql
AS $$
DECLARE
sql_query TEXT;
filter_condition TEXT := NULL;
BEGIN
SET LOCAL statement_timeout = '300s';
-- Check table is specified correct
IF target_table NOT IN ('topic_documents', 'message_documents') THEN
RAISE EXCEPTION 'Invalid target_table: %', target_table;
END IF;
-- Create filters due to metadata_filter
IF metadata_filter IS NOT NULL THEN
SELECT string_agg(
CASE
-- Operator $gte
WHEN inner_key = '$gte' THEN format('(metadata->>%L)::FLOAT >= %L', outer_key, inner_value::TEXT)
-- Operator $eq
WHEN inner_key = '$eq' THEN format('(metadata->>%L)::FLOAT = %L', outer_key, inner_value::TEXT)
-- Operator $in
WHEN inner_key = '$in' THEN format(
'(metadata->>%L) = ANY(SELECT jsonb_array_elements_text(%L::JSONB)::TEXT)',
outer_key,
inner_value
)
ELSE NULL
END, ' AND ')
INTO filter_condition
FROM (
SELECT outer_key, inner_key, inner_value
FROM (
-- Parse top level keys
SELECT key AS outer_key, value
FROM jsonb_each(metadata_filter)
) t,
-- Parse internal dictionary Разбираем вложенный словарь внутри значения верхнего уровня
LATERAL jsonb_each_text(t.value) AS subquery(inner_key, inner_value)
) conditions;
END IF;
-- Create dynamic SQL
sql_query := format(
'SELECT id, content, metadata, (embedding <-> $1) AS similarity
FROM %I
WHERE (embedding <-> $1) <= $2
%s
ORDER BY similarity ASC
LIMIT $3',
target_table,
CASE WHEN filter_condition IS NOT NULL THEN 'AND ' || filter_condition ELSE '' END
);
-- Execute dynamic SQL
RETURN QUERY EXECUTE sql_query
USING query_embedding, similarity_threshold, match_count;
END;
$$;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment