Last active
January 1, 2025 14:22
-
-
Save RomanSteinberg/1533848a240658aadc42150599638327 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
options = SyncClientOptions( | |
postgrest_client_timeout=Timeout(60.0) | |
) | |
self._store = create_client( | |
cfg.supabase.url, | |
cfg.supabase.key, | |
options = options | |
) | |
response = self._store.rpc( | |
f'match_documents', | |
{ | |
'query_embedding': embed_query(text), | |
'match_count': k, | |
'metadata_filter': metadata_filter, | |
'similarity_threshold': similarity_threshold, | |
'target_table' : collection_name, | |
} | |
).execute() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE OR REPLACE FUNCTION match_documents( | |
query_embedding VECTOR(1536), | |
match_count INT, | |
metadata_filter JSONB DEFAULT NULL, | |
similarity_threshold FLOAT DEFAULT 2.0, | |
target_table TEXT DEFAULT 'message_documents' | |
) | |
RETURNS TABLE(id BIGINT, content TEXT, metadata JSONB, similarity FLOAT) | |
LANGUAGE plpgsql | |
AS $$ | |
DECLARE | |
sql_query TEXT; | |
filter_condition TEXT := NULL; | |
BEGIN | |
SET LOCAL statement_timeout = '300s'; | |
-- Check table is specified correct | |
IF target_table NOT IN ('topic_documents', 'message_documents') THEN | |
RAISE EXCEPTION 'Invalid target_table: %', target_table; | |
END IF; | |
-- Create filters due to metadata_filter | |
IF metadata_filter IS NOT NULL THEN | |
SELECT string_agg( | |
CASE | |
-- Operator $gte | |
WHEN inner_key = '$gte' THEN format('(metadata->>%L)::FLOAT >= %L', outer_key, inner_value::TEXT) | |
-- Operator $eq | |
WHEN inner_key = '$eq' THEN format('(metadata->>%L)::FLOAT = %L', outer_key, inner_value::TEXT) | |
-- Operator $in | |
WHEN inner_key = '$in' THEN format( | |
'(metadata->>%L) = ANY(SELECT jsonb_array_elements_text(%L::JSONB)::TEXT)', | |
outer_key, | |
inner_value | |
) | |
ELSE NULL | |
END, ' AND ') | |
INTO filter_condition | |
FROM ( | |
SELECT outer_key, inner_key, inner_value | |
FROM ( | |
-- Parse top level keys | |
SELECT key AS outer_key, value | |
FROM jsonb_each(metadata_filter) | |
) t, | |
-- Parse internal dictionary Разбираем вложенный словарь внутри значения верхнего уровня | |
LATERAL jsonb_each_text(t.value) AS subquery(inner_key, inner_value) | |
) conditions; | |
END IF; | |
-- Create dynamic SQL | |
sql_query := format( | |
'SELECT id, content, metadata, (embedding <-> $1) AS similarity | |
FROM %I | |
WHERE (embedding <-> $1) <= $2 | |
%s | |
ORDER BY similarity ASC | |
LIMIT $3', | |
target_table, | |
CASE WHEN filter_condition IS NOT NULL THEN 'AND ' || filter_condition ELSE '' END | |
); | |
-- Execute dynamic SQL | |
RETURN QUERY EXECUTE sql_query | |
USING query_embedding, similarity_threshold, match_count; | |
END; | |
$$; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment