Created
October 24, 2024 08:47
-
-
Save jkatz/a723c4014a57d0c9543edc93c90a9469 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTENSION IF NOT EXISTS vector; | |
CREATE OR REPLACE FUNCTION public.generate_random_normalized_vector(dim integer) | |
RETURNS vector | |
LANGUAGE SQL | |
AS $function$ | |
SELECT public.l2_normalize(array_agg(random()::real)::vector) | |
FROM generate_series(1, $1); | |
$function$; | |
CREATE TABLE vectors ( | |
id bigint PRIMARY KEY, | |
embedding vector(1536) | |
); | |
INSERT INTO vectors | |
SELECT n, generate_random_normalized_vector(1536) | |
FROM generate_series(1,5_000_000) n; | |
SET max_parallel_maintenance_workers TO 3; | |
CREATE INDEX ON vectors USING ivfflat(embedding vector_cosine_ops) WITH (lists=500); -- less lists == larger clusters | |
SELECT generate_random_normalized_vector(1536)::vector AS v \gset | |
SET ivfflat.probes TO 10; -- more probes means searching more lists | |
SELECT id, :'v' <=> embedding AS distance | |
FROM vectors | |
ORDER BY distance | |
LIMIT 10; | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment