Skip to content

Instantly share code, notes, and snippets.

@jkatz
Created October 24, 2024 08:47
Show Gist options
  • Save jkatz/a723c4014a57d0c9543edc93c90a9469 to your computer and use it in GitHub Desktop.
Save jkatz/a723c4014a57d0c9543edc93c90a9469 to your computer and use it in GitHub Desktop.
CREATE EXTENSION IF NOT EXISTS vector;
CREATE OR REPLACE FUNCTION public.generate_random_normalized_vector(dim integer)
RETURNS vector
LANGUAGE SQL
AS $function$
SELECT public.l2_normalize(array_agg(random()::real)::vector)
FROM generate_series(1, $1);
$function$;
CREATE TABLE vectors (
id bigint PRIMARY KEY,
embedding vector(1536)
);
INSERT INTO vectors
SELECT n, generate_random_normalized_vector(1536)
FROM generate_series(1,5_000_000) n;
SET max_parallel_maintenance_workers TO 3;
CREATE INDEX ON vectors USING ivfflat(embedding vector_cosine_ops) WITH (lists=500); -- less lists == larger clusters
SELECT generate_random_normalized_vector(1536)::vector AS v \gset
SET ivfflat.probes TO 10; -- more probes means searching more lists
SELECT id, :'v' <=> embedding AS distance
FROM vectors
ORDER BY distance
LIMIT 10;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment