jonatas · August 21, 2024 19:22 · jonatas · Aug 20, 2024 · jonatas · Aug 20, 2024
diff --git a/semantic_tracking.sql b/semantic_tracking.sql
 CREATE EXTENSION IF NOT EXISTS ai CASCADE;
 DROP TABLE IF EXISTS vecs CASCADE;
 DROP TABLE IF EXISTS interests CASCADE;
 DROP TABLE IF EXISTS notifications CASCADE;

 -- Create or replace the function to embed content
 CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
 DECLARE
    vectorized_content VECTOR;
 BEGIN
    vectorized_content := openai_embed(
        'text-embedding-ada-002',
        content
    )::VECTOR;
    RETURN vectorized_content;
 END;
 $$ IMMUTABLE LANGUAGE plpgsql;

 CREATE TABLE vecs (
  content TEXT PRIMARY KEY,
  embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
 );

 CREATE TABLE interests (
  content TEXT PRIMARY KEY,
  embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
 );


 CREATE TABLE notifications (
  id SERIAL PRIMARY KEY,
  created_at TIMESTAMP DEFAULT clock_timestamp(),
  content TEXT REFERENCES vecs(content),
  summary JSONB
 );

 -- fetch all interests in vectors and calculate the similarity
 -- insert notifications in the table in case find cosine similarity < 0.2
 CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
 DECLARE
  interest RECORD;
  similarity_percent NUMERIC;
  distance FLOAT;
  summary jsonb := '{}'::jsonb;
 BEGIN
  for interest in select * from interests
  loop
    distance := NEW.embeds <=> interest.embeds;
    if (distance < 0.2) THEN
      similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
      summary := summary || jsonb_build_object(
        interest.content, similarity_percent
      );
    END IF;

  end loop;

  -- check if summary has any elements and persist it in the notifications table
 IF (summary != '{}'::jsonb) THEN
      insert into notifications (content, summary) values
      (NEW.content, summary);
  end if;
  RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;


 CREATE TRIGGER check_interests_similarity_trigger
 AFTER INSERT OR UPDATE ON vecs
 FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();


 INSERT INTO interests (content) VALUES
  ('Satisfied customer'),
  ('Unsatisfied customer'),
  ('Product return');

 insert into vecs (content) VALUES
 ('The bike looks good but I want to return it and get my money back'),
 ('Works perfectly for my running days'),
 ('This shoes are terrible, pay me back now!'),
 ('Shoes dont fit so I want to return it'),
 ('The bike is very good for my commuting days');

 table notifications;
	CREATE EXTENSION IF NOT EXISTS ai CASCADE;
	DROP TABLE IF EXISTS vecs CASCADE;
	DROP TABLE IF EXISTS interests CASCADE;
	DROP TABLE IF EXISTS notifications CASCADE;

	-- Create or replace the function to embed content
	CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
	DECLARE
	vectorized_content VECTOR;
	BEGIN
	vectorized_content := openai_embed(
	'text-embedding-ada-002',
	content
	)::VECTOR;
	RETURN vectorized_content;
	END;
	$$ IMMUTABLE LANGUAGE plpgsql;

	CREATE TABLE vecs (
	content TEXT PRIMARY KEY,
	embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
	);

	CREATE TABLE interests (
	content TEXT PRIMARY KEY,
	embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
	);


	CREATE TABLE notifications (
	id SERIAL PRIMARY KEY,
	created_at TIMESTAMP DEFAULT clock_timestamp(),
	content TEXT REFERENCES vecs(content),
	summary JSONB
	);

	-- fetch all interests in vectors and calculate the similarity
	-- insert notifications in the table in case find cosine similarity < 0.2
	CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
	DECLARE
	interest RECORD;
	similarity_percent NUMERIC;
	distance FLOAT;
	summary jsonb := '{}'::jsonb;
	BEGIN
	for interest in select * from interests
	loop
	distance := NEW.embeds <=> interest.embeds;
	if (distance < 0.2) THEN
	similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
	summary := summary \|\| jsonb_build_object(
	interest.content, similarity_percent
	);
	END IF;

	end loop;

	-- check if summary has any elements and persist it in the notifications table
	IF (summary != '{}'::jsonb) THEN
	insert into notifications (content, summary) values
	(NEW.content, summary);
	end if;
	RETURN NEW;
	END;
	$$ LANGUAGE plpgsql;


	CREATE TRIGGER check_interests_similarity_trigger
	AFTER INSERT OR UPDATE ON vecs
	FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();


	INSERT INTO interests (content) VALUES
	('Satisfied customer'),
	('Unsatisfied customer'),
	('Product return');

	insert into vecs (content) VALUES
	('The bike looks good but I want to return it and get my money back'),
	('Works perfectly for my running days'),
	('This shoes are terrible, pay me back now!'),
	('Shoes dont fit so I want to return it'),
	('The bike is very good for my commuting days');

	table notifications;