jonatas · August 20, 2024 17:05 · jonatas · Aug 20, 2024
diff --git a/docker-compose.yml b/docker-compose.yml
 version: '3.8'

 services:
  main:
    image: timescale/timescaledb-ha:pg16.3-ts2.15.3-all
    container_name: vectorized_content_demo
    ports:
      - "5432:5432"
    volumes:
      - ./data_main:/var/lib/postgresql/data
    environment:
      - POSTGRES_PASSWORD=your_password
      - POSTGRES_DB=main_db
      - POSTGRES_USER=main_user
    command: "postgres -c ai.ollama_host=http://host.docker.internal:11434"

 volumes:
  main:
    driver: local

diff --git a/rag_followup.sql b/rag_followup.sql

 CREATE TABLE IF NOT EXISTS playbooks (
  id SERIAL PRIMARY KEY,
  team TEXT,
  instructions TEXT,
  embeds VECTOR GENERATED ALWAYS AS (embed(instructions)) STORED
 );

 -- Populate playbooks with some example entries
 INSERT INTO playbooks (team, instructions) VALUES
 ('customer service', 'When a customer is unsatisfied, offer a refund or replacement.'),
 ('customer service', 'When a customer is satisfied, ask for a review.'),
 ('product', 'Positive feedback should be shared with the product team.'),
 ('returns', 'For returns, check if the product is within the return period and undamaged.');


 CREATE TABLE tasks (
  id SERIAL PRIMARY KEY,
  context TEXT,
  description TEXT,
  notification_id BIGINT REFERENCES notifications(id)
 );

 -- Now, let's create a new function for the RAG process
 CREATE OR REPLACE FUNCTION create_tasks_from_notifications() RETURNS TRIGGER AS $$
 DECLARE
    most_relevant_context TEXT;
    api_response JSONB;
    task_description TEXT;
 BEGIN
    -- Retrieve most relevant context
    SELECT instructions INTO most_relevant_context
    FROM playbooks
    ORDER BY embeds <=> embed(NEW.summary::text)
    LIMIT 1;

    -- Generate task using PGAI autocomplete
    api_response := openai_chat_complete('gpt-4o-mini'::text,
        jsonb_build_array(
            json_build_object('role', 'system', 'content',
            'You are a helpful assistant that generates' ||
             'concise task descriptions based on user input and context.'||
             'Return only the task description.'
            ),
            json_build_object('role', 'user', 'content',
            format('Based on this user input: "%s" and this context: "%s",'||
              ' generate a brief task description for our team.',
              NEW.comment_body, most_relevant_context)
            )
        )
    );

    task_description := api_response->'choices'->0->'message'->>'content';

    INSERT INTO tasks (context, description, notification_id)
    VALUES (most_relevant_context, task_description, NEW.id);

    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;

 DROP TRIGGER IF EXISTS perform_rag_trigger ON notifications;
 -- Create a new trigger for the RAG process
 CREATE TRIGGER perform_rag_trigger
 AFTER INSERT ON notifications
 FOR EACH ROW
 EXECUTE FUNCTION create_tasks_from_notifications();


 insert into comments (body) VALUES
 ('I am very satisfied with the high quality product, can you send me a swag to record a video about your product?');

 table notifications order by id desc limit 1;
 table tasks order by id desc limit 1;
diff --git a/semantic_tracking.sql b/semantic_tracking.sql
 CREATE EXTENSION IF NOT EXISTS ai CASCADE;
 DROP TABLE IF EXISTS comments CASCADE;
 DROP TABLE IF EXISTS interests CASCADE;
 DROP TABLE IF EXISTS notifications CASCADE;

 -- Create or replace the function to embed content
 CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
 DECLARE
    vectorized_content VECTOR;
 BEGIN
    vectorized_content := openai_embed(
        'text-embedding-ada-002',
        content
    )::VECTOR;
    RETURN vectorized_content;
 END;
 $$ IMMUTABLE LANGUAGE plpgsql;

 CREATE TABLE comments (
  body TEXT PRIMARY KEY,
  embeds VECTOR GENERATED ALWAYS AS (embed(body)) STORED
 );

 CREATE TABLE interests (
  subject TEXT PRIMARY KEY,
  embeds VECTOR GENERATED ALWAYS AS (embed(subject)) STORED
 );

 -- Map interests based on the cosine similarity filtering by the threshold percentage
 -- The function will return a JSONB object with the interest and the similarity percentage
 CREATE OR REPLACE FUNCTION map_interests(
  subject text,
  input_vector vector default null,
  threshold float default 80.0) RETURNS jsonb AS $$
 DECLARE
  interest RECORD;
  similarity_percent NUMERIC;
  distance FLOAT;
  summary jsonb := '{}'::jsonb;
 BEGIN
  if (input_vector is null) THEN
    input_vector := embed(subject);
  end if;
  for interest in select * from interests
  loop
    distance := input_vector <=> interest.embeds;
    similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
    if (similarity_percent >= threshold) THEN
      summary := summary || jsonb_build_object(
        interest.subject, similarity_percent
      );
    END IF;
  end loop;
  return summary;
 END;
 $$ LANGUAGE plpgsql;


 CREATE TABLE notifications (
  id SERIAL PRIMARY KEY,
  created_at TIMESTAMP DEFAULT clock_timestamp(),
  comment_body TEXT REFERENCES comments(body),
  summary JSONB
 );

 -- fetch all interests in vectors and calculate the similarity
 -- insert notifications in the table in case find cosine similarity < 0.2
 CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
 DECLARE
  summary jsonb := '{}'::jsonb;
 begin
  summary := map_interests(NEW.body, NEW.embeds);
 IF (summary != '{}'::jsonb) THEN
      insert into notifications (comment_body, summary) values
      (NEW.body, summary);
  end if;
  RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;


 CREATE TRIGGER check_interests_similarity_trigger
 AFTER INSERT OR UPDATE ON comments
 FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();


 INSERT INTO interests (subject) VALUES
  ('Satisfied customer'),
  ('Unsatisfied customer'),
  ('Product return');

 insert into comments (body) VALUES
  ('The bike looks good but I regret to buy it and want to return it and get my money back'),
  ('Works perfectly for my running days'),
  ('This shoes are terrible, pay me back now!'),
  ('Shoes dont fit so I want to return it'),
  ('I loved my new bike, your product is amazing!');

 table notifications;
	version: '3.8'

	services:
	main:
	image: timescale/timescaledb-ha:pg16.3-ts2.15.3-all
	container_name: vectorized_content_demo
	ports:
	- "5432:5432"
	volumes:
	- ./data_main:/var/lib/postgresql/data
	environment:
	- POSTGRES_PASSWORD=your_password
	- POSTGRES_DB=main_db
	- POSTGRES_USER=main_user
	command: "postgres -c ai.ollama_host=http://host.docker.internal:11434"

	volumes:
	main:
	driver: local

	CREATE TABLE IF NOT EXISTS playbooks (
	id SERIAL PRIMARY KEY,
	team TEXT,
	instructions TEXT,
	embeds VECTOR GENERATED ALWAYS AS (embed(instructions)) STORED
	);

	-- Populate playbooks with some example entries
	INSERT INTO playbooks (team, instructions) VALUES
	('customer service', 'When a customer is unsatisfied, offer a refund or replacement.'),
	('customer service', 'When a customer is satisfied, ask for a review.'),
	('product', 'Positive feedback should be shared with the product team.'),
	('returns', 'For returns, check if the product is within the return period and undamaged.');


	CREATE TABLE tasks (
	id SERIAL PRIMARY KEY,
	context TEXT,
	description TEXT,
	notification_id BIGINT REFERENCES notifications(id)
	);

	-- Now, let's create a new function for the RAG process
	CREATE OR REPLACE FUNCTION create_tasks_from_notifications() RETURNS TRIGGER AS $$
	DECLARE
	most_relevant_context TEXT;
	api_response JSONB;
	task_description TEXT;
	BEGIN
	-- Retrieve most relevant context
	SELECT instructions INTO most_relevant_context
	FROM playbooks
	ORDER BY embeds <=> embed(NEW.summary::text)
	LIMIT 1;

	-- Generate task using PGAI autocomplete
	api_response := openai_chat_complete('gpt-4o-mini'::text,
	jsonb_build_array(
	json_build_object('role', 'system', 'content',
	'You are a helpful assistant that generates' \|\|
	'concise task descriptions based on user input and context.'\|\|
	'Return only the task description.'
	),
	json_build_object('role', 'user', 'content',
	format('Based on this user input: "%s" and this context: "%s",'\|\|
	' generate a brief task description for our team.',
	NEW.comment_body, most_relevant_context)
	)
	)
	);

	task_description := api_response->'choices'->0->'message'->>'content';

	INSERT INTO tasks (context, description, notification_id)
	VALUES (most_relevant_context, task_description, NEW.id);

	RETURN NEW;
	END;
	$$ LANGUAGE plpgsql;

	DROP TRIGGER IF EXISTS perform_rag_trigger ON notifications;
	-- Create a new trigger for the RAG process
	CREATE TRIGGER perform_rag_trigger
	AFTER INSERT ON notifications
	FOR EACH ROW
	EXECUTE FUNCTION create_tasks_from_notifications();


	insert into comments (body) VALUES
	('I am very satisfied with the high quality product, can you send me a swag to record a video about your product?');

	table notifications order by id desc limit 1;
	table tasks order by id desc limit 1;
	CREATE EXTENSION IF NOT EXISTS ai CASCADE;
	DROP TABLE IF EXISTS comments CASCADE;
	DROP TABLE IF EXISTS interests CASCADE;
	DROP TABLE IF EXISTS notifications CASCADE;

	-- Create or replace the function to embed content
	CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
	DECLARE
	vectorized_content VECTOR;
	BEGIN
	vectorized_content := openai_embed(
	'text-embedding-ada-002',
	content
	)::VECTOR;
	RETURN vectorized_content;
	END;
	$$ IMMUTABLE LANGUAGE plpgsql;

	CREATE TABLE comments (
	body TEXT PRIMARY KEY,
	embeds VECTOR GENERATED ALWAYS AS (embed(body)) STORED
	);

	CREATE TABLE interests (
	subject TEXT PRIMARY KEY,
	embeds VECTOR GENERATED ALWAYS AS (embed(subject)) STORED
	);

	-- Map interests based on the cosine similarity filtering by the threshold percentage
	-- The function will return a JSONB object with the interest and the similarity percentage
	CREATE OR REPLACE FUNCTION map_interests(
	subject text,
	input_vector vector default null,
	threshold float default 80.0) RETURNS jsonb AS $$
	DECLARE
	interest RECORD;
	similarity_percent NUMERIC;
	distance FLOAT;
	summary jsonb := '{}'::jsonb;
	BEGIN
	if (input_vector is null) THEN
	input_vector := embed(subject);
	end if;
	for interest in select * from interests
	loop
	distance := input_vector <=> interest.embeds;
	similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
	if (similarity_percent >= threshold) THEN
	summary := summary \|\| jsonb_build_object(
	interest.subject, similarity_percent
	);
	END IF;
	end loop;
	return summary;
	END;
	$$ LANGUAGE plpgsql;


	CREATE TABLE notifications (
	id SERIAL PRIMARY KEY,
	created_at TIMESTAMP DEFAULT clock_timestamp(),
	comment_body TEXT REFERENCES comments(body),
	summary JSONB
	);

	-- fetch all interests in vectors and calculate the similarity
	-- insert notifications in the table in case find cosine similarity < 0.2
	CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
	DECLARE
	summary jsonb := '{}'::jsonb;
	begin
	summary := map_interests(NEW.body, NEW.embeds);
	IF (summary != '{}'::jsonb) THEN
	insert into notifications (comment_body, summary) values
	(NEW.body, summary);
	end if;
	RETURN NEW;
	END;
	$$ LANGUAGE plpgsql;


	CREATE TRIGGER check_interests_similarity_trigger
	AFTER INSERT OR UPDATE ON comments
	FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();


	INSERT INTO interests (subject) VALUES
	('Satisfied customer'),
	('Unsatisfied customer'),
	('Product return');

	insert into comments (body) VALUES
	('The bike looks good but I regret to buy it and want to return it and get my money back'),
	('Works perfectly for my running days'),
	('This shoes are terrible, pay me back now!'),
	('Shoes dont fit so I want to return it'),
	('I loved my new bike, your product is amazing!');

	table notifications;