Skip to content

Instantly share code, notes, and snippets.

@jonatas
Last active August 21, 2024 19:22
Show Gist options
  • Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
Semantic tracking with PGAI
CREATE EXTENSION IF NOT EXISTS ai CASCADE;
DROP TABLE IF EXISTS vecs CASCADE;
DROP TABLE IF EXISTS interests CASCADE;
DROP TABLE IF EXISTS notifications CASCADE;
-- Create or replace the function to embed content
CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
DECLARE
vectorized_content VECTOR;
BEGIN
vectorized_content := openai_embed(
'text-embedding-ada-002',
content
)::VECTOR;
RETURN vectorized_content;
END;
$$ IMMUTABLE LANGUAGE plpgsql;
CREATE TABLE vecs (
content TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);
CREATE TABLE interests (
content TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);
CREATE TABLE notifications (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP DEFAULT clock_timestamp(),
content TEXT REFERENCES vecs(content),
summary JSONB
);
-- fetch all interests in vectors and calculate the similarity
-- insert notifications in the table in case find cosine similarity < 0.2
CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
DECLARE
interest RECORD;
similarity_percent NUMERIC;
distance FLOAT;
summary jsonb := '{}'::jsonb;
BEGIN
for interest in select * from interests
loop
distance := NEW.embeds <=> interest.embeds;
if (distance < 0.2) THEN
similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
summary := summary || jsonb_build_object(
interest.content, similarity_percent
);
END IF;
end loop;
-- check if summary has any elements and persist it in the notifications table
IF (summary != '{}'::jsonb) THEN
insert into notifications (content, summary) values
(NEW.content, summary);
end if;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER check_interests_similarity_trigger
AFTER INSERT OR UPDATE ON vecs
FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();
INSERT INTO interests (content) VALUES
('Satisfied customer'),
('Unsatisfied customer'),
('Product return');
insert into vecs (content) VALUES
('The bike looks good but I want to return it and get my money back'),
('Works perfectly for my running days'),
('This shoes are terrible, pay me back now!'),
('Shoes dont fit so I want to return it'),
('The bike is very good for my commuting days');
table notifications;
@jonatas
Copy link
Author

jonatas commented Aug 20, 2024

Output:

-[ RECORD 1 ]-----------------------------------------------------------------
id         | 1
created_at | 2024-08-20 00:52:59.383766
content    | The bike looks good but I want to return it and get my money back
summary    | {"Unsatisfied customer": 80.62}
-[ RECORD 2 ]-----------------------------------------------------------------
id         | 2
created_at | 2024-08-20 00:52:59.384473
content    | Works perfectly for my running days
summary    | {"Satisfied customer": 80.87}
-[ RECORD 3 ]-----------------------------------------------------------------
id         | 3
created_at | 2024-08-20 00:52:59.384746
content    | Shoes dont fit so I want to return it
summary    | {"Product return": 80.52, "Unsatisfied customer": 82.38}

@jonatas
Copy link
Author

jonatas commented Aug 20, 2024

Expand to RAG follow up on the notifications

Continuing after detect an interest, so let's create agent instructions to also assign tasks from the notifications:

CREATE TABLE ai_instructions (
  id SERIAL PRIMARY KEY,
  category TEXT,
  content TEXT,
  embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);

-- Populate ai_instructions with some example entries
INSERT INTO ai_instructions (category, content) VALUES
('customer_service', 'When a customer is unsatisfied, offer a refund or replacement.'),
('product_feedback', 'Positive feedback should be shared with the product team.'),
('returns', 'For returns, check if the product is within the return period and undamaged.');


CREATE TABLE tasks (
  id SERIAL PRIMARY KEY,
  context TEXT,
  description TEXT,
  notification_id BIGINT REFERENCES notifications(id)
);

-- Now, let's create a new function for the RAG process
CREATE OR REPLACE FUNCTION create_tasks_from_notifications() RETURNS TRIGGER AS $$
DECLARE
    most_relevant_context TEXT;
    api_response JSONB;
    task_description TEXT;
BEGIN
    -- Retrieve most relevant context
    SELECT content INTO most_relevant_context
    FROM ai_instructions
    ORDER BY embeds <=> embed(NEW.content)
    LIMIT 1;

    -- Generate task using PGAI autocomplete
    api_response := openai_chat_complete('gpt-4o-mini'::text,
        jsonb_build_array(
            json_build_object('role', 'system', 'content',
            'You are a helpful assistant that generates concise task descriptions based on user input and context. Return only the task description.'
            ),
            json_build_object('role', 'user', 'content',
            format('Based on this user input: "%s" and this context: "%s", generate a brief task description for our team.', NEW.content, most_relevant_context)
            )
        )
    );

    task_description := api_response->'choices'->0->'message'->>'content';

    INSERT INTO tasks (context, description, notification_id)
    VALUES (most_relevant_context, task_description, NEW.id);

    RETURN NEW;
END;
$$ LANGUAGE plpgsql;

-- Create a new trigger for the RAG process
CREATE TRIGGER perform_rag_trigger
AFTER INSERT ON notifications
FOR EACH ROW
EXECUTE FUNCTION create_tasks_from_notifications();

Testing

insert into vecs (content) VALUES
('I am very satisfied with the high quality product you developed, can you send me a swag to record a video about your product?');

querying it:

table notifications order by id desc limit 1;
-[ RECORD 1 ]-----------------------------------------------------------------------------------------------------------------------------
id         | 4
created_at | 2024-08-20 00:53:19.554133
content    | I am very satisfied with the high quality product you developed, can you send me a swag to record a video about your product?
summary    | {"Satisfied customer": 82.12}

Generated task

table tasks order by id desc limit 1;

-[ RECORD 1 ]---+--------------------------------------------------------------------------------------------------
id              | 3
context         | Positive feedback should be shared with the product team.
description     | Share the positive feedback with the product team and arrange to send swag for a video recording.
notification_id | 4

@jgpruitt
Copy link

I had a lot of trouble understanding the data model at first. If you named the vecs table customer_feedback and the interests table either category or sentiment it would help a great deal to convey the intention.

@jgpruitt
Copy link

It's unfortunate that the openai_embed function isn't immutable. it will be in the next version and then wrapping it in the immutable embed function won't be necessary. I guess we need to review all our examples on the next release.

@jgpruitt
Copy link

This is really cool.

@jonatas
Copy link
Author

jonatas commented Aug 21, 2024

Thanks @jgpruitt ! I already renamed it! Thanks for all suggestions!

Latest version: https://gist.github.com/jonatas/6b4070b687578856c3cb98fb61d68171#comments

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment