-
-
Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
CREATE EXTENSION IF NOT EXISTS ai CASCADE; | |
DROP TABLE IF EXISTS vecs CASCADE; | |
DROP TABLE IF EXISTS interests CASCADE; | |
DROP TABLE IF EXISTS notifications CASCADE; | |
-- Create or replace the function to embed content | |
CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$ | |
DECLARE | |
vectorized_content VECTOR; | |
BEGIN | |
vectorized_content := openai_embed( | |
'text-embedding-ada-002', | |
content | |
)::VECTOR; | |
RETURN vectorized_content; | |
END; | |
$$ IMMUTABLE LANGUAGE plpgsql; | |
CREATE TABLE vecs ( | |
content TEXT PRIMARY KEY, | |
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED | |
); | |
CREATE TABLE interests ( | |
content TEXT PRIMARY KEY, | |
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED | |
); | |
CREATE TABLE notifications ( | |
id SERIAL PRIMARY KEY, | |
created_at TIMESTAMP DEFAULT clock_timestamp(), | |
content TEXT REFERENCES vecs(content), | |
summary JSONB | |
); | |
-- fetch all interests in vectors and calculate the similarity | |
-- insert notifications in the table in case find cosine similarity < 0.2 | |
CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$ | |
DECLARE | |
interest RECORD; | |
similarity_percent NUMERIC; | |
distance FLOAT; | |
summary jsonb := '{}'::jsonb; | |
BEGIN | |
for interest in select * from interests | |
loop | |
distance := NEW.embeds <=> interest.embeds; | |
if (distance < 0.2) THEN | |
similarity_percent := ((1.0 - distance) * 100)::numeric(10,2); | |
summary := summary || jsonb_build_object( | |
interest.content, similarity_percent | |
); | |
END IF; | |
end loop; | |
-- check if summary has any elements and persist it in the notifications table | |
IF (summary != '{}'::jsonb) THEN | |
insert into notifications (content, summary) values | |
(NEW.content, summary); | |
end if; | |
RETURN NEW; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE TRIGGER check_interests_similarity_trigger | |
AFTER INSERT OR UPDATE ON vecs | |
FOR EACH ROW EXECUTE FUNCTION check_interests_similarity(); | |
INSERT INTO interests (content) VALUES | |
('Satisfied customer'), | |
('Unsatisfied customer'), | |
('Product return'); | |
insert into vecs (content) VALUES | |
('The bike looks good but I want to return it and get my money back'), | |
('Works perfectly for my running days'), | |
('This shoes are terrible, pay me back now!'), | |
('Shoes dont fit so I want to return it'), | |
('The bike is very good for my commuting days'); | |
table notifications; | |
Expand to RAG follow up on the notifications
Continuing after detect an interest, so let's create agent instructions to also assign tasks from the notifications:
CREATE TABLE ai_instructions (
id SERIAL PRIMARY KEY,
category TEXT,
content TEXT,
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);
-- Populate ai_instructions with some example entries
INSERT INTO ai_instructions (category, content) VALUES
('customer_service', 'When a customer is unsatisfied, offer a refund or replacement.'),
('product_feedback', 'Positive feedback should be shared with the product team.'),
('returns', 'For returns, check if the product is within the return period and undamaged.');
CREATE TABLE tasks (
id SERIAL PRIMARY KEY,
context TEXT,
description TEXT,
notification_id BIGINT REFERENCES notifications(id)
);
-- Now, let's create a new function for the RAG process
CREATE OR REPLACE FUNCTION create_tasks_from_notifications() RETURNS TRIGGER AS $$
DECLARE
most_relevant_context TEXT;
api_response JSONB;
task_description TEXT;
BEGIN
-- Retrieve most relevant context
SELECT content INTO most_relevant_context
FROM ai_instructions
ORDER BY embeds <=> embed(NEW.content)
LIMIT 1;
-- Generate task using PGAI autocomplete
api_response := openai_chat_complete('gpt-4o-mini'::text,
jsonb_build_array(
json_build_object('role', 'system', 'content',
'You are a helpful assistant that generates concise task descriptions based on user input and context. Return only the task description.'
),
json_build_object('role', 'user', 'content',
format('Based on this user input: "%s" and this context: "%s", generate a brief task description for our team.', NEW.content, most_relevant_context)
)
)
);
task_description := api_response->'choices'->0->'message'->>'content';
INSERT INTO tasks (context, description, notification_id)
VALUES (most_relevant_context, task_description, NEW.id);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
-- Create a new trigger for the RAG process
CREATE TRIGGER perform_rag_trigger
AFTER INSERT ON notifications
FOR EACH ROW
EXECUTE FUNCTION create_tasks_from_notifications();
Testing
insert into vecs (content) VALUES
('I am very satisfied with the high quality product you developed, can you send me a swag to record a video about your product?');
querying it:
table notifications order by id desc limit 1;
-[ RECORD 1 ]-----------------------------------------------------------------------------------------------------------------------------
id | 4
created_at | 2024-08-20 00:53:19.554133
content | I am very satisfied with the high quality product you developed, can you send me a swag to record a video about your product?
summary | {"Satisfied customer": 82.12}
Generated task
table tasks order by id desc limit 1;
-[ RECORD 1 ]---+--------------------------------------------------------------------------------------------------
id | 3
context | Positive feedback should be shared with the product team.
description | Share the positive feedback with the product team and arrange to send swag for a video recording.
notification_id | 4
I had a lot of trouble understanding the data model at first. If you named the vecs
table customer_feedback
and the interests
table either category
or sentiment
it would help a great deal to convey the intention.
It's unfortunate that the openai_embed function isn't immutable. it will be in the next version and then wrapping it in the immutable embed
function won't be necessary. I guess we need to review all our examples on the next release.
This is really cool.
Thanks @jgpruitt ! I already renamed it! Thanks for all suggestions!
Latest version: https://gist.github.com/jonatas/6b4070b687578856c3cb98fb61d68171#comments
Output: