Skip to content

Instantly share code, notes, and snippets.

@jonatas
Created August 20, 2024 17:05
Show Gist options
  • Save jonatas/6b4070b687578856c3cb98fb61d68171 to your computer and use it in GitHub Desktop.
Save jonatas/6b4070b687578856c3cb98fb61d68171 to your computer and use it in GitHub Desktop.
version: '3.8'
services:
main:
image: timescale/timescaledb-ha:pg16.3-ts2.15.3-all
container_name: vectorized_content_demo
ports:
- "5432:5432"
volumes:
- ./data_main:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=your_password
- POSTGRES_DB=main_db
- POSTGRES_USER=main_user
command: "postgres -c ai.ollama_host=http://host.docker.internal:11434"
volumes:
main:
driver: local
CREATE TABLE IF NOT EXISTS playbooks (
id SERIAL PRIMARY KEY,
team TEXT,
instructions TEXT,
embeds VECTOR GENERATED ALWAYS AS (embed(instructions)) STORED
);
-- Populate playbooks with some example entries
INSERT INTO playbooks (team, instructions) VALUES
('customer service', 'When a customer is unsatisfied, offer a refund or replacement.'),
('customer service', 'When a customer is satisfied, ask for a review.'),
('product', 'Positive feedback should be shared with the product team.'),
('returns', 'For returns, check if the product is within the return period and undamaged.');
CREATE TABLE tasks (
id SERIAL PRIMARY KEY,
context TEXT,
description TEXT,
notification_id BIGINT REFERENCES notifications(id)
);
-- Now, let's create a new function for the RAG process
CREATE OR REPLACE FUNCTION create_tasks_from_notifications() RETURNS TRIGGER AS $$
DECLARE
most_relevant_context TEXT;
api_response JSONB;
task_description TEXT;
BEGIN
-- Retrieve most relevant context
SELECT instructions INTO most_relevant_context
FROM playbooks
ORDER BY embeds <=> embed(NEW.summary::text)
LIMIT 1;
-- Generate task using PGAI autocomplete
api_response := openai_chat_complete('gpt-4o-mini'::text,
jsonb_build_array(
json_build_object('role', 'system', 'content',
'You are a helpful assistant that generates' ||
'concise task descriptions based on user input and context.'||
'Return only the task description.'
),
json_build_object('role', 'user', 'content',
format('Based on this user input: "%s" and this context: "%s",'||
' generate a brief task description for our team.',
NEW.comment_body, most_relevant_context)
)
)
);
task_description := api_response->'choices'->0->'message'->>'content';
INSERT INTO tasks (context, description, notification_id)
VALUES (most_relevant_context, task_description, NEW.id);
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
DROP TRIGGER IF EXISTS perform_rag_trigger ON notifications;
-- Create a new trigger for the RAG process
CREATE TRIGGER perform_rag_trigger
AFTER INSERT ON notifications
FOR EACH ROW
EXECUTE FUNCTION create_tasks_from_notifications();
insert into comments (body) VALUES
('I am very satisfied with the high quality product, can you send me a swag to record a video about your product?');
table notifications order by id desc limit 1;
table tasks order by id desc limit 1;
CREATE EXTENSION IF NOT EXISTS ai CASCADE;
DROP TABLE IF EXISTS comments CASCADE;
DROP TABLE IF EXISTS interests CASCADE;
DROP TABLE IF EXISTS notifications CASCADE;
-- Create or replace the function to embed content
CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
DECLARE
vectorized_content VECTOR;
BEGIN
vectorized_content := openai_embed(
'text-embedding-ada-002',
content
)::VECTOR;
RETURN vectorized_content;
END;
$$ IMMUTABLE LANGUAGE plpgsql;
CREATE TABLE comments (
body TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(body)) STORED
);
CREATE TABLE interests (
subject TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(subject)) STORED
);
-- Map interests based on the cosine similarity filtering by the threshold percentage
-- The function will return a JSONB object with the interest and the similarity percentage
CREATE OR REPLACE FUNCTION map_interests(
subject text,
input_vector vector default null,
threshold float default 80.0) RETURNS jsonb AS $$
DECLARE
interest RECORD;
similarity_percent NUMERIC;
distance FLOAT;
summary jsonb := '{}'::jsonb;
BEGIN
if (input_vector is null) THEN
input_vector := embed(subject);
end if;
for interest in select * from interests
loop
distance := input_vector <=> interest.embeds;
similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
if (similarity_percent >= threshold) THEN
summary := summary || jsonb_build_object(
interest.subject, similarity_percent
);
END IF;
end loop;
return summary;
END;
$$ LANGUAGE plpgsql;
CREATE TABLE notifications (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP DEFAULT clock_timestamp(),
comment_body TEXT REFERENCES comments(body),
summary JSONB
);
-- fetch all interests in vectors and calculate the similarity
-- insert notifications in the table in case find cosine similarity < 0.2
CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
DECLARE
summary jsonb := '{}'::jsonb;
begin
summary := map_interests(NEW.body, NEW.embeds);
IF (summary != '{}'::jsonb) THEN
insert into notifications (comment_body, summary) values
(NEW.body, summary);
end if;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER check_interests_similarity_trigger
AFTER INSERT OR UPDATE ON comments
FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();
INSERT INTO interests (subject) VALUES
('Satisfied customer'),
('Unsatisfied customer'),
('Product return');
insert into comments (body) VALUES
('The bike looks good but I regret to buy it and want to return it and get my money back'),
('Works perfectly for my running days'),
('This shoes are terrible, pay me back now!'),
('Shoes dont fit so I want to return it'),
('I loved my new bike, your product is amazing!');
table notifications;
@jonatas
Copy link
Author

jonatas commented Aug 20, 2024

I published a short video covering this script: 📺 https://www.youtube.com/watch?v=Jy2BP26Y8nI 🍿

Run docker-compose up to setup a backend:

Configure env

create a .envrc file with your keys and right configuration

export OPENAI_API_KEY=sk-proj-....
export PG_URI="postgres://main_user:[email protected]:5432/main_db"
export PGOPTIONS="-c ai.openai_api_key=$OPENAI_API_KEY" 

Use like direnv to load your env.

direnv allow

As we declared PGOPTIONS it will load your psql with the openai key as a setting and load the semantic_tracking.sql.

psql $PG_URI -f semantic_tracking.sql

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment