Skip to content

Instantly share code, notes, and snippets.

@jonatas
Last active August 21, 2024 19:22
Show Gist options
  • Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
Semantic tracking with PGAI
CREATE EXTENSION IF NOT EXISTS ai CASCADE;
DROP TABLE IF EXISTS vecs CASCADE;
DROP TABLE IF EXISTS interests CASCADE;
DROP TABLE IF EXISTS notifications CASCADE;
-- Create or replace the function to embed content
CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$
DECLARE
vectorized_content VECTOR;
BEGIN
vectorized_content := openai_embed(
'text-embedding-ada-002',
content
)::VECTOR;
RETURN vectorized_content;
END;
$$ IMMUTABLE LANGUAGE plpgsql;
CREATE TABLE vecs (
content TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);
CREATE TABLE interests (
content TEXT PRIMARY KEY,
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED
);
CREATE TABLE notifications (
id SERIAL PRIMARY KEY,
created_at TIMESTAMP DEFAULT clock_timestamp(),
content TEXT REFERENCES vecs(content),
summary JSONB
);
-- fetch all interests in vectors and calculate the similarity
-- insert notifications in the table in case find cosine similarity < 0.2
CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$
DECLARE
interest RECORD;
similarity_percent NUMERIC;
distance FLOAT;
summary jsonb := '{}'::jsonb;
BEGIN
for interest in select * from interests
loop
distance := NEW.embeds <=> interest.embeds;
if (distance < 0.2) THEN
similarity_percent := ((1.0 - distance) * 100)::numeric(10,2);
summary := summary || jsonb_build_object(
interest.content, similarity_percent
);
END IF;
end loop;
-- check if summary has any elements and persist it in the notifications table
IF (summary != '{}'::jsonb) THEN
insert into notifications (content, summary) values
(NEW.content, summary);
end if;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER check_interests_similarity_trigger
AFTER INSERT OR UPDATE ON vecs
FOR EACH ROW EXECUTE FUNCTION check_interests_similarity();
INSERT INTO interests (content) VALUES
('Satisfied customer'),
('Unsatisfied customer'),
('Product return');
insert into vecs (content) VALUES
('The bike looks good but I want to return it and get my money back'),
('Works perfectly for my running days'),
('This shoes are terrible, pay me back now!'),
('Shoes dont fit so I want to return it'),
('The bike is very good for my commuting days');
table notifications;
@jgpruitt
Copy link

I had a lot of trouble understanding the data model at first. If you named the vecs table customer_feedback and the interests table either category or sentiment it would help a great deal to convey the intention.

@jgpruitt
Copy link

It's unfortunate that the openai_embed function isn't immutable. it will be in the next version and then wrapping it in the immutable embed function won't be necessary. I guess we need to review all our examples on the next release.

@jgpruitt
Copy link

This is really cool.

@jonatas
Copy link
Author

jonatas commented Aug 21, 2024

Thanks @jgpruitt ! I already renamed it! Thanks for all suggestions!

Latest version: https://gist.github.com/jonatas/6b4070b687578856c3cb98fb61d68171#comments

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment