Last active
August 21, 2024 19:22
-
-
Save jonatas/2d3f03e7b9cc8c979b609eecc9088914 to your computer and use it in GitHub Desktop.
Semantic tracking with PGAI
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CREATE EXTENSION IF NOT EXISTS ai CASCADE; | |
DROP TABLE IF EXISTS vecs CASCADE; | |
DROP TABLE IF EXISTS interests CASCADE; | |
DROP TABLE IF EXISTS notifications CASCADE; | |
-- Create or replace the function to embed content | |
CREATE OR REPLACE FUNCTION embed(content TEXT) RETURNS VECTOR AS $$ | |
DECLARE | |
vectorized_content VECTOR; | |
BEGIN | |
vectorized_content := openai_embed( | |
'text-embedding-ada-002', | |
content | |
)::VECTOR; | |
RETURN vectorized_content; | |
END; | |
$$ IMMUTABLE LANGUAGE plpgsql; | |
CREATE TABLE vecs ( | |
content TEXT PRIMARY KEY, | |
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED | |
); | |
CREATE TABLE interests ( | |
content TEXT PRIMARY KEY, | |
embeds VECTOR GENERATED ALWAYS AS (embed(content)) STORED | |
); | |
CREATE TABLE notifications ( | |
id SERIAL PRIMARY KEY, | |
created_at TIMESTAMP DEFAULT clock_timestamp(), | |
content TEXT REFERENCES vecs(content), | |
summary JSONB | |
); | |
-- fetch all interests in vectors and calculate the similarity | |
-- insert notifications in the table in case find cosine similarity < 0.2 | |
CREATE OR REPLACE FUNCTION check_interests_similarity() RETURNS TRIGGER AS $$ | |
DECLARE | |
interest RECORD; | |
similarity_percent NUMERIC; | |
distance FLOAT; | |
summary jsonb := '{}'::jsonb; | |
BEGIN | |
for interest in select * from interests | |
loop | |
distance := NEW.embeds <=> interest.embeds; | |
if (distance < 0.2) THEN | |
similarity_percent := ((1.0 - distance) * 100)::numeric(10,2); | |
summary := summary || jsonb_build_object( | |
interest.content, similarity_percent | |
); | |
END IF; | |
end loop; | |
-- check if summary has any elements and persist it in the notifications table | |
IF (summary != '{}'::jsonb) THEN | |
insert into notifications (content, summary) values | |
(NEW.content, summary); | |
end if; | |
RETURN NEW; | |
END; | |
$$ LANGUAGE plpgsql; | |
CREATE TRIGGER check_interests_similarity_trigger | |
AFTER INSERT OR UPDATE ON vecs | |
FOR EACH ROW EXECUTE FUNCTION check_interests_similarity(); | |
INSERT INTO interests (content) VALUES | |
('Satisfied customer'), | |
('Unsatisfied customer'), | |
('Product return'); | |
insert into vecs (content) VALUES | |
('The bike looks good but I want to return it and get my money back'), | |
('Works perfectly for my running days'), | |
('This shoes are terrible, pay me back now!'), | |
('Shoes dont fit so I want to return it'), | |
('The bike is very good for my commuting days'); | |
table notifications; | |
Thanks @jgpruitt ! I already renamed it! Thanks for all suggestions!
Latest version: https://gist.github.com/jonatas/6b4070b687578856c3cb98fb61d68171#comments
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is really cool.