Skip to content

Instantly share code, notes, and snippets.

@YangKeao
Created March 9, 2026 06:13
Show Gist options
  • Select an option

  • Save YangKeao/b15cd4e3f05a7bdd777bc8112534b280 to your computer and use it in GitHub Desktop.

Select an option

Save YangKeao/b15cd4e3f05a7bdd777bc8112534b280 to your computer and use it in GitHub Desktop.
Mnemis with TiDB
-- This query answers 'Which cities did Dave travel to in 2023?'
CREATE TABLE episodes (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
content TEXT NOT NULL,
valid_at DATETIME NOT NULL,
session_id BIGINT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_episodes_valid (valid_at)
);
INSERT INTO episodes (id, content, valid_at) VALUES
(1, 'Dave lives in Boston and works as a software engineer.', '2023-01-15 10:00:00'),
(2, 'Dave attended a rock concert in Boston on September 15, 2023.', '2023-09-15 20:00:00'),
(3, 'Dave attended a car workshop in San Francisco on August 14, 2023.', '2023-08-14 14:00:00'),
(4, 'Dave attended a conference in Detroit on October 25, 2023.', '2023-10-25 09:00:00'),
(5, 'Dave went on a road trip to the countryside with friends in July 2023.', '2023-07-20 08:00:00'),
(6, 'Dave traveled to Tokyo for vacation in December 2022.', '2022-12-10 00:00:00'),
(7, 'Dave mentioned his love for music and frequently visits music festivals.', '2023-03-26 18:00:00'),
(8, 'Calvin is Dave''s friend and they often discuss music together.', '2023-06-01 15:00:00');
CREATE TABLE entities (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
summary TEXT,
tag VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_entities_tag (tag),
INDEX idx_entities_name (name)
);
INSERT INTO entities (id, name, summary, tag) VALUES
(1, 'Dave', 'A software engineer living in Boston', 'person'),
(2, 'Boston', 'A city where Dave lives', 'location'),
(3, 'San Francisco', 'A city Dave visited for a workshop', 'location'),
(4, 'Detroit', 'A city Dave visited for a conference', 'location'),
(5, 'Countryside', 'Rural areas Dave explored on a road trip', 'location'),
(6, 'Tokyo', 'A city Dave visited for vacation', 'location'),
(7, 'Calvin', 'Dave''s friend', 'person'),
(8, 'Music Festival', 'An event Dave attended in Boston', 'event'),
(9, 'Car Workshop', 'A workshop Dave attended in San Francisco', 'event'),
(10, 'Conference', 'An event Dave attended in Detroit', 'event');
-- the edge between entities and entities
CREATE TABLE edges (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
source_id BIGINT NOT NULL,
target_id BIGINT NOT NULL,
fact TEXT NOT NULL,
valid_at DATETIME,
invalid_at DATETIME,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_edges_source (source_id),
INDEX idx_edges_target (target_id),
INDEX idx_edges_valid (valid_at, invalid_at)
);
INSERT INTO edges (source_id, target_id, fact, valid_at) VALUES
(1, 2, 'Dave lives in Boston', '2023-01-15'),
(1, 2, 'Dave attended a music festival in Boston', '2023-03-26'),
(1, 2, 'Dave attended a rock concert in Boston', '2023-09-15'),
(1, 3, 'Dave attended a car workshop in San Francisco', '2023-08-14'),
(1, 4, 'Dave attended a conference in Detroit', '2023-10-25'),
(1, 5, 'Dave went on a road trip to the countryside', '2023-07-20'),
(1, 6, 'Dave traveled to Tokyo for vacation', '2022-12-10'),
(1, 7, 'Dave is friends with Calvin', '2023-06-01');
-- the edge between entities and episodic
CREATE TABLE episodic_edges (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
entity_id BIGINT NOT NULL,
episode_id BIGINT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX uniq_entity_episode (entity_id, episode_id),
INDEX idx_episodic_episode (episode_id)
);
INSERT INTO episodic_edges (entity_id, episode_id) VALUES
(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8),
(2, 1), (2, 2), (2, 7),
(3, 3),
(4, 4),
(5, 5),
(6, 6),
(7, 8);
CREATE TABLE categories (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(255) NOT NULL,
summary TEXT,
tag VARCHAR(100),
layer INT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
INDEX idx_categories_layer (layer),
INDEX idx_categories_name (name)
);
-- Top layer catagories
INSERT INTO categories (id, name, layer, tag) VALUES
(100, 'Geography', 2, 'domain'),
(101, 'People', 2, 'domain'),
(102, 'Events', 2, 'domain');
-- Second layer catagories
INSERT INTO categories (id, name, layer, tag) VALUES
(200, 'Geographical Locations', 1, 'category'),
(201, 'Friends', 1, 'category'),
(202, 'Social Events', 1, 'category'),
(203, 'Professional Events', 1, 'category');
CREATE TABLE category_edges (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
parent_id BIGINT NOT NULL,
child_id BIGINT NOT NULL,
child_type ENUM('category', 'entity') NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE INDEX uniq_parent_child (parent_id, child_id, child_type),
INDEX idx_category_edges_child (child_id, child_type)
);
INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
(100, 200, 'category'),
(101, 201, 'category'),
(102, 202, 'category'),
(102, 203, 'category');
INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
(200, 2, 'entity'),
(200, 3, 'entity'),
(200, 4, 'entity'),
(200, 5, 'entity'),
(200, 6, 'entity'),
(201, 7, 'entity'),
(202, 8, 'entity'),
(203, 9, 'entity'),
(203, 10, 'entity');
WITH RECURSIVE hierarchy AS (
-- From Top Layer
SELECT
c.id,
c.name,
c.layer,
CAST(c.id AS CHAR(1000)) AS path
FROM categories c
WHERE c.name = 'Geography' AND c.layer = 2
UNION ALL
-- Traverse down multiple category layer
SELECT
cat.id,
cat.name,
cat.layer,
CONCAT(h.path, '->', cat.id)
FROM hierarchy h
JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'category'
JOIN categories cat ON cat.id = ce.child_id
WHERE h.layer > 1
),
selected_entities AS (
-- From smallest layer, get all geography entities
SELECT e.id, e.name, e.tag, h.path
FROM hierarchy h
JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'entity'
JOIN entities e ON e.id = ce.child_id
WHERE h.layer = 1
),
one_hop_episodes AS (
-- Get associated episodes (2023)
SELECT DISTINCT
'episode' AS type,
ep.content,
ep.valid_at,
se.name AS entity_name
FROM selected_entities se
JOIN episodic_edges ee ON ee.entity_id = se.id
JOIN episodes ep ON ep.id = ee.episode_id
WHERE ep.valid_at >= '2023-01-01' AND ep.valid_at < '2024-01-01'
),
one_hop_edges AS (
-- Get associated edges (Dave's travels, 2023)
SELECT DISTINCT
'edge' AS type,
ed.fact AS content,
ed.valid_at,
se.name AS entity_name
FROM selected_entities se
JOIN edges ed ON ed.target_id = se.id
WHERE ed.source_id = 1 -- Dave
AND ed.valid_at >= '2023-01-01'
AND ed.valid_at < '2024-01-01'
)
SELECT entity_name, type, content, valid_at
FROM one_hop_edges
ORDER BY entity_name, valid_at;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment