YangKeao · March 9, 2026 06:13
diff --git a/test.sql b/test.sql
 -- This query answers 'Which cities did Dave travel to in 2023?'

 CREATE TABLE episodes (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    content TEXT NOT NULL,
    valid_at DATETIME NOT NULL,
    session_id BIGINT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_episodes_valid (valid_at)
 );

 INSERT INTO episodes (id, content, valid_at) VALUES
 (1, 'Dave lives in Boston and works as a software engineer.', '2023-01-15 10:00:00'),
 (2, 'Dave attended a rock concert in Boston on September 15, 2023.', '2023-09-15 20:00:00'),
 (3, 'Dave attended a car workshop in San Francisco on August 14, 2023.', '2023-08-14 14:00:00'),
 (4, 'Dave attended a conference in Detroit on October 25, 2023.', '2023-10-25 09:00:00'),
 (5, 'Dave went on a road trip to the countryside with friends in July 2023.', '2023-07-20 08:00:00'),
 (6, 'Dave traveled to Tokyo for vacation in December 2022.', '2022-12-10 00:00:00'),
 (7, 'Dave mentioned his love for music and frequently visits music festivals.', '2023-03-26 18:00:00'),
 (8, 'Calvin is Dave''s friend and they often discuss music together.', '2023-06-01 15:00:00');

 CREATE TABLE entities (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    summary TEXT,
    tag VARCHAR(100),
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    INDEX idx_entities_tag (tag),
    INDEX idx_entities_name (name)
 );

 INSERT INTO entities (id, name, summary, tag) VALUES
 (1, 'Dave', 'A software engineer living in Boston', 'person'),
 (2, 'Boston', 'A city where Dave lives', 'location'),
 (3, 'San Francisco', 'A city Dave visited for a workshop', 'location'),
 (4, 'Detroit', 'A city Dave visited for a conference', 'location'),
 (5, 'Countryside', 'Rural areas Dave explored on a road trip', 'location'),
 (6, 'Tokyo', 'A city Dave visited for vacation', 'location'),
 (7, 'Calvin', 'Dave''s friend', 'person'),
 (8, 'Music Festival', 'An event Dave attended in Boston', 'event'),
 (9, 'Car Workshop', 'A workshop Dave attended in San Francisco', 'event'),
 (10, 'Conference', 'An event Dave attended in Detroit', 'event');

 -- the edge between entities and entities
 CREATE TABLE edges (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    source_id BIGINT NOT NULL,
    target_id BIGINT NOT NULL,
    fact TEXT NOT NULL,
    valid_at DATETIME,
    invalid_at DATETIME,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_edges_source (source_id),
    INDEX idx_edges_target (target_id),
    INDEX idx_edges_valid (valid_at, invalid_at)
 );

 INSERT INTO edges (source_id, target_id, fact, valid_at) VALUES
 (1, 2, 'Dave lives in Boston', '2023-01-15'),
 (1, 2, 'Dave attended a music festival in Boston', '2023-03-26'),
 (1, 2, 'Dave attended a rock concert in Boston', '2023-09-15'),
 (1, 3, 'Dave attended a car workshop in San Francisco', '2023-08-14'),
 (1, 4, 'Dave attended a conference in Detroit', '2023-10-25'),
 (1, 5, 'Dave went on a road trip to the countryside', '2023-07-20'),
 (1, 6, 'Dave traveled to Tokyo for vacation', '2022-12-10'),
 (1, 7, 'Dave is friends with Calvin', '2023-06-01');

 -- the edge between entities and episodic
 CREATE TABLE episodic_edges (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    entity_id BIGINT NOT NULL,
    episode_id BIGINT NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE INDEX uniq_entity_episode (entity_id, episode_id),
    INDEX idx_episodic_episode (episode_id)
 );

 INSERT INTO episodic_edges (entity_id, episode_id) VALUES
 (1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8),
 (2, 1), (2, 2), (2, 7),
 (3, 3),
 (4, 4),
 (5, 5),
 (6, 6),
 (7, 8);

 CREATE TABLE categories (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(255) NOT NULL,
    summary TEXT,
    tag VARCHAR(100),
    layer INT NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    INDEX idx_categories_layer (layer),
    INDEX idx_categories_name (name)
 );

 -- Top layer catagories
 INSERT INTO categories (id, name, layer, tag) VALUES
 (100, 'Geography', 2, 'domain'),
 (101, 'People', 2, 'domain'),
 (102, 'Events', 2, 'domain');

 -- Second layer catagories
 INSERT INTO categories (id, name, layer, tag) VALUES
 (200, 'Geographical Locations', 1, 'category'),
 (201, 'Friends', 1, 'category'),
 (202, 'Social Events', 1, 'category'),
 (203, 'Professional Events', 1, 'category');

 CREATE TABLE category_edges (
    id BIGINT AUTO_INCREMENT PRIMARY KEY,
    parent_id BIGINT NOT NULL,
    child_id BIGINT NOT NULL,
    child_type ENUM('category', 'entity') NOT NULL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    UNIQUE INDEX uniq_parent_child (parent_id, child_id, child_type),
    INDEX idx_category_edges_child (child_id, child_type)
 );

 INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
 (100, 200, 'category'),
 (101, 201, 'category'),
 (102, 202, 'category'),
 (102, 203, 'category');

 INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
 (200, 2, 'entity'),
 (200, 3, 'entity'),
 (200, 4, 'entity'),
 (200, 5, 'entity'),
 (200, 6, 'entity'),
 (201, 7, 'entity'),
 (202, 8, 'entity'),
 (203, 9, 'entity'),
 (203, 10, 'entity');

 WITH RECURSIVE hierarchy AS (
    -- From Top Layer
    SELECT 
        c.id, 
        c.name, 
        c.layer, 
        CAST(c.id AS CHAR(1000)) AS path
    FROM categories c
    WHERE c.name = 'Geography' AND c.layer = 2
    
    UNION ALL
    
    -- Traverse down multiple category layer
    SELECT 
        cat.id, 
        cat.name, 
        cat.layer, 
        CONCAT(h.path, '->', cat.id)
    FROM hierarchy h
    JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'category'
    JOIN categories cat ON cat.id = ce.child_id
    WHERE h.layer > 1
 ),
 selected_entities AS (
    -- From smallest layer, get all geography entities
    SELECT e.id, e.name, e.tag, h.path
    FROM hierarchy h
    JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'entity'
    JOIN entities e ON e.id = ce.child_id
    WHERE h.layer = 1
 ),
 one_hop_episodes AS (
    -- Get associated episodes (2023)
    SELECT DISTINCT
        'episode' AS type,
        ep.content,
        ep.valid_at,
        se.name AS entity_name
    FROM selected_entities se
    JOIN episodic_edges ee ON ee.entity_id = se.id
    JOIN episodes ep ON ep.id = ee.episode_id
    WHERE ep.valid_at >= '2023-01-01' AND ep.valid_at < '2024-01-01'
 ),
 one_hop_edges AS (
    -- Get associated edges (Dave's travels, 2023)
    SELECT DISTINCT
        'edge' AS type,
        ed.fact AS content,
        ed.valid_at,
        se.name AS entity_name
    FROM selected_entities se
    JOIN edges ed ON ed.target_id = se.id
    WHERE ed.source_id = 1  -- Dave
      AND ed.valid_at >= '2023-01-01' 
      AND ed.valid_at < '2024-01-01'
 )
 SELECT entity_name, type, content, valid_at
 FROM one_hop_edges
 ORDER BY entity_name, valid_at;
	-- This query answers 'Which cities did Dave travel to in 2023?'

	CREATE TABLE episodes (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	content TEXT NOT NULL,
	valid_at DATETIME NOT NULL,
	session_id BIGINT,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	INDEX idx_episodes_valid (valid_at)
	);

	INSERT INTO episodes (id, content, valid_at) VALUES
	(1, 'Dave lives in Boston and works as a software engineer.', '2023-01-15 10:00:00'),
	(2, 'Dave attended a rock concert in Boston on September 15, 2023.', '2023-09-15 20:00:00'),
	(3, 'Dave attended a car workshop in San Francisco on August 14, 2023.', '2023-08-14 14:00:00'),
	(4, 'Dave attended a conference in Detroit on October 25, 2023.', '2023-10-25 09:00:00'),
	(5, 'Dave went on a road trip to the countryside with friends in July 2023.', '2023-07-20 08:00:00'),
	(6, 'Dave traveled to Tokyo for vacation in December 2022.', '2022-12-10 00:00:00'),
	(7, 'Dave mentioned his love for music and frequently visits music festivals.', '2023-03-26 18:00:00'),
	(8, 'Calvin is Dave''s friend and they often discuss music together.', '2023-06-01 15:00:00');

	CREATE TABLE entities (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	name VARCHAR(255) NOT NULL,
	summary TEXT,
	tag VARCHAR(100),
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
	INDEX idx_entities_tag (tag),
	INDEX idx_entities_name (name)
	);

	INSERT INTO entities (id, name, summary, tag) VALUES
	(1, 'Dave', 'A software engineer living in Boston', 'person'),
	(2, 'Boston', 'A city where Dave lives', 'location'),
	(3, 'San Francisco', 'A city Dave visited for a workshop', 'location'),
	(4, 'Detroit', 'A city Dave visited for a conference', 'location'),
	(5, 'Countryside', 'Rural areas Dave explored on a road trip', 'location'),
	(6, 'Tokyo', 'A city Dave visited for vacation', 'location'),
	(7, 'Calvin', 'Dave''s friend', 'person'),
	(8, 'Music Festival', 'An event Dave attended in Boston', 'event'),
	(9, 'Car Workshop', 'A workshop Dave attended in San Francisco', 'event'),
	(10, 'Conference', 'An event Dave attended in Detroit', 'event');

	-- the edge between entities and entities
	CREATE TABLE edges (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	source_id BIGINT NOT NULL,
	target_id BIGINT NOT NULL,
	fact TEXT NOT NULL,
	valid_at DATETIME,
	invalid_at DATETIME,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	INDEX idx_edges_source (source_id),
	INDEX idx_edges_target (target_id),
	INDEX idx_edges_valid (valid_at, invalid_at)
	);

	INSERT INTO edges (source_id, target_id, fact, valid_at) VALUES
	(1, 2, 'Dave lives in Boston', '2023-01-15'),
	(1, 2, 'Dave attended a music festival in Boston', '2023-03-26'),
	(1, 2, 'Dave attended a rock concert in Boston', '2023-09-15'),
	(1, 3, 'Dave attended a car workshop in San Francisco', '2023-08-14'),
	(1, 4, 'Dave attended a conference in Detroit', '2023-10-25'),
	(1, 5, 'Dave went on a road trip to the countryside', '2023-07-20'),
	(1, 6, 'Dave traveled to Tokyo for vacation', '2022-12-10'),
	(1, 7, 'Dave is friends with Calvin', '2023-06-01');

	-- the edge between entities and episodic
	CREATE TABLE episodic_edges (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	entity_id BIGINT NOT NULL,
	episode_id BIGINT NOT NULL,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	UNIQUE INDEX uniq_entity_episode (entity_id, episode_id),
	INDEX idx_episodic_episode (episode_id)
	);

	INSERT INTO episodic_edges (entity_id, episode_id) VALUES
	(1, 1), (1, 2), (1, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8),
	(2, 1), (2, 2), (2, 7),
	(3, 3),
	(4, 4),
	(5, 5),
	(6, 6),
	(7, 8);

	CREATE TABLE categories (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	name VARCHAR(255) NOT NULL,
	summary TEXT,
	tag VARCHAR(100),
	layer INT NOT NULL,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	INDEX idx_categories_layer (layer),
	INDEX idx_categories_name (name)
	);

	-- Top layer catagories
	INSERT INTO categories (id, name, layer, tag) VALUES
	(100, 'Geography', 2, 'domain'),
	(101, 'People', 2, 'domain'),
	(102, 'Events', 2, 'domain');

	-- Second layer catagories
	INSERT INTO categories (id, name, layer, tag) VALUES
	(200, 'Geographical Locations', 1, 'category'),
	(201, 'Friends', 1, 'category'),
	(202, 'Social Events', 1, 'category'),
	(203, 'Professional Events', 1, 'category');

	CREATE TABLE category_edges (
	id BIGINT AUTO_INCREMENT PRIMARY KEY,
	parent_id BIGINT NOT NULL,
	child_id BIGINT NOT NULL,
	child_type ENUM('category', 'entity') NOT NULL,
	created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
	UNIQUE INDEX uniq_parent_child (parent_id, child_id, child_type),
	INDEX idx_category_edges_child (child_id, child_type)
	);

	INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
	(100, 200, 'category'),
	(101, 201, 'category'),
	(102, 202, 'category'),
	(102, 203, 'category');

	INSERT INTO category_edges (parent_id, child_id, child_type) VALUES
	(200, 2, 'entity'),
	(200, 3, 'entity'),
	(200, 4, 'entity'),
	(200, 5, 'entity'),
	(200, 6, 'entity'),
	(201, 7, 'entity'),
	(202, 8, 'entity'),
	(203, 9, 'entity'),
	(203, 10, 'entity');

	WITH RECURSIVE hierarchy AS (
	-- From Top Layer
	SELECT
	c.id,
	c.name,
	c.layer,
	CAST(c.id AS CHAR(1000)) AS path
	FROM categories c
	WHERE c.name = 'Geography' AND c.layer = 2

	UNION ALL

	-- Traverse down multiple category layer
	SELECT
	cat.id,
	cat.name,
	cat.layer,
	CONCAT(h.path, '->', cat.id)
	FROM hierarchy h
	JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'category'
	JOIN categories cat ON cat.id = ce.child_id
	WHERE h.layer > 1
	),
	selected_entities AS (
	-- From smallest layer, get all geography entities
	SELECT e.id, e.name, e.tag, h.path
	FROM hierarchy h
	JOIN category_edges ce ON ce.parent_id = h.id AND ce.child_type = 'entity'
	JOIN entities e ON e.id = ce.child_id
	WHERE h.layer = 1
	),
	one_hop_episodes AS (
	-- Get associated episodes (2023)
	SELECT DISTINCT
	'episode' AS type,
	ep.content,
	ep.valid_at,
	se.name AS entity_name
	FROM selected_entities se
	JOIN episodic_edges ee ON ee.entity_id = se.id
	JOIN episodes ep ON ep.id = ee.episode_id
	WHERE ep.valid_at >= '2023-01-01' AND ep.valid_at < '2024-01-01'
	),
	one_hop_edges AS (
	-- Get associated edges (Dave's travels, 2023)
	SELECT DISTINCT
	'edge' AS type,
	ed.fact AS content,
	ed.valid_at,
	se.name AS entity_name
	FROM selected_entities se
	JOIN edges ed ON ed.target_id = se.id
	WHERE ed.source_id = 1 -- Dave
	AND ed.valid_at >= '2023-01-01'
	AND ed.valid_at < '2024-01-01'
	)
	SELECT entity_name, type, content, valid_at
	FROM one_hop_edges
	ORDER BY entity_name, valid_at;
No results found