liling · May 5, 2026 15:42
diff --git a/switch_native_to_vchord.sql b/switch_native_to_vchord.sql
 -- Switch text search from native (tsvector) to vchord (bm25)
 --
 -- Prerequisites:
 --   1. Add to postgresql.conf:
 --      shared_preload_libraries = 'vchord,vchord_bm25,pg_tokenizer'
 --   2. Restart PostgreSQL
 --   3. Run this script: psql -d hindsight_dev -f switch_native_to_vchord.sql
 --
 -- This script is idempotent — safe to run multiple times.

 \echo 'Switching text search: native → vchord'

 -- Step 1: Install extensions
 CREATE EXTENSION IF NOT EXISTS vchord CASCADE;
 CREATE EXTENSION IF NOT EXISTS pg_tokenizer CASCADE;
 CREATE EXTENSION IF NOT EXISTS vchord_bm25 CASCADE;

 -- Step 2: Create tokenizer (needed by INSERT queries)
 -- NOTE: config is TOML — values must be quoted: key = "value"
 DO $$
 BEGIN
    IF NOT EXISTS (SELECT 1 FROM tokenizer_catalog.tokenizer WHERE name = 'llmlingua2') THEN
        PERFORM tokenizer_catalog.create_tokenizer('llmlingua2', 'model = "llmlingua2"');
        RAISE NOTICE 'Created llmlingua2 tokenizer';
    END IF;
 END $$;

 -- Step 3: Set search_path so bare tokenize() / to_bm25query() calls resolve correctly.
 -- Application code (ops_postgresql.py, consolidation, migrations) uses unqualified
 -- tokenize() which requires bm25_catalog and tokenizer_catalog on search_path.
 SELECT current_database() AS db \gset
 ALTER DATABASE :db SET search_path TO bm25_catalog, tokenizer_catalog, "$user", public;

 -- Step 4: Rebuild search_vector on memory_units
 DO $$
 BEGIN
    -- Drop existing GIN index
    DROP INDEX IF EXISTS idx_memory_units_text_search;

    -- Drop existing tsvector column (GENERATED columns can't be altered, must drop)
    ALTER TABLE memory_units DROP COLUMN IF EXISTS search_vector;

    -- Add bm25vector column
    ALTER TABLE memory_units ADD COLUMN search_vector bm25_catalog.bm25vector;

    -- Create BM25 index
    CREATE INDEX idx_memory_units_text_search
        ON memory_units USING bm25 (search_vector bm25_catalog.bm25_ops);

    -- Backfill existing rows
    UPDATE memory_units
    SET search_vector = tokenizer_catalog.tokenize(
        COALESCE(text, '') || ' ' || COALESCE(context, ''),
        'llmlingua2'
    )::bm25_catalog.bm25vector
    WHERE search_vector IS NULL;

    RAISE NOTICE 'memory_units: done';
 END $$;

 -- Step 4: Rebuild search_vector on reflections (if table exists)
 DO $$
 BEGIN
    IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'reflections') THEN
        DROP INDEX IF EXISTS idx_reflections_text_search;
        ALTER TABLE reflections DROP COLUMN IF EXISTS search_vector;
        ALTER TABLE reflections ADD COLUMN search_vector bm25_catalog.bm25vector;

        CREATE INDEX idx_reflections_text_search
            ON reflections USING bm25 (search_vector bm25_catalog.bm25_ops);

        UPDATE reflections
        SET search_vector = tokenizer_catalog.tokenize(
            COALESCE(name, '') || ' ' || COALESCE(content, ''),
            'llmlingua2'
        )::bm25_catalog.bm25vector
        WHERE search_vector IS NULL;

        RAISE NOTICE 'reflections: done';
    ELSE
        RAISE NOTICE 'reflections: table not found, skipping';
    END IF;
 END $$;

 \echo 'Done. Update HINDSIGHT_API_TEXT_SEARCH_EXTENSION=vchord in .env and restart the app.'
	-- Switch text search from native (tsvector) to vchord (bm25)
	--
	-- Prerequisites:
	-- 1. Add to postgresql.conf:
	-- shared_preload_libraries = 'vchord,vchord_bm25,pg_tokenizer'
	-- 2. Restart PostgreSQL
	-- 3. Run this script: psql -d hindsight_dev -f switch_native_to_vchord.sql
	--
	-- This script is idempotent — safe to run multiple times.

	\echo 'Switching text search: native → vchord'

	-- Step 1: Install extensions
	CREATE EXTENSION IF NOT EXISTS vchord CASCADE;
	CREATE EXTENSION IF NOT EXISTS pg_tokenizer CASCADE;
	CREATE EXTENSION IF NOT EXISTS vchord_bm25 CASCADE;

	-- Step 2: Create tokenizer (needed by INSERT queries)
	-- NOTE: config is TOML — values must be quoted: key = "value"
	DO $$
	BEGIN
	IF NOT EXISTS (SELECT 1 FROM tokenizer_catalog.tokenizer WHERE name = 'llmlingua2') THEN
	PERFORM tokenizer_catalog.create_tokenizer('llmlingua2', 'model = "llmlingua2"');
	RAISE NOTICE 'Created llmlingua2 tokenizer';
	END IF;
	END $$;

	-- Step 3: Set search_path so bare tokenize() / to_bm25query() calls resolve correctly.
	-- Application code (ops_postgresql.py, consolidation, migrations) uses unqualified
	-- tokenize() which requires bm25_catalog and tokenizer_catalog on search_path.
	SELECT current_database() AS db \gset
	ALTER DATABASE :db SET search_path TO bm25_catalog, tokenizer_catalog, "$user", public;

	-- Step 4: Rebuild search_vector on memory_units
	DO $$
	BEGIN
	-- Drop existing GIN index
	DROP INDEX IF EXISTS idx_memory_units_text_search;

	-- Drop existing tsvector column (GENERATED columns can't be altered, must drop)
	ALTER TABLE memory_units DROP COLUMN IF EXISTS search_vector;

	-- Add bm25vector column
	ALTER TABLE memory_units ADD COLUMN search_vector bm25_catalog.bm25vector;

	-- Create BM25 index
	CREATE INDEX idx_memory_units_text_search
	ON memory_units USING bm25 (search_vector bm25_catalog.bm25_ops);

	-- Backfill existing rows
	UPDATE memory_units
	SET search_vector = tokenizer_catalog.tokenize(
	COALESCE(text, '') \|\| ' ' \|\| COALESCE(context, ''),
	'llmlingua2'
	)::bm25_catalog.bm25vector
	WHERE search_vector IS NULL;

	RAISE NOTICE 'memory_units: done';
	END $$;

	-- Step 4: Rebuild search_vector on reflections (if table exists)
	DO $$
	BEGIN
	IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'reflections') THEN
	DROP INDEX IF EXISTS idx_reflections_text_search;
	ALTER TABLE reflections DROP COLUMN IF EXISTS search_vector;
	ALTER TABLE reflections ADD COLUMN search_vector bm25_catalog.bm25vector;

	CREATE INDEX idx_reflections_text_search
	ON reflections USING bm25 (search_vector bm25_catalog.bm25_ops);

	UPDATE reflections
	SET search_vector = tokenizer_catalog.tokenize(
	COALESCE(name, '') \|\| ' ' \|\| COALESCE(content, ''),
	'llmlingua2'
	)::bm25_catalog.bm25vector
	WHERE search_vector IS NULL;

	RAISE NOTICE 'reflections: done';
	ELSE
	RAISE NOTICE 'reflections: table not found, skipping';
	END IF;
	END $$;

	\echo 'Done. Update HINDSIGHT_API_TEXT_SEARCH_EXTENSION=vchord in .env and restart the app.'
No results found