Last active
November 29, 2023 23:53
Revisions
-
louismullie revised this gist
Nov 29, 2023 . 1 changed file with 8 additions and 5 deletions.There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -39,6 +39,7 @@ BEGIN RETURN END -- Create tables CREATE TABLE synonyms ( term VARCHAR(255), longest_form VARCHAR(255) @@ -49,6 +50,9 @@ CREATE TABLE abbreviations ( full_form VARCHAR(255) ); CREATE VIRTUAL TABLE documents USING fts5(title, body); -- Example data INSERT INTO synonyms (term, longest_form) VALUES ('anca', 'antineutrophil cytoplasmic antibodies'), ('anca associated vasculitis', 'antineutrophil cytoplasmic antibodies associated vasculitis'); @@ -57,7 +61,9 @@ INSERT INTO abbreviations (abbreviation, full_form) VALUES ('anca', 'antineutrophil cytoplasmic antibodies'), ('aav', 'anca associated vasculitis'); INSERT INTO documents (title, body) VALUES ('Example Title', 'Example body text'); -- Declare input query DECLARE @input_query VARCHAR(255); SET @input_query = 'your input query here'; @@ -108,9 +114,6 @@ SELECT * FROM contracted_query UNION SELECT * FROM nested_contracted_query) as search_queries; -- Step 7: Query Documents using FTS5 SELECT * FROM documents WHERE documents MATCH (group_concat(search_queries, ' OR ')); -
louismullie created this gist
Nov 29, 2023 .There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,116 @@ -- Function to split the input query and generate all contiguous combinations CREATE FUNCTION split_to_table(@input_query VARCHAR(MAX)) RETURNS @result TABLE (combination VARCHAR(MAX)) AS BEGIN DECLARE @words TABLE (id INT IDENTITY(1,1), word VARCHAR(255)) DECLARE @total_words INT, @i INT, @j INT, @current_combination VARCHAR(MAX) -- Step 1: Splitting the input string into words INSERT INTO @words (word) SELECT value FROM STRING_SPLIT(@input_query, ' ') -- This function varies based on SQL dialect SELECT @total_words = COUNT(*) FROM @words -- Step 2: Generating all contiguous combinations SET @i = 1 WHILE @i <= @total_words BEGIN SET @j = @i SET @current_combination = '' WHILE @j <= @total_words BEGIN SELECT @current_combination = @current_combination + ' ' + word FROM @words WHERE id = @j -- Insert the current combination into the result table INSERT INTO @result (combination) VALUES (LTRIM(@current_combination)) SET @j = @j + 1 END SET @i = @i + 1 END RETURN END CREATE TABLE synonyms ( term VARCHAR(255), longest_form VARCHAR(255) ); CREATE TABLE abbreviations ( abbreviation VARCHAR(255), full_form VARCHAR(255) ); INSERT INTO synonyms (term, longest_form) VALUES ('anca', 'antineutrophil cytoplasmic antibodies'), ('anca associated vasculitis', 'antineutrophil cytoplasmic antibodies associated vasculitis'); INSERT INTO abbreviations (abbreviation, full_form) VALUES ('anca', 'antineutrophil cytoplasmic antibodies'), ('aav', 'anca associated vasculitis'); -- Assuming you have an input query DECLARE @input_query VARCHAR(255); SET @input_query = 'your input query here'; -- Step 1: Breaking down the input query WITH split_terms AS ( SELECT term FROM split_to_table(@input_query, ' ') ) -- Step 2: Matching terms against synonyms , expanded_terms AS ( SELECT s.longest_form FROM split_terms st LEFT JOIN synonyms s ON st.term = s.term ) -- Step 3: Apply expansion , expanded_query AS ( SELECT DISTINCT longest_form FROM expanded_terms ORDER BY LENGTH(longest_form) DESC ) -- Step 4: Apply contraction , contracted_query AS ( SELECT DISTINCT a.abbreviation FROM abbreviations a WHERE a.full_form IN (SELECT longest_form FROM expanded_terms) ORDER BY LENGTH(a.full_form) DESC ) -- Step 5: Nested contraction , nested_contracted_query AS ( SELECT DISTINCT a.abbreviation FROM abbreviations a WHERE a.full_form IN ( SELECT abbreviation FROM abbreviations WHERE full_form IN ( SELECT longest_form FROM expanded_terms ) ) ORDER BY LENGTH(a.full_form) DESC ) -- Step 6: Final Combined Query WITH (SELECT * FROM expanded_query UNION SELECT * FROM contracted_query UNION SELECT * FROM nested_contracted_query) as search_queries; -- Step 7: Query Documents CREATE VIRTUAL TABLE documents USING fts5(title, body); INSERT INTO documents (title, body) VALUES ('Example Title', 'Example body text'); SELECT * FROM documents WHERE documents MATCH (group_concat(search_queries, ' OR '));