Last active
November 29, 2023 23:53
-
-
Save louismullie/9b0b96ec61d424ed10035efb7aec0270 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Function to split the input query and generate all contiguous combinations | |
CREATE FUNCTION split_to_table(@input_query VARCHAR(MAX)) | |
RETURNS @result TABLE (combination VARCHAR(MAX)) | |
AS | |
BEGIN | |
DECLARE @words TABLE (id INT IDENTITY(1,1), word VARCHAR(255)) | |
DECLARE @total_words INT, @i INT, @j INT, @current_combination VARCHAR(MAX) | |
-- Step 1: Splitting the input string into words | |
INSERT INTO @words (word) | |
SELECT value | |
FROM STRING_SPLIT(@input_query, ' ') -- This function varies based on SQL dialect | |
SELECT @total_words = COUNT(*) FROM @words | |
-- Step 2: Generating all contiguous combinations | |
SET @i = 1 | |
WHILE @i <= @total_words | |
BEGIN | |
SET @j = @i | |
SET @current_combination = '' | |
WHILE @j <= @total_words | |
BEGIN | |
SELECT @current_combination = @current_combination + ' ' + word | |
FROM @words | |
WHERE id = @j | |
-- Insert the current combination into the result table | |
INSERT INTO @result (combination) | |
VALUES (LTRIM(@current_combination)) | |
SET @j = @j + 1 | |
END | |
SET @i = @i + 1 | |
END | |
RETURN | |
END | |
CREATE TABLE synonyms ( | |
term VARCHAR(255), | |
longest_form VARCHAR(255) | |
); | |
CREATE TABLE abbreviations ( | |
abbreviation VARCHAR(255), | |
full_form VARCHAR(255) | |
); | |
INSERT INTO synonyms (term, longest_form) VALUES | |
('anca', 'antineutrophil cytoplasmic antibodies'), | |
('anca associated vasculitis', 'antineutrophil cytoplasmic antibodies associated vasculitis'); | |
INSERT INTO abbreviations (abbreviation, full_form) VALUES | |
('anca', 'antineutrophil cytoplasmic antibodies'), | |
('aav', 'anca associated vasculitis'); | |
-- Assuming you have an input query | |
DECLARE @input_query VARCHAR(255); | |
SET @input_query = 'your input query here'; | |
-- Step 1: Breaking down the input query | |
WITH split_terms AS ( | |
SELECT term | |
FROM split_to_table(@input_query, ' ') | |
) | |
-- Step 2: Matching terms against synonyms | |
, expanded_terms AS ( | |
SELECT s.longest_form | |
FROM split_terms st | |
LEFT JOIN synonyms s ON st.term = s.term | |
) | |
-- Step 3: Apply expansion | |
, expanded_query AS ( | |
SELECT DISTINCT longest_form | |
FROM expanded_terms | |
ORDER BY LENGTH(longest_form) DESC | |
) | |
-- Step 4: Apply contraction | |
, contracted_query AS ( | |
SELECT DISTINCT a.abbreviation | |
FROM abbreviations a | |
WHERE a.full_form IN (SELECT longest_form FROM expanded_terms) | |
ORDER BY LENGTH(a.full_form) DESC | |
) | |
-- Step 5: Nested contraction | |
, nested_contracted_query AS ( | |
SELECT DISTINCT a.abbreviation | |
FROM abbreviations a | |
WHERE a.full_form IN ( | |
SELECT abbreviation FROM abbreviations WHERE full_form IN ( | |
SELECT longest_form FROM expanded_terms | |
) | |
) | |
ORDER BY LENGTH(a.full_form) DESC | |
) | |
-- Step 6: Final Combined Query | |
WITH (SELECT * FROM expanded_query | |
UNION | |
SELECT * FROM contracted_query | |
UNION | |
SELECT * FROM nested_contracted_query) as search_queries; | |
-- Step 7: Query Documents | |
CREATE VIRTUAL TABLE documents USING fts5(title, body); | |
INSERT INTO documents (title, body) VALUES ('Example Title', 'Example body text'); | |
SELECT * FROM documents | |
WHERE documents MATCH (group_concat(search_queries, ' OR ')); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment