Last active
August 25, 2025 17:19
-
-
Save sany2k8/c7a631b7aae30f1048d72376418b4fa5 to your computer and use it in GitHub Desktop.
All the working queries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- CREATE EXTENSION IF NOT EXISTS postgis; | |
| -- CREATE EXTENSION IF NOT EXISTS pgvector; | |
| ------------------------------ Exercise 1 ------------------------------ | |
| -- Table setup | |
| CREATE TABLE products ( | |
| id SERIAL PRIMARY KEY, | |
| sku VARCHAR(20) UNIQUE, | |
| name VARCHAR(200), | |
| category VARCHAR(50), | |
| price DECIMAL(10,2), | |
| description TEXT | |
| ); | |
| -- Sample data | |
| INSERT INTO products (sku, name, category, price, description) VALUES | |
| ('LAPTOP001', 'MacBook Pro 16-inch', 'Electronics', 2499.99, 'Powerful laptop with M3 Max chip, perfect for professional video editing and software development'), | |
| ('PHONE001', 'iPhone 15 Pro', 'Electronics', 999.99, 'Latest smartphone with titanium design, advanced camera system, and A17 Pro chip'), | |
| ('BOOK001', 'PostgreSQL Guide', 'Books', 49.99, 'Complete guide to database management and advanced PostgreSQL techniques'); | |
| -- Exact match queries | |
| SELECT * FROM products WHERE sku = 'LAPTOP001'; | |
| SELECT * FROM products WHERE category = 'Electronics'; | |
| SELECT * FROM products WHERE price = 999.99; | |
| -- With index for performance | |
| CREATE INDEX idx_products_sku ON products(sku); | |
| CREATE INDEX idx_products_category ON products(category); | |
| ------------------------------ Exercise 2 ------------------------------ | |
| -- Pattern matching queries | |
| SELECT * FROM products WHERE name LIKE 'MacBook%'; -- Starts with 'MacBook' | |
| SELECT * FROM products WHERE name LIKE '%Pro%'; -- Contains 'Pro' | |
| SELECT * FROM products WHERE name LIKE '%Guide'; -- Ends with 'Guide' | |
| SELECT * FROM products WHERE name ILIKE '%MACBOOK%'; -- Case-insensitive | |
| -- More complex patterns | |
| SELECT * FROM products WHERE name LIKE 'iPhone __ Pro'; -- Two characters between iPhone and Pro | |
| SELECT * FROM products WHERE sku LIKE 'LAPTOP___'; -- Exactly 9 characters, starting with LAPTOP | |
| -- Performance optimization with trigram indexes | |
| CREATE EXTENSION IF NOT EXISTS pg_trgm; | |
| CREATE INDEX idx_products_name_trgm ON products USING gin(name gin_trgm_ops); | |
| -- Optimized for prefix searches | |
| CREATE INDEX idx_products_name_prefix ON products(name text_pattern_ops); | |
| ------------------------------ Exercise 3 ------------------------------ | |
| -- Add full-text search columns (if not already added during table creation) | |
| ALTER TABLE products ADD COLUMN search_vector tsvector; | |
| -- Sample data is already rich with descriptions from the initial INSERT | |
| -- Update search vectors | |
| UPDATE products SET search_vector = | |
| setweight(to_tsvector('english', coalesce(name,'')), 'A') || | |
| setweight(to_tsvector('english', coalesce(description,'')), 'B'); | |
| -- Create GIN index for fast full-text search | |
| CREATE INDEX idx_products_search ON products USING gin(search_vector); | |
| -- Full-text search queries | |
| SELECT name, description, ts_rank(search_vector, query) AS rank | |
| FROM products, plainto_tsquery('english', 'professional laptop') AS query | |
| WHERE search_vector @@ query | |
| ORDER BY rank DESC; | |
| -- Phrase search | |
| SELECT name, description | |
| FROM products | |
| WHERE search_vector @@ phraseto_tsquery('english', 'video editing'); | |
| -- Advanced query with operators | |
| SELECT name, description, ts_rank_cd(search_vector, query) AS rank | |
| FROM products, | |
| to_tsquery('english', 'laptop & (professional | development)') AS query | |
| WHERE search_vector @@ query | |
| ORDER BY rank DESC; | |
| -- Search with highlighting | |
| SELECT name, | |
| ts_headline('english', description, plainto_tsquery('english', 'chip camera')) AS highlighted | |
| FROM products | |
| WHERE search_vector @@ plainto_tsquery('english', 'chip camera'); | |
| ------------------------------ Exercise 4 ------------------------------ | |
| -- Install pgvector extension | |
| CREATE EXTENSION IF NOT EXISTS vector; | |
| -- Add embedding column | |
| ALTER TABLE products ADD COLUMN embedding vector(4); | |
| -- In a real application, you'd generate embeddings using a model like: | |
| -- - sentence-transformers/all-MiniLM-L6-v2 | |
| -- - OpenAI text-embedding-ada-002 | |
| -- - Google Universal Sentence Encoder | |
| -- Simulated embeddings (in practice, generated by ML models) | |
| UPDATE products SET embedding = '[0.1, -0.2, 0.5, 0.3]'::vector | |
| WHERE sku = 'LAPTOP001'; | |
| UPDATE products SET embedding = '[0.2, -0.1, 0.4, 0.2]'::vector | |
| WHERE sku = 'PHONE001'; | |
| UPDATE products SET embedding = '[0.0, 0.1, -0.3, 0.7]'::vector | |
| WHERE sku = 'BOOK001'; | |
| -- Vector similarity search with actual query vector | |
| WITH query_embedding AS ( | |
| SELECT '[0.15, -0.15, 0.45, 0.25]'::vector AS embedding | |
| ) | |
| SELECT p.name, p.description, | |
| 1 - (p.embedding <=> q.embedding) AS similarity | |
| FROM products p, query_embedding q | |
| WHERE p.embedding IS NOT NULL | |
| ORDER BY p.embedding <=> q.embedding | |
| LIMIT 10; | |
| ------------------------------ Exercise 5 ------------------------------ | |
| -- Install extensions for fuzzy search | |
| CREATE EXTENSION IF NOT EXISTS pg_trgm; | |
| CREATE EXTENSION IF NOT EXISTS fuzzystrmatch; | |
| -- Trigram similarity (0-1, higher is more similar) | |
| SELECT name, similarity(name, 'MacBok Pro') AS similarity | |
| FROM products | |
| WHERE similarity(name, 'MacBok Pro') > 0.3 | |
| ORDER BY similarity DESC; | |
| -- Levenshtein distance (lower is more similar) | |
| SELECT name, levenshtein(name, 'iPhone 15 Pro Max') AS distance | |
| FROM products | |
| WHERE levenshtein(name, 'iPhone 15 Pro Max') <= 3 | |
| ORDER BY distance; | |
| -- Soundex for phonetic matching | |
| SELECT name, soundex(name), soundex('Aifon') | |
| FROM products | |
| WHERE soundex(name) = soundex('Aifon'); | |
| -- Double Metaphone for better phonetic matching | |
| SELECT name, dmetaphone(name), dmetaphone('PostgreSQL') | |
| FROM products | |
| WHERE dmetaphone(name) = dmetaphone('PostgreSQL'); | |
| -- Combined fuzzy search with ranking | |
| WITH fuzzy_results AS ( | |
| SELECT name, description, | |
| similarity(name, 'MacBok Pro') as name_sim, | |
| similarity(description, 'laptap') as desc_sim, | |
| levenshtein_less_equal(name, 'MacBok Pro', 3) as lev_dist | |
| FROM products | |
| WHERE similarity(name, 'MacBok Pro') > 0.1 | |
| OR similarity(description, 'laptap') > 0.1 | |
| OR levenshtein_less_equal(name, 'MacBok Pro', 3) >= 0 | |
| ) | |
| SELECT name, description, | |
| GREATEST(name_sim, desc_sim) + | |
| CASE WHEN lev_dist >= 0 THEN (3.0 - lev_dist) / 3.0 ELSE 0 END AS fuzzy_score | |
| FROM fuzzy_results | |
| ORDER BY fuzzy_score DESC; | |
| ------------------------------ Exercise 6 ------------------------------ | |
| -- Benchmark setup | |
| CREATE TABLE large_products AS | |
| SELECT | |
| generate_series(1, 1000000) as id, | |
| 'PRODUCT' || lpad(generate_series(1, 1000000)::text, 6, '0') as sku, | |
| (array['MacBook', 'iPhone', 'iPad', 'Watch', 'Studio', 'Pro', 'Air', 'Mini'])[ceil(random()*8)] || ' ' || | |
| (array['Pro', 'Max', 'Air', 'Mini', 'Standard'])[ceil(random()*5)] as name, | |
| 'Category' || ceil(random()*10) as category; | |
| -- Performance comparison queries | |
| -- Add search_vector column to large_products | |
| ALTER TABLE large_products ADD COLUMN search_vector tsvector; | |
| -- Populate the search vectors | |
| UPDATE large_products SET search_vector = | |
| setweight(to_tsvector('english', coalesce(name,'')), 'A') || | |
| setweight(to_tsvector('english', coalesce(category,'')), 'B'); | |
| -- Create the GIN index | |
| CREATE INDEX idx_large_products_search ON large_products USING gin(search_vector); | |
| -- Exact match with index (fastest) | |
| SELECT * FROM large_products WHERE sku = 'PRODUCT000001'; | |
| -- Pattern match prefix (fast with right index) | |
| SELECT * FROM large_products WHERE name LIKE 'MacBook%'; | |
| -- Pattern match contains (slower) | |
| SELECT * FROM large_products WHERE name LIKE '%Pro%'; | |
| -- Full-text search (fast with GIN index) | |
| SELECT * FROM large_products WHERE search_vector @@ plainto_tsquery('MacBook'); | |
| -- Fuzzy search (slowest) | |
| SELECT * FROM large_products WHERE similarity(name, 'MacBok') > 0.3; | |
| ------------------------------ Exercise 7 ------------------------------ | |
| -- Multi-tier search strategy with consistent columns | |
| WITH exact_matches AS ( | |
| SELECT id, sku, name, category, price, description, 1.0 as score, 'exact' as match_type | |
| FROM products | |
| WHERE name = 'MacBook Pro' OR sku = 'MacBook Pro' | |
| ), | |
| prefix_matches AS ( | |
| SELECT id, sku, name, category, price, description, 0.8 as score, 'prefix' as match_type | |
| FROM products | |
| WHERE name LIKE 'MacBook Pro' || '%' | |
| AND NOT EXISTS (SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id) | |
| ), | |
| fulltext_matches AS ( | |
| SELECT id, sku, name, category, price, description, ts_rank(search_vector, query) * 0.6 as score, 'fulltext' as match_type | |
| FROM products, plainto_tsquery('english', 'MacBook Pro') AS query | |
| WHERE search_vector @@ query | |
| AND NOT EXISTS ( | |
| SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id | |
| UNION | |
| SELECT 1 FROM prefix_matches WHERE prefix_matches.id = products.id | |
| ) | |
| ), | |
| fuzzy_matches AS ( | |
| SELECT id, sku, name, category, price, description, similarity(name, 'MacBook Pro') * 0.4 as score, 'fuzzy' as match_type | |
| FROM products | |
| WHERE similarity(name, 'MacBook Pro') > 0.3 | |
| AND NOT EXISTS ( | |
| SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id | |
| UNION | |
| SELECT 1 FROM prefix_matches WHERE prefix_matches.id = products.id | |
| UNION | |
| SELECT 1 FROM fulltext_matches WHERE fulltext_matches.id = products.id | |
| ) | |
| ) | |
| SELECT * FROM exact_matches | |
| UNION ALL SELECT * FROM prefix_matches | |
| UNION ALL SELECT * FROM fulltext_matches | |
| UNION ALL SELECT * FROM fuzzy_matches | |
| ORDER BY score DESC, match_type | |
| LIMIT 20; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment