Skip to content

Instantly share code, notes, and snippets.

@sany2k8
Last active August 25, 2025 17:19
Show Gist options
  • Save sany2k8/c7a631b7aae30f1048d72376418b4fa5 to your computer and use it in GitHub Desktop.
Save sany2k8/c7a631b7aae30f1048d72376418b4fa5 to your computer and use it in GitHub Desktop.
All the working queries
-- CREATE EXTENSION IF NOT EXISTS postgis;
-- CREATE EXTENSION IF NOT EXISTS pgvector;
------------------------------ Exercise 1 ------------------------------
-- Table setup
CREATE TABLE products (
id SERIAL PRIMARY KEY,
sku VARCHAR(20) UNIQUE,
name VARCHAR(200),
category VARCHAR(50),
price DECIMAL(10,2),
description TEXT
);
-- Sample data
INSERT INTO products (sku, name, category, price, description) VALUES
('LAPTOP001', 'MacBook Pro 16-inch', 'Electronics', 2499.99, 'Powerful laptop with M3 Max chip, perfect for professional video editing and software development'),
('PHONE001', 'iPhone 15 Pro', 'Electronics', 999.99, 'Latest smartphone with titanium design, advanced camera system, and A17 Pro chip'),
('BOOK001', 'PostgreSQL Guide', 'Books', 49.99, 'Complete guide to database management and advanced PostgreSQL techniques');
-- Exact match queries
SELECT * FROM products WHERE sku = 'LAPTOP001';
SELECT * FROM products WHERE category = 'Electronics';
SELECT * FROM products WHERE price = 999.99;
-- With index for performance
CREATE INDEX idx_products_sku ON products(sku);
CREATE INDEX idx_products_category ON products(category);
------------------------------ Exercise 2 ------------------------------
-- Pattern matching queries
SELECT * FROM products WHERE name LIKE 'MacBook%'; -- Starts with 'MacBook'
SELECT * FROM products WHERE name LIKE '%Pro%'; -- Contains 'Pro'
SELECT * FROM products WHERE name LIKE '%Guide'; -- Ends with 'Guide'
SELECT * FROM products WHERE name ILIKE '%MACBOOK%'; -- Case-insensitive
-- More complex patterns
SELECT * FROM products WHERE name LIKE 'iPhone __ Pro'; -- Two characters between iPhone and Pro
SELECT * FROM products WHERE sku LIKE 'LAPTOP___'; -- Exactly 9 characters, starting with LAPTOP
-- Performance optimization with trigram indexes
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_products_name_trgm ON products USING gin(name gin_trgm_ops);
-- Optimized for prefix searches
CREATE INDEX idx_products_name_prefix ON products(name text_pattern_ops);
------------------------------ Exercise 3 ------------------------------
-- Add full-text search columns (if not already added during table creation)
ALTER TABLE products ADD COLUMN search_vector tsvector;
-- Sample data is already rich with descriptions from the initial INSERT
-- Update search vectors
UPDATE products SET search_vector =
setweight(to_tsvector('english', coalesce(name,'')), 'A') ||
setweight(to_tsvector('english', coalesce(description,'')), 'B');
-- Create GIN index for fast full-text search
CREATE INDEX idx_products_search ON products USING gin(search_vector);
-- Full-text search queries
SELECT name, description, ts_rank(search_vector, query) AS rank
FROM products, plainto_tsquery('english', 'professional laptop') AS query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- Phrase search
SELECT name, description
FROM products
WHERE search_vector @@ phraseto_tsquery('english', 'video editing');
-- Advanced query with operators
SELECT name, description, ts_rank_cd(search_vector, query) AS rank
FROM products,
to_tsquery('english', 'laptop & (professional | development)') AS query
WHERE search_vector @@ query
ORDER BY rank DESC;
-- Search with highlighting
SELECT name,
ts_headline('english', description, plainto_tsquery('english', 'chip camera')) AS highlighted
FROM products
WHERE search_vector @@ plainto_tsquery('english', 'chip camera');
------------------------------ Exercise 4 ------------------------------
-- Install pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column
ALTER TABLE products ADD COLUMN embedding vector(4);
-- In a real application, you'd generate embeddings using a model like:
-- - sentence-transformers/all-MiniLM-L6-v2
-- - OpenAI text-embedding-ada-002
-- - Google Universal Sentence Encoder
-- Simulated embeddings (in practice, generated by ML models)
UPDATE products SET embedding = '[0.1, -0.2, 0.5, 0.3]'::vector
WHERE sku = 'LAPTOP001';
UPDATE products SET embedding = '[0.2, -0.1, 0.4, 0.2]'::vector
WHERE sku = 'PHONE001';
UPDATE products SET embedding = '[0.0, 0.1, -0.3, 0.7]'::vector
WHERE sku = 'BOOK001';
-- Vector similarity search with actual query vector
WITH query_embedding AS (
SELECT '[0.15, -0.15, 0.45, 0.25]'::vector AS embedding
)
SELECT p.name, p.description,
1 - (p.embedding <=> q.embedding) AS similarity
FROM products p, query_embedding q
WHERE p.embedding IS NOT NULL
ORDER BY p.embedding <=> q.embedding
LIMIT 10;
------------------------------ Exercise 5 ------------------------------
-- Install extensions for fuzzy search
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE EXTENSION IF NOT EXISTS fuzzystrmatch;
-- Trigram similarity (0-1, higher is more similar)
SELECT name, similarity(name, 'MacBok Pro') AS similarity
FROM products
WHERE similarity(name, 'MacBok Pro') > 0.3
ORDER BY similarity DESC;
-- Levenshtein distance (lower is more similar)
SELECT name, levenshtein(name, 'iPhone 15 Pro Max') AS distance
FROM products
WHERE levenshtein(name, 'iPhone 15 Pro Max') <= 3
ORDER BY distance;
-- Soundex for phonetic matching
SELECT name, soundex(name), soundex('Aifon')
FROM products
WHERE soundex(name) = soundex('Aifon');
-- Double Metaphone for better phonetic matching
SELECT name, dmetaphone(name), dmetaphone('PostgreSQL')
FROM products
WHERE dmetaphone(name) = dmetaphone('PostgreSQL');
-- Combined fuzzy search with ranking
WITH fuzzy_results AS (
SELECT name, description,
similarity(name, 'MacBok Pro') as name_sim,
similarity(description, 'laptap') as desc_sim,
levenshtein_less_equal(name, 'MacBok Pro', 3) as lev_dist
FROM products
WHERE similarity(name, 'MacBok Pro') > 0.1
OR similarity(description, 'laptap') > 0.1
OR levenshtein_less_equal(name, 'MacBok Pro', 3) >= 0
)
SELECT name, description,
GREATEST(name_sim, desc_sim) +
CASE WHEN lev_dist >= 0 THEN (3.0 - lev_dist) / 3.0 ELSE 0 END AS fuzzy_score
FROM fuzzy_results
ORDER BY fuzzy_score DESC;
------------------------------ Exercise 6 ------------------------------
-- Benchmark setup
CREATE TABLE large_products AS
SELECT
generate_series(1, 1000000) as id,
'PRODUCT' || lpad(generate_series(1, 1000000)::text, 6, '0') as sku,
(array['MacBook', 'iPhone', 'iPad', 'Watch', 'Studio', 'Pro', 'Air', 'Mini'])[ceil(random()*8)] || ' ' ||
(array['Pro', 'Max', 'Air', 'Mini', 'Standard'])[ceil(random()*5)] as name,
'Category' || ceil(random()*10) as category;
-- Performance comparison queries
-- Add search_vector column to large_products
ALTER TABLE large_products ADD COLUMN search_vector tsvector;
-- Populate the search vectors
UPDATE large_products SET search_vector =
setweight(to_tsvector('english', coalesce(name,'')), 'A') ||
setweight(to_tsvector('english', coalesce(category,'')), 'B');
-- Create the GIN index
CREATE INDEX idx_large_products_search ON large_products USING gin(search_vector);
-- Exact match with index (fastest)
SELECT * FROM large_products WHERE sku = 'PRODUCT000001';
-- Pattern match prefix (fast with right index)
SELECT * FROM large_products WHERE name LIKE 'MacBook%';
-- Pattern match contains (slower)
SELECT * FROM large_products WHERE name LIKE '%Pro%';
-- Full-text search (fast with GIN index)
SELECT * FROM large_products WHERE search_vector @@ plainto_tsquery('MacBook');
-- Fuzzy search (slowest)
SELECT * FROM large_products WHERE similarity(name, 'MacBok') > 0.3;
------------------------------ Exercise 7 ------------------------------
-- Multi-tier search strategy with consistent columns
WITH exact_matches AS (
SELECT id, sku, name, category, price, description, 1.0 as score, 'exact' as match_type
FROM products
WHERE name = 'MacBook Pro' OR sku = 'MacBook Pro'
),
prefix_matches AS (
SELECT id, sku, name, category, price, description, 0.8 as score, 'prefix' as match_type
FROM products
WHERE name LIKE 'MacBook Pro' || '%'
AND NOT EXISTS (SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id)
),
fulltext_matches AS (
SELECT id, sku, name, category, price, description, ts_rank(search_vector, query) * 0.6 as score, 'fulltext' as match_type
FROM products, plainto_tsquery('english', 'MacBook Pro') AS query
WHERE search_vector @@ query
AND NOT EXISTS (
SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id
UNION
SELECT 1 FROM prefix_matches WHERE prefix_matches.id = products.id
)
),
fuzzy_matches AS (
SELECT id, sku, name, category, price, description, similarity(name, 'MacBook Pro') * 0.4 as score, 'fuzzy' as match_type
FROM products
WHERE similarity(name, 'MacBook Pro') > 0.3
AND NOT EXISTS (
SELECT 1 FROM exact_matches WHERE exact_matches.id = products.id
UNION
SELECT 1 FROM prefix_matches WHERE prefix_matches.id = products.id
UNION
SELECT 1 FROM fulltext_matches WHERE fulltext_matches.id = products.id
)
)
SELECT * FROM exact_matches
UNION ALL SELECT * FROM prefix_matches
UNION ALL SELECT * FROM fulltext_matches
UNION ALL SELECT * FROM fuzzy_matches
ORDER BY score DESC, match_type
LIMIT 20;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment