A comprehensive practical guide to essential SQL commands with concrete examples and detailed explanations.
- ๐ข Beginner Level
- ๐ก Intermediate Level
- ๐ด Advanced Level
- โก Quick Reference
- ๐ ๏ธ Practical Examples
CREATE DATABASE - Database Creation
Usage: Creates a new database to organize your tables and data
-- Create a database
CREATE DATABASE my_store;
-- Use the database
USE my_store;
-- Check existing databases
SHOW DATABASES;๐ก Tip: Use explicit names and avoid spaces (use _ instead)
CREATE TABLE - Table Creation
Usage: Defines table structure with columns and constraints
-- Simple table
CREATE TABLE clients (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
email VARCHAR(255) UNIQUE,
age INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Table with foreign key
CREATE TABLE orders (
id INT AUTO_INCREMENT PRIMARY KEY,
client_id INT,
amount DECIMAL(10,2) NOT NULL,
order_date DATE,
FOREIGN KEY (client_id) REFERENCES clients(id)
);๐ก Tip: Always define a primary key and use appropriate data types
INSERT INTO - Data Insertion
Usage: Adds new records to a table
-- Simple insertion
INSERT INTO clients (name, email, age)
VALUES ('John Doe', '[email protected]', 25);
-- Multiple insertion (more efficient)
INSERT INTO clients (name, email, age) VALUES
('Mary Smith', '[email protected]', 30),
('Paul Johnson', '[email protected]', 35),
('Sophie Brown', '[email protected]', 28);
-- Insertion without specifying columns (table order)
INSERT INTO clients VALUES
(NULL, 'Alice Green', '[email protected]', 32, NOW());๐ก Tip: Always specify columns to avoid errors when modifying table structure
SELECT - Data Reading
Usage: Retrieves data from one or more tables
-- Basic selection
SELECT name, email FROM clients;
-- All columns
SELECT * FROM clients;
-- With aliases to rename columns
SELECT
name AS client_name,
email AS email_address,
age AS client_age
FROM clients;
-- With calculations
SELECT
name,
TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) AS age,
YEAR(birth_date) AS birth_year,
MONTH(birth_date) AS birth_month,
DAY(birth_date) AS birth_day,
CONCAT(name, ' (', TIMESTAMPDIFF(YEAR, birth_date, CURDATE()), ' years old)') AS description
FROM clients;
๐ก Tip: Avoid SELECT * in production, select only necessary columns
WHERE - Data Filtering
Usage: Defines conditions to filter results
-- Simple conditions
SELECT * FROM clients WHERE age > 25;
SELECT * FROM clients WHERE name = 'John Doe';
SELECT * FROM clients WHERE email IS NOT NULL;
-- Multiple conditions with AND/OR
SELECT * FROM clients WHERE age > 25 AND age < 40;
SELECT * FROM clients WHERE name = 'John' OR name = 'Mary';
-- Range of values
SELECT * FROM clients WHERE age BETWEEN 25 AND 40;
-- List of values
SELECT * FROM clients WHERE name IN ('John', 'Mary', 'Paul');
-- Pattern searching with LIKE
SELECT * FROM clients WHERE name LIKE 'John%'; -- Starts with "John"
SELECT * FROM clients WHERE email LIKE '%@gmail.com'; -- Ends with "@gmail.com"
SELECT * FROM clients WHERE name LIKE '_ohn'; -- 4 letters ending with "ohn"๐ก Tip: Use indexes on frequently filtered columns to improve performance
ORDER BY - Result Sorting
Usage: Organizes results according to one or more criteria
-- Ascending sort (default)
SELECT * FROM clients ORDER BY age;
-- Descending sort
SELECT * FROM clients ORDER BY age DESC;
-- Multiple sort (priority from left to right)
SELECT * FROM clients ORDER BY age DESC, name ASC;
-- Sort by column number (less readable)
SELECT name, age FROM clients ORDER BY 2; -- Sort by age (2nd column)
-- Sort with expression
SELECT name, age FROM clients ORDER BY LENGTH(name);๐ก Tip: Sorting can be expensive on large tables, use appropriate indexes
UPDATE - Data Modification
Usage: Modifies existing records in a table
-- Simple update
UPDATE clients
SET age = 26
WHERE name = 'John Doe';
-- Multiple columns update
UPDATE clients
SET age = age + 1,
email = LOWER(email)
WHERE age < 30;
-- Update with calculation
UPDATE clients
SET email = CONCAT(LOWER(REPLACE(name, ' ', '.')), '@company.com')
WHERE email IS NULL;DELETE - Data Deletion
Usage: Removes records from a table
-- Deletion with condition
DELETE FROM clients WHERE age < 18;
-- Multiple deletion with conditions
DELETE FROM clients
WHERE age > 65 AND created_at < '2020-01-01';
-- Delete all records (careful!)
DELETE FROM clients;SHOW - Information Display
Usage: Shows information about database structure
-- List databases
SHOW DATABASES;
-- List tables in current database
SHOW TABLES;
-- Table structure
DESCRIBE clients;
-- or
DESC clients;
-- Complete table creation
SHOW CREATE TABLE clients;
-- Column information
SHOW COLUMNS FROM clients;
-- Table indexes
SHOW INDEX FROM clients;๐ก Tip: These commands are essential for exploring an unknown database
INNER JOIN - Inner Join
Usage: Combines rows from two tables that have matching values
-- Basic join
SELECT c.name, o.amount, o.order_date
FROM clients c
INNER JOIN orders o ON c.id = o.client_id;
-- Join with additional condition
SELECT c.name, o.amount
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
WHERE o.amount > 100;
-- Join with table aliases
SELECT
client.name AS client_name,
ord.amount AS order_amount
FROM clients client
INNER JOIN orders ord ON client.id = ord.client_id;๐ก Tip: INNER JOIN only returns rows that have a match in both tables
LEFT JOIN - Left Join
Usage: Returns all records from left table, with matches from right if they exist
-- All clients, even without orders
SELECT c.name, o.amount
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id;
-- Find clients who never ordered
SELECT c.name, c.email
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
WHERE o.client_id IS NULL;
-- Count orders per client (0 if none)
SELECT
c.name,
COUNT(o.id) AS order_count,
COALESCE(SUM(o.amount), 0) AS total_spent
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;๐ก Tip: LEFT JOIN is perfect for "all X with their Y (if it exists)" reports
RIGHT JOIN - Right Join
Usage: Returns all records from right table, with matches from left if they exist
-- All orders, even without associated client
SELECT c.name, o.amount, o.order_date
FROM clients c
RIGHT JOIN orders o ON c.id = o.client_id;
-- Equivalent with LEFT JOIN (more common)
SELECT c.name, o.amount, o.order_date
FROM orders o
LEFT JOIN clients c ON o.client_id = c.id;๐ก Tip: RIGHT JOIN is less used than LEFT JOIN, we generally prefer reversing table order
GROUP BY - Data Grouping
Usage: Groups rows that have the same values in specified columns
-- Count clients by age
SELECT age, COUNT(*) as client_count
FROM clients
GROUP BY age
ORDER BY age;
-- Order statistics by client
SELECT
c.name,
COUNT(o.id) as order_count,
SUM(o.amount) as total_purchases,
AVG(o.amount) as average_basket,
MIN(o.order_date) as first_order,
MAX(o.order_date) as last_order
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;
-- Grouping by multiple columns
SELECT
YEAR(order_date) as year,
MONTH(order_date) as month,
COUNT(*) as order_count,
SUM(amount) as monthly_revenue
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY year DESC, month DESC;๐ก Tip: All SELECT columns must be in GROUP BY or be aggregation functions
HAVING - Post-Grouping Filtering
Usage: Filters groups created by GROUP BY (equivalent of WHERE for groups)
-- Ages represented by more than 2 people
SELECT age, COUNT(*) as count
FROM clients
GROUP BY age
HAVING COUNT(*) > 2;
-- Clients with more than 3 orders and total > $500
SELECT
c.name,
COUNT(o.id) as order_count,
SUM(o.amount) as total
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name
HAVING COUNT(o.id) > 3 AND SUM(o.amount) > 500;
-- Combination WHERE + GROUP BY + HAVING
SELECT
c.name,
AVG(o.amount) as average_basket
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
WHERE o.order_date > '2023-01-01' -- Filter BEFORE grouping
GROUP BY c.id, c.name
HAVING AVG(o.amount) > 100; -- Filter AFTER grouping๐ก Tip: WHERE filters rows, HAVING filters groups
Aggregate Functions - Group Calculations
Usage: Performs calculations on a set of values to return a single value
-- Basic functions
SELECT
COUNT(*) as total_clients, -- Count all rows
COUNT(email) as clients_with_email, -- Count non-NULL
COUNT(DISTINCT age) as different_ages, -- Count unique values
SUM(age) as age_sum, -- Sum
AVG(age) as average_age, -- Average
MIN(age) as youngest, -- Minimum
MAX(age) as oldest, -- Maximum
STDDEV(age) as age_stddev -- Standard deviation
FROM clients;
-- With conditions
SELECT
COUNT(CASE WHEN age < 30 THEN 1 END) as young_people,
COUNT(CASE WHEN age >= 30 THEN 1 END) as adults,
AVG(CASE WHEN age < 30 THEN age END) as young_avg_age
FROM clients;
-- On joins
SELECT
c.name,
COUNT(o.id) as order_count,
COALESCE(SUM(o.amount), 0) as total_spent,
COALESCE(AVG(o.amount), 0) as average_basket
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;๐ก Tip: COUNT(*) counts all rows, COUNT(column) ignores NULL values
String Functions - Text Manipulation
Usage: Transforms and manipulates character strings
-- Case and cleaning
SELECT
name,
UPPER(name) as name_upper,
LOWER(name) as name_lower,
TRIM(name) as name_trimmed,
LENGTH(name) as name_length
FROM clients;
-- Concatenation and extraction
SELECT
name,
email,
CONCAT(name, ' - ', email) as full_info,
SUBSTRING(email, 1, POSITION('@' IN email) - 1) as username,
RIGHT(email, 10) as email_end,
LEFT(name, 3) as initials
FROM clients;
-- Replacement and search
SELECT
name,
REPLACE(name, ' ', '_') as name_underscore,
REPLACE(email, '@', ' AT ') as email_readable,
POSITION('gmail' IN email) as gmail_position,
CASE
WHEN email LIKE '%gmail%' THEN 'Gmail'
WHEN email LIKE '%outlook%' THEN 'Outlook'
ELSE 'Other'
END as email_provider
FROM clients;๐ก Tip: These functions are very useful for cleaning and formatting data
Date Functions - Time Manipulation
Usage: Works with dates and times to extract, calculate and format
-- Current dates
SELECT
NOW() as current_datetime,
CURDATE() as current_date,
CURTIME() as current_time,
UNIX_TIMESTAMP() as unix_timestamp;
-- Part extraction
SELECT
order_date,
YEAR(order_date) as year,
MONTH(order_date) as month,
DAY(order_date) as day,
DAYNAME(order_date) as day_name,
MONTHNAME(order_date) as month_name,
QUARTER(order_date) as quarter
FROM orders;
-- Date calculations
SELECT
order_date,
DATE_ADD(order_date, INTERVAL 30 DAY) as due_date,
DATE_SUB(order_date, INTERVAL 1 YEAR) as one_year_ago,
DATEDIFF(NOW(), order_date) as days_elapsed,
TIMESTAMPDIFF(MONTH, order_date, NOW()) as months_elapsed
FROM orders;
-- Formatting
SELECT
order_date,
DATE_FORMAT(order_date, '%d/%m/%Y') as french_format,
DATE_FORMAT(order_date, '%W %M %Y') as long_format,
DATE_FORMAT(order_date, '%Y-Q%q') as quarter_format
FROM orders;๐ก Tip: Use indexes on date columns to optimize time-based queries
CASE WHEN - Conditional Logic
Usage: Creates logical conditions to return different values based on cases
-- Simple conditions
SELECT
name,
age,
CASE
WHEN age < 18 THEN 'Minor'
WHEN age < 65 THEN 'Adult'
ELSE 'Senior'
END as age_category
FROM clients;
-- Multiple and complex conditions
SELECT
c.name,
COUNT(o.id) as order_count,
SUM(o.amount) as total_purchases,
CASE
WHEN COUNT(o.id) = 0 THEN 'Inactive'
WHEN COUNT(o.id) < 3 THEN 'Occasional'
WHEN COUNT(o.id) < 10 AND SUM(o.amount) > 1000 THEN 'VIP'
WHEN COUNT(o.id) >= 10 THEN 'Loyal'
ELSE 'Standard'
END as client_profile
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;
-- CASE in calculations
SELECT
name,
age,
CASE
WHEN age < 18 THEN 0
WHEN age < 25 THEN 0.05
ELSE 0.10
END as discount_rate,
100 * CASE
WHEN age < 18 THEN 0
WHEN age < 25 THEN 0.05
ELSE 0.10
END as discount_amount
FROM clients;๐ก Tip: CASE can be used in SELECT, WHERE, ORDER BY and even in aggregation functions
ALTER TABLE - Table Modification
Usage: Modifies existing table structure (columns, constraints, indexes)
-- Add a column
ALTER TABLE clients ADD COLUMN phone VARCHAR(20);
ALTER TABLE clients ADD COLUMN birth_date DATE AFTER age;
-- Modify a column
ALTER TABLE clients MODIFY COLUMN name VARCHAR(150) NOT NULL;
ALTER TABLE clients CHANGE COLUMN name full_name VARCHAR(150);
-- Drop a column
ALTER TABLE clients DROP COLUMN phone;
-- Add constraints
ALTER TABLE clients ADD CONSTRAINT uk_email UNIQUE (email);
ALTER TABLE clients ADD CONSTRAINT chk_age CHECK (age >= 0 AND age <= 120);
-- Drop constraints
ALTER TABLE clients DROP CONSTRAINT uk_email;
ALTER TABLE clients DROP CONSTRAINT chk_age;
-- Add foreign key
ALTER TABLE orders
ADD CONSTRAINT fk_client
FOREIGN KEY (client_id) REFERENCES clients(id);
-- Rename table
ALTER TABLE clients RENAME TO users;Scalar Subqueries - Single Value Return
Usage: Uses the result of a query as a single value in another query
-- In SELECT (comparative calculation)
SELECT
name,
age,
(SELECT AVG(age) FROM clients) as global_avg_age,
age - (SELECT AVG(age) FROM clients) as age_difference
FROM clients;
-- In WHERE (dynamic filtering)
SELECT * FROM clients
WHERE age > (SELECT AVG(age) FROM clients);
-- Clients who spent more than average
SELECT c.name, SUM(o.amount) as total_spent
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name
HAVING SUM(o.amount) > (
SELECT AVG(client_total)
FROM (
SELECT SUM(amount) as client_total
FROM orders
GROUP BY client_id
) as averages
);
-- In UPDATE with subquery
UPDATE orders
SET status = 'priority'
WHERE amount > (
SELECT AVG(amount) * 2
FROM (SELECT amount FROM orders) as temp
);๐ก Tip: Scalar subqueries must return exactly one value (one row, one column)
IN / NOT IN - Subqueries with Lists
Usage: Tests if a value belongs to a set of values returned by a subquery
-- Clients who placed at least one order
SELECT * FROM clients
WHERE id IN (SELECT DISTINCT client_id FROM orders);
-- Clients who never ordered
SELECT * FROM clients
WHERE id NOT IN (
SELECT client_id
FROM orders
WHERE client_id IS NOT NULL -- Important with NOT IN!
);
-- Products never ordered
SELECT * FROM products
WHERE id NOT IN (
SELECT DISTINCT product_id
FROM order_items
WHERE product_id IS NOT NULL
);
-- Clients who ordered luxury products (>$1000)
SELECT DISTINCT c.*
FROM clients c
WHERE c.id IN (
SELECT o.client_id
FROM orders o
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE p.price > 1000
);EXISTS / NOT EXISTS - Existence Test
Usage: Tests the existence of rows in a subquery (more efficient than IN/NOT IN)
-- Clients with orders (more efficient than IN)
SELECT * FROM clients c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.client_id = c.id
);
-- Clients without orders (safer than NOT IN)
SELECT * FROM clients c
WHERE NOT EXISTS (
SELECT 1 FROM orders o
WHERE o.client_id = c.id
);
-- Clients who ordered this month
SELECT * FROM clients c
WHERE EXISTS (
SELECT 1 FROM orders o
WHERE o.client_id = c.id
AND MONTH(o.order_date) = MONTH(NOW())
AND YEAR(o.order_date) = YEAR(NOW())
);
-- Out-of-stock products with pending orders
SELECT p.*
FROM products p
WHERE p.stock = 0
AND EXISTS (
SELECT 1
FROM order_items oi
JOIN orders o ON oi.order_id = o.id
WHERE oi.product_id = p.id
AND o.status = 'pending'
);๐ก Tip: EXISTS is generally more performant than IN and avoids NULL problems
Correlated Subqueries - Reference to Outer Query
Usage: Subquery that references columns from the outer query (executed for each row)
-- Clients with an order above their personal average
SELECT DISTINCT c.name
FROM clients c
WHERE EXISTS (
SELECT 1 FROM orders o1
WHERE o1.client_id = c.id
AND o1.amount > (
SELECT AVG(o2.amount)
FROM orders o2
WHERE o2.client_id = c.id -- Correlation here
)
);
-- Last order of each client
SELECT c.name, o.order_date, o.amount
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date = (
SELECT MAX(o2.order_date)
FROM orders o2
WHERE o2.client_id = c.id -- Correlation
);
-- Employees earning more than their department average
SELECT e.name, e.salary, e.department
FROM employees e
WHERE e.salary > (
SELECT AVG(e2.salary)
FROM employees e2
WHERE e2.department = e.department -- Correlation
);
-- Rank of each order by client (ROW_NUMBER simulation)
SELECT
c.name,
o.order_date,
o.amount,
(SELECT COUNT(*)
FROM orders o2
WHERE o2.client_id = o.client_id
AND o2.order_date <= o.order_date) as rank_num
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;ROW_NUMBER() / RANK() - Numbering and Ranking
Usage: Assigns rank numbers to rows according to a sort criterion
-- Simple numbering
SELECT
name,
age,
ROW_NUMBER() OVER (ORDER BY age DESC) as row_number,
RANK() OVER (ORDER BY age DESC) as rank_with_ties,
DENSE_RANK() OVER (ORDER BY age DESC) as dense_rank
FROM clients;
-- Ranking by group
SELECT
c.name,
o.amount,
o.order_date,
ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.amount DESC) as order_rank,
RANK() OVER (PARTITION BY c.id ORDER BY o.order_date) as chronological_order
FROM clients c
JOIN orders o ON c.id = o.client_id;
-- Top 3 orders per client
SELECT *
FROM (
SELECT
c.name,
o.amount,
o.order_date,
ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.amount DESC) as rank_num
FROM clients c
JOIN orders o ON c.id = o.client_id
) as rankings
WHERE rank_num <= 3;
-- Percentiles
SELECT
name,
age,
NTILE(4) OVER (ORDER BY age) as quartile,
PERCENT_RANK() OVER (ORDER BY age) as percentile_rank
FROM clients;๐ก Tip: ROW_NUMBER() gives unique numbers, RANK() can have ties, DENSE_RANK() avoids gaps
Windowed Aggregation Functions - Window Calculations
Usage: Applies aggregation functions over a "window" of data without GROUP BY
-- Running totals and moving averages
SELECT
c.name,
o.order_date,
o.amount,
SUM(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as running_total,
AVG(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as moving_average,
COUNT(*) OVER (PARTITION BY c.id ORDER BY o.order_date) as total_orders
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;
-- Comparison with global totals
SELECT
c.name,
o.amount,
SUM(o.amount) OVER () as global_total,
o.amount * 100.0 / SUM(o.amount) OVER () as percentage_of_total,
SUM(o.amount) OVER (PARTITION BY c.id) as client_total,
AVG(o.amount) OVER (PARTITION BY c.id) as client_average
FROM clients c
JOIN orders o ON c.id = o.client_id;
-- Rolling windows
SELECT
order_date,
amount,
AVG(amount) OVER (
ORDER BY order_date
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as three_day_avg,
SUM(amount) OVER (
ORDER BY order_date
RANGE BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW
) as week_total
FROM orders
ORDER BY order_date;๐ก Tip: Window functions allow keeping row details while having aggregations
LAG / LEAD - Access to Previous/Next Rows
Usage: Accesses values from previous or next rows in ordered results
-- Evolution of amounts between orders
SELECT
c.name,
o.order_date,
o.amount,
LAG(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as previous_amount,
LEAD(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as next_amount,
o.amount - LAG(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as evolution
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;
-- Time between orders
SELECT
c.name,
o.order_date,
LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as previous_order,
DATEDIFF(
o.order_date,
LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date)
) as days_between_orders
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;
-- First and last value of each group
SELECT
c.name,
o.order_date,
o.amount,
FIRST_VALUE(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as first_order,
LAST_VALUE(o.amount) OVER (
PARTITION BY c.id
ORDER BY o.order_date
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) as last_order
FROM clients c
JOIN orders o ON c.id = o.client_id;๐ก Tip: LAG/LEAD are perfect for analyzing trends and temporal evolution
CREATE INDEX - Index Creation
Usage: Creates structures to speed up queries on certain columns
-- Simple index on frequently filtered column
CREATE INDEX idx_clients_age ON clients(age);
-- Composite index (column order matters)
CREATE INDEX idx_orders_client_date ON orders(client_id, order_date);
-- Unique index (constraint + performance)
CREATE UNIQUE INDEX idx_clients_email ON clients(email);
-- Partial index (with condition)
CREATE INDEX idx_orders_active
ON orders(order_date)
WHERE status = 'active';
-- Index on expression
CREATE INDEX idx_clients_name_lower ON clients(LOWER(name));
-- Descending index for ORDER BY DESC
CREATE INDEX idx_orders_date_desc ON orders(order_date DESC);
-- Check existing indexes
SHOW INDEX FROM clients;
-- Analyze index usage
EXPLAIN SELECT * FROM clients WHERE age > 25;๐ก Tip: Indexes speed up SELECT but slow down INSERT/UPDATE/DELETE
EXPLAIN - Performance Analysis
Usage: Shows query execution plan to understand performance
-- Simple analysis
EXPLAIN SELECT * FROM clients WHERE age > 25;
-- Detailed analysis
EXPLAIN EXTENDED
SELECT c.name, COUNT(o.id)
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;
-- JSON format for more details
EXPLAIN FORMAT=JSON
SELECT c.name, o.amount
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date > '2023-01-01';
-- Analyze slow queries
EXPLAIN
SELECT c.name, SUM(o.amount)
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE YEAR(o.order_date) = 2023 -- Avoid functions on indexed columns
GROUP BY c.id
HAVING SUM(o.amount) > 1000;
-- Optimized version
EXPLAIN
SELECT c.name, SUM(o.amount)
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date >= '2023-01-01'
AND o.order_date < '2024-01-01'
GROUP BY c.id
HAVING SUM(o.amount) > 1000;๐ก Tip: Look for "filesort", "using temporary" and high costs in EXPLAIN
ACID Transactions - Atomicity, Consistency, Isolation, Durability
Usage: Groups multiple operations into a logical unit that succeeds or fails completely
-- Simple transaction
START TRANSACTION;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
-- Check balances before validation
SELECT id, balance FROM accounts WHERE id IN (1, 2);
-- If everything is OK
COMMIT;
-- In case of error
-- ROLLBACK;
-- Transaction with savepoints
START TRANSACTION;
INSERT INTO clients (name, email) VALUES ('Test Client', '[email protected]');
SAVEPOINT point1;
INSERT INTO orders (client_id, amount) VALUES (LAST_INSERT_ID(), 150);
SAVEPOINT point2;
-- In case of error on the order
ROLLBACK TO point1; -- Keep client, cancel order
-- Or go back to beginning
-- ROLLBACK TO point1;
COMMIT;
-- Error handling with procedure
DELIMITER //
CREATE PROCEDURE TransferMoney(IN from_account INT, IN to_account INT, IN amount DECIMAL(10,2))
BEGIN
DECLARE EXIT HANDLER FOR SQLEXCEPTION
BEGIN
ROLLBACK;
RESIGNAL;
END;
START TRANSACTION;
UPDATE accounts SET balance = balance - amount WHERE id = from_account;
UPDATE accounts SET balance = balance + amount WHERE id = to_account;
-- Check no account is overdrawn
IF (SELECT MIN(balance) FROM accounts WHERE id IN (from_account, to_account)) < 0 THEN
SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Insufficient balance';
END IF;
COMMIT;
END //
DELIMITER ;๐ก Tip: Always use transactions for critical operations involving multiple tables
Isolation Levels - Concurrency Management
Usage: Controls how transactions interact with each other
-- Check current isolation level
SELECT @@transaction_isolation;
-- Set isolation level for session
SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;
-- Transaction with specific level
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION;
SELECT * FROM accounts WHERE id = 1;
-- ... other operations ...
COMMIT;
-- Example of consistent read (REPEATABLE READ)
START TRANSACTION;
SELECT COUNT(*) FROM clients; -- Result: 100
-- In another session, someone inserts a client
SELECT COUNT(*) FROM clients; -- Result: still 100 (repeatable read)
COMMIT;
-- Explicit locks
START TRANSACTION;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE; -- Exclusive lock
-- or
SELECT * FROM accounts WHERE id = 1 LOCK IN SHARE MODE; -- Shared lock
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;CREATE VIEW - Virtual Views
Usage: Creates a "virtual table" based on a reusable SQL query
-- Simple view for client statistics
CREATE VIEW client_stats_view AS
SELECT
c.id,
c.name,
c.email,
COUNT(o.id) as order_count,
COALESCE(SUM(o.amount), 0) as total_purchases,
COALESCE(AVG(o.amount), 0) as average_basket,
MAX(o.order_date) as last_order,
CASE
WHEN COUNT(o.id) = 0 THEN 'Inactive'
WHEN COUNT(o.id) < 3 THEN 'Occasional'
WHEN SUM(o.amount) > 1000 THEN 'VIP'
ELSE 'Standard'
END as profile
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name, c.email;
-- Using the view
SELECT * FROM client_stats_view WHERE profile = 'VIP';
SELECT profile, COUNT(*) FROM client_stats_view GROUP BY profile;
-- View with complex joins
CREATE VIEW order_details_view AS
SELECT
o.id as order_id,
c.name as client_name,
c.email,
o.order_date,
o.status,
oi.quantity,
p.name as product_name,
p.price,
(oi.quantity * oi.unit_price) as subtotal,
cat.name as category
FROM orders o
JOIN clients c ON o.client_id = c.id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
LEFT JOIN categories cat ON p.category_id = cat.id;
-- View with parameters via WHERE
CREATE VIEW recent_sales_view AS
SELECT * FROM order_details_view
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY);
-- Modify a view
ALTER VIEW client_stats_view AS
SELECT
c.id,
c.name,
COUNT(o.id) as order_count,
SUM(o.amount) as total_purchases,
-- New calculation
DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last_order
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;
-- Drop a view
DROP VIEW recent_sales_view;๐ก Tip: Views simplify complex queries and centralize business logic
Stored Procedures - Reusable Server-Side Logic
Usage: Stores reusable SQL instruction sequences with parameters and logic
-- Simple procedure with input parameters
DELIMITER //
CREATE PROCEDURE GetClientOrders(IN p_client_id INT)
BEGIN
SELECT
c.name,
o.order_date,
o.amount,
o.status
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE c.id = p_client_id
ORDER BY o.order_date DESC;
END //
DELIMITER ;
-- Execution
CALL GetClientOrders(1);
-- Procedure with input and output parameters
DELIMITER //
CREATE PROCEDURE CalculateClientStats(
IN p_client_id INT,
OUT p_order_count INT,
OUT p_total_purchases DECIMAL(10,2),
OUT p_average_basket DECIMAL(10,2)
)
BEGIN
SELECT
COUNT(id),
COALESCE(SUM(amount), 0),
COALESCE(AVG(amount), 0)
INTO p_order_count, p_total_purchases, p_average_basket
FROM orders
WHERE client_id = p_client_id;
END //
DELIMITER ;
-- Usage with variables
CALL CalculateClientStats(1, @count, @total, @average);
SELECT @count as orders, @total as total, @average as average;
-- Procedure with conditional logic
DELIMITER //
CREATE PROCEDURE CreateOrder(
IN p_client_id INT,
IN p_amount DECIMAL(10,2),
OUT p_order_id INT,
OUT p_message VARCHAR(255)
)
BEGIN
DECLARE v_order_count INT DEFAULT 0;
DECLARE v_client_status VARCHAR(50);
-- Check if client exists
IF NOT EXISTS (SELECT 1 FROM clients WHERE id = p_client_id) THEN
SET p_order_id = 0;
SET p_message = 'Client does not exist';
ELSE
-- Count existing orders
SELECT COUNT(*) INTO v_order_count
FROM orders
WHERE client_id = p_client_id;
-- Determine status
IF v_order_count = 0 THEN
SET v_client_status = 'new';
ELSEIF v_order_count < 5 THEN
SET v_client_status = 'regular';
ELSE
SET v_client_status = 'vip';
END IF;
-- Create order
INSERT INTO orders (client_id, amount, order_date, status)
VALUES (p_client_id, p_amount, NOW(), 'pending');
SET p_order_id = LAST_INSERT_ID();
SET p_message = CONCAT('Order created for ', v_client_status, ' client');
-- Log activity
INSERT INTO activity_logs (table_name, operation, timestamp)
VALUES ('orders', 'INSERT', NOW());
END IF;
END //
DELIMITER ;
-- Procedure with error handling
DELIMITER //
CREATE PROCEDURE TransferStock(
IN p_source_product INT,
IN p_dest_product INT,
IN p_quantity INT
)
BEGIN
DECLARE v_source_stock INT;
DECLARE EXIT HANDLER FOR SQLEXCEPTION
BEGIN
ROLLBACK;
RESIGNAL;
END;
START TRANSACTION;
-- Check source stock
SELECT stock INTO v_source_stock
FROM products
WHERE id = p_source_product;
IF v_source_stock < p_quantity THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'Insufficient stock for transfer';
END IF;
-- Perform transfer
UPDATE products
SET stock = stock - p_quantity
WHERE id = p_source_product;
UPDATE products
SET stock = stock + p_quantity
WHERE id = p_dest_product;
COMMIT;
SELECT 'Transfer completed successfully' as message;
END //
DELIMITER ;
-- List procedures
SHOW PROCEDURE STATUS WHERE db = 'my_store';
-- Show procedure definition
SHOW CREATE PROCEDURE GetClientOrders;
-- Drop procedure
DROP PROCEDURE TransferStock;๐ก Tip: Stored procedures centralize business logic and improve performance for complex operations
SELECT columns
FROM table1
JOIN table2 ON condition
WHERE row_condition
GROUP BY columns
HAVING group_condition
ORDER BY columns
LIMIT number;๐ Aggregation
COUNT(*), COUNT(column), COUNT(DISTINCT column)
SUM(column), AVG(column), MIN(column), MAX(column)
STDDEV(column), VARIANCE(column)
GROUP_CONCAT(column SEPARATOR ', ') -- MySQL
STRING_AGG(column, ', ') -- PostgreSQL๐ String Functions
UPPER(str), LOWER(str), TRIM(str)
CONCAT(str1, str2), SUBSTRING(str, pos, len)
LENGTH(str), POSITION('sub' IN str)
REPLACE(str, 'old', 'new'), LEFT(str, len), RIGHT(str, len)๐ Date and Time
NOW(), CURDATE(), CURTIME()
YEAR(date), MONTH(date), DAY(date), DAYNAME(date)
DATE_ADD(date, INTERVAL n DAY/MONTH/YEAR)
DATEDIFF(date1, date2), TIMESTAMPDIFF(MONTH, date1, date2)
DATE_FORMAT(date, '%d/%m/%Y')๐ข Mathematical
ROUND(num, decimals), CEIL(num), FLOOR(num)
ABS(num), POWER(num, exp), SQRT(num)
RAND(), MOD(num, divisor)
GREATEST(val1, val2, ...), LEAST(val1, val2, ...)INNER JOIN -- โฉ (intersection)
LEFT JOIN -- A โช (A โฉ B)
RIGHT JOIN -- (A โฉ B) โช B
FULL JOIN -- A โช B (complete union)
CROSS JOIN -- A ร B (cartesian product)๐๏ธ Base Structure
-- Complete e-commerce database
CREATE DATABASE online_store;
USE online_store;
-- Categories table
CREATE TABLE categories (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(100) NOT NULL,
description TEXT,
parent_id INT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (parent_id) REFERENCES categories(id)
);
-- Products table
CREATE TABLE products (
id INT AUTO_INCREMENT PRIMARY KEY,
name VARCHAR(200) NOT NULL,
description TEXT,
price DECIMAL(10,2) NOT NULL,
stock INT DEFAULT 0,
alert_threshold INT DEFAULT 5,
category_id INT,
weight DECIMAL(8,2),
active BOOLEAN DEFAULT TRUE,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (category_id) REFERENCES categories(id),
INDEX idx_category (category_id),
INDEX idx_price (price),
INDEX idx_stock (stock)
);
-- Clients table
CREATE TABLE clients (
id INT AUTO_INCREMENT PRIMARY KEY,
first_name VARCHAR(100) NOT NULL,
last_name VARCHAR(100) NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
phone VARCHAR(20),
birth_date DATE,
address TEXT,
city VARCHAR(100),
postal_code VARCHAR(10),
country VARCHAR(50) DEFAULT 'USA',
registration_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
last_login TIMESTAMP NULL,
active BOOLEAN DEFAULT TRUE,
INDEX idx_email (email),
INDEX idx_city (city)
);
-- Orders table
CREATE TABLE orders (
id INT AUTO_INCREMENT PRIMARY KEY,
order_number VARCHAR(20) UNIQUE NOT NULL,
client_id INT NOT NULL,
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
status ENUM('pending', 'confirmed', 'prepared', 'shipped', 'delivered', 'cancelled') DEFAULT 'pending',
subtotal DECIMAL(10,2) NOT NULL,
shipping_cost DECIMAL(8,2) DEFAULT 0,
tax DECIMAL(8,2) DEFAULT 0,
total DECIMAL(10,2) NOT NULL,
shipping_address TEXT NOT NULL,
carrier VARCHAR(50),
tracking_number VARCHAR(100),
shipped_date TIMESTAMP NULL,
delivered_date TIMESTAMP NULL,
FOREIGN KEY (client_id) REFERENCES clients(id),
INDEX idx_client (client_id),
INDEX idx_status (status),
INDEX idx_order_date (order_date)
);
-- Order items table
CREATE TABLE order_items (
id INT AUTO_INCREMENT PRIMARY KEY,
order_id INT NOT NULL,
product_id INT NOT NULL,
quantity INT NOT NULL,
unit_price DECIMAL(10,2) NOT NULL,
subtotal DECIMAL(10,2) NOT NULL,
FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE,
FOREIGN KEY (product_id) REFERENCES products(id),
INDEX idx_order (order_id),
INDEX idx_product (product_id)
);
-- Sample data
INSERT INTO categories (name, description) VALUES
('Electronics', 'Electronic devices and gadgets'),
('Clothing', 'Fashion and accessories'),
('Home', 'Decoration and furniture'),
('Books', 'Literature and documentation'),
('Sports', 'Sports equipment and fitness');
INSERT INTO products (name, price, stock, category_id) VALUES
('Smartphone XY', 599.99, 50, 1),
('Bluetooth Headphones', 89.99, 25, 1),
('Premium T-shirt', 29.99, 100, 2),
('Slim Jeans', 79.99, 30, 2),
('Desk Lamp', 45.99, 15, 3),
('Bestseller Novel', 19.99, 200, 4),
('Football', 24.99, 40, 5);
INSERT INTO clients (first_name, last_name, email, city) VALUES
('John', 'Doe', '[email protected]', 'New York'),
('Mary', 'Smith', '[email protected]', 'Los Angeles'),
('Paul', 'Johnson', '[email protected]', 'Chicago'),
('Sophie', 'Brown', '[email protected]', 'Houston'),
('Peter', 'Wilson', '[email protected]', 'Phoenix');๐ Business Analysis Queries
-- 1. Top 10 best-selling products
SELECT
p.name,
SUM(oi.quantity) as quantity_sold,
SUM(oi.subtotal) as revenue,
AVG(oi.unit_price) as average_price,
COUNT(DISTINCT oi.order_id) as order_count
FROM products p
JOIN order_items oi ON p.id = oi.product_id
JOIN orders o ON oi.order_id = o.id
WHERE o.status NOT IN ('cancelled')
GROUP BY p.id, p.name
ORDER BY quantity_sold DESC
LIMIT 10;
-- 2. Monthly revenue evolution
SELECT
YEAR(order_date) as year,
MONTH(order_date) as month,
MONTHNAME(order_date) as month_name,
COUNT(*) as order_count,
SUM(total) as monthly_revenue,
AVG(total) as average_basket,
SUM(SUM(total)) OVER (
ORDER BY YEAR(order_date), MONTH(order_date)
) as cumulative_revenue
FROM orders
WHERE status NOT IN ('cancelled')
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY year DESC, month DESC;
-- 3. RFM Client Segmentation (Recency, Frequency, Monetary)
SELECT
c.first_name,
c.last_name,
c.email,
-- Recency (days since last order)
DATEDIFF(CURDATE(), MAX(o.order_date)) as recency,
-- Frequency (number of orders)
COUNT(o.id) as frequency,
-- Monetary (total spent)
SUM(o.total) as monetary_value,
-- RFM Scores (1-5)
CASE
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 30 THEN 5
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 90 THEN 4
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 180 THEN 3
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 365 THEN 2
ELSE 1
END as recency_score,
CASE
WHEN COUNT(o.id) >= 10 THEN 5
WHEN COUNT(o.id) >= 5 THEN 4
WHEN COUNT(o.id) >= 3 THEN 3
WHEN COUNT(o.id) >= 2 THEN 2
ELSE 1
END as frequency_score,
CASE
WHEN SUM(o.total) >= 1000 THEN 5
WHEN SUM(o.total) >= 500 THEN 4
WHEN SUM(o.total) >= 200 THEN 3
WHEN SUM(o.total) >= 100 THEN 2
ELSE 1
END as monetary_score
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id AND o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email
HAVING COUNT(o.id) > 0
ORDER BY monetary_value DESC;
-- 4. Cart abandonment analysis (orders pending for more than 24h)
SELECT
c.first_name,
c.last_name,
c.email,
o.order_number,
o.order_date,
o.total,
TIMESTAMPDIFF(HOUR, o.order_date, NOW()) as hours_elapsed,
-- Products in cart
GROUP_CONCAT(
CONCAT(p.name, ' (', oi.quantity, ')')
SEPARATOR ', '
) as products
FROM clients c
JOIN orders o ON c.id = o.client_id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.status = 'pending'
AND o.order_date < DATE_SUB(NOW(), INTERVAL 24 HOUR)
GROUP BY c.id, o.id
ORDER BY o.order_date DESC;
-- 5. Products requiring restocking
SELECT
p.name,
p.stock as current_stock,
p.alert_threshold,
(p.alert_threshold - p.stock) as quantity_needed,
c.name as category,
-- Sales in last 30 days
COALESCE(sales.total_sold, 0) as sold_last_30_days,
-- Estimated days of stock remaining
CASE
WHEN COALESCE(sales.total_sold, 0) = 0 THEN NULL
ELSE ROUND(p.stock / (sales.total_sold / 30), 1)
END as estimated_days_remaining
FROM products p
JOIN categories c ON p.category_id = c.id
LEFT JOIN (
SELECT
oi.product_id,
SUM(oi.quantity) as total_sold
FROM order_items oi
JOIN orders o ON oi.order_id = o.id
WHERE o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
AND o.status NOT IN ('cancelled')
GROUP BY oi.product_id
) sales ON p.id = sales.product_id
WHERE p.stock <= p.alert_threshold
ORDER BY (p.alert_threshold - p.stock) DESC;
-- 6. Customer lifetime value (CLV)
SELECT
c.first_name,
c.last_name,
c.email,
c.registration_date,
DATEDIFF(CURDATE(), c.registration_date) as customer_age_days,
COUNT(o.id) as total_orders,
SUM(o.total) as total_spent,
AVG(o.total) as average_order_value,
-- CLV calculation (simple version)
SUM(o.total) / NULLIF(DATEDIFF(CURDATE(), c.registration_date) / 365, 0) as annual_value,
-- Customer segment
CASE
WHEN SUM(o.total) >= 2000 THEN 'VIP'
WHEN SUM(o.total) >= 1000 THEN 'High Value'
WHEN SUM(o.total) >= 500 THEN 'Medium Value'
WHEN SUM(o.total) > 0 THEN 'Low Value'
ELSE 'No Purchase'
END as customer_segment
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id AND o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email, c.registration_date
ORDER BY total_spent DESC;
-- 7. Cohort analysis (monthly retention)
SELECT
cohort_month,
period_number,
customers_in_period,
LAG(customers_in_period, 1) OVER (PARTITION BY cohort_month ORDER BY period_number) as previous_period,
ROUND(
customers_in_period * 100.0 /
FIRST_VALUE(customers_in_period) OVER (PARTITION BY cohort_month ORDER BY period_number),
2
) as retention_rate
FROM (
SELECT
cohort_month,
period_number,
COUNT(DISTINCT client_id) as customers_in_period
FROM (
SELECT
c.id as client_id,
DATE_FORMAT(c.registration_date, '%Y-%m') as cohort_month,
TIMESTAMPDIFF(MONTH, c.registration_date, o.order_date) as period_number
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.status NOT IN ('cancelled')
) cohort_data
GROUP BY cohort_month, period_number
) cohort_table
ORDER BY cohort_month, period_number;๐ฏ Advanced Analytics
-- 8. Sales forecasting with trend analysis
SELECT
DATE_FORMAT(order_date, '%Y-%m') as month,
SUM(total) as monthly_sales,
AVG(SUM(total)) OVER (
ORDER BY DATE_FORMAT(order_date, '%Y-%m')
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) as three_month_avg,
(SUM(total) - LAG(SUM(total), 1) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m'))) /
LAG(SUM(total), 1) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) * 100 as growth_rate,
-- Simple linear trend
ROW_NUMBER() OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) as period_number
FROM orders
WHERE status NOT IN ('cancelled')
AND order_date >= DATE_SUB(CURDATE(), INTERVAL 24 MONTH)
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month;
-- 9. Product affinity analysis (market basket)
SELECT
p1.name as product1,
p2.name as product2,
COUNT(*) as times_bought_together,
COUNT(*) * 100.0 / (
SELECT COUNT(DISTINCT order_id)
FROM order_items oi3
WHERE oi3.product_id = p1.id
) as lift_percentage
FROM order_items oi1
JOIN order_items oi2 ON oi1.order_id = oi2.order_id AND oi1.product_id < oi2.product_id
JOIN products p1 ON oi1.product_id = p1.id
JOIN products p2 ON oi2.product_id = p2.id
GROUP BY p1.id, p1.name, p2.id, p2.name
HAVING COUNT(*) > 2
ORDER BY times_bought_together DESC
LIMIT 20;
-- 10. Customer churn prediction
SELECT
c.first_name,
c.last_name,
c.email,
MAX(o.order_date) as last_order_date,
DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last_order,
COUNT(o.id) as total_orders,
SUM(o.total) as total_spent,
AVG(DATEDIFF(
o.order_date,
LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date)
)) as average_days_between_orders,
-- Churn risk score
CASE
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 365 THEN 'High Risk'
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 180 THEN 'Medium Risk'
WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 90 THEN 'Low Risk'
ELSE 'Active'
END as churn_risk
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email
HAVING COUNT(o.id) >= 2 -- At least 2 orders to calculate patterns
ORDER BY days_since_last_order DESC;- ๐ MySQL Documentation
- ๐ PostgreSQL Documentation
- ๐ SQL Server Documentation
- ๐ SQLite Documentation
- ๐ฎ SQLBolt - Interactive SQL Tutorial
- ๐ W3Schools SQL Tutorial
- ๐ SQL Performance Tuning Guide
"This guide accompanies you in your SQL progression. From beginner to expert, each query is a step towards data mastery!"
โจ Created with passion by DorianABDS
๐ Thanks again, @DorianABDS, for sharing and compiling what you have learned so far about SQL in this gist.
<pedantic>The way this
SELECTSQL query calculatesbirth_yearmakes it brittle. It should be reworked, IMO.The year is hardcoded, and the age calculation does not take the month into account.
</pedantic>;-)