📊 SQL Guide

A comprehensive practical guide to essential SQL commands with concrete examples and detailed explanations.

📋 Table of Contents

🟢 Beginner Level

Initial Setup and Data Types

CREATE DATABASE - Database Creation

Usage: Creates a new database to organize your tables and data

-- Create a database
CREATE DATABASE my_store;

-- Use the database
USE my_store;

-- Check existing databases
SHOW DATABASES;

💡 Tip: Use explicit names and avoid spaces (use _ instead)

CREATE TABLE - Table Creation

Usage: Defines table structure with columns and constraints

-- Simple table
CREATE TABLE clients (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    email VARCHAR(255) UNIQUE,
    age INT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- Table with foreign key
CREATE TABLE orders (
    id INT AUTO_INCREMENT PRIMARY KEY,
    client_id INT,
    amount DECIMAL(10,2) NOT NULL,
    order_date DATE,
    FOREIGN KEY (client_id) REFERENCES clients(id)
);

💡 Tip: Always define a primary key and use appropriate data types

INSERT INTO - Data Insertion

Usage: Adds new records to a table

-- Simple insertion
INSERT INTO clients (name, email, age) 
VALUES ('John Doe', '[email protected]', 25);

-- Multiple insertion (more efficient)
INSERT INTO clients (name, email, age) VALUES 
('Mary Smith', '[email protected]', 30),
('Paul Johnson', '[email protected]', 35),
('Sophie Brown', '[email protected]', 28);

-- Insertion without specifying columns (table order)
INSERT INTO clients VALUES 
(NULL, 'Alice Green', '[email protected]', 32, NOW());

💡 Tip: Always specify columns to avoid errors when modifying table structure

SELECT - Data Reading

Usage: Retrieves data from one or more tables

-- Basic selection
SELECT name, email FROM clients;

-- All columns
SELECT * FROM clients;

-- With aliases to rename columns
SELECT 
    name AS client_name, 
    email AS email_address,
    age AS client_age
FROM clients;

-- With calculations
SELECT 
    name,
    TIMESTAMPDIFF(YEAR, birth_date, CURDATE()) AS age,
    YEAR(birth_date)  AS birth_year,
    MONTH(birth_date) AS birth_month,
    DAY(birth_date)   AS birth_day,
    CONCAT(name, ' (', TIMESTAMPDIFF(YEAR, birth_date, CURDATE()), ' years old)') AS description
FROM clients;

💡 Tip: Avoid SELECT * in production, select only necessary columns

WHERE - Data Filtering

Usage: Defines conditions to filter results

-- Simple conditions
SELECT * FROM clients WHERE age > 25;
SELECT * FROM clients WHERE name = 'John Doe';
SELECT * FROM clients WHERE email IS NOT NULL;

-- Multiple conditions with AND/OR
SELECT * FROM clients WHERE age > 25 AND age < 40;
SELECT * FROM clients WHERE name = 'John' OR name = 'Mary';

-- Range of values
SELECT * FROM clients WHERE age BETWEEN 25 AND 40;

-- List of values
SELECT * FROM clients WHERE name IN ('John', 'Mary', 'Paul');

-- Pattern searching with LIKE
SELECT * FROM clients WHERE name LIKE 'John%';        -- Starts with "John"
SELECT * FROM clients WHERE email LIKE '%@gmail.com'; -- Ends with "@gmail.com"
SELECT * FROM clients WHERE name LIKE '_ohn';         -- 4 letters ending with "ohn"

💡 Tip: Use indexes on frequently filtered columns to improve performance

ORDER BY - Result Sorting

Usage: Organizes results according to one or more criteria

-- Ascending sort (default)
SELECT * FROM clients ORDER BY age;

-- Descending sort
SELECT * FROM clients ORDER BY age DESC;

-- Multiple sort (priority from left to right)
SELECT * FROM clients ORDER BY age DESC, name ASC;

-- Sort by column number (less readable)
SELECT name, age FROM clients ORDER BY 2; -- Sort by age (2nd column)

-- Sort with expression
SELECT name, age FROM clients ORDER BY LENGTH(name);

💡 Tip: Sorting can be expensive on large tables, use appropriate indexes

UPDATE - Data Modification

Usage: Modifies existing records in a table

-- Simple update
UPDATE clients 
SET age = 26 
WHERE name = 'John Doe';

-- Multiple columns update
UPDATE clients 
SET age = age + 1, 
    email = LOWER(email)
WHERE age < 30;

-- Update with calculation
UPDATE clients 
SET email = CONCAT(LOWER(REPLACE(name, ' ', '.')), '@company.com')
WHERE email IS NULL;

⚠️ Warning: Always use WHERE otherwise ALL records will be modified!

DELETE - Data Deletion

Usage: Removes records from a table

-- Deletion with condition
DELETE FROM clients WHERE age < 18;

-- Multiple deletion with conditions
DELETE FROM clients 
WHERE age > 65 AND created_at < '2020-01-01';

-- Delete all records (careful!)
DELETE FROM clients;

⚠️ Warning: DELETE without WHERE removes ALL records! Use TRUNCATE to completely empty a table

Information Commands

SHOW - Information Display

Usage: Shows information about database structure

-- List databases
SHOW DATABASES;

-- List tables in current database
SHOW TABLES;

-- Table structure
DESCRIBE clients;
-- or
DESC clients;

-- Complete table creation
SHOW CREATE TABLE clients;

-- Column information
SHOW COLUMNS FROM clients;

-- Table indexes
SHOW INDEX FROM clients;

💡 Tip: These commands are essential for exploring an unknown database

🟡 Intermediate Level

Joins and Relationships

INNER JOIN - Inner Join

Usage: Combines rows from two tables that have matching values

-- Basic join
SELECT c.name, o.amount, o.order_date
FROM clients c
INNER JOIN orders o ON c.id = o.client_id;

-- Join with additional condition
SELECT c.name, o.amount
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
WHERE o.amount > 100;

-- Join with table aliases
SELECT 
    client.name AS client_name,
    ord.amount AS order_amount
FROM clients client
INNER JOIN orders ord ON client.id = ord.client_id;

💡 Tip: INNER JOIN only returns rows that have a match in both tables

LEFT JOIN - Left Join

Usage: Returns all records from left table, with matches from right if they exist

-- All clients, even without orders
SELECT c.name, o.amount
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id;

-- Find clients who never ordered
SELECT c.name, c.email
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
WHERE o.client_id IS NULL;

-- Count orders per client (0 if none)
SELECT 
    c.name,
    COUNT(o.id) AS order_count,
    COALESCE(SUM(o.amount), 0) AS total_spent
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

💡 Tip: LEFT JOIN is perfect for "all X with their Y (if it exists)" reports

RIGHT JOIN - Right Join

Usage: Returns all records from right table, with matches from left if they exist

-- All orders, even without associated client
SELECT c.name, o.amount, o.order_date
FROM clients c
RIGHT JOIN orders o ON c.id = o.client_id;

-- Equivalent with LEFT JOIN (more common)
SELECT c.name, o.amount, o.order_date
FROM orders o
LEFT JOIN clients c ON o.client_id = c.id;

💡 Tip: RIGHT JOIN is less used than LEFT JOIN, we generally prefer reversing table order

GROUP BY - Data Grouping

Usage: Groups rows that have the same values in specified columns

-- Count clients by age
SELECT age, COUNT(*) as client_count
FROM clients
GROUP BY age
ORDER BY age;

-- Order statistics by client
SELECT 
    c.name,
    COUNT(o.id) as order_count,
    SUM(o.amount) as total_purchases,
    AVG(o.amount) as average_basket,
    MIN(o.order_date) as first_order,
    MAX(o.order_date) as last_order
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

-- Grouping by multiple columns
SELECT 
    YEAR(order_date) as year,
    MONTH(order_date) as month,
    COUNT(*) as order_count,
    SUM(amount) as monthly_revenue
FROM orders
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY year DESC, month DESC;

💡 Tip: All SELECT columns must be in GROUP BY or be aggregation functions

HAVING - Post-Grouping Filtering

Usage: Filters groups created by GROUP BY (equivalent of WHERE for groups)

-- Ages represented by more than 2 people
SELECT age, COUNT(*) as count
FROM clients
GROUP BY age
HAVING COUNT(*) > 2;

-- Clients with more than 3 orders and total > $500
SELECT 
    c.name,
    COUNT(o.id) as order_count,
    SUM(o.amount) as total
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name
HAVING COUNT(o.id) > 3 AND SUM(o.amount) > 500;

-- Combination WHERE + GROUP BY + HAVING
SELECT 
    c.name,
    AVG(o.amount) as average_basket
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
WHERE o.order_date > '2023-01-01'  -- Filter BEFORE grouping
GROUP BY c.id, c.name
HAVING AVG(o.amount) > 100;        -- Filter AFTER grouping

💡 Tip: WHERE filters rows, HAVING filters groups

Functions and Expressions

Aggregate Functions - Group Calculations

Usage: Performs calculations on a set of values to return a single value

-- Basic functions
SELECT 
    COUNT(*) as total_clients,                    -- Count all rows
    COUNT(email) as clients_with_email,           -- Count non-NULL
    COUNT(DISTINCT age) as different_ages,        -- Count unique values
    SUM(age) as age_sum,                         -- Sum
    AVG(age) as average_age,                     -- Average
    MIN(age) as youngest,                        -- Minimum
    MAX(age) as oldest,                          -- Maximum
    STDDEV(age) as age_stddev                    -- Standard deviation
FROM clients;

-- With conditions
SELECT 
    COUNT(CASE WHEN age < 30 THEN 1 END) as young_people,
    COUNT(CASE WHEN age >= 30 THEN 1 END) as adults,
    AVG(CASE WHEN age < 30 THEN age END) as young_avg_age
FROM clients;

-- On joins
SELECT 
    c.name,
    COUNT(o.id) as order_count,
    COALESCE(SUM(o.amount), 0) as total_spent,
    COALESCE(AVG(o.amount), 0) as average_basket
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

💡 Tip: COUNT(*) counts all rows, COUNT(column) ignores NULL values

String Functions - Text Manipulation

Usage: Transforms and manipulates character strings

-- Case and cleaning
SELECT 
    name,
    UPPER(name) as name_upper,
    LOWER(name) as name_lower,
    TRIM(name) as name_trimmed,
    LENGTH(name) as name_length
FROM clients;

-- Concatenation and extraction
SELECT 
    name,
    email,
    CONCAT(name, ' - ', email) as full_info,
    SUBSTRING(email, 1, POSITION('@' IN email) - 1) as username,
    RIGHT(email, 10) as email_end,
    LEFT(name, 3) as initials
FROM clients;

-- Replacement and search
SELECT 
    name,
    REPLACE(name, ' ', '_') as name_underscore,
    REPLACE(email, '@', ' AT ') as email_readable,
    POSITION('gmail' IN email) as gmail_position,
    CASE 
        WHEN email LIKE '%gmail%' THEN 'Gmail'
        WHEN email LIKE '%outlook%' THEN 'Outlook'
        ELSE 'Other'
    END as email_provider
FROM clients;

💡 Tip: These functions are very useful for cleaning and formatting data

Date Functions - Time Manipulation

Usage: Works with dates and times to extract, calculate and format

-- Current dates
SELECT 
    NOW() as current_datetime,
    CURDATE() as current_date,
    CURTIME() as current_time,
    UNIX_TIMESTAMP() as unix_timestamp;

-- Part extraction
SELECT 
    order_date,
    YEAR(order_date) as year,
    MONTH(order_date) as month,
    DAY(order_date) as day,
    DAYNAME(order_date) as day_name,
    MONTHNAME(order_date) as month_name,
    QUARTER(order_date) as quarter
FROM orders;

-- Date calculations
SELECT 
    order_date,
    DATE_ADD(order_date, INTERVAL 30 DAY) as due_date,
    DATE_SUB(order_date, INTERVAL 1 YEAR) as one_year_ago,
    DATEDIFF(NOW(), order_date) as days_elapsed,
    TIMESTAMPDIFF(MONTH, order_date, NOW()) as months_elapsed
FROM orders;

-- Formatting
SELECT 
    order_date,
    DATE_FORMAT(order_date, '%d/%m/%Y') as french_format,
    DATE_FORMAT(order_date, '%W %M %Y') as long_format,
    DATE_FORMAT(order_date, '%Y-Q%q') as quarter_format
FROM orders;

💡 Tip: Use indexes on date columns to optimize time-based queries

CASE WHEN - Conditional Logic

Usage: Creates logical conditions to return different values based on cases

-- Simple conditions
SELECT 
    name,
    age,
    CASE 
        WHEN age < 18 THEN 'Minor'
        WHEN age < 65 THEN 'Adult'
        ELSE 'Senior'
    END as age_category
FROM clients;

-- Multiple and complex conditions
SELECT 
    c.name,
    COUNT(o.id) as order_count,
    SUM(o.amount) as total_purchases,
    CASE 
        WHEN COUNT(o.id) = 0 THEN 'Inactive'
        WHEN COUNT(o.id) < 3 THEN 'Occasional'
        WHEN COUNT(o.id) < 10 AND SUM(o.amount) > 1000 THEN 'VIP'
        WHEN COUNT(o.id) >= 10 THEN 'Loyal'
        ELSE 'Standard'
    END as client_profile
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

-- CASE in calculations
SELECT 
    name,
    age,
    CASE 
        WHEN age < 18 THEN 0
        WHEN age < 25 THEN 0.05
        ELSE 0.10
    END as discount_rate,
    100 * CASE 
        WHEN age < 18 THEN 0
        WHEN age < 25 THEN 0.05
        ELSE 0.10
    END as discount_amount
FROM clients;

💡 Tip: CASE can be used in SELECT, WHERE, ORDER BY and even in aggregation functions

Structure Modification

ALTER TABLE - Table Modification

Usage: Modifies existing table structure (columns, constraints, indexes)

-- Add a column
ALTER TABLE clients ADD COLUMN phone VARCHAR(20);
ALTER TABLE clients ADD COLUMN birth_date DATE AFTER age;

-- Modify a column
ALTER TABLE clients MODIFY COLUMN name VARCHAR(150) NOT NULL;
ALTER TABLE clients CHANGE COLUMN name full_name VARCHAR(150);

-- Drop a column
ALTER TABLE clients DROP COLUMN phone;

-- Add constraints
ALTER TABLE clients ADD CONSTRAINT uk_email UNIQUE (email);
ALTER TABLE clients ADD CONSTRAINT chk_age CHECK (age >= 0 AND age <= 120);

-- Drop constraints
ALTER TABLE clients DROP CONSTRAINT uk_email;
ALTER TABLE clients DROP CONSTRAINT chk_age;

-- Add foreign key
ALTER TABLE orders 
ADD CONSTRAINT fk_client 
FOREIGN KEY (client_id) REFERENCES clients(id);

-- Rename table
ALTER TABLE clients RENAME TO users;

⚠️ Warning: Structure modifications can be long on large tables and may block operations

🔴 Advanced Level

Subqueries and Complex Queries

Scalar Subqueries - Single Value Return

Usage: Uses the result of a query as a single value in another query

-- In SELECT (comparative calculation)
SELECT 
    name,
    age,
    (SELECT AVG(age) FROM clients) as global_avg_age,
    age - (SELECT AVG(age) FROM clients) as age_difference
FROM clients;

-- In WHERE (dynamic filtering)
SELECT * FROM clients
WHERE age > (SELECT AVG(age) FROM clients);

-- Clients who spent more than average
SELECT c.name, SUM(o.amount) as total_spent
FROM clients c
INNER JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name
HAVING SUM(o.amount) > (
    SELECT AVG(client_total) 
    FROM (
        SELECT SUM(amount) as client_total
        FROM orders 
        GROUP BY client_id
    ) as averages
);

-- In UPDATE with subquery
UPDATE orders 
SET status = 'priority'
WHERE amount > (
    SELECT AVG(amount) * 2 
    FROM (SELECT amount FROM orders) as temp
);

💡 Tip: Scalar subqueries must return exactly one value (one row, one column)

IN / NOT IN - Subqueries with Lists

Usage: Tests if a value belongs to a set of values returned by a subquery

-- Clients who placed at least one order
SELECT * FROM clients
WHERE id IN (SELECT DISTINCT client_id FROM orders);

-- Clients who never ordered
SELECT * FROM clients
WHERE id NOT IN (
    SELECT client_id 
    FROM orders 
    WHERE client_id IS NOT NULL  -- Important with NOT IN!
);

-- Products never ordered
SELECT * FROM products
WHERE id NOT IN (
    SELECT DISTINCT product_id 
    FROM order_items 
    WHERE product_id IS NOT NULL
);

-- Clients who ordered luxury products (>$1000)
SELECT DISTINCT c.*
FROM clients c
WHERE c.id IN (
    SELECT o.client_id
    FROM orders o
    JOIN order_items oi ON o.id = oi.order_id
    JOIN products p ON oi.product_id = p.id
    WHERE p.price > 1000
);

⚠️ Warning: NOT IN with NULL values can give unexpected results, prefer NOT EXISTS

EXISTS / NOT EXISTS - Existence Test

Usage: Tests the existence of rows in a subquery (more efficient than IN/NOT IN)

-- Clients with orders (more efficient than IN)
SELECT * FROM clients c
WHERE EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.client_id = c.id
);

-- Clients without orders (safer than NOT IN)
SELECT * FROM clients c
WHERE NOT EXISTS (
    SELECT 1 FROM orders o 
    WHERE o.client_id = c.id
);

-- Clients who ordered this month
SELECT * FROM clients c
WHERE EXISTS (
    SELECT 1 FROM orders o
    WHERE o.client_id = c.id
    AND MONTH(o.order_date) = MONTH(NOW())
    AND YEAR(o.order_date) = YEAR(NOW())
);

-- Out-of-stock products with pending orders
SELECT p.*
FROM products p
WHERE p.stock = 0
AND EXISTS (
    SELECT 1 
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.id
    WHERE oi.product_id = p.id
    AND o.status = 'pending'
);

💡 Tip: EXISTS is generally more performant than IN and avoids NULL problems

Correlated Subqueries - Reference to Outer Query

Usage: Subquery that references columns from the outer query (executed for each row)

-- Clients with an order above their personal average
SELECT DISTINCT c.name
FROM clients c
WHERE EXISTS (
    SELECT 1 FROM orders o1
    WHERE o1.client_id = c.id
    AND o1.amount > (
        SELECT AVG(o2.amount)
        FROM orders o2
        WHERE o2.client_id = c.id  -- Correlation here
    )
);

-- Last order of each client
SELECT c.name, o.order_date, o.amount
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date = (
    SELECT MAX(o2.order_date)
    FROM orders o2
    WHERE o2.client_id = c.id  -- Correlation
);

-- Employees earning more than their department average
SELECT e.name, e.salary, e.department
FROM employees e
WHERE e.salary > (
    SELECT AVG(e2.salary)
    FROM employees e2
    WHERE e2.department = e.department  -- Correlation
);

-- Rank of each order by client (ROW_NUMBER simulation)
SELECT 
    c.name,
    o.order_date,
    o.amount,
    (SELECT COUNT(*) 
     FROM orders o2 
     WHERE o2.client_id = o.client_id 
     AND o2.order_date <= o.order_date) as rank_num
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;

⚠️ Performance: Correlated subqueries can be slow as they execute for each row of the outer query

Window Functions

ROW_NUMBER() / RANK() - Numbering and Ranking

Usage: Assigns rank numbers to rows according to a sort criterion

-- Simple numbering
SELECT 
    name,
    age,
    ROW_NUMBER() OVER (ORDER BY age DESC) as row_number,
    RANK() OVER (ORDER BY age DESC) as rank_with_ties,
    DENSE_RANK() OVER (ORDER BY age DESC) as dense_rank
FROM clients;

-- Ranking by group
SELECT 
    c.name,
    o.amount,
    o.order_date,
    ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.amount DESC) as order_rank,
    RANK() OVER (PARTITION BY c.id ORDER BY o.order_date) as chronological_order
FROM clients c
JOIN orders o ON c.id = o.client_id;

-- Top 3 orders per client
SELECT *
FROM (
    SELECT 
        c.name,
        o.amount,
        o.order_date,
        ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.amount DESC) as rank_num
    FROM clients c
    JOIN orders o ON c.id = o.client_id
) as rankings
WHERE rank_num <= 3;

-- Percentiles
SELECT 
    name,
    age,
    NTILE(4) OVER (ORDER BY age) as quartile,
    PERCENT_RANK() OVER (ORDER BY age) as percentile_rank
FROM clients;

💡 Tip: ROW_NUMBER() gives unique numbers, RANK() can have ties, DENSE_RANK() avoids gaps

Windowed Aggregation Functions - Window Calculations

Usage: Applies aggregation functions over a "window" of data without GROUP BY

-- Running totals and moving averages
SELECT 
    c.name,
    o.order_date,
    o.amount,
    SUM(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as running_total,
    AVG(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as moving_average,
    COUNT(*) OVER (PARTITION BY c.id ORDER BY o.order_date) as total_orders
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;

-- Comparison with global totals
SELECT 
    c.name,
    o.amount,
    SUM(o.amount) OVER () as global_total,
    o.amount * 100.0 / SUM(o.amount) OVER () as percentage_of_total,
    SUM(o.amount) OVER (PARTITION BY c.id) as client_total,
    AVG(o.amount) OVER (PARTITION BY c.id) as client_average
FROM clients c
JOIN orders o ON c.id = o.client_id;

-- Rolling windows
SELECT 
    order_date,
    amount,
    AVG(amount) OVER (
        ORDER BY order_date 
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) as three_day_avg,
    SUM(amount) OVER (
        ORDER BY order_date 
        RANGE BETWEEN INTERVAL 7 DAY PRECEDING AND CURRENT ROW
    ) as week_total
FROM orders
ORDER BY order_date;

💡 Tip: Window functions allow keeping row details while having aggregations

LAG / LEAD - Access to Previous/Next Rows

Usage: Accesses values from previous or next rows in ordered results

-- Evolution of amounts between orders
SELECT 
    c.name,
    o.order_date,
    o.amount,
    LAG(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as previous_amount,
    LEAD(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as next_amount,
    o.amount - LAG(o.amount, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as evolution
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;

-- Time between orders
SELECT 
    c.name,
    o.order_date,
    LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date) as previous_order,
    DATEDIFF(
        o.order_date, 
        LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date)
    ) as days_between_orders
FROM clients c
JOIN orders o ON c.id = o.client_id
ORDER BY c.name, o.order_date;

-- First and last value of each group
SELECT 
    c.name,
    o.order_date,
    o.amount,
    FIRST_VALUE(o.amount) OVER (PARTITION BY c.id ORDER BY o.order_date) as first_order,
    LAST_VALUE(o.amount) OVER (
        PARTITION BY c.id 
        ORDER BY o.order_date 
        ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
    ) as last_order
FROM clients c
JOIN orders o ON c.id = o.client_id;

💡 Tip: LAG/LEAD are perfect for analyzing trends and temporal evolution

Optimization and Indexes

CREATE INDEX - Index Creation

Usage: Creates structures to speed up queries on certain columns

-- Simple index on frequently filtered column
CREATE INDEX idx_clients_age ON clients(age);

-- Composite index (column order matters)
CREATE INDEX idx_orders_client_date ON orders(client_id, order_date);

-- Unique index (constraint + performance)
CREATE UNIQUE INDEX idx_clients_email ON clients(email);

-- Partial index (with condition)
CREATE INDEX idx_orders_active 
ON orders(order_date) 
WHERE status = 'active';

-- Index on expression
CREATE INDEX idx_clients_name_lower ON clients(LOWER(name));

-- Descending index for ORDER BY DESC
CREATE INDEX idx_orders_date_desc ON orders(order_date DESC);

-- Check existing indexes
SHOW INDEX FROM clients;

-- Analyze index usage
EXPLAIN SELECT * FROM clients WHERE age > 25;

💡 Tip: Indexes speed up SELECT but slow down INSERT/UPDATE/DELETE

EXPLAIN - Performance Analysis

Usage: Shows query execution plan to understand performance

-- Simple analysis
EXPLAIN SELECT * FROM clients WHERE age > 25;

-- Detailed analysis
EXPLAIN EXTENDED 
SELECT c.name, COUNT(o.id)
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

-- JSON format for more details
EXPLAIN FORMAT=JSON
SELECT c.name, o.amount
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date > '2023-01-01';

-- Analyze slow queries
EXPLAIN
SELECT c.name, SUM(o.amount)
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE YEAR(o.order_date) = 2023  -- Avoid functions on indexed columns
GROUP BY c.id
HAVING SUM(o.amount) > 1000;

-- Optimized version
EXPLAIN
SELECT c.name, SUM(o.amount)
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.order_date >= '2023-01-01' 
  AND o.order_date < '2024-01-01'
GROUP BY c.id
HAVING SUM(o.amount) > 1000;

💡 Tip: Look for "filesort", "using temporary" and high costs in EXPLAIN

Transactions and Locks

ACID Transactions - Atomicity, Consistency, Isolation, Durability

Usage: Groups multiple operations into a logical unit that succeeds or fails completely

-- Simple transaction
START TRANSACTION;

UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;

-- Check balances before validation
SELECT id, balance FROM accounts WHERE id IN (1, 2);

-- If everything is OK
COMMIT;

-- In case of error
-- ROLLBACK;

-- Transaction with savepoints
START TRANSACTION;

INSERT INTO clients (name, email) VALUES ('Test Client', '[email protected]');
SAVEPOINT point1;

INSERT INTO orders (client_id, amount) VALUES (LAST_INSERT_ID(), 150);
SAVEPOINT point2;

-- In case of error on the order
ROLLBACK TO point1;  -- Keep client, cancel order

-- Or go back to beginning
-- ROLLBACK TO point1;

COMMIT;

-- Error handling with procedure
DELIMITER //
CREATE PROCEDURE TransferMoney(IN from_account INT, IN to_account INT, IN amount DECIMAL(10,2))
BEGIN
    DECLARE EXIT HANDLER FOR SQLEXCEPTION
    BEGIN
        ROLLBACK;
        RESIGNAL;
    END;

    START TRANSACTION;

    UPDATE accounts SET balance = balance - amount WHERE id = from_account;
    UPDATE accounts SET balance = balance + amount WHERE id = to_account;

    -- Check no account is overdrawn
    IF (SELECT MIN(balance) FROM accounts WHERE id IN (from_account, to_account)) < 0 THEN
        SIGNAL SQLSTATE '45000' SET MESSAGE_TEXT = 'Insufficient balance';
    END IF;

    COMMIT;
END //
DELIMITER ;

💡 Tip: Always use transactions for critical operations involving multiple tables

Isolation Levels - Concurrency Management

Usage: Controls how transactions interact with each other

-- Check current isolation level
SELECT @@transaction_isolation;

-- Set isolation level for session
SET SESSION TRANSACTION ISOLATION LEVEL READ UNCOMMITTED;
SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
SET SESSION TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SET SESSION TRANSACTION ISOLATION LEVEL SERIALIZABLE;

-- Transaction with specific level
SET TRANSACTION ISOLATION LEVEL READ COMMITTED;
START TRANSACTION;
SELECT * FROM accounts WHERE id = 1;
-- ... other operations ...
COMMIT;

-- Example of consistent read (REPEATABLE READ)
START TRANSACTION;
SELECT COUNT(*) FROM clients;  -- Result: 100
-- In another session, someone inserts a client
SELECT COUNT(*) FROM clients;  -- Result: still 100 (repeatable read)
COMMIT;

-- Explicit locks
START TRANSACTION;
SELECT * FROM accounts WHERE id = 1 FOR UPDATE;  -- Exclusive lock
-- or
SELECT * FROM accounts WHERE id = 1 LOCK IN SHARE MODE;  -- Shared lock
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
COMMIT;

⚠️ Warning: Higher isolation levels increase deadlock and contention risks

Views and Stored Procedures

CREATE VIEW - Virtual Views

Usage: Creates a "virtual table" based on a reusable SQL query

-- Simple view for client statistics
CREATE VIEW client_stats_view AS
SELECT 
    c.id,
    c.name,
    c.email,
    COUNT(o.id) as order_count,
    COALESCE(SUM(o.amount), 0) as total_purchases,
    COALESCE(AVG(o.amount), 0) as average_basket,
    MAX(o.order_date) as last_order,
    CASE 
        WHEN COUNT(o.id) = 0 THEN 'Inactive'
        WHEN COUNT(o.id) < 3 THEN 'Occasional'
        WHEN SUM(o.amount) > 1000 THEN 'VIP'
        ELSE 'Standard'
    END as profile
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name, c.email;

-- Using the view
SELECT * FROM client_stats_view WHERE profile = 'VIP';
SELECT profile, COUNT(*) FROM client_stats_view GROUP BY profile;

-- View with complex joins
CREATE VIEW order_details_view AS
SELECT 
    o.id as order_id,
    c.name as client_name,
    c.email,
    o.order_date,
    o.status,
    oi.quantity,
    p.name as product_name,
    p.price,
    (oi.quantity * oi.unit_price) as subtotal,
    cat.name as category
FROM orders o
JOIN clients c ON o.client_id = c.id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
LEFT JOIN categories cat ON p.category_id = cat.id;

-- View with parameters via WHERE
CREATE VIEW recent_sales_view AS
SELECT * FROM order_details_view
WHERE order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY);

-- Modify a view
ALTER VIEW client_stats_view AS
SELECT 
    c.id,
    c.name,
    COUNT(o.id) as order_count,
    SUM(o.amount) as total_purchases,
    -- New calculation
    DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last_order
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id
GROUP BY c.id, c.name;

-- Drop a view
DROP VIEW recent_sales_view;

💡 Tip: Views simplify complex queries and centralize business logic

Stored Procedures - Reusable Server-Side Logic

Usage: Stores reusable SQL instruction sequences with parameters and logic

-- Simple procedure with input parameters
DELIMITER //
CREATE PROCEDURE GetClientOrders(IN p_client_id INT)
BEGIN
    SELECT 
        c.name,
        o.order_date,
        o.amount,
        o.status
    FROM clients c
    JOIN orders o ON c.id = o.client_id
    WHERE c.id = p_client_id
    ORDER BY o.order_date DESC;
END //
DELIMITER ;

-- Execution
CALL GetClientOrders(1);

-- Procedure with input and output parameters
DELIMITER //
CREATE PROCEDURE CalculateClientStats(
    IN p_client_id INT,
    OUT p_order_count INT,
    OUT p_total_purchases DECIMAL(10,2),
    OUT p_average_basket DECIMAL(10,2)
)
BEGIN
    SELECT 
        COUNT(id),
        COALESCE(SUM(amount), 0),
        COALESCE(AVG(amount), 0)
    INTO p_order_count, p_total_purchases, p_average_basket
    FROM orders
    WHERE client_id = p_client_id;
END //
DELIMITER ;

-- Usage with variables
CALL CalculateClientStats(1, @count, @total, @average);
SELECT @count as orders, @total as total, @average as average;

-- Procedure with conditional logic
DELIMITER //
CREATE PROCEDURE CreateOrder(
    IN p_client_id INT,
    IN p_amount DECIMAL(10,2),
    OUT p_order_id INT,
    OUT p_message VARCHAR(255)
)
BEGIN
    DECLARE v_order_count INT DEFAULT 0;
    DECLARE v_client_status VARCHAR(50);

    -- Check if client exists
    IF NOT EXISTS (SELECT 1 FROM clients WHERE id = p_client_id) THEN
        SET p_order_id = 0;
        SET p_message = 'Client does not exist';
    ELSE
        -- Count existing orders
        SELECT COUNT(*) INTO v_order_count
        FROM orders 
        WHERE client_id = p_client_id;

        -- Determine status
        IF v_order_count = 0 THEN
            SET v_client_status = 'new';
        ELSEIF v_order_count < 5 THEN
            SET v_client_status = 'regular';
        ELSE
            SET v_client_status = 'vip';
        END IF;

        -- Create order
        INSERT INTO orders (client_id, amount, order_date, status)
        VALUES (p_client_id, p_amount, NOW(), 'pending');

        SET p_order_id = LAST_INSERT_ID();
        SET p_message = CONCAT('Order created for ', v_client_status, ' client');

        -- Log activity
        INSERT INTO activity_logs (table_name, operation, timestamp)
        VALUES ('orders', 'INSERT', NOW());
    END IF;
END //
DELIMITER ;

-- Procedure with error handling
DELIMITER //
CREATE PROCEDURE TransferStock(
    IN p_source_product INT,
    IN p_dest_product INT,
    IN p_quantity INT
)
BEGIN
    DECLARE v_source_stock INT;
    DECLARE EXIT HANDLER FOR SQLEXCEPTION
    BEGIN
        ROLLBACK;
        RESIGNAL;
    END;

    START TRANSACTION;

    -- Check source stock
    SELECT stock INTO v_source_stock 
    FROM products 
    WHERE id = p_source_product;

    IF v_source_stock < p_quantity THEN
        SIGNAL SQLSTATE '45000' 
        SET MESSAGE_TEXT = 'Insufficient stock for transfer';
    END IF;

    -- Perform transfer
    UPDATE products 
    SET stock = stock - p_quantity 
    WHERE id = p_source_product;

    UPDATE products 
    SET stock = stock + p_quantity 
    WHERE id = p_dest_product;

    COMMIT;

    SELECT 'Transfer completed successfully' as message;
END //
DELIMITER ;

-- List procedures
SHOW PROCEDURE STATUS WHERE db = 'my_store';

-- Show procedure definition
SHOW CREATE PROCEDURE GetClientOrders;

-- Drop procedure
DROP PROCEDURE TransferStock;

💡 Tip: Stored procedures centralize business logic and improve performance for complex operations

⚡ Quick Reference

SQL Clause Order

SELECT columns
FROM table1
JOIN table2 ON condition
WHERE row_condition
GROUP BY columns
HAVING group_condition
ORDER BY columns
LIMIT number;

Common Functions by Category

📊 Aggregation

COUNT(*), COUNT(column), COUNT(DISTINCT column)
SUM(column), AVG(column), MIN(column), MAX(column)
STDDEV(column), VARIANCE(column)
GROUP_CONCAT(column SEPARATOR ', ')  -- MySQL
STRING_AGG(column, ', ')             -- PostgreSQL

📝 String Functions

UPPER(str), LOWER(str), TRIM(str)
CONCAT(str1, str2), SUBSTRING(str, pos, len)
LENGTH(str), POSITION('sub' IN str)
REPLACE(str, 'old', 'new'), LEFT(str, len), RIGHT(str, len)

📅 Date and Time

NOW(), CURDATE(), CURTIME()
YEAR(date), MONTH(date), DAY(date), DAYNAME(date)
DATE_ADD(date, INTERVAL n DAY/MONTH/YEAR)
DATEDIFF(date1, date2), TIMESTAMPDIFF(MONTH, date1, date2)
DATE_FORMAT(date, '%d/%m/%Y')

🔢 Mathematical

ROUND(num, decimals), CEIL(num), FLOOR(num)
ABS(num), POWER(num, exp), SQRT(num)
RAND(), MOD(num, divisor)
GREATEST(val1, val2, ...), LEAST(val1, val2, ...)

Join Types Visual

INNER JOIN    -- ∩ (intersection)
LEFT JOIN     -- A ∪ (A ∩ B)
RIGHT JOIN    -- (A ∩ B) ∪ B  
FULL JOIN     -- A ∪ B (complete union)
CROSS JOIN    -- A × B (cartesian product)

🛠️ Practical Examples

Complete E-commerce Use Case

🏗️ Base Structure

-- Complete e-commerce database
CREATE DATABASE online_store;
USE online_store;

-- Categories table
CREATE TABLE categories (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(100) NOT NULL,
    description TEXT,
    parent_id INT,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (parent_id) REFERENCES categories(id)
);

-- Products table
CREATE TABLE products (
    id INT AUTO_INCREMENT PRIMARY KEY,
    name VARCHAR(200) NOT NULL,
    description TEXT,
    price DECIMAL(10,2) NOT NULL,
    stock INT DEFAULT 0,
    alert_threshold INT DEFAULT 5,
    category_id INT,
    weight DECIMAL(8,2),
    active BOOLEAN DEFAULT TRUE,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
    FOREIGN KEY (category_id) REFERENCES categories(id),
    INDEX idx_category (category_id),
    INDEX idx_price (price),
    INDEX idx_stock (stock)
);

-- Clients table
CREATE TABLE clients (
    id INT AUTO_INCREMENT PRIMARY KEY,
    first_name VARCHAR(100) NOT NULL,
    last_name VARCHAR(100) NOT NULL,
    email VARCHAR(255) UNIQUE NOT NULL,
    phone VARCHAR(20),
    birth_date DATE,
    address TEXT,
    city VARCHAR(100),
    postal_code VARCHAR(10),
    country VARCHAR(50) DEFAULT 'USA',
    registration_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    last_login TIMESTAMP NULL,
    active BOOLEAN DEFAULT TRUE,
    INDEX idx_email (email),
    INDEX idx_city (city)
);

-- Orders table
CREATE TABLE orders (
    id INT AUTO_INCREMENT PRIMARY KEY,
    order_number VARCHAR(20) UNIQUE NOT NULL,
    client_id INT NOT NULL,
    order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
    status ENUM('pending', 'confirmed', 'prepared', 'shipped', 'delivered', 'cancelled') DEFAULT 'pending',
    subtotal DECIMAL(10,2) NOT NULL,
    shipping_cost DECIMAL(8,2) DEFAULT 0,
    tax DECIMAL(8,2) DEFAULT 0,
    total DECIMAL(10,2) NOT NULL,
    shipping_address TEXT NOT NULL,
    carrier VARCHAR(50),
    tracking_number VARCHAR(100),
    shipped_date TIMESTAMP NULL,
    delivered_date TIMESTAMP NULL,
    FOREIGN KEY (client_id) REFERENCES clients(id),
    INDEX idx_client (client_id),
    INDEX idx_status (status),
    INDEX idx_order_date (order_date)
);

-- Order items table
CREATE TABLE order_items (
    id INT AUTO_INCREMENT PRIMARY KEY,
    order_id INT NOT NULL,
    product_id INT NOT NULL,
    quantity INT NOT NULL,
    unit_price DECIMAL(10,2) NOT NULL,
    subtotal DECIMAL(10,2) NOT NULL,
    FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE,
    FOREIGN KEY (product_id) REFERENCES products(id),
    INDEX idx_order (order_id),
    INDEX idx_product (product_id)
);

-- Sample data
INSERT INTO categories (name, description) VALUES
('Electronics', 'Electronic devices and gadgets'),
('Clothing', 'Fashion and accessories'),
('Home', 'Decoration and furniture'),
('Books', 'Literature and documentation'),
('Sports', 'Sports equipment and fitness');

INSERT INTO products (name, price, stock, category_id) VALUES
('Smartphone XY', 599.99, 50, 1),
('Bluetooth Headphones', 89.99, 25, 1),
('Premium T-shirt', 29.99, 100, 2),
('Slim Jeans', 79.99, 30, 2),
('Desk Lamp', 45.99, 15, 3),
('Bestseller Novel', 19.99, 200, 4),
('Football', 24.99, 40, 5);

INSERT INTO clients (first_name, last_name, email, city) VALUES
('John', 'Doe', '[email protected]', 'New York'),
('Mary', 'Smith', '[email protected]', 'Los Angeles'),
('Paul', 'Johnson', '[email protected]', 'Chicago'),
('Sophie', 'Brown', '[email protected]', 'Houston'),
('Peter', 'Wilson', '[email protected]', 'Phoenix');

📊 Business Analysis Queries

-- 1. Top 10 best-selling products
SELECT 
    p.name,
    SUM(oi.quantity) as quantity_sold,
    SUM(oi.subtotal) as revenue,
    AVG(oi.unit_price) as average_price,
    COUNT(DISTINCT oi.order_id) as order_count
FROM products p
JOIN order_items oi ON p.id = oi.product_id
JOIN orders o ON oi.order_id = o.id
WHERE o.status NOT IN ('cancelled')
GROUP BY p.id, p.name
ORDER BY quantity_sold DESC
LIMIT 10;

-- 2. Monthly revenue evolution
SELECT 
    YEAR(order_date) as year,
    MONTH(order_date) as month,
    MONTHNAME(order_date) as month_name,
    COUNT(*) as order_count,
    SUM(total) as monthly_revenue,
    AVG(total) as average_basket,
    SUM(SUM(total)) OVER (
        ORDER BY YEAR(order_date), MONTH(order_date)
    ) as cumulative_revenue
FROM orders
WHERE status NOT IN ('cancelled')
GROUP BY YEAR(order_date), MONTH(order_date)
ORDER BY year DESC, month DESC;

-- 3. RFM Client Segmentation (Recency, Frequency, Monetary)
SELECT 
    c.first_name,
    c.last_name,
    c.email,
    -- Recency (days since last order)
    DATEDIFF(CURDATE(), MAX(o.order_date)) as recency,
    -- Frequency (number of orders)
    COUNT(o.id) as frequency,
    -- Monetary (total spent)
    SUM(o.total) as monetary_value,
    -- RFM Scores (1-5)
    CASE 
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 30 THEN 5
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 90 THEN 4
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 180 THEN 3
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) <= 365 THEN 2
        ELSE 1
    END as recency_score,
    CASE 
        WHEN COUNT(o.id) >= 10 THEN 5
        WHEN COUNT(o.id) >= 5 THEN 4
        WHEN COUNT(o.id) >= 3 THEN 3
        WHEN COUNT(o.id) >= 2 THEN 2
        ELSE 1
    END as frequency_score,
    CASE 
        WHEN SUM(o.total) >= 1000 THEN 5
        WHEN SUM(o.total) >= 500 THEN 4
        WHEN SUM(o.total) >= 200 THEN 3
        WHEN SUM(o.total) >= 100 THEN 2
        ELSE 1
    END as monetary_score
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id AND o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email
HAVING COUNT(o.id) > 0
ORDER BY monetary_value DESC;

-- 4. Cart abandonment analysis (orders pending for more than 24h)
SELECT 
    c.first_name,
    c.last_name,
    c.email,
    o.order_number,
    o.order_date,
    o.total,
    TIMESTAMPDIFF(HOUR, o.order_date, NOW()) as hours_elapsed,
    -- Products in cart
    GROUP_CONCAT(
        CONCAT(p.name, ' (', oi.quantity, ')') 
        SEPARATOR ', '
    ) as products
FROM clients c
JOIN orders o ON c.id = o.client_id
JOIN order_items oi ON o.id = oi.order_id
JOIN products p ON oi.product_id = p.id
WHERE o.status = 'pending'
AND o.order_date < DATE_SUB(NOW(), INTERVAL 24 HOUR)
GROUP BY c.id, o.id
ORDER BY o.order_date DESC;

-- 5. Products requiring restocking
SELECT 
    p.name,
    p.stock as current_stock,
    p.alert_threshold,
    (p.alert_threshold - p.stock) as quantity_needed,
    c.name as category,
    -- Sales in last 30 days
    COALESCE(sales.total_sold, 0) as sold_last_30_days,
    -- Estimated days of stock remaining
    CASE 
        WHEN COALESCE(sales.total_sold, 0) = 0 THEN NULL
        ELSE ROUND(p.stock / (sales.total_sold / 30), 1)
    END as estimated_days_remaining
FROM products p
JOIN categories c ON p.category_id = c.id
LEFT JOIN (
    SELECT 
        oi.product_id,
        SUM(oi.quantity) as total_sold
    FROM order_items oi
    JOIN orders o ON oi.order_id = o.id
    WHERE o.order_date >= DATE_SUB(CURDATE(), INTERVAL 30 DAY)
    AND o.status NOT IN ('cancelled')
    GROUP BY oi.product_id
) sales ON p.id = sales.product_id
WHERE p.stock <= p.alert_threshold
ORDER BY (p.alert_threshold - p.stock) DESC;

-- 6. Customer lifetime value (CLV)
SELECT 
    c.first_name,
    c.last_name,
    c.email,
    c.registration_date,
    DATEDIFF(CURDATE(), c.registration_date) as customer_age_days,
    COUNT(o.id) as total_orders,
    SUM(o.total) as total_spent,
    AVG(o.total) as average_order_value,
    -- CLV calculation (simple version)
    SUM(o.total) / NULLIF(DATEDIFF(CURDATE(), c.registration_date) / 365, 0) as annual_value,
    -- Customer segment
    CASE 
        WHEN SUM(o.total) >= 2000 THEN 'VIP'
        WHEN SUM(o.total) >= 1000 THEN 'High Value'
        WHEN SUM(o.total) >= 500 THEN 'Medium Value'
        WHEN SUM(o.total) > 0 THEN 'Low Value'
        ELSE 'No Purchase'
    END as customer_segment
FROM clients c
LEFT JOIN orders o ON c.id = o.client_id AND o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email, c.registration_date
ORDER BY total_spent DESC;

-- 7. Cohort analysis (monthly retention)
SELECT 
    cohort_month,
    period_number,
    customers_in_period,
    LAG(customers_in_period, 1) OVER (PARTITION BY cohort_month ORDER BY period_number) as previous_period,
    ROUND(
        customers_in_period * 100.0 / 
        FIRST_VALUE(customers_in_period) OVER (PARTITION BY cohort_month ORDER BY period_number),
        2
    ) as retention_rate
FROM (
    SELECT 
        cohort_month,
        period_number,
        COUNT(DISTINCT client_id) as customers_in_period
    FROM (
        SELECT 
            c.id as client_id,
            DATE_FORMAT(c.registration_date, '%Y-%m') as cohort_month,
            TIMESTAMPDIFF(MONTH, c.registration_date, o.order_date) as period_number
        FROM clients c
        JOIN orders o ON c.id = o.client_id
        WHERE o.status NOT IN ('cancelled')
    ) cohort_data
    GROUP BY cohort_month, period_number
) cohort_table
ORDER BY cohort_month, period_number;

🎯 Advanced Analytics

-- 8. Sales forecasting with trend analysis
SELECT 
    DATE_FORMAT(order_date, '%Y-%m') as month,
    SUM(total) as monthly_sales,
    AVG(SUM(total)) OVER (
        ORDER BY DATE_FORMAT(order_date, '%Y-%m')
        ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
    ) as three_month_avg,
    (SUM(total) - LAG(SUM(total), 1) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m'))) / 
    LAG(SUM(total), 1) OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) * 100 as growth_rate,
    -- Simple linear trend
    ROW_NUMBER() OVER (ORDER BY DATE_FORMAT(order_date, '%Y-%m')) as period_number
FROM orders
WHERE status NOT IN ('cancelled')
AND order_date >= DATE_SUB(CURDATE(), INTERVAL 24 MONTH)
GROUP BY DATE_FORMAT(order_date, '%Y-%m')
ORDER BY month;

-- 9. Product affinity analysis (market basket)
SELECT 
    p1.name as product1,
    p2.name as product2,
    COUNT(*) as times_bought_together,
    COUNT(*) * 100.0 / (
        SELECT COUNT(DISTINCT order_id) 
        FROM order_items oi3 
        WHERE oi3.product_id = p1.id
    ) as lift_percentage
FROM order_items oi1
JOIN order_items oi2 ON oi1.order_id = oi2.order_id AND oi1.product_id < oi2.product_id
JOIN products p1 ON oi1.product_id = p1.id
JOIN products p2 ON oi2.product_id = p2.id
GROUP BY p1.id, p1.name, p2.id, p2.name
HAVING COUNT(*) > 2
ORDER BY times_bought_together DESC
LIMIT 20;

-- 10. Customer churn prediction
SELECT 
    c.first_name,
    c.last_name,
    c.email,
    MAX(o.order_date) as last_order_date,
    DATEDIFF(CURDATE(), MAX(o.order_date)) as days_since_last_order,
    COUNT(o.id) as total_orders,
    SUM(o.total) as total_spent,
    AVG(DATEDIFF(
        o.order_date,
        LAG(o.order_date, 1) OVER (PARTITION BY c.id ORDER BY o.order_date)
    )) as average_days_between_orders,
    -- Churn risk score
    CASE 
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 365 THEN 'High Risk'
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 180 THEN 'Medium Risk'
        WHEN DATEDIFF(CURDATE(), MAX(o.order_date)) > 90 THEN 'Low Risk'
        ELSE 'Active'
    END as churn_risk
FROM clients c
JOIN orders o ON c.id = o.client_id
WHERE o.status NOT IN ('cancelled')
GROUP BY c.id, c.first_name, c.last_name, c.email
HAVING COUNT(o.id) >= 2  -- At least 2 orders to calculate patterns
ORDER BY days_since_last_order DESC;

📖 Additional Resources

"This guide accompanies you in your SQL progression. From beginner to expert, each query is a step towards data mastery!"

✨ Created with passion by DorianABDS

DorianABDS/SQL_Guide.md

📊 SQL Guide

📋 Table of Contents

🟢 Beginner Level

Initial Setup and Data Types

Information Commands

🟡 Intermediate Level

Joins and Relationships

Functions and Expressions

Structure Modification

🔴 Advanced Level

Subqueries and Complex Queries

Window Functions

Optimization and Indexes

Transactions and Locks

Views and Stored Procedures

⚡ Quick Reference

SQL Clause Order

Common Functions by Category

Join Types Visual

🛠️ Practical Examples

Complete E-commerce Use Case

📖 Additional Resources

ebouchut commented Sep 17, 2025 •

edited

Loading

Uh oh!

DorianABDS commented Sep 19, 2025 •

edited

Loading

Uh oh!

DorianABDS/SQL_Guide.md

📊 SQL Guide

📋 Table of Contents

🟢 Beginner Level

Initial Setup and Data Types

Information Commands

🟡 Intermediate Level

Joins and Relationships

Functions and Expressions

Structure Modification

🔴 Advanced Level

Subqueries and Complex Queries

Window Functions

Optimization and Indexes

Transactions and Locks

Views and Stored Procedures

⚡ Quick Reference

SQL Clause Order

Common Functions by Category

Join Types Visual

🛠️ Practical Examples

Complete E-commerce Use Case

📖 Additional Resources

ebouchut commented Sep 17, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

DorianABDS commented Sep 19, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ebouchut commented Sep 17, 2025 •

edited

Loading

DorianABDS commented Sep 19, 2025 •

edited

Loading