IllusiveMilkman · October 11, 2018 13:28 · motdde · Oct 27, 2020 · IllusiveMilkman · Oct 27, 2020
diff --git a/Postgres Window Functions.sql b/Postgres Window Functions.sql
 -- Window Function examples
 -- PostgreSQL conference South Africa 2018
 -- By Willem Booysen 
 -- Youtube: https://www.youtube.com/watch?v=blHEnrYwySE


 -- Create database and templates for demo
 DROP DATABASE IF EXISTS WindowFunctions;
 CREATE DATABASE WindowFunctions;

 -- Create table Payroll
 DROP TABLE IF EXISTS Payroll CASCADE;
 CREATE TABLE Payroll (
 	emp_no		INTEGER,
 	emp_name 	VARCHAR(20) NOT NULL,
 	dept_name 	VARCHAR(15) NOT NULL,
 	salary_amt 	DECIMAL (8,2) NOT NULL CHECK (salary_amt > 0.00)
 );


 -- Populate table Payroll
 INSERT INTO Payroll (emp_no, emp_name, dept_name, salary_amt ) VALUES
 	(1, 'Mark Stone', 'Accounting', 16000.00),
 	(2, 'Maria Stone', 'Accounting', 13000.00),
 	(3, 'Geetha Singh', 'Accounting', 13000.00),
 	(4, 'Richard Hathaway', 'Accounting', 14000.00),
 	(5, 'Joseph Bastion', 'Accounting', 14000.00),

 	(6, 'Arthur Prince', 'Production',12000.00),
 	(7, 'Adele Morse', 'Production', 13000.00),
 	(8, 'Sheamus O Kelly', 'Production', 24000.00),
 	(9, 'Sheilah Flask', 'Production', 24000.00),
 	(10, 'Brian James', 'Production', 16000.00),
 	(11, 'Adam Scott', 'Production', 16000.00),

 	(12, 'Maurice Moss', 'IT',12000.00),
 	(13, 'Roy', 'IT', 12001.00),
 	(14, 'Jen Barber', 'IT', 28000.00),
 	(15, 'Richard Hammond', 'IT', 10000.00),
 	(16, 'James May', 'IT', 10000.00),
 	(18, 'Jeremy Clarkson', 'IT', 10000.00),
 	(17, 'John Doe', 'IT', 100000.00) 	--Note how emp_no was switched around here...
 ;

 SELECT * FROM Payroll;





 -- Examples:  First the traditional method

 -- Basic aggregate functions, using the department as the Group
 SELECT
 	dept_name,
 	COUNT(*) AS dept_employee_count,
 	MIN(salary_amt) AS min_dept_salary,
 	MAX(salary_amt) AS max_dept_salary,
 	AVG(salary_amt)::DECIMAL(8,2) AS average_dept_salary,
 	SUM(salary_amt) AS total_dept_salaries
 FROM Payroll
 GROUP BY dept_name
 ORDER BY dept_name
 ;
 -- The individual rows have been grouped into subsets and returned in Department Groups as single rows.


 -- If we wanted this information to be compared to each salaried employee, we would normally JOIN with a CTE
 WITH Dept_stats AS (
 	SELECT
 		dept_name,
 		COUNT(*) AS dept_employee_count,
 		MIN(salary_amt) AS min_dept_salary,
 		MAX(salary_amt) AS max_dept_salary,
 		AVG(salary_amt)::DECIMAL(8,2) AS average_dept_salary,
 		SUM(salary_amt) AS total_dept_salaries
 	FROM Payroll
 	GROUP BY dept_name
 	ORDER BY dept_name
 	)
 SELECT 
 	Payroll.*,
 	(Select count(*) from Payroll) AS total_employee_count,
 	Dept_stats.dept_employee_count,
 	Dept_stats.min_dept_salary,
 	Dept_stats.max_dept_salary,
 	Dept_stats.average_dept_salary,
 	Dept_stats.total_dept_salaries
 FROM Payroll
 LEFT OUTER JOIN Dept_stats ON (Payroll.dept_name = Dept_stats.dept_name)
 ORDER BY Payroll.dept_name, emp_name
 ;


 -- Window Function --> How clean is this in comparison?

 SELECT 
 	*,
 	COUNT(*) 		OVER () AS total_employee_count,
 	COUNT(*) 		OVER (PARTITION BY dept_name) AS dept_employee_count, 
 	MIN(salary_amt) 	OVER (PARTITION BY dept_name) AS min_dept_salary,
 	MAX(salary_amt) 	OVER (PARTITION BY dept_name) AS max_dept_salary, 
 	AVG(salary_amt) 	OVER (PARTITION BY dept_name)::DECIMAL(8,2) AS avg_dept_sal, 
 	SUM(salary_amt) 	OVER (PARTITION BY dept_name) AS total_dept_salaries
 FROM Payroll
 ORDER BY dept_name, emp_name;

 -- DEMO 1 complete









 -- Basic Syntax:
 -- A window function call always contains an OVER clause.
 -- "( )" --> This is the Window

 SELECT 	
 	*,
 	COUNT(*) OVER () AS total_employee_count,								
 	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count
 FROM Payroll
 ;




 SELECT 	
 	*,
 	COUNT(*) OVER () AS total_employee_count,
 	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count
 FROM 	Payroll
 WHERE 	dept_name = 'IT'  --> Windows into your data are affected by the WHERE clause.  Windows cannot work outside your base data set.
 ;



 SELECT 
 	*,
 	COUNT(*) 		OVER () AS total_employee_count,
 	COUNT(*) 		OVER (PARTITION BY dept_name) AS dept_employee_count, 
 	MIN(salary_amt) 	OVER (PARTITION BY dept_name) AS min_dept_salary,
 	MAX(salary_amt) 	OVER (PARTITION BY dept_name) AS max_dept_salary, 
 	AVG(salary_amt) 	OVER (PARTITION BY dept_name)::DECIMAL(8,2) AS avg_dept_sal, 
 	SUM(salary_amt) 	OVER (PARTITION BY dept_name) AS total_dept_salaries
 FROM Payroll
 ORDER BY dept_name, emp_name;



 -- And now for some handy comparison figures...
 SELECT 
 	*, 
 	(salary_amt / (SUM(salary_amt) OVER (PARTITION BY dept_name)) * 100)::DECIMAL(18,2) AS Dept_Percentage, 
 	(salary_amt / (SUM(salary_amt) OVER () ) * 100)::DECIMAL(18,2) AS Company_Percentage
 FROM Payroll
 ORDER BY dept_name, dept_percentage;



 -- ORDER BY examples here --> Skipping to ROW_NUMBER for the moment as this best illustrates the various Window orders.
 -- and how a Window order is not affected by the outer SELECT order.
 
 SELECT 
 	*,
 	-- Note the difference between row_number and the emp_no in terms of when it was captured.
 	ROW_NUMBER() OVER () AS "Base Row No",													
 	
 	-- No Partition, just an ORDER BY, thus the whole base resultset is used.	
 	ROW_NUMBER() OVER (ORDER BY salary_amt) AS "Salary Row No",	
 	
 	-- Order each partition first, then assign row numbers.
 	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt) AS "Dept,Salary Row No"	
 FROM Payroll
 ORDER BY emp_no;

 -- DEMO 2 complete







 -- Row_Order, Rank & Dense_rank:

 -- The natural order of the table as we declared it.
 SELECT 	* 
 FROM  	Payroll
 ORDER BY emp_no;

 -- ROW_NUMBER a really good way to explain the relationship between Windows and Partitions:

 SELECT 	*,
 	-- Row_Number assigns a unique integer to each row within your partition within your window.
 	ROW_NUMBER() OVER (),							-- Note the difference between row_number and the emp_no in terms of when it was captured.
 	ROW_NUMBER() OVER (ORDER BY salary_amt),				-- No Partition, just an ORDER BY, thus the whole base resultset is used.
 	ROW_NUMBER() OVER (PARTITION BY dept_name),				
 	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt),

 	-- Ranks --> Equal values are ranked the same, creating gaps in numbering
 	RANK() OVER (),								-- Ranks are useless without an ORDER BY
 	RANK() OVER (PARTITION BY dept_name),	
 	RANK() OVER (ORDER BY salary_amt),
 	RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt),
 	
 	-- Dense_Ranks --> Equal values are ranked the same, without gaps in numbering
 	DENSE_RANK() OVER (), 
 	DENSE_RANK() OVER (PARTITION BY dept_name),
 	DENSE_RANK() OVER (ORDER BY salary_amt),
 	DENSE_RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt)
 FROM  	Payroll
 ORDER BY emp_no
 ;



 -- GREAT! Let's find the top 2 earners in every department
 -- =======================================================


 -- Solution with ROW_NUMBERS
 WITH 	ctePayroll AS (
 	SELECT 	*,
 		ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_row
 	FROM 	Payroll
 	--WHERE 	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) <= 2   --> WFs not allowed in WHERE clauses
 	)
 SELECT 	*
 FROM 	ctePayroll
 WHERE 	dept_row <= 2
 ; --> BUT hold on!  What about people who earn the same salaries?

 -- Solution with RANK
 WITH 	ctePayroll AS (
 	SELECT 	*,
 		RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_rank
 	FROM 	Payroll
 	)
 SELECT 	*
 FROM 	ctePayroll
 WHERE 	dept_rank <= 2
 ; --> HOWEVER, Production is just plain wrong???

 -- Solution with DENSE_RANK
 WITH 	ctePayroll AS (
 	SELECT 	*,
 		DENSE_RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_rank
 	FROM 	Payroll
 	)
 SELECT 	*
 FROM 	ctePayroll
 WHERE 	dept_rank <= 2
 --ORDER BY dept_rank		--> Note how we can order based on CTE name, cannot do this without CTE
 ;



 -- DEMO 3 complete








 -- percent_rank and cume_rank
 WITH base_data AS (
 	SELECT generate_series(1,10) AS series
 )
 SELECT 	*,
 	(PERCENT_RANK() OVER (ORDER BY series))::DEC(8,2),	-- to determine the relative standing of a value within a set or rows
 	(CUME_DIST() 	OVER (ORDER BY series))::DEC(8,2), 	-- Perfect for a 10-90 split in data
 	NTILE(2) 	OVER (ORDER BY series),			-- Split your dataset into x parts
 	NTILE(3) 	OVER (ORDER BY series)
 FROM base_data
 ORDER BY series
 ;


 --ntile with partitions
 SELECT 	*,

 	--ntile(bucket size)
 	ntile(2) OVER (PARTITION BY dept_name ORDER BY salary_amt),
 	ntile(3) OVER (PARTITION BY dept_name ORDER BY salary_amt)

 FROM  	Payroll
 ORDER BY dept_name, salary_amt;


 -- DEMO 4 complete









 -- LAG(), LEAD() --> Offset from current row

 -- Let's return the employee names before and after the current row:

 SELECT 	emp_no,
 		dept_name,
 		emp_name,
 		LAG(emp_name) 	OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Employee",
 		LEAD(emp_name) 	OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Next Employee"
 FROM 	Payroll
 ORDER BY emp_no;


 -- The syntax of LAG and LEAD
 -- LAG/LEAD  (column or expression [,offset] [,default])

 SELECT 	*,
 	LAG(emp_name) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Employee",
 	LAG(emp_name, 2) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Offset 2",
 	LAG(emp_name, 2, 'nada...') OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Offset 2 with Defaults"	
 FROM 	Payroll
 ORDER BY emp_no;


 -- FIRST_VALUE(), LAST_VALUE() ... Offset relative to beginning/end of the window frame

 SELECT 	*,
 	FIRST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Higest Earner",
 	MAX(salary_amt) 			OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Highest Salary",   
 	
 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Lowest Earner",  --> Notice how LAST and MIN move with Window Frame
 	MIN(salary_amt) 			OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Lowest Salary",
 	
 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name) AS "Lowest Earner for real",
 	MIN(salary_amt)			OVER (PARTITION BY dept_name) "Lowest Salary for real"
 FROM 	Payroll
 ORDER BY dept_name, salary_amt DESC
 ;

 -- Now the previous one failed miserably, so why the Highest Earner worked, but not the Lowest?
 -- Because the ORDER BY initiates the Roller Blind effect, thus MIN/MAX/FIRST/LAST is calculated as the Window Frame grows.


 -- Demo Complete



 -- UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING
 -- and.. RANGE vs ROWS

 SELECT 	*,

 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "ORDER BY - DEFAULT BEHAVIOUR",

 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
 							RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Lowest Earner - RANGE to Current Row",  --> So ORDER BY default is RANGE

 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
 							ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Lowest Earner - ROWS to Current Row", --> Overriding default behaviour here

 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC 
 							ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS "Lowest Earner"  --> Window functions are easy, there's almost no syntax or variations to remember...	
 FROM 	Payroll
 ;

 -- The catch-all phrase "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING"

 SELECT 	*,
 	FIRST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC 
 							ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "Higest Earner",

 	LAST_VALUE(emp_name) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC 
 							ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS "Lowest Earner"  	
 FROM 	Payroll
 ;


 -- To clearly illustrate the difference between ROWS and RANGE:

 SELECT 	*,
 	SUM(salary_amt) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC 
 					RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Running Totals with RANGE",

 	SUM(salary_amt) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC 
 					ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Running Totals with ROWS"  	--> BTW This is how we do running totals easily.
 FROM 	Payroll
 ;


 -- Getting really specific about what rows to include...

 SELECT 	*,
 	sum(salary_amt) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
 					ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS "The rest",
 					
 	sum(salary_amt) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
 					ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS "Prev, Current & Next",

 	sum(salary_amt) 	OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
 					ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS "Next 2"
 FROM 	Payroll
 ;


 -- Demo complete




 /*
 And now for Running Totals...
 */

 DROP TABLE IF EXISTS running_totals;
 CREATE TABLE running_totals AS (
 	SELECT 	generate_series(1,5000) AS my_id,
 		(random()* 100)::decimal(15,2) AS Amount	
 )
 ;

 -- Let's take a look at our data
 SELECT * FROM running_totals;
 SELECT count(*), sum(amount)::money FROM running_totals;


 -- Traditional method
 SELECT 	t1.my_id, 
 		t1.amount, 
 		SUM(t2.amount) AS running_total
 FROM running_totals t1
 INNER JOIN running_totals t2 ON t1.my_id >= t2.my_id
 GROUP BY t1.my_id, t1.amount
 ORDER BY t1.my_id;

 -- Window Functions
 SELECT 	*,
 	SUM(amount) OVER (ORDER BY my_id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
 FROM 	running_totals
 ORDER BY my_id;


 --- DEMO COMPLETE


 -- Watch out!

 -- Issues with Distinct...
 SELECT 
 	*,
 	COUNT(DISTINCT dept_name) OVER ()
 FROM Payroll
 ORDER BY dept_name, emp_name;

 -- You'd have to use GROUP BY (and list every column in the table), or some trickery...
 WITH dept_stats AS (
 	SELECT 
 		COUNT(DISTINCT dept_name) AS dept_count
 	FROM Payroll
 	)
 SELECT	*,
 	(SELECT * FROM dept_stats)
 FROM Payroll;


 --Using Window Funcitons in WHERE ... You shall not pass!
 SELECT 	*,
 		ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_row
 FROM 	Payroll
 WHERE 	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) <= 2   --> WFs not allowed in WHERE clauses
 ;


 -- thanks :)
	-- Window Function examples
	-- PostgreSQL conference South Africa 2018
	-- By Willem Booysen
	-- Youtube: https://www.youtube.com/watch?v=blHEnrYwySE


	-- Create database and templates for demo
	DROP DATABASE IF EXISTS WindowFunctions;
	CREATE DATABASE WindowFunctions;

	-- Create table Payroll
	DROP TABLE IF EXISTS Payroll CASCADE;
	CREATE TABLE Payroll (
	emp_no INTEGER,
	emp_name VARCHAR(20) NOT NULL,
	dept_name VARCHAR(15) NOT NULL,
	salary_amt DECIMAL (8,2) NOT NULL CHECK (salary_amt > 0.00)
	);


	-- Populate table Payroll
	INSERT INTO Payroll (emp_no, emp_name, dept_name, salary_amt ) VALUES
	(1, 'Mark Stone', 'Accounting', 16000.00),
	(2, 'Maria Stone', 'Accounting', 13000.00),
	(3, 'Geetha Singh', 'Accounting', 13000.00),
	(4, 'Richard Hathaway', 'Accounting', 14000.00),
	(5, 'Joseph Bastion', 'Accounting', 14000.00),

	(6, 'Arthur Prince', 'Production',12000.00),
	(7, 'Adele Morse', 'Production', 13000.00),
	(8, 'Sheamus O Kelly', 'Production', 24000.00),
	(9, 'Sheilah Flask', 'Production', 24000.00),
	(10, 'Brian James', 'Production', 16000.00),
	(11, 'Adam Scott', 'Production', 16000.00),

	(12, 'Maurice Moss', 'IT',12000.00),
	(13, 'Roy', 'IT', 12001.00),
	(14, 'Jen Barber', 'IT', 28000.00),
	(15, 'Richard Hammond', 'IT', 10000.00),
	(16, 'James May', 'IT', 10000.00),
	(18, 'Jeremy Clarkson', 'IT', 10000.00),
	(17, 'John Doe', 'IT', 100000.00) --Note how emp_no was switched around here...
	;

	SELECT * FROM Payroll;





	-- Examples: First the traditional method

	-- Basic aggregate functions, using the department as the Group
	SELECT
	dept_name,
	COUNT(*) AS dept_employee_count,
	MIN(salary_amt) AS min_dept_salary,
	MAX(salary_amt) AS max_dept_salary,
	AVG(salary_amt)::DECIMAL(8,2) AS average_dept_salary,
	SUM(salary_amt) AS total_dept_salaries
	FROM Payroll
	GROUP BY dept_name
	ORDER BY dept_name
	;
	-- The individual rows have been grouped into subsets and returned in Department Groups as single rows.


	-- If we wanted this information to be compared to each salaried employee, we would normally JOIN with a CTE
	WITH Dept_stats AS (
	SELECT
	dept_name,
	COUNT(*) AS dept_employee_count,
	MIN(salary_amt) AS min_dept_salary,
	MAX(salary_amt) AS max_dept_salary,
	AVG(salary_amt)::DECIMAL(8,2) AS average_dept_salary,
	SUM(salary_amt) AS total_dept_salaries
	FROM Payroll
	GROUP BY dept_name
	ORDER BY dept_name
	)
	SELECT
	Payroll.*,
	(Select count(*) from Payroll) AS total_employee_count,
	Dept_stats.dept_employee_count,
	Dept_stats.min_dept_salary,
	Dept_stats.max_dept_salary,
	Dept_stats.average_dept_salary,
	Dept_stats.total_dept_salaries
	FROM Payroll
	LEFT OUTER JOIN Dept_stats ON (Payroll.dept_name = Dept_stats.dept_name)
	ORDER BY Payroll.dept_name, emp_name
	;


	-- Window Function --> How clean is this in comparison?

	SELECT
	*,
	COUNT(*) OVER () AS total_employee_count,
	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count,
	MIN(salary_amt) OVER (PARTITION BY dept_name) AS min_dept_salary,
	MAX(salary_amt) OVER (PARTITION BY dept_name) AS max_dept_salary,
	AVG(salary_amt) OVER (PARTITION BY dept_name)::DECIMAL(8,2) AS avg_dept_sal,
	SUM(salary_amt) OVER (PARTITION BY dept_name) AS total_dept_salaries
	FROM Payroll
	ORDER BY dept_name, emp_name;

	-- DEMO 1 complete









	-- Basic Syntax:
	-- A window function call always contains an OVER clause.
	-- "( )" --> This is the Window

	SELECT
	*,
	COUNT(*) OVER () AS total_employee_count,
	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count
	FROM Payroll
	;




	SELECT
	*,
	COUNT(*) OVER () AS total_employee_count,
	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count
	FROM Payroll
	WHERE dept_name = 'IT' --> Windows into your data are affected by the WHERE clause. Windows cannot work outside your base data set.
	;



	SELECT
	*,
	COUNT(*) OVER () AS total_employee_count,
	COUNT(*) OVER (PARTITION BY dept_name) AS dept_employee_count,
	MIN(salary_amt) OVER (PARTITION BY dept_name) AS min_dept_salary,
	MAX(salary_amt) OVER (PARTITION BY dept_name) AS max_dept_salary,
	AVG(salary_amt) OVER (PARTITION BY dept_name)::DECIMAL(8,2) AS avg_dept_sal,
	SUM(salary_amt) OVER (PARTITION BY dept_name) AS total_dept_salaries
	FROM Payroll
	ORDER BY dept_name, emp_name;



	-- And now for some handy comparison figures...
	SELECT
	*,
	(salary_amt / (SUM(salary_amt) OVER (PARTITION BY dept_name)) * 100)::DECIMAL(18,2) AS Dept_Percentage,
	(salary_amt / (SUM(salary_amt) OVER () ) * 100)::DECIMAL(18,2) AS Company_Percentage
	FROM Payroll
	ORDER BY dept_name, dept_percentage;



	-- ORDER BY examples here --> Skipping to ROW_NUMBER for the moment as this best illustrates the various Window orders.
	-- and how a Window order is not affected by the outer SELECT order.

	SELECT
	*,
	-- Note the difference between row_number and the emp_no in terms of when it was captured.
	ROW_NUMBER() OVER () AS "Base Row No",

	-- No Partition, just an ORDER BY, thus the whole base resultset is used.
	ROW_NUMBER() OVER (ORDER BY salary_amt) AS "Salary Row No",

	-- Order each partition first, then assign row numbers.
	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt) AS "Dept,Salary Row No"
	FROM Payroll
	ORDER BY emp_no;

	-- DEMO 2 complete







	-- Row_Order, Rank & Dense_rank:

	-- The natural order of the table as we declared it.
	SELECT *
	FROM Payroll
	ORDER BY emp_no;

	-- ROW_NUMBER a really good way to explain the relationship between Windows and Partitions:

	SELECT *,
	-- Row_Number assigns a unique integer to each row within your partition within your window.
	ROW_NUMBER() OVER (), -- Note the difference between row_number and the emp_no in terms of when it was captured.
	ROW_NUMBER() OVER (ORDER BY salary_amt), -- No Partition, just an ORDER BY, thus the whole base resultset is used.
	ROW_NUMBER() OVER (PARTITION BY dept_name),
	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt),

	-- Ranks --> Equal values are ranked the same, creating gaps in numbering
	RANK() OVER (), -- Ranks are useless without an ORDER BY
	RANK() OVER (PARTITION BY dept_name),
	RANK() OVER (ORDER BY salary_amt),
	RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt),

	-- Dense_Ranks --> Equal values are ranked the same, without gaps in numbering
	DENSE_RANK() OVER (),
	DENSE_RANK() OVER (PARTITION BY dept_name),
	DENSE_RANK() OVER (ORDER BY salary_amt),
	DENSE_RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt)
	FROM Payroll
	ORDER BY emp_no
	;



	-- GREAT! Let's find the top 2 earners in every department
	-- =======================================================


	-- Solution with ROW_NUMBERS
	WITH ctePayroll AS (
	SELECT *,
	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_row
	FROM Payroll
	--WHERE ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) <= 2 --> WFs not allowed in WHERE clauses
	)
	SELECT *
	FROM ctePayroll
	WHERE dept_row <= 2
	; --> BUT hold on! What about people who earn the same salaries?

	-- Solution with RANK
	WITH ctePayroll AS (
	SELECT *,
	RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_rank
	FROM Payroll
	)
	SELECT *
	FROM ctePayroll
	WHERE dept_rank <= 2
	; --> HOWEVER, Production is just plain wrong???

	-- Solution with DENSE_RANK
	WITH ctePayroll AS (
	SELECT *,
	DENSE_RANK() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_rank
	FROM Payroll
	)
	SELECT *
	FROM ctePayroll
	WHERE dept_rank <= 2
	--ORDER BY dept_rank --> Note how we can order based on CTE name, cannot do this without CTE
	;



	-- DEMO 3 complete








	-- percent_rank and cume_rank
	WITH base_data AS (
	SELECT generate_series(1,10) AS series
	)
	SELECT *,
	(PERCENT_RANK() OVER (ORDER BY series))::DEC(8,2), -- to determine the relative standing of a value within a set or rows
	(CUME_DIST() OVER (ORDER BY series))::DEC(8,2), -- Perfect for a 10-90 split in data
	NTILE(2) OVER (ORDER BY series), -- Split your dataset into x parts
	NTILE(3) OVER (ORDER BY series)
	FROM base_data
	ORDER BY series
	;


	--ntile with partitions
	SELECT *,

	--ntile(bucket size)
	ntile(2) OVER (PARTITION BY dept_name ORDER BY salary_amt),
	ntile(3) OVER (PARTITION BY dept_name ORDER BY salary_amt)

	FROM Payroll
	ORDER BY dept_name, salary_amt;


	-- DEMO 4 complete









	-- LAG(), LEAD() --> Offset from current row

	-- Let's return the employee names before and after the current row:

	SELECT emp_no,
	dept_name,
	emp_name,
	LAG(emp_name) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Employee",
	LEAD(emp_name) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Next Employee"
	FROM Payroll
	ORDER BY emp_no;


	-- The syntax of LAG and LEAD
	-- LAG/LEAD (column or expression [,offset] [,default])

	SELECT *,
	LAG(emp_name) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Employee",
	LAG(emp_name, 2) OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Offset 2",
	LAG(emp_name, 2, 'nada...') OVER (PARTITION BY dept_name ORDER BY emp_no) AS "Previous Offset 2 with Defaults"
	FROM Payroll
	ORDER BY emp_no;


	-- FIRST_VALUE(), LAST_VALUE() ... Offset relative to beginning/end of the window frame

	SELECT *,
	FIRST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Higest Earner",
	MAX(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Highest Salary",

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Lowest Earner", --> Notice how LAST and MIN move with Window Frame
	MIN(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "Lowest Salary",

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name) AS "Lowest Earner for real",
	MIN(salary_amt) OVER (PARTITION BY dept_name) "Lowest Salary for real"
	FROM Payroll
	ORDER BY dept_name, salary_amt DESC
	;

	-- Now the previous one failed miserably, so why the Highest Earner worked, but not the Lowest?
	-- Because the ORDER BY initiates the Roller Blind effect, thus MIN/MAX/FIRST/LAST is calculated as the Window Frame grows.


	-- Demo Complete



	-- UNBOUNDED PRECEDING and UNBOUNDED FOLLOWING
	-- and.. RANGE vs ROWS

	SELECT *,

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS "ORDER BY - DEFAULT BEHAVIOUR",

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Lowest Earner - RANGE to Current Row", --> So ORDER BY default is RANGE

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Lowest Earner - ROWS to Current Row", --> Overriding default behaviour here

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS "Lowest Earner" --> Window functions are easy, there's almost no syntax or variations to remember...
	FROM Payroll
	;

	-- The catch-all phrase "ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING"

	SELECT *,
	FIRST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS "Higest Earner",

	LAST_VALUE(emp_name) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS "Lowest Earner"
	FROM Payroll
	;


	-- To clearly illustrate the difference between ROWS and RANGE:

	SELECT *,
	SUM(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Running Totals with RANGE",

	SUM(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS "Running Totals with ROWS" --> BTW This is how we do running totals easily.
	FROM Payroll
	;


	-- Getting really specific about what rows to include...

	SELECT *,
	sum(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING) AS "The rest",

	sum(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) AS "Prev, Current & Next",

	sum(salary_amt) OVER (PARTITION BY dept_name ORDER BY salary_amt DESC
	ROWS BETWEEN 1 FOLLOWING AND 2 FOLLOWING) AS "Next 2"
	FROM Payroll
	;


	-- Demo complete




	/*
	And now for Running Totals...
	*/

	DROP TABLE IF EXISTS running_totals;
	CREATE TABLE running_totals AS (
	SELECT generate_series(1,5000) AS my_id,
	(random()* 100)::decimal(15,2) AS Amount
	)
	;

	-- Let's take a look at our data
	SELECT * FROM running_totals;
	SELECT count(*), sum(amount)::money FROM running_totals;


	-- Traditional method
	SELECT t1.my_id,
	t1.amount,
	SUM(t2.amount) AS running_total
	FROM running_totals t1
	INNER JOIN running_totals t2 ON t1.my_id >= t2.my_id
	GROUP BY t1.my_id, t1.amount
	ORDER BY t1.my_id;

	-- Window Functions
	SELECT *,
	SUM(amount) OVER (ORDER BY my_id ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
	FROM running_totals
	ORDER BY my_id;


	--- DEMO COMPLETE


	-- Watch out!

	-- Issues with Distinct...
	SELECT
	*,
	COUNT(DISTINCT dept_name) OVER ()
	FROM Payroll
	ORDER BY dept_name, emp_name;

	-- You'd have to use GROUP BY (and list every column in the table), or some trickery...
	WITH dept_stats AS (
	SELECT
	COUNT(DISTINCT dept_name) AS dept_count
	FROM Payroll
	)
	SELECT *,
	(SELECT * FROM dept_stats)
	FROM Payroll;


	--Using Window Funcitons in WHERE ... You shall not pass!
	SELECT *,
	ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) AS dept_row
	FROM Payroll
	WHERE ROW_NUMBER() OVER (PARTITION BY dept_name ORDER BY salary_amt DESC) <= 2 --> WFs not allowed in WHERE clauses
	;


	-- thanks :)