jrjames83 · February 21, 2018 15:40
diff --git a/course2_examples.sql b/course2_examples.sql
 -- want to get counts of people who's last name starts with a vowel (AEIOU)
 -- case, substring

 -- SUM(CASE WHEN x.thing = 'whatever' THEN 1 ELSE 0 END) as counts_of_whatever

 SELECT t.my_case_outcome, count(*) FROM (
 SELECT c.*, substring(c.last_name, '^[AEIOUaeiou]') as x,

 CASE
 WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN 'last_starts_vow'
 ELSE 'novowel' 
 END as my_case_outcome,
 
 CASE
 WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN  substring(c.last_name, '^[AEIOUaeiou]') 
 ELSE 'novowel' 
 END as the_letter_or_not
 
 FROM customer c
 )t

 GROUP BY 1


 ----

 SELECT t.*, 
 t.payment_date - t.prior_order as some_interval, -- raw interval
 EXTRACT(epoch FROM t.payment_date - t.prior_order ) / 3600 as hours_since-- interval to hours
 
 FROM (
 	SELECT p.*, 
 		lag(p.payment_date) OVER (PARTITION BY p.customer_id) as prior_order
 	FROM payment p
 )t

 -- Alternate Syntax and Some Moving Calculations

 SELECT p.* , 
 	avg(p.amount) over w2 as avg_over_prior7,
 	avg(p.amount) over w2 as back3_fwd_3_avg
 FROM payment p

 WINDOW w AS (PARTITION BY p.customer_id ORDER BY p.payment_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING),
       w2 AS (PARTITION BY p.customer_id  ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING)
       
       
       
       
 -----

 -- CREATE TABLE IF NOT EXISTS customer_sources (
 --    customer_id integer REFERENCES customer(customer_id) ON DELETE RESTRICT,
 --    traffic_source text,
 --    PRIMARY KEY(customer_id)
 -- );

 SELECT c.customer_id, c.first_name, c.email, cs.*
 FROM customer c JOIN customer_sources cs ON cs.customer_id = c.customer_id

 -- Source / medium, cost, month

 DROP TABLE source_spend_all;

 CREATE TABLE IF NOT EXISTS source_spend_all (
   spend_source text,
   spend integer,
   visits integer
 );

 SELECT t.spend_source, max(t.spend)::money as spend, 
 	count(*) as customers, 
 	(max(t.spend)/count(*))::money as CPA, 
 	(SUM(t.LTV) / 3)::money as total_gross_margin  

 	FROM (
 	SELECT ssa.*,cs.*, (
         SELECT sum(p.amount) FROM payment p WHERE cs.customer_id = p.customer_id
 	) as LTV
 	FROM source_spend_all ssa 
 	JOIN customer_sources cs ON cs.traffic_source = ssa.spend_source
 )t 

 GROUP BY 1 ORDER BY 2 DESC



 ----

 -- First, there's no right way to do this! Just try to get 
 -- data in a format that you can understand, then iterate


 -- I like breaking it into pieces, buyerid/email, first order, last order, total spend

 WITH base_table AS (
 	SELECT p.customer_id, p.payment_date, p.payment_id, 
 	row_number() OVER(partition by p.customer_id ORDER BY p.payment_date ASC) as order_rank_early,
 	row_number() OVER(partition by p.customer_id ORDER BY p.payment_date DESC) as order_rank_late,
 	(
 	  SELECT SUM(p2.amount) FROM payment p2 WHERE p2.customer_id = p.customer_id
 	) as LTV
 	FROM payment p
 	GROUP BY 1,2,3
 	ORDER BY 1,2
 ), second_table AS (

 	SELECT bt.* 
 	FROM 
 	base_table bt 
 	WHERE bt.order_rank_early = 1 OR bt.order_rank_late = 1
 )

 SELECT st.customer_id, min(st.payment_date), max(st.payment_date), min(st.ltv)
 FROM second_table st
 GROUP BY 1 ORDER BY 1



 -- still need their top rating as well as all rating rented from (R, PG, etc)
 SELECT r.customer_id, r.inventory_id, i.film_id, f.rating
 FROM rental r -- start with the activity
 	JOIN inventory i on i.inventory_id = r.inventory_id
 	JOIN film f ON f.film_id = i.film_id


 -- still need their top rating as well as all rating rented from (R, PG, etc)
 -- not worry about ties:

 SELECT * FROM (
 SELECT r.customer_id, f.rating, COUNT(*),
 row_number() OVER(partition by r.customer_id ORDER BY COUNT(*) DESC) as rental_freq_rank
 FROM rental r -- start with the activity
 	JOIN inventory i on i.inventory_id = r.inventory_id
 	JOIN film f ON f.film_id = i.film_id
 GROUP BY 1,2
 ORDER BY 1, 3 DESC
 )t WHERE t.rental_freq_rank = 1




 SELECT r.customer_id, array_agg(distinct f.rating)
 FROM rental r -- start with the activity
 	JOIN inventory i on i.inventory_id = r.inventory_id
 	JOIN film f ON f.film_id = i.film_id
 GROUP BY 1
	-- want to get counts of people who's last name starts with a vowel (AEIOU)
	-- case, substring

	-- SUM(CASE WHEN x.thing = 'whatever' THEN 1 ELSE 0 END) as counts_of_whatever

	SELECT t.my_case_outcome, count(*) FROM (
	SELECT c.*, substring(c.last_name, '^[AEIOUaeiou]') as x,

	CASE
	WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN 'last_starts_vow'
	ELSE 'novowel'
	END as my_case_outcome,

	CASE
	WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN substring(c.last_name, '^[AEIOUaeiou]')
	ELSE 'novowel'
	END as the_letter_or_not

	FROM customer c
	)t

	GROUP BY 1


	----

	SELECT t.*,
	t.payment_date - t.prior_order as some_interval, -- raw interval
	EXTRACT(epoch FROM t.payment_date - t.prior_order ) / 3600 as hours_since-- interval to hours

	FROM (
	SELECT p.*,
	lag(p.payment_date) OVER (PARTITION BY p.customer_id) as prior_order
	FROM payment p
	)t

	-- Alternate Syntax and Some Moving Calculations

	SELECT p.* ,
	avg(p.amount) over w2 as avg_over_prior7,
	avg(p.amount) over w2 as back3_fwd_3_avg
	FROM payment p

	WINDOW w AS (PARTITION BY p.customer_id ORDER BY p.payment_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING),
	w2 AS (PARTITION BY p.customer_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING)




	-----

	-- CREATE TABLE IF NOT EXISTS customer_sources (
	-- customer_id integer REFERENCES customer(customer_id) ON DELETE RESTRICT,
	-- traffic_source text,
	-- PRIMARY KEY(customer_id)
	-- );

	SELECT c.customer_id, c.first_name, c.email, cs.*
	FROM customer c JOIN customer_sources cs ON cs.customer_id = c.customer_id

	-- Source / medium, cost, month

	DROP TABLE source_spend_all;

	CREATE TABLE IF NOT EXISTS source_spend_all (
	spend_source text,
	spend integer,
	visits integer
	);

	SELECT t.spend_source, max(t.spend)::money as spend,
	count(*) as customers,
	(max(t.spend)/count(*))::money as CPA,
	(SUM(t.LTV) / 3)::money as total_gross_margin

	FROM (
	SELECT ssa.,cs., (
	SELECT sum(p.amount) FROM payment p WHERE cs.customer_id = p.customer_id
	) as LTV
	FROM source_spend_all ssa
	JOIN customer_sources cs ON cs.traffic_source = ssa.spend_source
	)t

	GROUP BY 1 ORDER BY 2 DESC



	----

	-- First, there's no right way to do this! Just try to get
	-- data in a format that you can understand, then iterate


	-- I like breaking it into pieces, buyerid/email, first order, last order, total spend

	WITH base_table AS (
	SELECT p.customer_id, p.payment_date, p.payment_id,
	row_number() OVER(partition by p.customer_id ORDER BY p.payment_date ASC) as order_rank_early,
	row_number() OVER(partition by p.customer_id ORDER BY p.payment_date DESC) as order_rank_late,
	(
	SELECT SUM(p2.amount) FROM payment p2 WHERE p2.customer_id = p.customer_id
	) as LTV
	FROM payment p
	GROUP BY 1,2,3
	ORDER BY 1,2
	), second_table AS (

	SELECT bt.*
	FROM
	base_table bt
	WHERE bt.order_rank_early = 1 OR bt.order_rank_late = 1
	)

	SELECT st.customer_id, min(st.payment_date), max(st.payment_date), min(st.ltv)
	FROM second_table st
	GROUP BY 1 ORDER BY 1



	-- still need their top rating as well as all rating rented from (R, PG, etc)
	SELECT r.customer_id, r.inventory_id, i.film_id, f.rating
	FROM rental r -- start with the activity
	JOIN inventory i on i.inventory_id = r.inventory_id
	JOIN film f ON f.film_id = i.film_id


	-- still need their top rating as well as all rating rented from (R, PG, etc)
	-- not worry about ties:

	SELECT * FROM (
	SELECT r.customer_id, f.rating, COUNT(*),
	row_number() OVER(partition by r.customer_id ORDER BY COUNT(*) DESC) as rental_freq_rank
	FROM rental r -- start with the activity
	JOIN inventory i on i.inventory_id = r.inventory_id
	JOIN film f ON f.film_id = i.film_id
	GROUP BY 1,2
	ORDER BY 1, 3 DESC
	)t WHERE t.rental_freq_rank = 1




	SELECT r.customer_id, array_agg(distinct f.rating)
	FROM rental r -- start with the activity
	JOIN inventory i on i.inventory_id = r.inventory_id
	JOIN film f ON f.film_id = i.film_id
	GROUP BY 1