Last active
February 21, 2018 15:40
-
-
Save jrjames83/b496877766194f7a59bd72876134c7e8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| -- want to get counts of people who's last name starts with a vowel (AEIOU) | |
| -- case, substring | |
| -- SUM(CASE WHEN x.thing = 'whatever' THEN 1 ELSE 0 END) as counts_of_whatever | |
| SELECT t.my_case_outcome, count(*) FROM ( | |
| SELECT c.*, substring(c.last_name, '^[AEIOUaeiou]') as x, | |
| CASE | |
| WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN 'last_starts_vow' | |
| ELSE 'novowel' | |
| END as my_case_outcome, | |
| CASE | |
| WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN substring(c.last_name, '^[AEIOUaeiou]') | |
| ELSE 'novowel' | |
| END as the_letter_or_not | |
| FROM customer c | |
| )t | |
| GROUP BY 1 | |
| ---- | |
| SELECT t.*, | |
| t.payment_date - t.prior_order as some_interval, -- raw interval | |
| EXTRACT(epoch FROM t.payment_date - t.prior_order ) / 3600 as hours_since-- interval to hours | |
| FROM ( | |
| SELECT p.*, | |
| lag(p.payment_date) OVER (PARTITION BY p.customer_id) as prior_order | |
| FROM payment p | |
| )t | |
| -- Alternate Syntax and Some Moving Calculations | |
| SELECT p.* , | |
| avg(p.amount) over w2 as avg_over_prior7, | |
| avg(p.amount) over w2 as back3_fwd_3_avg | |
| FROM payment p | |
| WINDOW w AS (PARTITION BY p.customer_id ORDER BY p.payment_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING), | |
| w2 AS (PARTITION BY p.customer_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING) | |
| ----- | |
| -- CREATE TABLE IF NOT EXISTS customer_sources ( | |
| -- customer_id integer REFERENCES customer(customer_id) ON DELETE RESTRICT, | |
| -- traffic_source text, | |
| -- PRIMARY KEY(customer_id) | |
| -- ); | |
| SELECT c.customer_id, c.first_name, c.email, cs.* | |
| FROM customer c JOIN customer_sources cs ON cs.customer_id = c.customer_id | |
| -- Source / medium, cost, month | |
| DROP TABLE source_spend_all; | |
| CREATE TABLE IF NOT EXISTS source_spend_all ( | |
| spend_source text, | |
| spend integer, | |
| visits integer | |
| ); | |
| SELECT t.spend_source, max(t.spend)::money as spend, | |
| count(*) as customers, | |
| (max(t.spend)/count(*))::money as CPA, | |
| (SUM(t.LTV) / 3)::money as total_gross_margin | |
| FROM ( | |
| SELECT ssa.*,cs.*, ( | |
| SELECT sum(p.amount) FROM payment p WHERE cs.customer_id = p.customer_id | |
| ) as LTV | |
| FROM source_spend_all ssa | |
| JOIN customer_sources cs ON cs.traffic_source = ssa.spend_source | |
| )t | |
| GROUP BY 1 ORDER BY 2 DESC | |
| ---- | |
| -- First, there's no right way to do this! Just try to get | |
| -- data in a format that you can understand, then iterate | |
| -- I like breaking it into pieces, buyerid/email, first order, last order, total spend | |
| WITH base_table AS ( | |
| SELECT p.customer_id, p.payment_date, p.payment_id, | |
| row_number() OVER(partition by p.customer_id ORDER BY p.payment_date ASC) as order_rank_early, | |
| row_number() OVER(partition by p.customer_id ORDER BY p.payment_date DESC) as order_rank_late, | |
| ( | |
| SELECT SUM(p2.amount) FROM payment p2 WHERE p2.customer_id = p.customer_id | |
| ) as LTV | |
| FROM payment p | |
| GROUP BY 1,2,3 | |
| ORDER BY 1,2 | |
| ), second_table AS ( | |
| SELECT bt.* | |
| FROM | |
| base_table bt | |
| WHERE bt.order_rank_early = 1 OR bt.order_rank_late = 1 | |
| ) | |
| SELECT st.customer_id, min(st.payment_date), max(st.payment_date), min(st.ltv) | |
| FROM second_table st | |
| GROUP BY 1 ORDER BY 1 | |
| -- still need their top rating as well as all rating rented from (R, PG, etc) | |
| SELECT r.customer_id, r.inventory_id, i.film_id, f.rating | |
| FROM rental r -- start with the activity | |
| JOIN inventory i on i.inventory_id = r.inventory_id | |
| JOIN film f ON f.film_id = i.film_id | |
| -- still need their top rating as well as all rating rented from (R, PG, etc) | |
| -- not worry about ties: | |
| SELECT * FROM ( | |
| SELECT r.customer_id, f.rating, COUNT(*), | |
| row_number() OVER(partition by r.customer_id ORDER BY COUNT(*) DESC) as rental_freq_rank | |
| FROM rental r -- start with the activity | |
| JOIN inventory i on i.inventory_id = r.inventory_id | |
| JOIN film f ON f.film_id = i.film_id | |
| GROUP BY 1,2 | |
| ORDER BY 1, 3 DESC | |
| )t WHERE t.rental_freq_rank = 1 | |
| SELECT r.customer_id, array_agg(distinct f.rating) | |
| FROM rental r -- start with the activity | |
| JOIN inventory i on i.inventory_id = r.inventory_id | |
| JOIN film f ON f.film_id = i.film_id | |
| GROUP BY 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment