Skip to content

Instantly share code, notes, and snippets.

@jrjames83
Last active February 21, 2018 15:40
Show Gist options
  • Save jrjames83/b496877766194f7a59bd72876134c7e8 to your computer and use it in GitHub Desktop.
Save jrjames83/b496877766194f7a59bd72876134c7e8 to your computer and use it in GitHub Desktop.
-- want to get counts of people who's last name starts with a vowel (AEIOU)
-- case, substring
-- SUM(CASE WHEN x.thing = 'whatever' THEN 1 ELSE 0 END) as counts_of_whatever
SELECT t.my_case_outcome, count(*) FROM (
SELECT c.*, substring(c.last_name, '^[AEIOUaeiou]') as x,
CASE
WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN 'last_starts_vow'
ELSE 'novowel'
END as my_case_outcome,
CASE
WHEN substring(c.last_name, '^[AEIOUaeiou]') IS NOT NULL THEN substring(c.last_name, '^[AEIOUaeiou]')
ELSE 'novowel'
END as the_letter_or_not
FROM customer c
)t
GROUP BY 1
----
SELECT t.*,
t.payment_date - t.prior_order as some_interval, -- raw interval
EXTRACT(epoch FROM t.payment_date - t.prior_order ) / 3600 as hours_since-- interval to hours
FROM (
SELECT p.*,
lag(p.payment_date) OVER (PARTITION BY p.customer_id) as prior_order
FROM payment p
)t
-- Alternate Syntax and Some Moving Calculations
SELECT p.* ,
avg(p.amount) over w2 as avg_over_prior7,
avg(p.amount) over w2 as back3_fwd_3_avg
FROM payment p
WINDOW w AS (PARTITION BY p.customer_id ORDER BY p.payment_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING),
w2 AS (PARTITION BY p.customer_id ROWS BETWEEN 7 PRECEDING AND 0 FOLLOWING)
-----
-- CREATE TABLE IF NOT EXISTS customer_sources (
-- customer_id integer REFERENCES customer(customer_id) ON DELETE RESTRICT,
-- traffic_source text,
-- PRIMARY KEY(customer_id)
-- );
SELECT c.customer_id, c.first_name, c.email, cs.*
FROM customer c JOIN customer_sources cs ON cs.customer_id = c.customer_id
-- Source / medium, cost, month
DROP TABLE source_spend_all;
CREATE TABLE IF NOT EXISTS source_spend_all (
spend_source text,
spend integer,
visits integer
);
SELECT t.spend_source, max(t.spend)::money as spend,
count(*) as customers,
(max(t.spend)/count(*))::money as CPA,
(SUM(t.LTV) / 3)::money as total_gross_margin
FROM (
SELECT ssa.*,cs.*, (
SELECT sum(p.amount) FROM payment p WHERE cs.customer_id = p.customer_id
) as LTV
FROM source_spend_all ssa
JOIN customer_sources cs ON cs.traffic_source = ssa.spend_source
)t
GROUP BY 1 ORDER BY 2 DESC
----
-- First, there's no right way to do this! Just try to get
-- data in a format that you can understand, then iterate
-- I like breaking it into pieces, buyerid/email, first order, last order, total spend
WITH base_table AS (
SELECT p.customer_id, p.payment_date, p.payment_id,
row_number() OVER(partition by p.customer_id ORDER BY p.payment_date ASC) as order_rank_early,
row_number() OVER(partition by p.customer_id ORDER BY p.payment_date DESC) as order_rank_late,
(
SELECT SUM(p2.amount) FROM payment p2 WHERE p2.customer_id = p.customer_id
) as LTV
FROM payment p
GROUP BY 1,2,3
ORDER BY 1,2
), second_table AS (
SELECT bt.*
FROM
base_table bt
WHERE bt.order_rank_early = 1 OR bt.order_rank_late = 1
)
SELECT st.customer_id, min(st.payment_date), max(st.payment_date), min(st.ltv)
FROM second_table st
GROUP BY 1 ORDER BY 1
-- still need their top rating as well as all rating rented from (R, PG, etc)
SELECT r.customer_id, r.inventory_id, i.film_id, f.rating
FROM rental r -- start with the activity
JOIN inventory i on i.inventory_id = r.inventory_id
JOIN film f ON f.film_id = i.film_id
-- still need their top rating as well as all rating rented from (R, PG, etc)
-- not worry about ties:
SELECT * FROM (
SELECT r.customer_id, f.rating, COUNT(*),
row_number() OVER(partition by r.customer_id ORDER BY COUNT(*) DESC) as rental_freq_rank
FROM rental r -- start with the activity
JOIN inventory i on i.inventory_id = r.inventory_id
JOIN film f ON f.film_id = i.film_id
GROUP BY 1,2
ORDER BY 1, 3 DESC
)t WHERE t.rental_freq_rank = 1
SELECT r.customer_id, array_agg(distinct f.rating)
FROM rental r -- start with the activity
JOIN inventory i on i.inventory_id = r.inventory_id
JOIN film f ON f.film_id = i.film_id
GROUP BY 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment