Skip to content

Instantly share code, notes, and snippets.

@snakers4
Last active March 3, 2017 12:03
Show Gist options
  • Save snakers4/fc8f6de705a79a6a846ce11a1dc6e73c to your computer and use it in GitHub Desktop.
Save snakers4/fc8f6de705a79a6a846ce11a1dc6e73c to your computer and use it in GitHub Desktop.
Client pipeline example
-- using multiple subqueries to ensure no mistakes
SELECT
average2.weekly as week,
ROUND(AVG(average2.backlog),1) as backlog
FROM
(
SELECT
SUM(average.is_backlog) as backlog,
average.weekly,
average.current_day
FROM
(
SELECT
final_calcs.customer_id as customer_id,
final_calcs.stats_applicable * final_calcs.is_backlog as is_backlog,
final_calcs.current_day as current_day,
date_trunc('week', final_calcs.current_day::date)::DATE AS weekly
FROM
(
SELECT
--raw_data.*,
/*
raw_data.current_day
*/
raw_data.customer_id as customer_id,
-- we cannot apply our stats to the future that "has not happened yet"
CASE WHEN
(raw_data.current_day - date_trunc('day', raw_data.state_1)) < ('0 days'::INTERVAL)
THEN 0
ELSE 1
END as stats_applicable,
raw_data.current_day as current_day,
CASE
-- If customer reaches stage #2 on the same day or the next day he never appears in the backlog
WHEN raw_data.never_in_bl = 1 THEN 0
-- We consider the customer being in a backlog if he doesn't reach stage #2 between 1 and 14 days after reaching stage #1
ELSE
CASE
WHEN raw_data.never_st_2 = 1 AND ( raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.max_bl_day_never_reach ) THEN 1
ELSE
-- If customer reaches stage #2 later than 14 days after stage #1 he's appearing in backlog only until day 14
CASE
WHEN raw_data.reached_late = 1 AND ( raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.max_bl_day_never_reach ) THEN 1
ELSE
CASE
WHEN raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.state_2 - '1 day'::INTERVAL THEN 1
ELSE 0
END
END
END
END as is_backlog
FROM
(
SELECT
*
FROM
(
-- Generate basic data markers
SELECT
d."id" as customer_id,
d.state_1,
d.state_2,
-- If customer reaches stage #2 on the same day or the next day he never appears in the backlog
CASE WHEN (date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) < ('2 day'::INTERVAL)
THEN 1
ELSE 0
END as never_in_bl,
-- If customer doesn't reach stage #2 at all he's in the backlog until day 14, then we forget about this customer
CASE WHEN d.state_2 ISNULL
THEN 1
ELSE 0
END as never_st_2,
d.state_1 + '14 days'::INTERVAL as max_bl_day_never_reach,
-- If customer reaches stage #2 later than 14 days after stage #1 he's appearing in backlog only until day 14
CASE WHEN (date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) > ('14 days'::INTERVAL)
THEN 1
ELSE 0
END as reached_late,
d.state_1 + '14 days'::INTERVAL as max_bl_day_late_reach,
-- We consider the customer being in a backlog if he doesn't reach stage #2 between 1 and 14 days after reaching stage #1
-- here we need the number of days he needed to reach stage 2
EXTRACT(day FROM date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) as day_diff
FROM
question_data d
/*
WHERE
d.id = 1989
ORDER BY
(date_trunc('day', d.state_1) - date_trunc('day', d.state_2)) DESC
*/
) basic_data
JOIN (
-- Generate day list series
SELECT
i::date as current_day
FROM
generate_series(
(SELECT min(d.state_1)::DATE FROM question_data d),
(SELECT max(d.state_1)::DATE FROM question_data d),
'1 day'::interval
) i
) days_list ON 1=1
) raw_data
ORDER BY
customer_id ASC,
current_day ASC
) final_calcs
) average
GROUP BY
average.current_day,
average.weekly
ORDER BY
average.current_day ASC
) average2
GROUP BY
average2.weekly
WITH
days_list AS
(
SELECT
i :: DATE AS current_day
FROM
generate_series (
(
SELECT
MIN (d.state_1) :: DATE
FROM
question_data d
),
(
SELECT
MAX (d.state_1) :: DATE
FROM
question_data d
),
'1 day' :: INTERVAL
) i
),
basic_data AS
(
SELECT
d."id" AS customer_id,
d.state_1,
d.state_2,
CASE
WHEN (
date_trunc('day', d.state_2) - date_trunc('day', d.state_1)
) < ('2 day' :: INTERVAL) THEN 1
ELSE 0
END AS never_in_bl,
CASE
WHEN d.state_2 ISNULL THEN 1
ELSE 0
END AS never_st_2,
d.state_1 + '14 days' :: INTERVAL AS max_bl_day_never_reach,
CASE
WHEN (
date_trunc('day', d.state_2) - date_trunc('day', d.state_1)
) > ('14 days' :: INTERVAL) THEN 1
ELSE 0
END AS reached_late,
d.state_1 + '14 days' :: INTERVAL AS max_bl_day_late_reach
FROM
question_data d
),
raw_data AS
(
SELECT
*
FROM
basic_data
JOIN days_list ON 1 = 1
),
final_calcs AS (
SELECT
raw_data.customer_id AS customer_id,
CASE
WHEN (
raw_data.current_day - date_trunc('day', raw_data.state_1)
) < ('0 days' :: INTERVAL) THEN 0
ELSE
1
END AS stats_applicable,
raw_data.current_day AS current_day,
CASE
WHEN raw_data.never_in_bl = 1 THEN 0
ELSE
CASE
WHEN raw_data.never_st_2 = 1
AND (
raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL
AND raw_data.max_bl_day_never_reach
) THEN 1
ELSE
CASE
WHEN raw_data.reached_late = 1
AND (
raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL
AND raw_data.max_bl_day_never_reach
) THEN 1
ELSE
CASE
WHEN raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL
AND raw_data.state_2 - '1 day' :: INTERVAL THEN 1
ELSE 0
END
END
END
END AS is_backlog
FROM
raw_data
ORDER BY
customer_id ASC,
current_day ASC
)
-------------------------
SELECT
average2.weekly AS week,
ROUND(AVG(average2.backlog), 1) AS backlog
FROM
(
SELECT
SUM (average.is_backlog) AS backlog,
average.weekly,
average.current_day
FROM
(
SELECT
final_calcs.customer_id AS customer_id,
final_calcs.stats_applicable * final_calcs.is_backlog AS is_backlog,
final_calcs.current_day AS current_day,
date_trunc(
'week',
final_calcs.current_day :: DATE
) :: DATE AS weekly
FROM
final_calcs
) average
GROUP BY
average.current_day,
average.weekly
ORDER BY
average.current_day ASC
) average2
GROUP BY
average2.weekly
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment