Last active
March 3, 2017 12:03
-
-
Save snakers4/fc8f6de705a79a6a846ce11a1dc6e73c to your computer and use it in GitHub Desktop.
Client pipeline example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- using multiple subqueries to ensure no mistakes | |
SELECT | |
average2.weekly as week, | |
ROUND(AVG(average2.backlog),1) as backlog | |
FROM | |
( | |
SELECT | |
SUM(average.is_backlog) as backlog, | |
average.weekly, | |
average.current_day | |
FROM | |
( | |
SELECT | |
final_calcs.customer_id as customer_id, | |
final_calcs.stats_applicable * final_calcs.is_backlog as is_backlog, | |
final_calcs.current_day as current_day, | |
date_trunc('week', final_calcs.current_day::date)::DATE AS weekly | |
FROM | |
( | |
SELECT | |
--raw_data.*, | |
/* | |
raw_data.current_day | |
*/ | |
raw_data.customer_id as customer_id, | |
-- we cannot apply our stats to the future that "has not happened yet" | |
CASE WHEN | |
(raw_data.current_day - date_trunc('day', raw_data.state_1)) < ('0 days'::INTERVAL) | |
THEN 0 | |
ELSE 1 | |
END as stats_applicable, | |
raw_data.current_day as current_day, | |
CASE | |
-- If customer reaches stage #2 on the same day or the next day he never appears in the backlog | |
WHEN raw_data.never_in_bl = 1 THEN 0 | |
-- We consider the customer being in a backlog if he doesn't reach stage #2 between 1 and 14 days after reaching stage #1 | |
ELSE | |
CASE | |
WHEN raw_data.never_st_2 = 1 AND ( raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.max_bl_day_never_reach ) THEN 1 | |
ELSE | |
-- If customer reaches stage #2 later than 14 days after stage #1 he's appearing in backlog only until day 14 | |
CASE | |
WHEN raw_data.reached_late = 1 AND ( raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.max_bl_day_never_reach ) THEN 1 | |
ELSE | |
CASE | |
WHEN raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day'::INTERVAL AND raw_data.state_2 - '1 day'::INTERVAL THEN 1 | |
ELSE 0 | |
END | |
END | |
END | |
END as is_backlog | |
FROM | |
( | |
SELECT | |
* | |
FROM | |
( | |
-- Generate basic data markers | |
SELECT | |
d."id" as customer_id, | |
d.state_1, | |
d.state_2, | |
-- If customer reaches stage #2 on the same day or the next day he never appears in the backlog | |
CASE WHEN (date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) < ('2 day'::INTERVAL) | |
THEN 1 | |
ELSE 0 | |
END as never_in_bl, | |
-- If customer doesn't reach stage #2 at all he's in the backlog until day 14, then we forget about this customer | |
CASE WHEN d.state_2 ISNULL | |
THEN 1 | |
ELSE 0 | |
END as never_st_2, | |
d.state_1 + '14 days'::INTERVAL as max_bl_day_never_reach, | |
-- If customer reaches stage #2 later than 14 days after stage #1 he's appearing in backlog only until day 14 | |
CASE WHEN (date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) > ('14 days'::INTERVAL) | |
THEN 1 | |
ELSE 0 | |
END as reached_late, | |
d.state_1 + '14 days'::INTERVAL as max_bl_day_late_reach, | |
-- We consider the customer being in a backlog if he doesn't reach stage #2 between 1 and 14 days after reaching stage #1 | |
-- here we need the number of days he needed to reach stage 2 | |
EXTRACT(day FROM date_trunc('day', d.state_2) - date_trunc('day', d.state_1)) as day_diff | |
FROM | |
question_data d | |
/* | |
WHERE | |
d.id = 1989 | |
ORDER BY | |
(date_trunc('day', d.state_1) - date_trunc('day', d.state_2)) DESC | |
*/ | |
) basic_data | |
JOIN ( | |
-- Generate day list series | |
SELECT | |
i::date as current_day | |
FROM | |
generate_series( | |
(SELECT min(d.state_1)::DATE FROM question_data d), | |
(SELECT max(d.state_1)::DATE FROM question_data d), | |
'1 day'::interval | |
) i | |
) days_list ON 1=1 | |
) raw_data | |
ORDER BY | |
customer_id ASC, | |
current_day ASC | |
) final_calcs | |
) average | |
GROUP BY | |
average.current_day, | |
average.weekly | |
ORDER BY | |
average.current_day ASC | |
) average2 | |
GROUP BY | |
average2.weekly |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
WITH | |
days_list AS | |
( | |
SELECT | |
i :: DATE AS current_day | |
FROM | |
generate_series ( | |
( | |
SELECT | |
MIN (d.state_1) :: DATE | |
FROM | |
question_data d | |
), | |
( | |
SELECT | |
MAX (d.state_1) :: DATE | |
FROM | |
question_data d | |
), | |
'1 day' :: INTERVAL | |
) i | |
), | |
basic_data AS | |
( | |
SELECT | |
d."id" AS customer_id, | |
d.state_1, | |
d.state_2, | |
CASE | |
WHEN ( | |
date_trunc('day', d.state_2) - date_trunc('day', d.state_1) | |
) < ('2 day' :: INTERVAL) THEN 1 | |
ELSE 0 | |
END AS never_in_bl, | |
CASE | |
WHEN d.state_2 ISNULL THEN 1 | |
ELSE 0 | |
END AS never_st_2, | |
d.state_1 + '14 days' :: INTERVAL AS max_bl_day_never_reach, | |
CASE | |
WHEN ( | |
date_trunc('day', d.state_2) - date_trunc('day', d.state_1) | |
) > ('14 days' :: INTERVAL) THEN 1 | |
ELSE 0 | |
END AS reached_late, | |
d.state_1 + '14 days' :: INTERVAL AS max_bl_day_late_reach | |
FROM | |
question_data d | |
), | |
raw_data AS | |
( | |
SELECT | |
* | |
FROM | |
basic_data | |
JOIN days_list ON 1 = 1 | |
), | |
final_calcs AS ( | |
SELECT | |
raw_data.customer_id AS customer_id, | |
CASE | |
WHEN ( | |
raw_data.current_day - date_trunc('day', raw_data.state_1) | |
) < ('0 days' :: INTERVAL) THEN 0 | |
ELSE | |
1 | |
END AS stats_applicable, | |
raw_data.current_day AS current_day, | |
CASE | |
WHEN raw_data.never_in_bl = 1 THEN 0 | |
ELSE | |
CASE | |
WHEN raw_data.never_st_2 = 1 | |
AND ( | |
raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL | |
AND raw_data.max_bl_day_never_reach | |
) THEN 1 | |
ELSE | |
CASE | |
WHEN raw_data.reached_late = 1 | |
AND ( | |
raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL | |
AND raw_data.max_bl_day_never_reach | |
) THEN 1 | |
ELSE | |
CASE | |
WHEN raw_data.current_day BETWEEN date_trunc('day', raw_data.state_1) + '1 day' :: INTERVAL | |
AND raw_data.state_2 - '1 day' :: INTERVAL THEN 1 | |
ELSE 0 | |
END | |
END | |
END | |
END AS is_backlog | |
FROM | |
raw_data | |
ORDER BY | |
customer_id ASC, | |
current_day ASC | |
) | |
------------------------- | |
SELECT | |
average2.weekly AS week, | |
ROUND(AVG(average2.backlog), 1) AS backlog | |
FROM | |
( | |
SELECT | |
SUM (average.is_backlog) AS backlog, | |
average.weekly, | |
average.current_day | |
FROM | |
( | |
SELECT | |
final_calcs.customer_id AS customer_id, | |
final_calcs.stats_applicable * final_calcs.is_backlog AS is_backlog, | |
final_calcs.current_day AS current_day, | |
date_trunc( | |
'week', | |
final_calcs.current_day :: DATE | |
) :: DATE AS weekly | |
FROM | |
final_calcs | |
) average | |
GROUP BY | |
average.current_day, | |
average.weekly | |
ORDER BY | |
average.current_day ASC | |
) average2 | |
GROUP BY | |
average2.weekly |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment