Last active
March 12, 2019 05:52
-
-
Save sheldonhull/8c7235ecb75bb91833e1 to your computer and use it in GitHub Desktop.
Eliminate Overlapping Dates, with subgroupings (such as overlap only eliminated at process > task level, instead of just process level)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if object_id('tempdb..#Awesome') is not null | |
drop table #awesome; | |
create table #awesome | |
( | |
unique_join_k int | |
,process varchar(10) | |
,start_date date | |
,end_date date | |
,grouper_id as convert(uniqueidentifier, hashbytes('sha1', | |
isnull(cast(unique_join_k as varchar(max)), '|') | |
+ '|' + isnull(cast(process as varchar(max)), '|') | |
)) | |
) | |
insert into #awesome | |
select | |
* | |
from | |
( | |
values | |
(1000, 'process 1', cast('20131006' as date), cast('20131213' as date)), | |
(1000, 'process 1', cast('20131225' as date), cast('20140215' as date)), | |
(1000, 'process 1', cast('20140123' as date), cast('20140215' as date)), | |
(1000, 'process 2', cast('20140328' as date), cast('20140617' as date)), | |
(1001, 'process 1', cast('20130214' as date), cast('20130421' as date)), | |
(1001, 'process 1', cast('20130827' as date), cast('20130926' as date)), | |
(1001, 'process 1', cast('20131211' as date), cast('20131221' as date)), | |
(1001, 'process 1', cast('20140227' as date), cast('20140405' as date)), | |
(1002, 'process 1', cast('20130518' as date), cast('20130622' as date)), | |
(1002, 'process 2', cast('20130527' as date), cast('20130618' as date)), | |
(1003, 'process 1', cast('20130312' as date), cast('20130428' as date)), | |
(1003, 'process 1', cast('20130510' as date), cast('20130614' as date)), | |
(1003, 'process 1', cast('20130725' as date), cast('20131115' as date)), | |
(1003, 'process 1', cast('20131010' as date), cast('20131014' as date)), | |
(1003, 'process 2', cast('20140413' as date), cast('20140614' as date)), | |
(1004, 'process 1', cast('20130116' as date), cast('20130425' as date)), | |
(1004, 'process 1', cast('20130828' as date), cast('20130926' as date)), | |
(1004, 'process 1', cast('20140101' as date), cast('20140210' as date)), | |
(1004, 'process 1', cast('20140312' as date), cast('20140408' as date)), | |
(1004, 'process 2', cast('20140513' as date), cast('20140806' as date)), | |
(1004, 'process 2', cast('20140515' as date), cast('20140818' as date)), | |
(1005, 'process 1', cast('20130606' as date), cast('20130724' as date)), | |
(1005, 'process 2', cast('20131125' as date), cast('20140217' as date)), | |
(1005, 'process 3', cast('20131220' as date), cast('20140131' as date)), | |
(1005, 'process 3', cast('20140503' as date), cast('20140605' as date))) as vtable (unique_join_k, process, start_date, end_date); | |
select | |
* | |
from | |
#awesome as a | |
order by | |
a.unique_join_k | |
,a.[start_date]; | |
with c1 as -- let e = end ordinals, let s = start ordinals | |
( | |
select | |
grouper_id | |
,unique_join_k | |
,time_start = start_date | |
,is_start = 1 | |
,sqid_end = null | |
,sqid_start = row_number() over (partition by grouper_id order by start_date) | |
from | |
#awesome cs | |
union all | |
select | |
grouper_id | |
,unique_join_k | |
,time_start = end_date | |
,is_start = 0 | |
,sqid_end = row_number() over (partition by grouper_id order by end_date) | |
,sqid_start = null | |
from | |
#awesome cs | |
), | |
c2 as (-- let se = start or end ordinal, namely, how many events (start or end) happened so far | |
select | |
c1.* | |
,sqid_all_is_starts = row_number() over (partition by grouper_id order by time_start, is_start desc) | |
from | |
c1 | |
), --select * from c2 | |
c3 as ( | |
/******************************************************* | |
For start events, the expression sqid_start - (sqid_all_is_starts - sqid_start) - 1 represents how many sessions were active | |
just before the current (hence - 1) | |
For end events, the expression (sqid_all_is_starts - sqid_end) - sqid_end ) represents how many sessions are active | |
right after this one | |
The above two expressions are 0 exactly when a group of packed intervals | |
either starts or ends, respectively | |
After filtering only events when a group of packed intervals either starts or ends, | |
group each pair of adjacent start/end events | |
*******************************************************/ | |
select | |
grouper_id | |
,unique_join_k | |
,time_start | |
,date_grouping = floor((row_number() over (partition by grouper_id order by time_start) - 1) / 2 + 1) | |
from | |
c2 | |
where | |
coalesce(sqid_start - (sqid_all_is_starts - sqid_start) - 1, | |
(sqid_all_is_starts - sqid_end) - sqid_end) = 0 | |
) -- select * from c3 | |
/******************************************************* | |
OVERLAPPING DATES ON SAME GROUP RESOLVED IN DISTINCT RANGES | |
*******************************************************/ | |
, | |
overlappedgone as ( | |
select | |
* | |
,sqid = dense_rank() over (partition by grouper_id, date_grouping order by time_start asc) | |
from | |
c3 | |
) | |
select | |
unique_join_k_start = og1.unique_join_k | |
,unique_join_k_end = og2.unique_join_k | |
,calc_start_date = og1.time_start | |
,calc_end_date = og2.time_start | |
,unique_non_overlapping_days = datediff(day, og1.time_start, og2.time_start) | |
,grouper_id = og1.grouper_id | |
from | |
overlappedgone og1 | |
inner join overlappedgone og2 | |
on og1.date_grouping = og2.date_grouping | |
and og1.grouper_id = og2.grouper_id | |
and og2.sqid = 2 | |
where | |
og1.sqid = 1 | |
order by | |
og1.unique_join_k | |
,calc_start_date |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment