Home Grouping consecutive date periods with criteria
Reply: 4

Grouping consecutive date periods with criteria

nbot
1#
nbot Published in 2018-02-14 05:12:52Z

SQL Server

Data:

 Row | Type | Start Date | End Date   | Dollars
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10
2    |  1   | 01/08/2017 | 01/12/2017 |  10
3    |  1   | 01/17/2017 | 01/19/2017 |  10
4    |  1   | 01/28/2017 | 02/10/2017 |  10
5    |  1   | 02/20/2017 | 03/10/2017 |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10
7    |  2   | 10/20/2017 | 10/23/2017 |  10
8    |  2   | 10/25/2017 | 10/29/2017 |  10

Within types, I need to group consecutive date periods as long as they are separated by less than 7 days, summing the dollars and the total gap days within each group.

Intermediate Table:

 Row | Type | Start Date | End Date   | Dollars | Grouping | GapDays
------------------------------------------
1    |  1   | 01/01/2017 | 01/05/2017 |  10     |  1       |  null
2    |  1   | 01/08/2017 | 01/12/2017 |  10     |  1       |  3
3    |  1   | 01/17/2017 | 01/19/2017 |  10     |  1       |  5
4    |  1   | 01/28/2017 | 02/10/2017 |  10     |  2       |  9
5    |  1   | 02/20/2017 | 03/10/2017 |  10     |  3       |  10
6    |  2   | 10/01/2017 | 10/03/2017 |  10     |  1       |  null
7    |  2   | 10/20/2017 | 10/23/2017 |  10     |  2       |  17
8    |  2   | 10/25/2017 | 10/29/2017 |  10     |  2       |  2

Result:

----------------------------------------------------
 Type | Start Date |  End Date  | Dollars | GapDays
----------------------------------------------------
  1   | 01/01/2017 | 01/19/2017 |   30    |   8
  1   | 01/28/2017 | 02/10/2017 |   10    |   0
  1   | 02/20/2017 | 03/10/2017 |   10    |   0
  2   | 10/01/2017 | 10/03/2017 |   10    |   0
  2   | 10/20/2017 | 10/29/2017 |   20    |   2

Solution: Use binary grouping for "consecutive" date criteria (in this case < 7 days) and then use that grouping in a sum over with rows unbounded preceding

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2
Sandip Patel
2#
Sandip Patel Reply to 2018-02-14 06:07:01Z

You have to first get the date difference group by type and then apply aggregate functions on column as below:

Look at here for DENSE_RANK (Returns the rank of rows within the partition of a result set, without any gaps in the ranking. The rank of a row is one plus the number of distinct ranks that come before the row in question)

;WITH T AS
(
    SELECT
        *,      
        CASE WHEN Type=LAG(Type) OVER (ORDER BY Type) AND DATEDIFF(d,StartDate,EndDate) < 7 THEN DATEDIFF(d,lag(EndDate)  OVER (ORDER BY Type),StartDate) ELSE 0 END  AS GapDays,
        DENSE_RANK() OVER(ORDER BY Type,CASE WHEN DATEDIFF(d,StartDate,EndDate) < 7 THEN 1 ELSE 2 END) AS PartNo
    FROM @tblTest
)

SELECT
    Type,
    MIN(StartDate) AS StartDate,
    MAX(EndDate) AS EndDate, 
    ISNULL(SUM(Dollars),0) AS Dollars, 
    --DATEDIFF(d,MIN(StartDate),MAX(EndDate)) AS GapDays
    SUM(GapDays) AS GapDays
FROM T
GROUP BY Type,PartNo

Output:

Note: It seems you have given wrong result in your question

DhruvJoshi
3#
DhruvJoshi Reply to 2018-02-14 05:51:20Z

you can use a query like below: See working demo

create table data (Type int,StartDate date,EndDate date,  Dollars int)
insert into data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

; with uniquerowset as
(
    select
    days=datediff(d,startdate,endDate)+1,
    grouped= 
        case 
            when 
                datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate) >7
            then 0
            else 1
        end ,
    days_missed=
        datediff(d,lag(EndDate) over(partition by Type order by StartDate asc),StartDate),
    *
    from data
 ),
 finalresult as
 (
     select 
         *,
         rn =row_number() over ( partition by Type order by StartDate asc),
         rn2= row_number() over ( partition by Type order by grouped asc),
         days_missed_corrected= grouped * isnull(days_missed,0)
    from 
         uniquerowset
     )

 select 
     Type,
     StartDate = Min(StartDate),
     EndDate = Max(EndDate),
     Dollars = Sum(Dollars),
     GapDays = Sum(days_missed_corrected)
 from finalresult 
 group by rn2-rn, type
 order by min(StartDate)
KumarHarsh
4#
KumarHarsh Reply to 2018-02-14 08:36:37Z

If output is correct across all sample data then optimization can be discuss and implemented.

Try this script,

create table #data (Type int,StartDate date,EndDate date,  Dollars int)
insert into #data values
(1,'01/01/2017','01/05/2017',10)
,(1,'01/08/2017','01/12/2017',10)
,(1,'01/17/2017','01/19/2017',10)
,(1,'01/28/2017','02/10/2017',10)
,(2,'10/01/2017','10/03/2017',10)
,(2,'10/20/2017','10/23/2017',10)
,(2,'10/25/2017','10/29/2017',10)

;with CTE as
(
select [Type],StartDate,EndDate,Dollars
,ROW_NUMBER()over(PARTITION by [Type] order by StartDate)rn

from #data  
)
,CTE1 AS
(
select [Type],StartDate,EndDate,Dollars,rn 
,1 grp
,0 gapDays
from cte
where rn=1

union ALL

select c.[Type],c.StartDate,c.EndDate,c.Dollars
,c.rn  
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then grp else grp+1 end
,case when DATEDIFF(day,c1.EndDate,c.StartDate)<7 
then DATEDIFF(day,c1.EndDate,c.StartDate)
 else 0 end
from CTE c
inner join cte1 c1
on c.[type]=c1.[type]
and c.rn=c1.rn+1
)
,CTE2 AS(
select [type],[grp]
,sum(Dollars)Dollars
,sum(gapDays)gapDays
 from cte1
group by [type],[grp]
)

select c.[type],
c1.startDate, c1.EndDate 
,c.[grp] 
,c.Dollars,c.gapDays
from cte2 c
cross apply(
select min(startDate) startDate
,max(EndDate) EndDate from cte1 c1
where c1.[type]=c.[type]
and c1.grp=c.grp
)c1
--order by [type]
drop table #data
nbot
5#
nbot Reply to 2018-02-14 21:08:32Z

Solution: Use binary grouping for "consecutive" date criteria (in this case < 7 days) and then use that grouping in a sum over with rows unbounded preceding

with cte as (
select
  *,
  COALESCE(DATEDIFF(dd, LAG(EndDate, 1, NULL) OVER (PARTITION BY [Type] ORDER BY StartDate), StartDate),0) AS GapDays
from
  #data
),
cte2 as (
select
  *,
  case when GapDays < 7 then 0 else 1 end as group1
from
  cte
),
cte3 as (
select
  *,
  sum(group1) over (partition by [type] order by startDate, endDate rows unbounded preceding) as group2
from
  cte2
)
select
  [TYPE],
  MIN(StartDate) AS StartDate,
  MAX(EndDate) AS EndDate,
  SUM(Dollars) AS Dollars,
  SUM(CASE WHEN GapDays > 7 THEN 0 ELSE GapDays END) AS GapDays
from
  cte3
group by
  [Type], group2
You need to login account before you can post.

About| Privacy statement| Terms of Service| Advertising| Contact us| Help| Sitemap|
Processed in 0.351197 second(s) , Gzip On .

© 2016 Powered by mzan.com design MATCHINFO