Is it possible to create counts by date on historic events table? - sql

I have an events table which contains the date of status changes. What I'm trying to achieve is to produce summary counts for each date, however I'm struggling as it is not a straight count by date but instead a count based on the last time the status changed.
The data is as follows:
------------------------------------------
IT_ID NEW_STATUS OLD_STATUS TIMESTAMP
------------------------------------------
100 4 3 06/05/2019
100 3 2 04/05/2019
200 2 1 03/05/2019
100 2 1 02/05/2019
300 2 1 02/05/2019
200 1 - 01/05/2019
100 1 - 01/05/2019
300 1 - 01/05/2019
-------------------------------------------
I've tried grouping, but this hasn't worked due to the above, SQL below for the straight count.
select max(trunc(timestamp)), new_status ,count(new_status)
from status_hist
where trunc(timestamp) >= '01/01/2019'
group by trunc(timestamp), new_status
Ideally I would like the data in the following format, however the key here is to counts against each date. Note, as no status changes took place on the 05/05/19 then it shows the same of the 04/05/19:
---------------------------------------------------------
Date Status 1 Status 2 Status 3 Status 4
---------------------------------------------------------
06/05/2019 0 2 0 1
05/05/2019 0 2 1 0
04/05/2019 0 2 1 0
03/05/2019 0 3 0 0
02/05/2019 1 2 0 0
01/05/2019 3 0 0 0
--------------------------------------------------------
Any help would be gratefully received.
Thanks

I think about handling this problem by getting the status of each person on each date. That requires a cross join to get the person/dates combinations and then some aggregation:
WITH dates as (
SELECT min_dt + LEVEL - 1 AS dt
FROM (SELECT MIN(ts) AS min_dt, MAX(ts) AS max_dt
FROM test_data
)
CONNECT BY min_dt + LEVEL - 1 <= max_dt
)
SELECT d.dt, i.it_id, max(td.new_status) keep (dense_rank first order by td.ts desc) as status
FROM dates d CROSS JOIN
(SELECT DISTINCT IT_ID FROM test_data) i LEFT JOIN
test_data td
ON td.IT_ID = i.IT_ID AND td.ts <= d.dt
GROUP BY d.dt, i.it_id;
The dates CTE is just calculating all dates. The rest is bringing in the latest status.
This can then be expanded to aggregate (or pivot) the results:
WITH dates as (
SELECT min_dt + LEVEL - 1 AS dt
FROM (SELECT MIN(ts) AS min_dt, MAX(ts) AS max_dt
FROM test_data
)
CONNECT BY min_dt + LEVEL - 1 <= max_dt
),
di as (
SELECT d.dt, i.it_id, max(td.new_status) keep (dense_rank first order by td.ts desc) as status
FROM dates d CROSS JOIN
(SELECT DISTINCT IT_ID FROM test_data) i LEFT JOIN
test_data td
ON td.IT_ID = i.IT_ID AND td.ts <= d.dt
GROUP BY d.dt, i.it_id
)
select dt,
sum(case when status = 1 then 1 else 0 end) as num_1,
sum(case when status = 2 then 1 else 0 end) as num_2,
sum(case when status = 3 then 1 else 0 end) as num_3,
sum(case when status = 4 then 1 else 0 end) as num_4
from di
group by dt
order by dt desc;
Here is a db<>fiddle.

You can do it using windowed aggregation functions:
Oracle Setup:
CREATE TABLE test_data ( IT_ID, NEW_STATUS, OLD_STATUS, "TIMESTAMP" ) AS
SELECT 100, 4, 3, DATE '2019-05-06' FROM DUAL UNION ALL
SELECT 100, 3, 2, DATE '2019-05-04' FROM DUAL UNION ALL
SELECT 200, 2, 1, DATE '2019-05-03' FROM DUAL UNION ALL
SELECT 100, 2, 1, DATE '2019-05-02' FROM DUAL UNION ALL
SELECT 300, 2, 1, DATE '2019-05-02' FROM DUAL UNION ALL
SELECT 200, 1, NULL, DATE '2019-05-01' FROM DUAL UNION ALL
SELECT 100, 1, NULL, DATE '2019-05-01' FROM DUAL UNION ALL
SELECT 300, 1, NULL, DATE '2019-05-01' FROM DUAL;
Query:
SELECT DISTINCT
dt AS "TIMESTAMP",
COUNT( CASE new_status WHEN 1 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
- COUNT( CASE old_status WHEN 1 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
AS Status1,
COUNT( CASE new_status WHEN 2 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
- COUNT( CASE old_status WHEN 2 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
AS Status2,
COUNT( CASE new_status WHEN 3 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
- COUNT( CASE old_status WHEN 3 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
AS Status3,
COUNT( CASE new_status WHEN 4 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
- COUNT( CASE old_status WHEN 4 THEN IT_ID END ) OVER ( ORDER BY dt RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )
AS Status4
FROM test_data t
RIGHT OUTER JOIN (
SELECT min_dt + LEVEL - 1 AS dt
FROM ( SELECT MIN("TIMESTAMP") AS min_dt,
MAX("TIMESTAMP") AS max_dt
FROM test_data
)
CONNECT BY min_dt + LEVEL - 1 <= max_dt
) c
ON ( c.dt = t."TIMESTAMP" )
ORDER BY "TIMESTAMP" DESC
Output:
TIMESTAMP | STATUS1 | STATUS2 | STATUS3 | STATUS4
:-------- | ------: | ------: | ------: | ------:
06-MAY-19 | 0 | 2 | 0 | 1
05-MAY-19 | 0 | 2 | 1 | 0
04-MAY-19 | 0 | 2 | 1 | 0
03-MAY-19 | 0 | 3 | 0 | 0
02-MAY-19 | 1 | 2 | 0 | 0
01-MAY-19 | 3 | 0 | 0 | 0
db<>fiddle here

You can use the pivot function of SQL.
I don't have an oracle DB to test this:
declare #dates table(Date timestamp(3), NEW_STATUS number(10))
v_StartDate DATE := (SELECT MIN(timestamp) FROM [test].dbo)
v_EndDate DATE := (SELECT MAX(timestamp) FROM [test].dbo)
insert into #dates
SELECT nbr * INTERVAL '1' DAY(5) - 1 + v_StartDate as 'Date', null as NEW_STATUS
FROM ( SELECT ROW_NUMBER() OVER ( ORDER BY c.object_id ) AS Nbr
FROM sys.columns c
) nbrs
WHERE nbr - 1 <= v_EndDate - v_StartDate
SELECT timestamp as 'Date', 1 AS 'Status 1', 2 AS 'Status 2', 3 AS 'Status 3', 4 AS 'Status 4'
FROM
(SELECT Date as 'timestamp', NVL(NVL(d.new_status, t.NEW_STATUS),t2.NEW_STATUS) as new_status
FROM #dates d
left outer join Table_test t on d.Date = t.TIMESTAMP
left outer join Table_test t2 on INTERVAL '-1' DAY(5) +d.Date = t2.TIMESTAMP and NVL(d.new_status, t.NEW_STATUS) is null ) p
PIVOT
(
COUNT (new_status)
FOR new_status IN
( 1, 2, 3, 4 )
) AS pvt
ORDER BY pvt.TIMESTAMP desc
My Microsoft SQL Syntax is:
declare #dates table([Date] datetime, [NEW_STATUS] int)
DECLARE #StartDate DATE = (SELECT MIN(timestamp) FROM [test].[dbo].[Table_test])
DECLARE #EndDate DATE = (SELECT MAX(timestamp) FROM [test].[dbo].[Table_test])
insert into #dates
SELECT DATEADD(DAY, nbr - 1, #StartDate) as 'Date', null as NEW_STATUS
FROM ( SELECT ROW_NUMBER() OVER ( ORDER BY c.object_id ) AS Nbr
FROM sys.columns c
) nbrs
WHERE nbr - 1 <= DATEDIFF(DAY, #StartDate, #EndDate)
SELECT timestamp as 'Date', [1] AS 'Status 1', [2] AS 'Status 2', [3] AS 'Status 3', [4] AS 'Status 4'
FROM
(SELECT Date as 'timestamp', ISNULL(ISNULL(d.new_status, t.NEW_STATUS),t2.NEW_STATUS) as new_status
FROM #dates d
left outer join Table_test t on d.Date = t.TIMESTAMP
left outer join Table_test t2 on DATEADD(DAY,-1,d.Date) = t2.TIMESTAMP and ISNULL(d.new_status, t.NEW_STATUS) is null ) p
PIVOT
(
COUNT (new_status)
FOR new_status IN
( [1], [2], [3], [4] )
) AS pvt
ORDER BY pvt.TIMESTAMP desc

Related

Loop within id and combine dates between rows in SQL [duplicate]

I have a table in the following format
Id StartDate EndDate Type
1 2012-02-18 2012-03-18 1
1 2012-03-17 2012-06-29 1
1 2012-06-27 2012-09-27 1
1 2014-08-23 2014-09-24 3
1 2014-09-23 2014-10-24 3
1 2014-10-23 2014-11-24 3
2 2015-07-04 2015-08-06 1
2 2015-08-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
I found similar questions here, but not something that could help me solve my problem. I want to merge rows that has the same Id, Type and overlapping date periods.
The result from the above table should be
Id StartDate EndDate Type
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
In another server, I was able to do it with the following restrictions and the query below:
Didn't care about the Type column, but just the Id
Had a newer version of SQL Server (2012), but now I have 2008 which the code is not compatible
SELECT Id
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
FROM (
SELECT *
, SUM(CASE WHEN a.EndDate = a.StartDate THEN 0
ELSE 1
END
) OVER (ORDER BY Id, StartDate) sm
FROM (
SELECT Id
, StartDate
, EndDate
, LAG(EndDate, 1, NULL) OVER (PARTITION BY Id ORDER BY Id, EndDate) EndDate
FROM #temptable
) a
) b
GROUP BY Id, sm
Any advice how I can
Include Type on the process
Make it work on SQL Server 2008
This approach uses an additional temp table to identify the groups of overlapping dates, and then performs a quick aggregate based on the groupings.
SELECT *, ROW_NUMBER() OVER (ORDER BY Id, Type) AS UID,
ROW_NUMBER() OVER (ORDER BY Id, Type) AS GroupId INTO #G FROM #TempTable
WHILE ##ROWCOUNT <> 0 BEGIN
UPDATE T1 SET
GroupId = T2.GroupId
FROM #G T1
INNER JOIN (
SELECT T1.UID, CASE WHEN T1.GroupId < T2.GroupId THEN T1.GroupId ELSE T2.GroupId END
FROM #G T1
LEFT OUTER JOIN #G T2
ON T1.Id = T2.Id AND T1.Type = T2.Type AND T1.GroupId <> T2.GroupId
AND T1.StartDate <= T2.EndDate AND T2.StartDate <= T1.EndDate
) T2 (UID, GroupId)
ON T1.UID = T2.UID
WHERE T1.GroupId <> T2.GroupId
END
SELECT Id, MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate, Type
FROM #G G GROUP BY GroupId, Id, Type
This returns the expected values
Id StartDate EndDate Type
----------- ---------- ---------- -----------
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
This is 2008 compatible. A CTE really is the best way to link up all overlapping records in my opinion. The date overlap logic came from this thread: SO Date Overlap
I added extra data that's more complex to make sure that it's working as expected.
DECLARE #Data table (Id INT, StartDate DATE, EndDate DATE, Type INT)
INSERT INTO #data
SELECT 1,'2/18/2012' ,'3/18/2012', 1 UNION ALL
select 1,'3/17/2012','6/29/2012',1 UNION ALL
select 1,'6/27/2012','9/27/2012',1 UNION ALL
select 1,'8/23/2014','9/24/2014',3 UNION ALL
select 1,'9/23/2014','10/24/2014',3 UNION ALL
select 1,'10/23/2014','11/24/2014',3 UNION ALL
select 2,'7/4/2015','8/6/2015',1 UNION ALL
select 2,'8/4/2015','9/6/2015',1 UNION ALL
select 3,'11/1/2013','12/1/2013',0 UNION ALL
select 3,'1/9/2018','2/9/2018',0 UNION ALL
select 4,'1/1/2018','1/2/2018',0 UNION ALL --many non overlapping dates
select 4,'1/4/2018','1/5/2018',0 UNION ALL
select 4,'1/7/2018','1/9/2018',0 UNION ALL
select 4,'1/11/2018','1/13/2018',0 UNION ALL
select 4,'2/7/2018','2/8/2018',0 UNION ALL --many overlapping dates
select 4,'2/8/2018','2/9/2018',0 UNION ALL
select 4,'2/9/2018','2/10/2018',0 UNION all
select 4,'2/10/2018','2/11/2018',0 UNION all
select 4,'2/11/2018','2/12/2018',0 UNION all
select 4,'2/12/2018','2/13/2018',0 UNION all
select 4,'3/7/2018','3/8/2018',0 UNION ALL --many overlapping dates, second instance of id 4, type 0
select 4,'3/8/2018','3/9/2018',0 UNION ALL
select 4,'3/9/2018','3/10/2018',0 UNION all
select 4,'3/10/2018','3/11/2018',0 UNION all
select 4,'3/11/2018','3/12/2018',0 UNION all
select 4,'3/12/2018','3/13/2018',0
;
WITH cdata
AS (SELECT Id,
d.Type,
d.StartDate,
d.EndDate,
CurrentStart = d.StartDate
FROM #Data d
WHERE
NOT EXISTS (
SELECT * FROM #Data x WHERE x.StartDate < d.StartDate AND d.StartDate <= x.EndDate AND d.EndDate >= x.StartDate AND d.Id = x.Id AND d.Type = x.Type --get first records for overlapping ranges
)
UNION ALL
SELECT d.Id,
d.Type,
StartDate = CASE WHEN d2.StartDate < d.StartDate THEN d2.StartDate ELSE d.StartDate END,
EndDate = CASE WHEN d2.EndDate > d.EndDate THEN d2.EndDate ELSE d.EndDate END,
CurrentStart = d2.StartDate
FROM cdata d
INNER JOIN #Data d2
ON (
d.StartDate <= d2.EndDate
AND d.EndDate >= d2.StartDate
)
AND d2.Id = d.Id
AND d2.Type = d.Type
AND d2.StartDate > d.CurrentStart)
SELECT cdata.Id, cdata.Type, cdata.StartDate, EndDate = MAX(cdata.EndDate)
FROM cdata
GROUP BY cdata.Id, cdata.Type, cdata.StartDate
This looks like a Packing Intervals problem. See the post by Itzik Ben-Gan for all the details and what indexes he recommends to make it work efficiently. He presents a solution without recursive CTE.
Two notes.
The query below assumes that intervals are [closed; open), i.e. StartDate is inclusive and EndDate is exclusive. This way to represent such data is often the most convenient. (in the same sense as having arrays as zero-based instead of 1-based is usually more convenient in programming languages).
I added a RowID column to have unambiguous sorting.
Sample data
DECLARE #T TABLE
(
RowID int IDENTITY,
id int,
StartDate date,
EndDate date,
tp int
);
INSERT INTO #T(Id, StartDate, EndDate, tp) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
-- Make EndDate an opened interval, make it exclusive
-- [Start; End)
UPDATE #T
SET EndDate = DATEADD(day, 1, EndDate)
;
Recommended indexes
-- indexes to support solutions
CREATE UNIQUE INDEX idx_start_id ON T(id, tp, StartDate, RowID);
CREATE UNIQUE INDEX idx_end_id ON T(id, tp, EndDate, RowID);
Query
Read the Itzik's post to understand what is going on. He has nice illustrations there. In short, each timestamp (start or end) is treated as an event. Each event has a + or - type. Each time we encounter a + event (some interval starts) we increase the running counter. Each time we encounter a - event (some interval ends) we decrease the running counter. When the running counter is 0 it means that the streak of overlapping intervals is over.
I took Itzik's query as is and simply changed the column names to match your names.
WITH C1 AS
-- let e = end ordinals, let s = start ordinals
(
SELECT
RowID, id, tp, StartDate AS ts, +1 AS EventType,
NULL AS e,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY StartDate, RowID) AS s
FROM #T
UNION ALL
SELECT
RowID, id, tp, EndDate AS ts, -1 AS EventType,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY EndDate, RowID) AS e,
NULL AS s
FROM #T
),
C2 AS
-- let se = start or end ordinal, namely, how many events (start or end) happened so far
(
SELECT C1.*,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts, EventType DESC, RowID) AS se
FROM C1
),
C3 AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions were active
-- just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions are active
-- right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT id, tp, ts,
((ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts) - 1) / 2 + 1)
AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT id, tp, MIN(ts) AS StartDate, DATEADD(day, -1, MAX(ts)) AS EndDate
FROM C3
GROUP BY id, tp, grpnum
ORDER BY id, tp, StartDate;
Result
+----+----+------------+------------+
| id | tp | StartDate | EndDate |
+----+----+------------+------------+
| 1 | 1 | 2012-02-18 | 2012-09-27 |
| 1 | 3 | 2014-08-23 | 2014-11-24 |
| 2 | 1 | 2015-07-04 | 2015-09-06 |
| 3 | 0 | 2013-11-01 | 2013-12-01 |
| 3 | 0 | 2018-01-09 | 2018-02-09 |
+----+----+------------+------------+
create table #table
(Id int,StartDate date, EndDate date, Type int)
insert into #table
values
('1','2012-02-18','2012-03-18','1'),('1','2012-03-19','2012-06-19','1'),
('1','2012-06-27','2012-09-27','1'),('1','2014-08-23','2014-09-24','3'),
('1','2014-09-23','2014-10-24','3'),('1','2014-10-23','2014-11-24','3'),
('2','2015-07-04','2015-08-06','1'),('2','2015-08-04','2015-09-06','1'),
('3','2013-11-01','2013-12-01','0'),('3','2018-01-09','2018-02-09','0')
select ID,MIN(startdate)sd,MAX(EndDate)ed,type from #table
group by ID,TYPE,YEAR(startdate),YEAR(EndDate)
this can be easily achieved by using some window-functions and CTE's. Here is the solution
DECLARE #table TABLE
(id INT,
StartDate DATE,
EndDate DATE,
[Type] INT
);
INSERT INTO #table(Id, StartDate, EndDate, [Type]) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
WITH C1 AS
(
SELECT *,
MAX(EndDate) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS PrevEnd
FROM #table
),
C2 AS
(
SELECT *,
SUM(StartFlag) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS UNBOUNDED PRECEDING) AS GroupID
FROM C1
CROSS APPLY ( VALUES(CASE WHEN StartDate <= PrevEnd THEN NULL ELSE 1 END) ) AS A(StartFlag)
)
SELECT Id, [Type], MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate
FROM C2
GROUP BY Id, [Type], GroupID;

Overlapping between first record enddate and next record start date in SQL Server

I have the below kind of data and I need below kind of output.
Input:
id startdate enddate
1 21/01/2019 23/01/2019
1 23/01/2019 24/01/2019
1 24/01/2029 27/01/2019
1 29/01/2019 02/02/2019
Output:
id startdate enddate
1 21/01/2019 27/01/2019
1 29/01/2019 02/02/2019
We need to use the logic of matching the first record enddate and nth record startdate.
This is a gaps-and-islands problem, where you want to group together "adjacent" dates. Here is one approach using window functions: the idea is to compare the current start date to the end date of the "previous" row, and use a window sum to define the groups:
select id, min(startdate) startdate, max(enddate) enddate
from (
select t.*,
sum(case when startdate = lag_enddate then 0 else 1 end) over(partition by id order by startdate) grp
from (
select t.*,
lag(enddate) over(partition by id order by startdate) lag_enddate
from mytable t
) t
) t
group by id, grp
Demo on DB Fiddle - with credits to Sander for creating the DDL statements in the first place:
id | startdate | enddate
-: | :--------- | :---------
1 | 2019-01-21 | 2019-01-27
1 | 2019-01-29 | 2019-02-02
have a look at
NEXT VALUE FOR method, works 2016 and later
Use a CTE or subquery (works in 2008) where you join on your own table using the previous value as a join. Here a sample script I use showing backup growth
declare #backupType char(1)
, #DatabaseName sysname
set #DatabaseName = db_name() --> Name of current database, null for all databaseson server
set #backupType ='D' /* valid options are:
D = Database
I = Database Differential
L = Log
F = File or Filegroup
G = File Differential
P = Partial
Q = Partial Differential
*/
select backup_start_date
, backup_finish_date
, DurationSec
, database_name,backup_size
, PreviouseBackupSize
, backup_size-PreviouseBackupSize as growth
,KbSec= format(KbSec,'N2')
FROM (
select backup_start_date
, backup_finish_date
, datediff(second,backup_start_date,b.backup_finish_date) as DurationSec
, b.database_name
, b.backup_size/1024./1024. as backup_size
,case when datediff(second,backup_start_date,b.backup_finish_date) >0
then ( b.backup_size/1024.)/datediff(second,backup_start_date,b.backup_finish_date)
else 0 end as KbSec
-- , b.compressed_backup_size
, (
select top (1) p.backup_size/1024./1024.
from msdb.dbo.backupset p
where p.database_name = b.database_name
and p.database_backup_lsn< b.database_backup_lsn
and type=#backupType
order by p.database_backup_lsn desc
) as PreviouseBackupSize
from msdb.dbo.backupset as b
where #DatabaseName IS NULL OR database_name =#DatabaseName
and type=#backupType
)as A
order by backup_start_date desc
using a "cursor local fast_forward" to loop over the data on a row-by-row and use a temporary table where you store & compaire prev value
Here is a solution with common table expressions that could work.
Sample data
create table data
(
id int,
startdate date,
enddate date
);
insert into data (id, startdate, enddate) values
(1, '2019-01-21', '2019-01-23'),
(1, '2019-01-23', '2019-01-24'),
(1, '2019-01-24', '2019-01-27'),
(1, '2019-01-29', '2019-02-02');
Solution
-- determine start dates
with cte_start as
(
select s.id,
s.startdate
from data s
where not exists ( select 'x'
from data e
where e.id = s.id
and e.enddate = s.startdate )
),
-- determine date boundaries
cte_startnext as
(
select s.id,
s.startdate,
lead(s.startdate) over (partition by s.id order by s.startdate) as startdate_next
from cte_start s
)
-- determine periods
select sn.id,
sn.startdate,
e.enddate
from cte_startnext sn
cross apply ( select top 1 e.enddate
from data e
where e.id = sn.id
and e.startdate >= sn.startdate
and (e.startdate < sn.startdate_next or sn.startdate_next is null)
order by e.enddate desc ) e
order by sn.id,
sn.startdate;
Result
id startdate enddate
-- ---------- ----------
1 2019-01-21 2019-01-27
1 2019-01-29 2019-02-02
Fiddle to see build up of solution and intermediate CTE results.

Merge rows if date columns are overlapping in TSQL

I have a table in the following format
Id StartDate EndDate Type
1 2012-02-18 2012-03-18 1
1 2012-03-17 2012-06-29 1
1 2012-06-27 2012-09-27 1
1 2014-08-23 2014-09-24 3
1 2014-09-23 2014-10-24 3
1 2014-10-23 2014-11-24 3
2 2015-07-04 2015-08-06 1
2 2015-08-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
I found similar questions here, but not something that could help me solve my problem. I want to merge rows that has the same Id, Type and overlapping date periods.
The result from the above table should be
Id StartDate EndDate Type
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
In another server, I was able to do it with the following restrictions and the query below:
Didn't care about the Type column, but just the Id
Had a newer version of SQL Server (2012), but now I have 2008 which the code is not compatible
SELECT Id
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
FROM (
SELECT *
, SUM(CASE WHEN a.EndDate = a.StartDate THEN 0
ELSE 1
END
) OVER (ORDER BY Id, StartDate) sm
FROM (
SELECT Id
, StartDate
, EndDate
, LAG(EndDate, 1, NULL) OVER (PARTITION BY Id ORDER BY Id, EndDate) EndDate
FROM #temptable
) a
) b
GROUP BY Id, sm
Any advice how I can
Include Type on the process
Make it work on SQL Server 2008
This approach uses an additional temp table to identify the groups of overlapping dates, and then performs a quick aggregate based on the groupings.
SELECT *, ROW_NUMBER() OVER (ORDER BY Id, Type) AS UID,
ROW_NUMBER() OVER (ORDER BY Id, Type) AS GroupId INTO #G FROM #TempTable
WHILE ##ROWCOUNT <> 0 BEGIN
UPDATE T1 SET
GroupId = T2.GroupId
FROM #G T1
INNER JOIN (
SELECT T1.UID, CASE WHEN T1.GroupId < T2.GroupId THEN T1.GroupId ELSE T2.GroupId END
FROM #G T1
LEFT OUTER JOIN #G T2
ON T1.Id = T2.Id AND T1.Type = T2.Type AND T1.GroupId <> T2.GroupId
AND T1.StartDate <= T2.EndDate AND T2.StartDate <= T1.EndDate
) T2 (UID, GroupId)
ON T1.UID = T2.UID
WHERE T1.GroupId <> T2.GroupId
END
SELECT Id, MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate, Type
FROM #G G GROUP BY GroupId, Id, Type
This returns the expected values
Id StartDate EndDate Type
----------- ---------- ---------- -----------
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
This is 2008 compatible. A CTE really is the best way to link up all overlapping records in my opinion. The date overlap logic came from this thread: SO Date Overlap
I added extra data that's more complex to make sure that it's working as expected.
DECLARE #Data table (Id INT, StartDate DATE, EndDate DATE, Type INT)
INSERT INTO #data
SELECT 1,'2/18/2012' ,'3/18/2012', 1 UNION ALL
select 1,'3/17/2012','6/29/2012',1 UNION ALL
select 1,'6/27/2012','9/27/2012',1 UNION ALL
select 1,'8/23/2014','9/24/2014',3 UNION ALL
select 1,'9/23/2014','10/24/2014',3 UNION ALL
select 1,'10/23/2014','11/24/2014',3 UNION ALL
select 2,'7/4/2015','8/6/2015',1 UNION ALL
select 2,'8/4/2015','9/6/2015',1 UNION ALL
select 3,'11/1/2013','12/1/2013',0 UNION ALL
select 3,'1/9/2018','2/9/2018',0 UNION ALL
select 4,'1/1/2018','1/2/2018',0 UNION ALL --many non overlapping dates
select 4,'1/4/2018','1/5/2018',0 UNION ALL
select 4,'1/7/2018','1/9/2018',0 UNION ALL
select 4,'1/11/2018','1/13/2018',0 UNION ALL
select 4,'2/7/2018','2/8/2018',0 UNION ALL --many overlapping dates
select 4,'2/8/2018','2/9/2018',0 UNION ALL
select 4,'2/9/2018','2/10/2018',0 UNION all
select 4,'2/10/2018','2/11/2018',0 UNION all
select 4,'2/11/2018','2/12/2018',0 UNION all
select 4,'2/12/2018','2/13/2018',0 UNION all
select 4,'3/7/2018','3/8/2018',0 UNION ALL --many overlapping dates, second instance of id 4, type 0
select 4,'3/8/2018','3/9/2018',0 UNION ALL
select 4,'3/9/2018','3/10/2018',0 UNION all
select 4,'3/10/2018','3/11/2018',0 UNION all
select 4,'3/11/2018','3/12/2018',0 UNION all
select 4,'3/12/2018','3/13/2018',0
;
WITH cdata
AS (SELECT Id,
d.Type,
d.StartDate,
d.EndDate,
CurrentStart = d.StartDate
FROM #Data d
WHERE
NOT EXISTS (
SELECT * FROM #Data x WHERE x.StartDate < d.StartDate AND d.StartDate <= x.EndDate AND d.EndDate >= x.StartDate AND d.Id = x.Id AND d.Type = x.Type --get first records for overlapping ranges
)
UNION ALL
SELECT d.Id,
d.Type,
StartDate = CASE WHEN d2.StartDate < d.StartDate THEN d2.StartDate ELSE d.StartDate END,
EndDate = CASE WHEN d2.EndDate > d.EndDate THEN d2.EndDate ELSE d.EndDate END,
CurrentStart = d2.StartDate
FROM cdata d
INNER JOIN #Data d2
ON (
d.StartDate <= d2.EndDate
AND d.EndDate >= d2.StartDate
)
AND d2.Id = d.Id
AND d2.Type = d.Type
AND d2.StartDate > d.CurrentStart)
SELECT cdata.Id, cdata.Type, cdata.StartDate, EndDate = MAX(cdata.EndDate)
FROM cdata
GROUP BY cdata.Id, cdata.Type, cdata.StartDate
This looks like a Packing Intervals problem. See the post by Itzik Ben-Gan for all the details and what indexes he recommends to make it work efficiently. He presents a solution without recursive CTE.
Two notes.
The query below assumes that intervals are [closed; open), i.e. StartDate is inclusive and EndDate is exclusive. This way to represent such data is often the most convenient. (in the same sense as having arrays as zero-based instead of 1-based is usually more convenient in programming languages).
I added a RowID column to have unambiguous sorting.
Sample data
DECLARE #T TABLE
(
RowID int IDENTITY,
id int,
StartDate date,
EndDate date,
tp int
);
INSERT INTO #T(Id, StartDate, EndDate, tp) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
-- Make EndDate an opened interval, make it exclusive
-- [Start; End)
UPDATE #T
SET EndDate = DATEADD(day, 1, EndDate)
;
Recommended indexes
-- indexes to support solutions
CREATE UNIQUE INDEX idx_start_id ON T(id, tp, StartDate, RowID);
CREATE UNIQUE INDEX idx_end_id ON T(id, tp, EndDate, RowID);
Query
Read the Itzik's post to understand what is going on. He has nice illustrations there. In short, each timestamp (start or end) is treated as an event. Each event has a + or - type. Each time we encounter a + event (some interval starts) we increase the running counter. Each time we encounter a - event (some interval ends) we decrease the running counter. When the running counter is 0 it means that the streak of overlapping intervals is over.
I took Itzik's query as is and simply changed the column names to match your names.
WITH C1 AS
-- let e = end ordinals, let s = start ordinals
(
SELECT
RowID, id, tp, StartDate AS ts, +1 AS EventType,
NULL AS e,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY StartDate, RowID) AS s
FROM #T
UNION ALL
SELECT
RowID, id, tp, EndDate AS ts, -1 AS EventType,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY EndDate, RowID) AS e,
NULL AS s
FROM #T
),
C2 AS
-- let se = start or end ordinal, namely, how many events (start or end) happened so far
(
SELECT C1.*,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts, EventType DESC, RowID) AS se
FROM C1
),
C3 AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions were active
-- just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions are active
-- right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT id, tp, ts,
((ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts) - 1) / 2 + 1)
AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT id, tp, MIN(ts) AS StartDate, DATEADD(day, -1, MAX(ts)) AS EndDate
FROM C3
GROUP BY id, tp, grpnum
ORDER BY id, tp, StartDate;
Result
+----+----+------------+------------+
| id | tp | StartDate | EndDate |
+----+----+------------+------------+
| 1 | 1 | 2012-02-18 | 2012-09-27 |
| 1 | 3 | 2014-08-23 | 2014-11-24 |
| 2 | 1 | 2015-07-04 | 2015-09-06 |
| 3 | 0 | 2013-11-01 | 2013-12-01 |
| 3 | 0 | 2018-01-09 | 2018-02-09 |
+----+----+------------+------------+
create table #table
(Id int,StartDate date, EndDate date, Type int)
insert into #table
values
('1','2012-02-18','2012-03-18','1'),('1','2012-03-19','2012-06-19','1'),
('1','2012-06-27','2012-09-27','1'),('1','2014-08-23','2014-09-24','3'),
('1','2014-09-23','2014-10-24','3'),('1','2014-10-23','2014-11-24','3'),
('2','2015-07-04','2015-08-06','1'),('2','2015-08-04','2015-09-06','1'),
('3','2013-11-01','2013-12-01','0'),('3','2018-01-09','2018-02-09','0')
select ID,MIN(startdate)sd,MAX(EndDate)ed,type from #table
group by ID,TYPE,YEAR(startdate),YEAR(EndDate)
this can be easily achieved by using some window-functions and CTE's. Here is the solution
DECLARE #table TABLE
(id INT,
StartDate DATE,
EndDate DATE,
[Type] INT
);
INSERT INTO #table(Id, StartDate, EndDate, [Type]) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
WITH C1 AS
(
SELECT *,
MAX(EndDate) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS PrevEnd
FROM #table
),
C2 AS
(
SELECT *,
SUM(StartFlag) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS UNBOUNDED PRECEDING) AS GroupID
FROM C1
CROSS APPLY ( VALUES(CASE WHEN StartDate <= PrevEnd THEN NULL ELSE 1 END) ) AS A(StartFlag)
)
SELECT Id, [Type], MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate
FROM C2
GROUP BY Id, [Type], GroupID;

SQL - Find if column dates include at least partially a date range

I need to create a report and I am struggling with the SQL script.
The table I want to query is a company_status_history table which has entries like the following (the ones that I can't figure out)
Table company_status_history
Columns:
| id | company_id | status_id | effective_date |
Data:
| 1 | 10 | 1 | 2016-12-30 00:00:00.000 |
| 2 | 10 | 5 | 2017-02-04 00:00:00.000 |
| 3 | 11 | 5 | 2017-06-05 00:00:00.000 |
| 4 | 11 | 1 | 2018-04-30 00:00:00.000 |
I want to answer to the question "Get all companies that have been at least for some point in status 1 inside the time period 01/01/2017 - 31/12/2017"
Above are the cases that I don't know how to handle since I need to add some logic of type :
"If this row is status 1 and it's date is before the date range check the next row if it has a date inside the date range."
"If this row is status 1 and it's date is after the date range check the row before if it has a date inside the date range."
I think this can be handled as a gaps and islands problem. Consider the following input data: (same as sample data of OP plus two additional rows)
id company_id status_id effective_date
-------------------------------------------
1 10 1 2016-12-15
2 10 1 2016-12-30
3 10 5 2017-02-04
4 10 4 2017-02-08
5 11 5 2017-06-05
6 11 1 2018-04-30
You can use the following query:
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
ORDER BY company_id, effective_date
to get:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 0
2 10 1 2016-12-30 1
3 10 5 2017-02-04 2
4 10 4 2017-02-08 2
5 11 5 2017-06-05 0
6 11 1 2018-04-30 0
Now you can identify status = 1 islands using:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
)
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
Output:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 1
2 10 1 2016-12-30 1
3 10 5 2017-02-04 1
4 10 4 2017-02-08 2
5 11 5 2017-06-05 1
6 11 1 2018-04-30 2
Calculated field grp will help us identify those islands:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
), CTE2 AS
(
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
)
SELECT company_id,
MIN(effective_date) AS start_date,
CASE
WHEN COUNT(*) > 1 THEN DATEADD(DAY, -1, MAX(effective_date))
ELSE MIN(effective_date)
END AS end_date
FROM CTE2
GROUP BY company_id, grp
HAVING COUNT(CASE WHEN status_id = 1 THEN 1 END) > 0
Output:
company_id start_date end_date
-----------------------------------
10 2016-12-15 2017-02-03
11 2018-04-30 2018-04-30
All you want know is those records from above that overlap with the specified interval.
Demo here with somewhat more complicated use case.
Maybe this is what you are looking for? For these kind of questions, you need to join two instance of your table, in this case I am just joining with next record by Id, which probably is not totally correct. To do it better, you can create a new Id using a windowed function like row_number, ordering the table by your requirement criteria
If this row is status 1 and it's date is before the date range check
the next row if it has a date inside the date range
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
else NULL
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
Implementing second criteria:
"If this row is status 1 and it's date is after the date range check
the row before if it has a date inside the date range."
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
when csh1.status_id=1 and csh1.effective_date>#range_en
then
case
when csh3.effective_date between #range_st and #range_en then true
else false
end
else null -- ¿?
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
left join company_status_history csh3
on csh1.id=csh3.id-1
I would suggest the use of a cte and the window functions ROW_NUMBER. With this you can find the desired records. An example:
DECLARE #t TABLE(
id INT
,company_id INT
,status_id INT
,effective_date DATETIME
)
INSERT INTO #t VALUES
(1, 10, 1, '2016-12-30 00:00:00.000')
,(2, 10, 5, '2017-02-04 00:00:00.000')
,(3, 11, 5, '2017-06-05 00:00:00.000')
,(4, 11, 1, '2018-04-30 00:00:00.000')
DECLARE #StartDate DATETIME = '2017-01-01';
DECLARE #EndDate DATETIME = '2017-12-31';
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) AS rn
FROM #t
),
cteLeadLag AS(
SELECT c.*, ISNULL(c2.effective_date, c.effective_date) LagEffective, ISNULL(c3.effective_date, c.effective_date)LeadEffective
FROM cte c
LEFT JOIN cte c2 ON c2.company_id = c.company_id AND c2.rn = c.rn-1
LEFT JOIN cte c3 ON c3.company_id = c.company_id AND c3.rn = c.rn+1
)
SELECT 'Included' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Following' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date > #EndDate
AND LagEffective BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Trailing' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date < #EndDate
AND LeadEffective BETWEEN #StartDate AND #EndDate
I first select all records with their leading and lagging Dates and then I perform your checks on the inclusion in the desired timespan.
Try with this, self-explanatory. Responds to this part of your question:
I want to answer to the question "Get all companies that have been at
least for some point in status 1 inside the time period 01/01/2017 -
31/12/2017"
Case that you want to find those id's that have been in any moment in status 1 and have records in the period requested:
SELECT *
FROM company_status_history
WHERE id IN
( SELECT Id
FROM company_status_history
WHERE status_id=1 )
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'
Case that you want to find id's in status 1 and inside the period:
SELECT *
FROM company_status_history
WHERE status_id=1
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'

SQL Server Select the most recent past date if no future date available

I have a table structure as below,
CREATE TABLE #CustOrder ( CustId INT, OrderDate DATE )
INSERT #CustOrder ( CustId, OrderDate )
VALUES ( 1, '2016-11-01' ),
( 1, '2019-09-01' ),
( 2, '2019-07-01' ),
( 2, '2019-11-01' ),
( 3, '2017-01-01' ),
( 4, '2016-12-01' ),
( 4, '2017-01-01' )
I want to list the customer with their future order dates, if they do not have a future order I want to list their last or most recent order. I have the following query.
; WITH LastOrder AS
(
SELECT
CO.CustId,
CO.OrderDate,
ROW_NUMBER() OVER(PARTITION BY CO.CustId ORDER BY ABS(DATEDIFF(DAY, CO.OrderDate, GETUTCDATE()))) AS RowNum
FROM #CustOrder AS CO
)
SELECT LO.CustId, LO.OrderDate
FROM LastOrder AS LO
WHERE LO.RowNum = 1
This query gives me the result as,
CustId | OrderDate
--------+-------------
1 | 2016-11-01
2 | 2019-07-01
3 | 2017-01-01
4 | 2017-01-01
However, I need the result as,
CustId | OrderDate
--------+-------------
1 | 2019-09-01
2 | 2019-07-01
3 | 2017-01-01
4 | 2017-01-01
As
Customer 1 has a future order on 2019-09-01
Customer 2 has two future order but the first one is on 2019-07-01
Customer 3 has no more than 1 order, it should just return 2017-01-01
Customer 4 has two past orders but the most recent is 2017-01-01
rextester: http://rextester.com/PBKNA95127
CREATE TABLE #CustOrder ( CustId INT, OrderDate DATE )
INSERT #CustOrder ( CustId, OrderDate )
VALUES ( 1, '2016-11-01' ),
( 1, '2019-09-01' ),
( 2, '2019-07-01' ),
( 2, '2019-11-01' ),
( 3, '2017-01-01' ),
( 4, '2016-12-01' ),
( 4, '2017-01-01' )
; WITH LastOrder AS
(
SELECT
CO.CustId,
CO.OrderDate,
ROW_NUMBER() OVER(PARTITION BY CO.CustId
ORDER BY case when co.OrderDate > getdate() then 0 else 1 end
, abs(DATEDIFF(DAY, getdate(),CO.OrderDate)) asc
) AS RowNum
FROM #CustOrder AS CO
)
SELECT LO.CustId, LO.OrderDate
FROM LastOrder AS LO
WHERE LO.RowNum = 1
results:
+--------+------------+
| CustId | OrderDate |
+--------+------------+
| 1 | 2019-09-01 |
| 2 | 2019-07-01 |
| 3 | 2017-01-01 |
| 4 | 2017-01-01 |
+--------+------------+
You can use the MAX function to check if the latest date is in the future. If so, get the MIN date after today using MIN. Else get the latest date.
SELECT CUSTID,OrderDate
FROM (SELECT CustId,
OrderDate,
CASE WHEN MAX(orderdate) OVER(PARTITION BY CustId) > GETUTCDATE()
THEN MIN(case when orderdate >getutcdate() then orderdate end) OVER(PARTITION BY CustId)
ELSE MAX(orderdate) OVER(PARTITION BY CustId) end as latest_date
FROM #CustOrder) T
WHERE latest_date=orderDate
Min, Max, UNION approach
select custID, MIN(OrderDate)
from #CustOrder
where OrderDate > '2017-02-17'
group by custID
union all
select co1.custID, max(co1.OrderDate)
from #CustOrder co1
where not exists ( select 1
from #CustOrder co2
where co2.CustId = co1.CustId
and co2.OrderDate > '2017-02-17'
)
group by co1.custID
Start your ORDER BY with a CASE expression that prefers future over past, and then use the ABS DATEDIFF (like you have now) as the second condition in the ORDER BY.
Maybe create another column and use the LAG() window function to grab the last date function and then put a conditional/case statement within the select portion? https://msdn.microsoft.com/en-us/library/hh231256.aspx