SQL: CTE query Speed - sql

I am using SQL Server 2008 and am trying to increase the speed of my query below. The query assigns points to patients based on readmission dates.
Example: A patient is seen on 1/2, 1/5, 1/7, 1/8, 1/9, 2/4. I want to first group visits within 3 days of each other. 1/2-5 are grouped, 1/7-9 are grouped. 1/5 is NOT grouped with 1/7 because 1/5's actual visit date is 1/2. 1/7 would receive 3 points because it is a readmit from 1/2. 2/4 would also receive 3 points because it is a readmit from 1/7. When the dates are grouped the first date is the actual visit date.
Most articles suggest limiting the data set or adding indexes to increase speed. I have limited the amount of rows to about 15,000 and added a index. When running the query with 45 test visit dates/ 3 test patients, the query takes 1.5 min to run. With my actual data set it takes > 8 hrs.
How can I get this query to run < 1 hr? Is there a better way to write my query? Does my Index look correct? Any help would be greatly appreciated.
Example expected results below query.
;CREATE TABLE RiskReadmits(MRN INT, VisitDate DATE, Category VARCHAR(15))
;CREATE CLUSTERED INDEX Risk_Readmits_Index ON RiskReadmits(VisitDate)
;INSERT RiskReadmits(MRN,VisitDate,CATEGORY)
VALUES
(1, '1/2/2016','Inpatient'),
(1, '1/5/2016','Inpatient'),
(1, '1/7/2016','Inpatient'),
(1, '1/8/2016','Inpatient'),
(1, '1/9/2016','Inpatient'),
(1, '2/4/2016','Inpatient'),
(1, '6/2/2016','Inpatient'),
(1, '6/3/2016','Inpatient'),
(1, '6/5/2016','Inpatient'),
(1, '6/6/2016','Inpatient'),
(1, '6/8/2016','Inpatient'),
(1, '7/1/2016','Inpatient'),
(1, '8/1/2016','Inpatient'),
(1, '8/4/2016','Inpatient'),
(1, '8/15/2016','Inpatient'),
(1, '8/18/2016','Inpatient'),
(1, '8/28/2016','Inpatient'),
(1, '10/12/2016','Inpatient'),
(1, '10/15/2016','Inpatient'),
(1, '11/17/2016','Inpatient'),
(1, '12/20/2016','Inpatient')
;WITH a AS (
SELECT
z1.VisitDate
, z1.MRN
, (SELECT MIN(VisitDate) FROM RiskReadmits WHERE VisitDate > DATEADD(day, 3, z1.VisitDate)) AS NextDay
FROM
RiskReadmits z1
WHERE
CATEGORY = 'Inpatient'
), a1 AS (
SELECT
MRN
, MIN(VisitDate) AS VisitDate
, MIN(NextDay) AS NextDay
FROM
a
GROUP BY
MRN
), b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
a1
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
a
JOIN b
ON a.VisitDate = b.NextDay
), c AS (
SELECT
MRN,
VisitDate
, (SELECT MAX(VisitDate) FROM b WHERE b1.VisitDate > VisitDate AND b.MRN = b1.MRN) AS PreviousVisitDate
FROM
b b1
)
SELECT distinct
c1.MRN,
c1.VisitDate
, CASE
WHEN DATEDIFF(day,c1.PreviousVisitDate,c1.VisitDate) < 30 THEN PreviousVisitDate
ELSE NULL
END AS ReAdmissionFrom
, CASE
WHEN DATEDIFF(day,c1.PreviousVisitDate,c1.VisitDate) < 30 THEN 3
ELSE 0
END AS Points
FROM
c c1
ORDER BY c1.MRN
Expected Results:
MRN VisitDate ReAdmissionFrom Points
1 2016-01-02 NULL 0
1 2016-01-07 2016-01-02 3
1 2016-02-04 2016-01-07 3
1 2016-06-02 NULL 0
1 2016-06-06 2016-06-02 3
1 2016-07-01 2016-06-06 3
1 2016-08-01 NULL 0
1 2016-08-15 2016-08-01 3
1 2016-08-28 2016-08-15 3
1 2016-10-12 NULL 0
1 2016-11-17 NULL 0
1 2016-12-20 NULL 0

oops I changed the names of a few cte's (and the post messed up what was code)
It should be like this:
b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
a1
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
a AS a
JOIN b
ON a.VisitDate = b.NextDay AND a.MRN = b.MRN
)

I'm going to take a wild guess here and say you want to change the b cte to
have AND a.MRN = b.MRN as a second condition in the second select query like this:
, b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
firstVisitAndFollowUp
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
visitsDistance3daysOrMore AS a
JOIN b
ON a.VisitDate = b.NextDay AND a.MRN = b.MRN
)

Related

Rolling total in SQL that Resets to 0 when going above 90

First time post. Learning SQL over the past 6 months so help is appreciated. I have data structured as below:
DECLARE #tmp4 as TABLE (
AccountNumber int,
Date date,
DateRank int
)
INSERT INTO #tmp4
VALUES (001, '11/13/2018' , 1)
, (002, '12/19/2018', 2)
, (003, '1/23/2019' , 3)
, (004, '2/5/2019' , 4)
, (005, '3/10/2019' , 5)
, (006, '3/20/2019' , 6)
, (007, '4/8/2019' , 7)
, (008, '5/20/2019' , 8)
What I need to do with this data is calculate a rolling total that resets to 0 once a threshold of 90 days is reached. I have used the DateDiff function to calculate the DateDiffs between consecutive dates and have tried multiple things using LAG and other window functions but can't make it reset. The goal is to find "index visits" which can only occur once every 90 days. So my plan is to have a field that reads 0 on the first visit and resets to 0 for the next stay after 90 days is up from the first visit then only pull visits with a value of 0.
One solution I tried was correct for most sets but did not return the right values for the above set (rows 4 and 8 should start over as "index visits").
The results I would expect for this query would be:
Account Date DateRank RollingTotal
001 |'11/13/2018' | 1 | 0
002 |'12/19/2018' | 2 | 35
003 |'1/23/2019' | 3 | 71
004 |'2/5/2019' | 4 | 84
005 |'3/10/2019' | 5 | 0 (not 117)
006 |'3/20/2019' | 6 | 10
007 |'4/8/2019' | 7 | 29
008 |'5/20/2019' | 8 | 71
Thanks for any help.
Here's the code I tried:
DECLARE #tmp2 as TABLE
(EmrNumber varchar(255)
, AdmitDateTime datetime
, DateRank int
, LagDateDiff int
, RunningTotal int
)
INSERT INTO #tmp2
SELECT tmp1.EmrNumber
, tmp1.AdmitDateTime
, tmp1.DateRank
--, LAG(tmp1.AdmitDateTime) OVER(PARTITION BY tmp1.EmrNumber ORDER BY tmp1.DateRank) as NextAdmitDate
, -DATEDIFF(DAY, tmp1.AdmitDateTime, LAG(tmp1.AdmitDateTime) OVER(PARTITION BY tmp1.EmrNumber ORDER BY tmp1.DateRank)) LagDateDiff
, IIF((SELECT SUM(sumt.total)
FROM (
SELECT -DATEDIFF(DAY, tmpsum.AdmitDateTime, LAG(tmpsum.AdmitDateTime) OVER(PARTITION BY tmpsum.EmrNumber ORDER BY tmpsum.DateRank)) total
FROM #tmp tmpsum
WHERE tmp1.EmrNumber = tmpsum.EmrNumber
AND tmpsum.AdmitDateTime <= tmp1.AdmitDateTime
) sumt) IS NULL, 0, (SELECT SUM(sumt.total)
FROM (
SELECT -DATEDIFF(DAY, tmpsum.AdmitDateTime, LAG(tmpsum.AdmitDateTime) OVER(PARTITION BY tmpsum.EmrNumber ORDER BY tmpsum.DateRank)) total
FROM #tmp tmpsum
WHERE tmp1.EmrNumber = tmpsum.EmrNumber
AND tmpsum.AdmitDateTime <= tmp1.AdmitDateTime
) sumt) ) as RunningTotal
FROM #tmp tmp1
SELECT *
, CASE WHEN LagDateDiff >90 THEN 0
WHEN RunningTotal = 0 THEN 0
ELSE LAG(LagDateDiff) OVER(PARTITION BY EmrNumber ORDER BY DateRank) + RunningTotal END AS RollingTotal
FROM #tmp2
You need a recursive query for this, because the running total has to be checked iteratively, row after row:
with cte as (
select
Account,
Date,
DateRank,
0 RollingTotal
from #tmp4
where DateRank = 1
union all
select
t.Account,
t.Date,
t.DateRank,
case when RollingTotal + datediff(day, c.Date, t.Date) > 90
then 0
else RollingTotal + datediff(day, c.Date, t.Date)
end
from cte c
inner join #tmp4 t on t.DateRank = c.DateRank + 1
)
select * from cte
The anchor of the cte selects the first record (as indicated by DateRank. Then, the recursive part processes rows one by one, and resets the running count when it crosses 90.

Get sum of entries over last 6 months (incomplete months)

My data looks something like this
ProductNumber | YearMonth | Number
1 201803 1
1 201804 3
1 201810 6
2 201807 -3
2 201809 5
Now what I want to have is add an additional entry "6MSum" which is the sum of the last 6 months per ProductNumber (not the last 6 entries).
Please be aware the YearMonth data is not complete, for every ProductNumber there are gaps in between so I cant just use the last 6 entries for the sum. The final result should look something like this.
ProductNumber | YearMonth | Number | 6MSum
1 201803 1 1
1 201804 3 4
1 201810 6 9
2 201807 -3 -3
2 201809 5 2
Additionally I don't want to insert the sum to the table but instead use it in a query like:
SELECT [ProductNumber],[YearMonth],[Number],
6MSum = CONVERT(INT,SUM...)
FROM ...
I found a lot off solutions that use a "sum over period" but only for the last X entries and not for the actual conditional statement of "YearMonth within last 6 months".
Any help would be much appreciated!
Its a SQL Database
EDIT/Answer
It seems to be the case that the gaps within the months have to be filled with data, afterwards something like
sum(Number) OVER (PARTITION BY category
ORDER BY year, week
ROWS 6 PRECEDING) AS 6MSum
Should work.
Reference to the solution : https://dba.stackexchange.com/questions/181773/sum-of-previous-n-number-of-columns-based-on-some-category
You could go the OUTER APPLY route. The following produces your required results exactly:
-- prep data
SELECT
ProductNumber , YearMonth , Number
into #t
FROM ( values
(1, 201803 , 1 ),
(1, 201804 , 3 ),
(1, 201810 , 6 ),
(2, 201807 , -3 ),
(2, 201809 , 5 )
) s (ProductNumber , YearMonth , Number)
-- output
SELECT
ProductNumber
,YearMonth
,Number
,[6MSum]
FROM #t t
outer apply (
SELECT
sum(number) as [6MSum]
FROM #t it
where
it.ProductNumber = t.ProductNumber
and it.yearmonth <= t.yearmonth
and t.yearmonth - it.yearmonth between 0 and 6
) tt
drop table #t
Use outer apply and convert yearmonth to a date, something like this:
with t as (
select t.*,
convert(date, convert(varchar(255), yearmonth) + '01')) as ymd
from yourtable t
)
select t.*, t2.sum_6m
from t outer apply
(select sum(t2.number) as sum_6m
from t t2
where t2.productnumber = t.productnumber and
t2.ymd <= t.ymd and
t2.ymd > dateadd(month, -6, ymd)
) t2;
Just to provide one more option. You can use DATEFROMPARTS to build valid dates from the YearMonth value and then search for values within date ranges.
Testable here: https://rextester.com/APJJ99843
SELECT
ProductNumber , YearMonth , Number
INTO #t
FROM ( values
(1, 201803 , 1 ),
(1, 201804 , 3 ),
(1, 201810 , 6 ),
(2, 201807 , -3 ),
(2, 201809 , 5 )
) s (ProductNumber , YearMonth , Number)
SELECT *
,[6MSum] = (SELECT SUM(number) FROM #t WHERE
ProductNumber = t.ProductNumber
AND DATEFROMPARTS(LEFT(YearMonth,4),RIGHT(YearMonth,2),1) --Build a valid start of month date
BETWEEN
DATEADD(MONTH,-6,DATEFROMPARTS(LEFT(t.YearMonth,4),RIGHT(t.YearMonth,2),1)) --Build a valid start of month date 6 months back
AND DATEFROMPARTS(LEFT(t.YearMonth,4),RIGHT(t.YearMonth,2),1)) --Build a valid end of month date
FROM #t t
DROP TABLE #t
So a working query (provided by a colleauge of mine) can look like this
SELECT [YearMonth]
,[Number]
,[ProductNumber]
, (Select Sum(Number) from [...] DPDS_1 where DPDS.ProductNumber =
DPDS_1.ProductNumber and DPDS_1.YearMonth <= DPDS.YearMonth and DPDS_1.YearMonth >=
convert (int, left (convert (varchar, dateadd(mm, -6, DPDS.YearMonth + '01'), 112),
6)))FROM [...] DPDS

T-SQL - Select patients who are readmitted (within some duration) with the same diagnosis

I have a table with the following schema:
CREATE TABLE Codes
(
diagnosis_code CHAR,
visit_date DATE,
visit_id INT,
patient_id int
);
I would like to output the patient_ids where the patient is readmitted (so a different visit_id) with the same diagnosis_code within a certain time (say 15 days). For example, if I have the following entries in the table:
diagnosis_code visit_date visit_id patient_id
-------------- ---------- ----------- -----------
A 2018-01-01 1 1
B 2018-01-01 1 1
A 2018-01-07 2 1
C 2018-01-01 3 2
D 2018-01-01 4 3
D 2018-01-20 5 3
E 2018-01-01 6 4
E 2018-01-01 6 4
A 2018-01-07 7 1
The query would return only patient_id = 1, and the rationales are as follows:
1, because between visit_id 1 and 2, this patient shared diagnosis code A.
Not 2 because this patient was only admitted once.
Not 3 because this patient, although readmitted for the same diagnosis, was not readmitted within 15 days of their initial visit.
Not 4 because this patient has a duplicated diagnosis code in the same visit.
Notice that patient_id = 1 is readmitted for the same diagnosis during visit_id = 7, but he was already counted once before.
You could try a simple join, adding the conditions you described:
select
distinct c.patient_id
from codes c
join codes d on d.patient_id = c.patient_id
and d.visit_id <> c.visit_id
and d.diagnosis_code = c.diagnosis_code
and d.visit_date between c.visit_date
and dateadd(day, 15, c.visit_date)
i used lag.
declare #Codes table
(
diagnosis_code CHAR,
visit_date DATE,
visit_id INT,
patient_id int
);
insert into #Codes
values
('A', '2018-01-01' ,1, 1)
,('B' , '2018-01-01', 1, 1)
,('A' , '2018-01-07', 2, 1)
,('C' ,'2018-01-01', 3, 2)
/*
D 2018-01-01 4 3
D 2018-01-15 5 3
E 2018-01-01 6 4
E 2018-01-01 6 4
A 2018-01-07 7 1
*/
select *
from (
select *
--,rn=row_number() over (partition by patient_ID,diagnosis_code order by visit_date)
,DaysSince = datediff(day,lag(visit_date,1) over (partition by patient_ID,diagnosis_code order by visit_date),visit_date)
from #Codes
) a
where a.DaysSince<=15
You can also use inbuilt FIRST_VALUE and DATEADD functions to achieve this:
SELECT
DISTINCT patient_id,diagnosis_code
FROM
(SELECT
FIRST_VALUE(visit_date) OVER (PARTITION BY patient_id,diagnosis_code ORDER BY visit_id ASC) AS Initial_Visit,
DATEADD(DAY,15,first_value(visit_date) OVER (PARTITION BY patient_id,diagnosis_code ORDER BY visit_id ASC)) Window
,* FROM Codes
)m
WHERE
Initial_Visit <> visit_date
AND visit_date <= Window

Getting a date + 3 days ( using specific date) SQL

I'm trying to get the next available day after a result set.
This is the query I'm using but is totally wrong:
SELECT DateID = ROW_NUMBER() over (order by B.Date_Key) , B.ClosingDate, C.dates AS RecDay
FROM DIM_DATE B JOIN [dbo].[WorkDay_Calendar] C on C.dates = DATEADD(DAY,3, B.ClosingDate) WHERE YEAR(B.ClosingDate) >= '2018'
AND C.[Sentday] = 0 and C.[RecDay] = 0
This query is
retrieving the RecDay when Closingdate +3 days = to Sentday AND What I want is
when Closingdate + 3(Sentday) then pick the next RecDay,
something like C.dates = DATEADD(DAY,3(Sentday), B.ClosingDate).
This is how are looking my tables:
Dim_Date table
WorkDay_Calendar Table
Notice that when Sentday and RecDay are valid when = 0 if 1 is not valid because is a weekend or holiday.
Based on this information for example if I pick from the Dim_Date table 2018-02-02 as one of the Closingdate then the RecDay should be:
DateID RecDay
------------------------
1 2018-02-07
And with the current query is retrieving this which is totally wrong:
DateID RecDay
-----------------------
1 2018-02-05
Graphic explanation below and please follow the 0 in Bold:
More output examples:
Using the dates below as ClosingDate:
Date_Key ClosingDate:
38284 2018-07-24
38287 2018-01-10
38290 2018-03-08
38291 2018-07-13
38293 2018-02-08
Using the same order of the ClosingDates these should be the outputs, I incluided the ClosingDate column so you can follow the order:
OUTPUTS:
DateID ClosingDate RecDay (output)
1 2018-07-24 2018-07-30
2 2018-01-10 2018-01-16
3 2018-03-08 2018-03-13
4 2018-07-13 2018-07-18
5 2018-02-08 2018-02-13
I'm not sure If if followed you correctly, but based on your condition, you want to check the date dimension table based on calendar table. If ClosingDate + 3 days is equal to SentDay then you need to get the ReceiveDay. if that's what you need. then try this out :
UPDATED
SELECT
ROW_NUMBER() OVER (ORDER BY Date_key) DateID,
ClosingDateOLD,
C.Dates
FROM (
SELECT
Date_key,
ClosingDate AS ClosingDateOLD,
CASE
WHEN DATENAME(dw, DATEADD(DAY, 4, ClosingDate)) IN ('Saturday') THEN DATEADD(DAY, 6, ClosingDate)
WHEN DATENAME(dw, DATEADD(DAY, 4, ClosingDate)) IN ('Sunday') THEN DATEADD(DAY, 5, ClosingDate)
ELSE DATEADD(DAY, 5, ClosingDate)
END AS ClosingDate
FROM
#DIM_DATE
WHERE
ClosingDate IS NOT NULL
) D
JOIN #Calendar C ON C.Dates = ClosingDate
As I understand the requirements it would be something like this.
I am posting a full working example in case somebody wants to take a crack at this.
create table #DIM_DATE
(
DateKey int
, ClosingDate date
)
insert #DIM_DATE values
(1, NULL)
, (2, '2018-01-02')
, (3, NULL)
, (4, NULL)
create table #CalendarTable
(
ID int
, SentDay date
, ReceiveDay date
)
insert #CalendarTable values
(1, '2018-01-03', '2018-01-02')
, (2, '2018-01-04', '2018-01-03')
, (3, '2018-01-05', '2018-01-08')
SELECT DateID = ROW_NUMBER() over (order by d.DateKey)
, ct.ReceiveDay
FROM #DIM_DATE d
join #CalendarTable ct on ct.SentDay = dateadd(day, 3, d.ClosingDate)
drop table #DIM_DATE
drop table #CalendarTable

Remove duplicate data in 'date-ranged' rows

I have a table like the following.
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 1 C
1 3/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
The date range is used to know for what time period the rest of the Attributes were valid, the problem i have is that there are several consecutive time ranges where the Attributes ramain the same, what I would like is to obtain the same data but without the duplicate rows.
From the previous example, my expected end result would be like this:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
What I did was merge the 2nd and 3rd row into one (All attribute except the date were the same), but I kept the StartDate of the 2nd and the endDate of the 3rd row.
I first thought of grouping by the values obtaining the MAX and MIN like this
SELECT ID, MIN(StartDate), MAX(EndDate), attributeA, attributeB
FROM MyTable
Group BY ID, AttributeA, AttributeB
But as soon as I run it I realized that when the attributes change several times and go back to their original value I'd end up with overlapping intervals. I've been stuck for a while now trying to figure out how to fix this problem.
Here's an example of what I meant in my previous statement.
When the initial data looks like the following:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 0 D
1 3/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
Grouping the results would end up like the following
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 6/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
And what I'd like to obtain is this
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
Any help would be welcomed :)
EDIT: I'll be uploading some sample data soon to make my problem a bit easier to understand.
EDIT2: Here's a script with some of my data. From that sample what I'd like to obtain are the following rows.
ID StartDate EndDate A B C D E F
-- --------- ------- -- -- -- -- -- --
708513 1980-01-01 2006-07-23 15 ASDB A ACT 130 0
708513 2006-07-24 2009-12-08 15 ASDB A ACT 130 2
708513 2009-12-09 2010-01-12 0 ASDB A ACT 130 2
708513 2010-01-13 2079-05-30 15 ASDB A ACT 130 2
EDITED, following comments. Try:
;with cte as (
select m1.ID, m1.StartDate, m1.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from sampledata m1
where not exists
(select null from sampledata m0
where m1.ID = m0.ID and
m1.a = m0.a and
m1.b = m0.b and
m1.c = m0.c and
m1.d = m0.d and
m1.e = m0.e and
m1.f = m0.f and
dateadd(day, -1, m1.StartDate) = m0.EndDate)
union all
select m1.ID, m1.StartDate, m2.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from cte m1
join sampledata m2
on m1.ID = m2.ID and
m1.a = m2.a and
m1.b = m2.b and
m1.c = m2.c and
m1.d = m2.d and
m1.e = m2.e and
m1.f = m2.f and
dateadd(day, 1, m1.EndDate) = m2.StartDate)
select ID, StartDate, max(EndDate) EndDate, a, b, c, d, e, f
from cte
group by ID, StartDate, a, b, c, d, e, f
OPTION (MAXRECURSION 32767)
I made a version without recursion if someone is interested. I didn't really figure out how to add extra columns not used in comparison in the previous example.
IF OBJECT_ID('tempdb..#test') IS NOT NULL drop table #test
create table #test (
id int identity(1, 1)
, ship nvarchar(64)
, color nvarchar(16)
, [length] int
, height int
, [type] nvarchar(16)
, country nvarchar(16)
, StartDate date
)
insert into #test(ship, color, [length], height, [type], country, StartDate)
values
('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-01-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-02-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-03-01')
, ('Ship 1', 'Red', 200, 13, 'motorboat', 'sweden', '2019-11-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-12-01')
, ('Ship 2', 'Green', 400, 27, 'RoRo', 'denmark', '2019-02-01')
;
with step1 as (
select t.*
, [EndDate] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from #test t
where 1 = 1
)
, step2 as (
select t.*
-- Check if preceeding row with same attribute has enddate between this startdate
, [IdenticalPreceeding] = case
when t.StartDate = dateadd(day, 1, lag(t.EndDate, 1, '1900-01-01') over (partition by t.ship, t.color, t.[length], t.height, t.[type], t.country order by t.Startdate)) then 1
else 0
end
from step1 t
)
select t.*
, [EndDateFinal] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from step2 t
where 1 = 1
-- Remove rows with identical preceeders
and t.IdenticalPreceeding = 0
order by t.ship
, t.StartDate