Remove duplicate data in 'date-ranged' rows - sql

I have a table like the following.
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 1 C
1 3/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
The date range is used to know for what time period the rest of the Attributes were valid, the problem i have is that there are several consecutive time ranges where the Attributes ramain the same, what I would like is to obtain the same data but without the duplicate rows.
From the previous example, my expected end result would be like this:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 1 C
2 1/1/2010 2/1/2010 0 D
2 3/1/2010 4/1/2010 1 D
What I did was merge the 2nd and 3rd row into one (All attribute except the date were the same), but I kept the StartDate of the 2nd and the endDate of the 3rd row.
I first thought of grouping by the values obtaining the MAX and MIN like this
SELECT ID, MIN(StartDate), MAX(EndDate), attributeA, attributeB
FROM MyTable
Group BY ID, AttributeA, AttributeB
But as soon as I run it I realized that when the attributes change several times and go back to their original value I'd end up with overlapping intervals. I've been stuck for a while now trying to figure out how to fix this problem.
Here's an example of what I meant in my previous statement.
When the initial data looks like the following:
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 3/1/2009 0 D
1 3/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
Grouping the results would end up like the following
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 6/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
And what I'd like to obtain is this
ID StartDate EndDate AttributeA AttributeB
-- --------- ------- ---------- ----------
1 1/1/2009 2/1/2009 0 C
1 2/1/2009 4/1/2009 0 D
1 4/1/2009 5/1/2009 1 D
1 6/1/2010 6/1/2009 0 D
Any help would be welcomed :)
EDIT: I'll be uploading some sample data soon to make my problem a bit easier to understand.
EDIT2: Here's a script with some of my data. From that sample what I'd like to obtain are the following rows.
ID StartDate EndDate A B C D E F
-- --------- ------- -- -- -- -- -- --
708513 1980-01-01 2006-07-23 15 ASDB A ACT 130 0
708513 2006-07-24 2009-12-08 15 ASDB A ACT 130 2
708513 2009-12-09 2010-01-12 0 ASDB A ACT 130 2
708513 2010-01-13 2079-05-30 15 ASDB A ACT 130 2

EDITED, following comments. Try:
;with cte as (
select m1.ID, m1.StartDate, m1.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from sampledata m1
where not exists
(select null from sampledata m0
where m1.ID = m0.ID and
m1.a = m0.a and
m1.b = m0.b and
m1.c = m0.c and
m1.d = m0.d and
m1.e = m0.e and
m1.f = m0.f and
dateadd(day, -1, m1.StartDate) = m0.EndDate)
union all
select m1.ID, m1.StartDate, m2.EndDate, m1.a, m1.b, m1.c, m1.d, m1.e, m1.f
from cte m1
join sampledata m2
on m1.ID = m2.ID and
m1.a = m2.a and
m1.b = m2.b and
m1.c = m2.c and
m1.d = m2.d and
m1.e = m2.e and
m1.f = m2.f and
dateadd(day, 1, m1.EndDate) = m2.StartDate)
select ID, StartDate, max(EndDate) EndDate, a, b, c, d, e, f
from cte
group by ID, StartDate, a, b, c, d, e, f
OPTION (MAXRECURSION 32767)

I made a version without recursion if someone is interested. I didn't really figure out how to add extra columns not used in comparison in the previous example.
IF OBJECT_ID('tempdb..#test') IS NOT NULL drop table #test
create table #test (
id int identity(1, 1)
, ship nvarchar(64)
, color nvarchar(16)
, [length] int
, height int
, [type] nvarchar(16)
, country nvarchar(16)
, StartDate date
)
insert into #test(ship, color, [length], height, [type], country, StartDate)
values
('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-01-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-02-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-03-01')
, ('Ship 1', 'Red', 200, 13, 'motorboat', 'sweden', '2019-11-01')
, ('Ship 1', 'Blue', 200, 13, 'sailboat', 'sweden', '2019-12-01')
, ('Ship 2', 'Green', 400, 27, 'RoRo', 'denmark', '2019-02-01')
;
with step1 as (
select t.*
, [EndDate] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from #test t
where 1 = 1
)
, step2 as (
select t.*
-- Check if preceeding row with same attribute has enddate between this startdate
, [IdenticalPreceeding] = case
when t.StartDate = dateadd(day, 1, lag(t.EndDate, 1, '1900-01-01') over (partition by t.ship, t.color, t.[length], t.height, t.[type], t.country order by t.Startdate)) then 1
else 0
end
from step1 t
)
select t.*
, [EndDateFinal] = dateadd(day, -1, lead(t.StartDate, 1, '9999-12-31') over(partition by t.ship order by t.StartDate))
from step2 t
where 1 = 1
-- Remove rows with identical preceeders
and t.IdenticalPreceeding = 0
order by t.ship
, t.StartDate

Related

How to display multiple rows based row difference data In SQL?

I have a table as follows:
EmpID FromYM ToYM EmpYl EmpX1
----- ------ ----- ------ ----------
1001 202101 202101 20210103 20210103
1001 202102 202103 20210103 20210103
I want to split this data based on above data.
If FromYm(Means yearmonth) and ToYM(Means yearmonth) difference is two then result as two rows:
Example result :
EmpID FromYM ToYM EmpYl DiffNoCount
------ ------ ---- ----- ------
1001 202101 202101 20210103 1
1001 202102 202103 20210103 1
1001 202102 202103 20210103 2
Tried Code
IF OBJECT_ID(N'tempdb..#rawdata1') IS NOT NULL
BEGIN
DROP TABLE #rawdata1
END
IF OBJECT_ID(N'tempdb..#rawdata2') IS NOT NULL
BEGIN
DROP TABLE #rawdata2
END
go
declare #Max as int
declare #Kount as int
Set #Kount = 1
SELECT row_number() Over (order by EmpID) as row,EmpID
,FromYM
,ToYM
, EmpYl
,EmpX1
into #rawdata1
FROM [dbo].[ASAAValue1]
order by EmpID
set #Max = (Select Max(FromYM) from #rawdata1)
Create Table #Rawdata2
(
[Row] int,
Rolling int,
RollingAvg decimal(15,2),
RollingFinal int
)
while (#kount < #max)
Begin
Insert into #rawdata2
select #Kount as Row , FromYM as Rolling
, ToYM as RollingAvg,
Case When Convert(int,Convert(nvarchar(6),EmpYl))>=FromYM
Then <br>
FromYM <br>
Else<br>
FromYM+1 <br>
End <br>
from #rawdata1<br>
where row between #Kount - 12 and #Kount <br>
set #Kount = #Kount + 1
end
select rd1.row,
rd1.EmpID,Rd1.FromYM,Rd1.ToYM,Rd1.EmpYl, rd2.RollingFinal as Final from #rawdata2 rd2
inner join #rawdata1 rd1
on rd1.row = rd2.row
You can do it using recursive cte:
with data as (
select '202101' as f, '202101' as t union all
select '202102' as f, '202103' as t
),
rcte as (
select *, 1 as n from data
union all
select f, t, n + 1
from rcte
where n <= datediff(month, convert(date, f + '01', 112), convert(date, t + '01', 112))
)
select * from rcte
You can test on this db<>fiddle

T-SQL - Select patients who are readmitted (within some duration) with the same diagnosis

I have a table with the following schema:
CREATE TABLE Codes
(
diagnosis_code CHAR,
visit_date DATE,
visit_id INT,
patient_id int
);
I would like to output the patient_ids where the patient is readmitted (so a different visit_id) with the same diagnosis_code within a certain time (say 15 days). For example, if I have the following entries in the table:
diagnosis_code visit_date visit_id patient_id
-------------- ---------- ----------- -----------
A 2018-01-01 1 1
B 2018-01-01 1 1
A 2018-01-07 2 1
C 2018-01-01 3 2
D 2018-01-01 4 3
D 2018-01-20 5 3
E 2018-01-01 6 4
E 2018-01-01 6 4
A 2018-01-07 7 1
The query would return only patient_id = 1, and the rationales are as follows:
1, because between visit_id 1 and 2, this patient shared diagnosis code A.
Not 2 because this patient was only admitted once.
Not 3 because this patient, although readmitted for the same diagnosis, was not readmitted within 15 days of their initial visit.
Not 4 because this patient has a duplicated diagnosis code in the same visit.
Notice that patient_id = 1 is readmitted for the same diagnosis during visit_id = 7, but he was already counted once before.
You could try a simple join, adding the conditions you described:
select
distinct c.patient_id
from codes c
join codes d on d.patient_id = c.patient_id
and d.visit_id <> c.visit_id
and d.diagnosis_code = c.diagnosis_code
and d.visit_date between c.visit_date
and dateadd(day, 15, c.visit_date)
i used lag.
declare #Codes table
(
diagnosis_code CHAR,
visit_date DATE,
visit_id INT,
patient_id int
);
insert into #Codes
values
('A', '2018-01-01' ,1, 1)
,('B' , '2018-01-01', 1, 1)
,('A' , '2018-01-07', 2, 1)
,('C' ,'2018-01-01', 3, 2)
/*
D 2018-01-01 4 3
D 2018-01-15 5 3
E 2018-01-01 6 4
E 2018-01-01 6 4
A 2018-01-07 7 1
*/
select *
from (
select *
--,rn=row_number() over (partition by patient_ID,diagnosis_code order by visit_date)
,DaysSince = datediff(day,lag(visit_date,1) over (partition by patient_ID,diagnosis_code order by visit_date),visit_date)
from #Codes
) a
where a.DaysSince<=15
You can also use inbuilt FIRST_VALUE and DATEADD functions to achieve this:
SELECT
DISTINCT patient_id,diagnosis_code
FROM
(SELECT
FIRST_VALUE(visit_date) OVER (PARTITION BY patient_id,diagnosis_code ORDER BY visit_id ASC) AS Initial_Visit,
DATEADD(DAY,15,first_value(visit_date) OVER (PARTITION BY patient_id,diagnosis_code ORDER BY visit_id ASC)) Window
,* FROM Codes
)m
WHERE
Initial_Visit <> visit_date
AND visit_date <= Window

SQL - Find if column dates include at least partially a date range

I need to create a report and I am struggling with the SQL script.
The table I want to query is a company_status_history table which has entries like the following (the ones that I can't figure out)
Table company_status_history
Columns:
| id | company_id | status_id | effective_date |
Data:
| 1 | 10 | 1 | 2016-12-30 00:00:00.000 |
| 2 | 10 | 5 | 2017-02-04 00:00:00.000 |
| 3 | 11 | 5 | 2017-06-05 00:00:00.000 |
| 4 | 11 | 1 | 2018-04-30 00:00:00.000 |
I want to answer to the question "Get all companies that have been at least for some point in status 1 inside the time period 01/01/2017 - 31/12/2017"
Above are the cases that I don't know how to handle since I need to add some logic of type :
"If this row is status 1 and it's date is before the date range check the next row if it has a date inside the date range."
"If this row is status 1 and it's date is after the date range check the row before if it has a date inside the date range."
I think this can be handled as a gaps and islands problem. Consider the following input data: (same as sample data of OP plus two additional rows)
id company_id status_id effective_date
-------------------------------------------
1 10 1 2016-12-15
2 10 1 2016-12-30
3 10 5 2017-02-04
4 10 4 2017-02-08
5 11 5 2017-06-05
6 11 1 2018-04-30
You can use the following query:
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
ORDER BY company_id, effective_date
to get:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 0
2 10 1 2016-12-30 1
3 10 5 2017-02-04 2
4 10 4 2017-02-08 2
5 11 5 2017-06-05 0
6 11 1 2018-04-30 0
Now you can identify status = 1 islands using:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
)
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
Output:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 1
2 10 1 2016-12-30 1
3 10 5 2017-02-04 1
4 10 4 2017-02-08 2
5 11 5 2017-06-05 1
6 11 1 2018-04-30 2
Calculated field grp will help us identify those islands:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
), CTE2 AS
(
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
)
SELECT company_id,
MIN(effective_date) AS start_date,
CASE
WHEN COUNT(*) > 1 THEN DATEADD(DAY, -1, MAX(effective_date))
ELSE MIN(effective_date)
END AS end_date
FROM CTE2
GROUP BY company_id, grp
HAVING COUNT(CASE WHEN status_id = 1 THEN 1 END) > 0
Output:
company_id start_date end_date
-----------------------------------
10 2016-12-15 2017-02-03
11 2018-04-30 2018-04-30
All you want know is those records from above that overlap with the specified interval.
Demo here with somewhat more complicated use case.
Maybe this is what you are looking for? For these kind of questions, you need to join two instance of your table, in this case I am just joining with next record by Id, which probably is not totally correct. To do it better, you can create a new Id using a windowed function like row_number, ordering the table by your requirement criteria
If this row is status 1 and it's date is before the date range check
the next row if it has a date inside the date range
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
else NULL
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
Implementing second criteria:
"If this row is status 1 and it's date is after the date range check
the row before if it has a date inside the date range."
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
when csh1.status_id=1 and csh1.effective_date>#range_en
then
case
when csh3.effective_date between #range_st and #range_en then true
else false
end
else null -- ¿?
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
left join company_status_history csh3
on csh1.id=csh3.id-1
I would suggest the use of a cte and the window functions ROW_NUMBER. With this you can find the desired records. An example:
DECLARE #t TABLE(
id INT
,company_id INT
,status_id INT
,effective_date DATETIME
)
INSERT INTO #t VALUES
(1, 10, 1, '2016-12-30 00:00:00.000')
,(2, 10, 5, '2017-02-04 00:00:00.000')
,(3, 11, 5, '2017-06-05 00:00:00.000')
,(4, 11, 1, '2018-04-30 00:00:00.000')
DECLARE #StartDate DATETIME = '2017-01-01';
DECLARE #EndDate DATETIME = '2017-12-31';
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) AS rn
FROM #t
),
cteLeadLag AS(
SELECT c.*, ISNULL(c2.effective_date, c.effective_date) LagEffective, ISNULL(c3.effective_date, c.effective_date)LeadEffective
FROM cte c
LEFT JOIN cte c2 ON c2.company_id = c.company_id AND c2.rn = c.rn-1
LEFT JOIN cte c3 ON c3.company_id = c.company_id AND c3.rn = c.rn+1
)
SELECT 'Included' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Following' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date > #EndDate
AND LagEffective BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Trailing' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date < #EndDate
AND LeadEffective BETWEEN #StartDate AND #EndDate
I first select all records with their leading and lagging Dates and then I perform your checks on the inclusion in the desired timespan.
Try with this, self-explanatory. Responds to this part of your question:
I want to answer to the question "Get all companies that have been at
least for some point in status 1 inside the time period 01/01/2017 -
31/12/2017"
Case that you want to find those id's that have been in any moment in status 1 and have records in the period requested:
SELECT *
FROM company_status_history
WHERE id IN
( SELECT Id
FROM company_status_history
WHERE status_id=1 )
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'
Case that you want to find id's in status 1 and inside the period:
SELECT *
FROM company_status_history
WHERE status_id=1
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'

SQL: CTE query Speed

I am using SQL Server 2008 and am trying to increase the speed of my query below. The query assigns points to patients based on readmission dates.
Example: A patient is seen on 1/2, 1/5, 1/7, 1/8, 1/9, 2/4. I want to first group visits within 3 days of each other. 1/2-5 are grouped, 1/7-9 are grouped. 1/5 is NOT grouped with 1/7 because 1/5's actual visit date is 1/2. 1/7 would receive 3 points because it is a readmit from 1/2. 2/4 would also receive 3 points because it is a readmit from 1/7. When the dates are grouped the first date is the actual visit date.
Most articles suggest limiting the data set or adding indexes to increase speed. I have limited the amount of rows to about 15,000 and added a index. When running the query with 45 test visit dates/ 3 test patients, the query takes 1.5 min to run. With my actual data set it takes > 8 hrs.
How can I get this query to run < 1 hr? Is there a better way to write my query? Does my Index look correct? Any help would be greatly appreciated.
Example expected results below query.
;CREATE TABLE RiskReadmits(MRN INT, VisitDate DATE, Category VARCHAR(15))
;CREATE CLUSTERED INDEX Risk_Readmits_Index ON RiskReadmits(VisitDate)
;INSERT RiskReadmits(MRN,VisitDate,CATEGORY)
VALUES
(1, '1/2/2016','Inpatient'),
(1, '1/5/2016','Inpatient'),
(1, '1/7/2016','Inpatient'),
(1, '1/8/2016','Inpatient'),
(1, '1/9/2016','Inpatient'),
(1, '2/4/2016','Inpatient'),
(1, '6/2/2016','Inpatient'),
(1, '6/3/2016','Inpatient'),
(1, '6/5/2016','Inpatient'),
(1, '6/6/2016','Inpatient'),
(1, '6/8/2016','Inpatient'),
(1, '7/1/2016','Inpatient'),
(1, '8/1/2016','Inpatient'),
(1, '8/4/2016','Inpatient'),
(1, '8/15/2016','Inpatient'),
(1, '8/18/2016','Inpatient'),
(1, '8/28/2016','Inpatient'),
(1, '10/12/2016','Inpatient'),
(1, '10/15/2016','Inpatient'),
(1, '11/17/2016','Inpatient'),
(1, '12/20/2016','Inpatient')
;WITH a AS (
SELECT
z1.VisitDate
, z1.MRN
, (SELECT MIN(VisitDate) FROM RiskReadmits WHERE VisitDate > DATEADD(day, 3, z1.VisitDate)) AS NextDay
FROM
RiskReadmits z1
WHERE
CATEGORY = 'Inpatient'
), a1 AS (
SELECT
MRN
, MIN(VisitDate) AS VisitDate
, MIN(NextDay) AS NextDay
FROM
a
GROUP BY
MRN
), b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
a1
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
a
JOIN b
ON a.VisitDate = b.NextDay
), c AS (
SELECT
MRN,
VisitDate
, (SELECT MAX(VisitDate) FROM b WHERE b1.VisitDate > VisitDate AND b.MRN = b1.MRN) AS PreviousVisitDate
FROM
b b1
)
SELECT distinct
c1.MRN,
c1.VisitDate
, CASE
WHEN DATEDIFF(day,c1.PreviousVisitDate,c1.VisitDate) < 30 THEN PreviousVisitDate
ELSE NULL
END AS ReAdmissionFrom
, CASE
WHEN DATEDIFF(day,c1.PreviousVisitDate,c1.VisitDate) < 30 THEN 3
ELSE 0
END AS Points
FROM
c c1
ORDER BY c1.MRN
Expected Results:
MRN VisitDate ReAdmissionFrom Points
1 2016-01-02 NULL 0
1 2016-01-07 2016-01-02 3
1 2016-02-04 2016-01-07 3
1 2016-06-02 NULL 0
1 2016-06-06 2016-06-02 3
1 2016-07-01 2016-06-06 3
1 2016-08-01 NULL 0
1 2016-08-15 2016-08-01 3
1 2016-08-28 2016-08-15 3
1 2016-10-12 NULL 0
1 2016-11-17 NULL 0
1 2016-12-20 NULL 0
oops I changed the names of a few cte's (and the post messed up what was code)
It should be like this:
b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
a1
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
a AS a
JOIN b
ON a.VisitDate = b.NextDay AND a.MRN = b.MRN
)
I'm going to take a wild guess here and say you want to change the b cte to
have AND a.MRN = b.MRN as a second condition in the second select query like this:
, b AS (
SELECT
VisitDate
, MRN
, NextDay
, 1 AS OrderRow
FROM
firstVisitAndFollowUp
UNION ALL
SELECT
a.VisitDate
, a.MRN
, a.NextDay
, b.OrderRow +1 AS OrderRow
FROM
visitsDistance3daysOrMore AS a
JOIN b
ON a.VisitDate = b.NextDay AND a.MRN = b.MRN
)

Merge two records date if dates are continuous and key values are same

I have two different scenarios. In the first scenario I need something like:
create table test
(
ItemID int,
ItemStartDate datetime,
ItemEndDate datetime,
itemType varchar(100)
)
Table test:
ItemID ItemStartDate ItemEndDate itemType
------ ------------- ----------- --------
item_1 1/1/2011 3/2/2011 value A
item_1 3/3/2011 12/31/2011 value A
item_2 1/3/2011 12/31/2011 value B
It should show only two records:
ItemID ItemStartDate ItemEndDate itemType
------ ------------- ----------- --------
item_1 1/1/2011 12/31/2011 value A
item_2 1/1/2011 12/31/2011 value B
Scenario 2.
Here I would like to split data value to separate year periods if it's across multiple years.
Table test
create table #Scenario_2
(
ItemID int,
priceStartDate datetime,
priceEndDate datetime,
price int
)
item startdate enddate value
---- --------- ---------- -----
11 1/1/2011 5/4/2013 500
12 7/1/2013 11/12/2013 600
It should show like
item startdate enddate value
---- --------- ---------- -----
11 1/1/2011 12/31/2011 500
11 1/1/2012 12/31/2012 500
11 1/1/2013 5/4/2013 500
12 7/1/2013 11/12/2013 600
Please advise how I can achieve this.
Try this. from your question this is what i understood!!
SCENARIO 2
----------
CREATE TABLE #datt
(
itemid int,startd DATE,endat DATE,price int
)
INSERT INTO #datt
VALUES (11,'2011-01-01','2013-05-04',500),
(12,'2013-7-1','2013-11-12',600)
;WITH cte
AS (SELECT itemid,
startd st,
case when year(endat)<> YEAR(startd) then Dateadd(yy, Year(startd) - 1899, -1)
else endat end ed,price
FROM #datt
UNION ALL
SELECT a.itemid,
Dateadd(yy, 1, st),
CASE
WHEN Dateadd(yy, 1, ed) > b.endat THEN b.endat
ELSE Dateadd(yy, 1, ed)
END,a.price
FROM cte a
JOIN #datt b
ON a.itemid = b.itemid
AND a.ed < b.endat)
SELECT *
FROM cte order by itemid,st
For scenario1 you could see this answer.
For scenario2 there also have a similar answer you could reference.
But your question can be simplified like this:
with dates as
(
select number,cast(ltrim(number*10000+1231) as date) as dt
from master..spt_values
inner join
(select min(year(startdate)) as s_year
,max(year(enddate)) as e_year
from Scenario_2) as y
on number between y.s_year and y.e_year AND TYPE='P'
)
select
s.item
,case when year(dt) = year(startdate)
then startdate
else dateadd(year,-1,dateadd(day,1,dt)) end --or cast(ltrim(year(dt)*10000+101) as date)
,case when year(dt) = year(enddate)
then enddate
else dt end
,s.value
from
Scenario_2 s
inner join
dates d
on
d.number between year(s.startdate) and year(s.enddate)
SQL FIDDLE DEMO