I need to handle overlapping dates but if the end date is null then it to be assumed that the process has not stopped - sql

I have a start date and end date of a process from two different sources.these two sources will be merged and the dates needs to be handled in case of conflicts
Dataset1
P_startDate P_EndDate
1-Jan-07 1-Jun-15
Dataset2
P_Start Date P_End Date
1-Mar-15 1-Jan-17
2-Jan-17 Null
Merged Dataset / Expected Dataset
| Process Start Date | Process End Date |
| 1-Jan-07 | 1-Mar-15 |
| 1-Mar-15 | 1-Jan-17 |
| 2-Jan-17 | Null |
I did create a code but that is giving me a result where the null (no end date) condition is not considered and my out put comes as
| Process Start Date | Process End Date |
| 1-Jan-07 | 1-Mar-15 |
| 1-Mar-15 | 1-Jan-17 |
| 1-Jan-17 | 2-Jan-17 |
I have followed the guidelines from here
http://www.schemamania.org/sql/#overlapping.dates
with D (ID, bound) as (
select ID
, case T when 's' then StartDate else EndDate end as bound
from (
select ID, StartDate, EndDate from so.A
UNION
select ID, StartDate, EndDate from so.B
) as U
cross join (select 's' as T union select 'e') as T
)
select P.*
from (
select s.ID, s.bound as StartDate, min(e.bound) as EndDate
from D as s join D as e
on s.ID = e.ID
and s.bound < e.bound
group by s.ID, s.bound
) as P
left join so.A as a
on P.ID = a.ID
and a.StartDate <= P.StartDate and P.EndDate <= a.EndDate
left join so.B as b
on P.ID = b.ID
and b.StartDate <= P.StartDate and P.EndDate <= b.EndDate
order by P.ID, P.StartDate, P.EndDate

This looks more like a merge overlapping interval problem. Here is one solution that keeps a running count of starts and ends:
CREATE TABLE ds1 (P_STARTDATE DATE, P_ENDDATE DATE);
CREATE TABLE ds2 (P_STARTDATE DATE, P_ENDDATE DATE);
INSERT INTO ds1 VALUES
('2007-01-01', '2015-06-01');
INSERT INTO ds2 VALUES
('2015-03-01', '2017-01-01'),
('2017-01-02', NULL);
WITH cte1(d, v) AS (
SELECT P_startDate, +1 FROM ds1 UNION ALL
SELECT P_EndDate, -1 FROM ds1 UNION ALL
SELECT P_startDate, +1 FROM ds2 UNION ALL
SELECT P_EndDate, -1 FROM ds2
), cte2(d, c) AS (
SELECT d, SUM(SUM(v)) OVER (ORDER BY CASE WHEN d IS NULL THEN 2 ELSE 1 END, d)
FROM cte1
GROUP BY d
), cte3(d, c, f) AS (
SELECT d, c, CASE WHEN LAG(c) OVER (ORDER BY CASE WHEN d IS NULL THEN 2 ELSE 1 END, d) > 0 THEN 0 ELSE 1 END
FROM cte2
), cte4(d, c, g) AS (
SELECT d, c, SUM(f) OVER (ORDER BY CASE WHEN d IS NULL THEN 2 ELSE 1 END, d)
FROM cte3
)
SELECT MIN(d) AS FromDate, CASE WHEN COUNT(d) = COUNT(*) THEN MAX(d) END AS ToDate
FROM cte4
GROUP BY g;
Result:
FromDate ToDate
01/01/2007 00:00:00 01/01/2017 00:00:00
02/01/2017 00:00:00 02/01/2017 00:00:00

Related

Query to select same event code with at least one hour interval

I have a sample table
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
I would like to have a list of txt code and dt date grouped by txt code where interval beetwen txt occurrence is at least one hour and nothing in-between.
To clarify when t1 first occures at '2017-01-01 00:01:00.000'
then next occurrence I am looking for is after at least one hour
which will be '2017-01-01 01:04:00.000'
third occurrence I am looking for is after at least one hour from '2017-01-01 01:04:00.000' and so on.
After some searching I found something like this
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
but this wasn't good because the interval isn't counted from first occurrence but from last, so I got
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
I think I found a workaround because in this case I could group record within same hour but I am not happy with that solution.
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
Any suggestions to improve the script so it will let the interval be any entered value in minutes would be appreciated.
If I have understood correctly I think the following does what you need.
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
Output
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+
I kind of like a method that is a bubble sort. The problem I have found when doing recursive operations is they work great for small sets(think less than 5 or 10k), then behave horrid when you get larger. For this reason I like a cursor approach were you are essentially saying: "Are you larger than a criteria? Yes, No. Insert or Ignore, Delete, move on." This way you are evaluating over every item once and once only, not every variation of a theme of recursion.
DECLARE #Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE #Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO #Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM #Temp)
BEGIN
DECLARE
#CurId INT
, #CurDt DATETIME
, #Curtxt VARCHAR(8)
, #LastDate DATETIME
;
SELECT TOP 1 #CurId = Id, #CurDt = Dt, #Curtxt = txt FROM #Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM #Holder)
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM #Temp WHERE Id = #CurId) = 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 #LastDate = dt FROM #Holder ORDER BY id desc
IF DATEDIFF(HOUR, #LastDate, #CurDt) >= 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, DATEDIFF(MINUTE, #LastDate, #CurDt))
END
END
--Delete the running values and loop again
DELETE #Temp WHERE Id = #CurId
END
Select *
From #Holder

SQL get data from column(YYY0) with same number as different column(XXXX0) with the maximum date

I am looking for a query to for each row to find the column (YYY.) with the highest/most recent date and would like to find the corresponding column (XXXX.)
Finding the column with the most recent date was possible, but getting the corresponding column left me clueless... All suggestions are welcome!!
So from the table:
| id | XXXX0| YYY0 | XXXX1| YYY1| XXXX9| YYY9|
---------------------------------------------------------------------------------------
| A | 3 | 10-10-2009| 4 |10-10-2010| 1 | 10-10-2011|
| B | 2 | 10-10-2010| 3 |10-10-2012| 6 | 10-10-2011|
| C | 4 | 10-10-2011| 1 |10-10-2010| 7 | 10-10-2012|
| D | 1 | 10-10-2010| 8 |10-10-2013| 9 | 10-10-2012|
I would like to end up with:
| id | LabelX| LabelY|
--------------------------------------
| A | 1 | 10-10-2011|
| B | 3 | 10-10-2012|
| C | 7 | 10-10-2012|
| D | 8 | 10-10-2013|
Added:
This was what I tried to determine the maximum value:
SELECT LTRIM(A) AS A, LTRIM(B) AS B, LTRIM(C)
(Select Max(v)
FROM (VALUES (YYY0), (YYY1), …..(YYY9) AS value(v)) as [MaxDate]
FROM Table
SELECT id,
CASE
WHEN YYYY0 > YYY1 AND YYY0 > YYY2 ... AND YYY0 > YYY9 THEN XXX0
WHEN YYY1 > YYY2 ... AND YYY0 > YYY9 THEN XXX1
...
ELSE XXX9 AS LabelX,
CASE
WHEN YYYY0 > YYY1 AND YYY0 > YYY2 ... AND YYY0 > YYY9 THEN YYY0
WHEN YYY1 > YYY2 ... AND YYY0 > YYY9 THEN YYY1
...
ELSE YYY9 AS LabelY,
...
and replace > by >= depending on which you want to win if they're equal.
If it's a SQL Server 2005 and above you can do it this way (it assumes that dates are unique in each column for specific id):
;with cte as (
select id, xxxx0 as LabelX, yyy0 as LabelY from tab union all
select id, xxxx1, yyy1 from tab union all
select id, xxxx9, yyy9 from tab
)
select t.id, x.LabelX, t.LabelY from (
select t1.id, max(t1.LabelY) as LabelY
from cte t1
group by t1.id
) t
join cte x on t.id = x.id and t.LabelY = x.LabelY
Live SQL Fiddle example
Here's a simplified example for you. I'm using SQL Server 2008, but this SQL is pretty standard and should work fine on most modern implementations (famous last words).
So, given this table schema:
drop table dbo.foobar
go
create table dbo.foobar
(
id char(1) not null primary key ,
X1 int not null , Y1 date not null ,
X2 int not null , Y2 date not null ,
X3 int not null , Y3 date not null ,
)
go
And some sample data:
insert dbo.foobar values ( 'A' , 1 , '1 Jan 2013' , 2 , '1 Feb 2013' , 3 , '1 Mar 2013' )
insert dbo.foobar values ( 'B' , 1 , '1 Mar 2013' , 2 , '1 Jan 2013' , 3 , '1 Feb 2013' )
insert dbo.foobar values ( 'C' , 1 , '1 Feb 2013' , 2 , '1 Mar 2013' , 3 , '1 Jan 2013' )
go
Depending on the nature of your data and the desired semantics of the query and results, either this approach:
--
-- This approach pushes evaluation of the corresponding X to the output column list
--
-- 1. Construct a UNION ALL to normalize the table into a set of id/date pairs
-- 2. Compute max(date) for each id
-- 3. Join back against the original table to recover the source row
-- 4. Use the max(date) value to identify the corresponding X
--
select t.id ,
MaxY = t.y ,
X = case
when t.Y = x.Y1 then x.X1
when t.Y = x.Y2 then x.X2
when t.Y = x.Y3 then x.X3
end
from ( select x.id ,
y = max( x.y )
from ( select id , y=y1 from dbo.foobar
union all select id , y=y2 from dbo.foobar
union all select id , y=y3 from dbo.foobar
) x
group by x.id
) t
join dbo.foobar x on x.id = t.id
order by 1,2,3
go
Or this approach
--
-- This approach looks at each X/Y pair as its own "table" as it were
--
select t.id ,
MaxY = t.y ,
X = coalesce( t1.X1 , t2.X2 , t3.X3 )
from ( select x.id ,
y = max( x.y )
from ( select id , y=Y1 from dbo.foobar
union all select id , y=Y2 from dbo.foobar
union all select id , y=Y3 from dbo.foobar
) x
group by x.id
) t
left join dbo.foobar t1 on t1.id = t.id and t1.y1 = t.Y
left join dbo.foobar t2 on t2.id = t.id and t2.y2 = t.Y
left join dbo.foobar t3 on t3.id = t.id and t3.y3 = t.Y
order by 1,2,3
should work for you. In either event, both queries produce an identical result set:
id MaxY X
-- ---------- -
A 2013-03-01 3
B 2013-03-01 1
C 2013-03-01 2
Good Luck!
[Have you considered normalizing your database design? Third Normal Form makes life a lot easier and usually more efficient.]

Joining values with DateDim, where null'd dates' value will take the last non-null value in the table

I need to do a join but I'm not sure which type. I have a table like this:
Date Amount | FOO
------------------
2012-01-12 x
2012-03-14 y
2012-05-06 z
2012-05-14 aa
2012-09-02 bb
I am joining this with DateDim (Google here: DATE DIM, which is a table of dates (historical and future).
I need a query that would display data like this:
datedim.Date foo.Amount | FOO x DATEDIM
------------------------------------------
2012-01-12 x
2012-01-13 x
2012-01-14 x
... etc...
2012-03-14 y
2012-03-15 y
2012-03-16 y
2012-03-17 y
... etc...
2012-05-06 z
... etc...
Basically, I need the values to persist (were it a left join, it would be NULLs) until the next non-null value. That will persist too... etc..
What I have so far...
SELECT datedim.Date
,CASE
WHEN Amount IS NULL
THEN (SELECT TOP 1 Amount
FROM FOO WHERE foo.Date <= datedim.Date
ORDER BY Date DESC)
ELSE Amount END AS Amount
FROM DATEDIM datedim
LEFT JOIN FOO foo
ON foo.Date = datedim.Date
I need to create a view out of this. I'm getting an error saying ORDER BY is invalid for views, unless specified by TOP??? I do have a TOP in the subquery...
In SQLServer2005+ use recursive CTE
;WITH cte (id, [Date], Amount) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY [Date] ASC) AS id,
[Date], Amount
FROM dbo.your_table t1
), cte2 (id, [Date], [LevelDate], Amount) AS
(
SELECT c1.id, c1.[Date], DATEDIFF(day, c1.[Date], c2.[Date]) AS [LevelDate], c1.Amount
FROM cte c1 LEFT JOIN cte c2 ON c1.id = c2.id - 1
), cte3 (id, [Date], Amount, [Level]) AS
(
SELECT id, [Date], Amount, 1 AS [Level]
FROM cte2 c
UNION ALL
SELECT c.id, DATEADD(day, 1, ct.[Date]) AS [Date], c.Amount, ct.[Level] + 1
FROM cte2 c JOIN cte3 ct ON c.id = ct.id
WHERE c.[LevelDate] > ct.[Level]
)
SELECT [Date], Amount
FROM cte3
ORDER BY Date
OPTION (maxrecursion 0)
Demo on SQLFiddle
I'm sure there are more efficient ways using a CTE, or window functions, but something along these lines should work:
Select
d.Date,
d.FooDate,
f.Amount
From
Foo f
Inner Join (
Select
d.[Date],
Max(f.[Date]) as FooDate
From
Foo f
Inner Join
DateDim d
On f.[Date] < d.[Date]
Group By
d.[Date]
) d
On d.[FooDate] = f.[Date]
http://sqlfiddle.com/#!3/3c7d5/10

Creating groups of consecutive days meeting a given criteria

I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?
In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.
Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.
Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.
Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);
A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example

I wish I could correlate an "inline view"

I have a Patient table:
PatientId Admitted
--------- ---------------
1 d/m/yy hh:mm:ss
2 d/m/yy hh:mm:ss
3 d/m/yy hh:mm:ss
I have a PatientMeasurement table (0 to many):
PatientId MeasurementId Recorded Value
--------- ------------- --------------- -----
1 A d/h/yy hh:mm:ss 100
1 A d/h/yy hh:mm:ss 200
1 A d/h/yy hh:mm:ss 300
2 A d/h/yy hh:mm:ss 10
2 A d/h/yy hh:mm:ss 20
1 B d/h/yy hh:mm:ss 1
1 B d/h/yy hh:mm:ss 2
I am trying to create a result set that resembles:
PatientId Numerator Denominator
--------- -------- -----------
1 1 1
2 1 1
3 0 1
Essentially, a patient will have a 1 in the numerator if the have at least one value for measurement A and one value for measurement B. In this example, patient 1 has 3 A measurements and 2 B measures, so the numerator is 1. Patient 2 has 2 A measurements, but no B measurements, so the numerator is 0. Patient has neither an A measurement nor a B measurement, so the numerator is 0.
My query thus far is:
SELECT PatientId, CASE WHEN a.cnt+b.cnt>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM patient p
LEFT OUTER JOIN (
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='A'
--AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) a ON p.PatientId=a.PatientId
LEFT OUTER JOIN (
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='B'
--AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) b ON p.PatientId=b.PatientId
This works as expected as long as I don't include the correlated, date restriction (Recorded < dateadd(hh, 12, Admitted). Unfortunately, correlating an 'inline view' in this manner is not syntactically valid.
This has forced me to re-write the SQL to:
SELECT PatientId, CASE WHEN v.a+v.b>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM (
SELECT PatientId,
(
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE PatientId=p.PatientId
AND MeasurementId='A'
AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) a,
(
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE PatientId=p.PatientId
AND MeasurementId='B'
AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) b
FROM Patient p
) v
My question: Is there a better, more-efficient way to do this?
Thanks for your time.
Try this :
WITH GroupPatients AS
(SELECT MeasurementID, PatientId, Count(*) AS cnt
FROM PatientMeasurement AS pm
INNER JOIN Patient p ON pm.PatientID = p.PatientID
WHERE
MeasurementId IN ('A', 'B')
AND
Recorded <= dateadd(hh, 12, Admitted)
GROUP BY MeasureMentID, PatientId)
SELECT p.PatientID, Case
When IsNull(GPA.cnt, 0) > 0 AND IsNull(GPB.cnt, 0) > 0 Then 1
Else 0
End AS Numerator, 1 AS Denominator
FROM Patient p
LEFT JOIN GroupPatientsA AS GPA ON p.PatientID = GPA.PatientID AND GPA.MeasurementID = 'A'
LEFT JOIN GroupPatientsB AS GPB ON p.PatientID = GPB.PatientID AND GPB.MeasurementID = 'B'
I've made one tweak to the business logic too - your spec says Numerator should be one if a patient has both A and B measurements - however, your clause of a.cnt+b.cnt>2 will erroneously return one if either a.cnt or b.cnt are 3 or more and the other is zero.
Another solution can be close to your original attempt using OUTER APPLY:
SELECT PatientId, CASE WHEN a.cnt+b.cnt>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM patient p
OUTER APPLY (
SELECT count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='A'
AND Recorded <= dateadd(hh, 12, p.Admitted)
AND pm.PatientId = p.PatientId
) AS a(cnt)
OUTER APPLY (
SELECT count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='B'
AND Recorded <= dateadd(hh, 12, p.Admitted)
AND pm.PatientId = p.PatientId
) AS b(cnt)
SELECT p.*,
CASE WHEN
EXISTS
(
SELECT NULL
FROM PatientMeasurement pm
WHERE pm.PatientID = p.ID
AND pm.Type = 'A'
AND pm.Recorded <= DATEADD(hh, 12, p.Admitted)
) AND EXISTS (
SELECT NULL
FROM PatientMeasurement pm
WHERE pm.PatientID = p.ID
AND pm.Type = 'B'
AND pm.Recorded <= DATEADD(hh, 12, p.Admitted)
) THEN 1 ELSE 0 END
FROM Patient p
Assuming you are using Sql 2005 or 2008, the entire query can be simplified using some window functions and a pivot:
with pData as
(
select count(*) over(partition by PatientId, MeasurementId) as cnt,
PatientId, MeasurementId
from PatientMeasurement pm
where MeasurementId in('A','B')
and Recorded <= dateadd(hh, 12, Admitted)
)
select PatientId, coalesce([A],0) as cntA, coalesce([B],0) as cntB,
case when coalesce([A],0) + coalesce([B],0) > 2 then 1 else 0 end as Numerator,
1 as Denominator
from pData
pivot (max(cnt) for MeasurementId in([A],[B])) pvt
DECLARE #TimeSlot int;
SET #TimeSlot = 12;
WITH
pt AS (
SELECT p.PatientID, p.Admitted, m.MeasurementID, m.Recorded,
CASE
WHEN m.Recorded <= dateadd(hh, #TimeSlot, p.Admitted) THEN 1
ELSE 0
END AS "InTimeSlot"
FROM Patient AS p
LEFT JOIN PatientMeasurement AS m ON p.PatientID = m.PatientID
),
cntA AS (
SELECT PatientID, count(*) AS "A_count"
FROM pt WHERE MeasurementID='A' AND InTimeSlot = 1
GROUP BY PatientID
),
cntB AS (
SELECT PatientID, count(*) AS "B_count"
FROM pt WHERE MeasurementID='B' AND InTimeSlot = 1
GROUP BY PatientID
),
cntAB AS (
SELECT p.PatientID
,coalesce(a.A_count, 0) AS "A_cnt"
,coalesce(b.B_count, 0) AS "B_cnt"
FROM Patient as p
LEFT JOIN cntA AS a ON p.PatientID = a.PatientID
LEFT JOIN cntB AS b ON p.PatientID = b.PatientID
),
cntN AS (
SELECT PatientID,
CASE WHEN A_cnt > 0 AND B_cnt > 0 THEN 1 ELSE 0 END AS Numerator
FROM cntAB
)
SELECT PatientID, Numerator, 1 AS Denominator FROM cntN