SQL get data from column(YYY0) with same number as different column(XXXX0) with the maximum date - sql

I am looking for a query to for each row to find the column (YYY.) with the highest/most recent date and would like to find the corresponding column (XXXX.)
Finding the column with the most recent date was possible, but getting the corresponding column left me clueless... All suggestions are welcome!!
So from the table:
| id | XXXX0| YYY0 | XXXX1| YYY1| XXXX9| YYY9|
---------------------------------------------------------------------------------------
| A | 3 | 10-10-2009| 4 |10-10-2010| 1 | 10-10-2011|
| B | 2 | 10-10-2010| 3 |10-10-2012| 6 | 10-10-2011|
| C | 4 | 10-10-2011| 1 |10-10-2010| 7 | 10-10-2012|
| D | 1 | 10-10-2010| 8 |10-10-2013| 9 | 10-10-2012|
I would like to end up with:
| id | LabelX| LabelY|
--------------------------------------
| A | 1 | 10-10-2011|
| B | 3 | 10-10-2012|
| C | 7 | 10-10-2012|
| D | 8 | 10-10-2013|
Added:
This was what I tried to determine the maximum value:
SELECT LTRIM(A) AS A, LTRIM(B) AS B, LTRIM(C)
(Select Max(v)
FROM (VALUES (YYY0), (YYY1), …..(YYY9) AS value(v)) as [MaxDate]
FROM Table

SELECT id,
CASE
WHEN YYYY0 > YYY1 AND YYY0 > YYY2 ... AND YYY0 > YYY9 THEN XXX0
WHEN YYY1 > YYY2 ... AND YYY0 > YYY9 THEN XXX1
...
ELSE XXX9 AS LabelX,
CASE
WHEN YYYY0 > YYY1 AND YYY0 > YYY2 ... AND YYY0 > YYY9 THEN YYY0
WHEN YYY1 > YYY2 ... AND YYY0 > YYY9 THEN YYY1
...
ELSE YYY9 AS LabelY,
...
and replace > by >= depending on which you want to win if they're equal.

If it's a SQL Server 2005 and above you can do it this way (it assumes that dates are unique in each column for specific id):
;with cte as (
select id, xxxx0 as LabelX, yyy0 as LabelY from tab union all
select id, xxxx1, yyy1 from tab union all
select id, xxxx9, yyy9 from tab
)
select t.id, x.LabelX, t.LabelY from (
select t1.id, max(t1.LabelY) as LabelY
from cte t1
group by t1.id
) t
join cte x on t.id = x.id and t.LabelY = x.LabelY
Live SQL Fiddle example

Here's a simplified example for you. I'm using SQL Server 2008, but this SQL is pretty standard and should work fine on most modern implementations (famous last words).
So, given this table schema:
drop table dbo.foobar
go
create table dbo.foobar
(
id char(1) not null primary key ,
X1 int not null , Y1 date not null ,
X2 int not null , Y2 date not null ,
X3 int not null , Y3 date not null ,
)
go
And some sample data:
insert dbo.foobar values ( 'A' , 1 , '1 Jan 2013' , 2 , '1 Feb 2013' , 3 , '1 Mar 2013' )
insert dbo.foobar values ( 'B' , 1 , '1 Mar 2013' , 2 , '1 Jan 2013' , 3 , '1 Feb 2013' )
insert dbo.foobar values ( 'C' , 1 , '1 Feb 2013' , 2 , '1 Mar 2013' , 3 , '1 Jan 2013' )
go
Depending on the nature of your data and the desired semantics of the query and results, either this approach:
--
-- This approach pushes evaluation of the corresponding X to the output column list
--
-- 1. Construct a UNION ALL to normalize the table into a set of id/date pairs
-- 2. Compute max(date) for each id
-- 3. Join back against the original table to recover the source row
-- 4. Use the max(date) value to identify the corresponding X
--
select t.id ,
MaxY = t.y ,
X = case
when t.Y = x.Y1 then x.X1
when t.Y = x.Y2 then x.X2
when t.Y = x.Y3 then x.X3
end
from ( select x.id ,
y = max( x.y )
from ( select id , y=y1 from dbo.foobar
union all select id , y=y2 from dbo.foobar
union all select id , y=y3 from dbo.foobar
) x
group by x.id
) t
join dbo.foobar x on x.id = t.id
order by 1,2,3
go
Or this approach
--
-- This approach looks at each X/Y pair as its own "table" as it were
--
select t.id ,
MaxY = t.y ,
X = coalesce( t1.X1 , t2.X2 , t3.X3 )
from ( select x.id ,
y = max( x.y )
from ( select id , y=Y1 from dbo.foobar
union all select id , y=Y2 from dbo.foobar
union all select id , y=Y3 from dbo.foobar
) x
group by x.id
) t
left join dbo.foobar t1 on t1.id = t.id and t1.y1 = t.Y
left join dbo.foobar t2 on t2.id = t.id and t2.y2 = t.Y
left join dbo.foobar t3 on t3.id = t.id and t3.y3 = t.Y
order by 1,2,3
should work for you. In either event, both queries produce an identical result set:
id MaxY X
-- ---------- -
A 2013-03-01 3
B 2013-03-01 1
C 2013-03-01 2
Good Luck!
[Have you considered normalizing your database design? Third Normal Form makes life a lot easier and usually more efficient.]

Related

How put grouping variable to columns in SQL/

I have following dataset
and want to get this
How can I do it?
Using SQL Server, you can use a PIVOT, such as :
SELECT Time, [a],[b],[c]
FROM
(
SELECT time, [group],value
FROM dataset) d
PIVOT
(
SUM(value)
FOR [group] IN ([a],[b],[c])
) AS pvt
You can try it on the following fiddle.
Changed the column names to not conflict with reserved words. You would have to put them into single quotes otherwise.
WITH
-- the input
indata(grp,tm,val) AS (
SELECT 'a',1,44
UNION ALL SELECT 'a',2,22
UNION ALL SELECT 'a',3, 1
UNION ALL SELECT 'b',1, 1
UNION ALL SELECT 'b',2, 5
UNION ALL SELECT 'b',3, 6
UNION ALL SELECT 'c',1, 7
UNION ALL SELECT 'c',2, 8
UNION ALL SELECT 'c',3, 9
)
SELECT tm
, SUM(CASE grp WHEN 'a' THEN val END) AS a
, SUM(CASE grp WHEN 'b' THEN val END) AS b
, SUM(CASE grp WHEN 'c' THEN val END) AS c
FROM indata
GROUP BY tm
;
tm | a | b | c
----+----+---+---
1 | 44 | 1 | 7
2 | 22 | 5 | 8
3 | 1 | 6 | 9
select * from
(
select
time,[group],value
from yourTable
group by time,[group],value
)
as table
pivot
(
sum([value])
for [group] in ([a],[b],[c])
) as p
order by time
This is the result
for Vertica,
SELECT time
, SUM(value) FILTER (WHERE group = a) a
, SUM(value) FILTER (WHERE group = b) b
, SUM(value) FILTER (WHERE group = c) c
FROM yourTable
GROUP BY time

Adding new column with set of rules in SQL Server

I have a somewhat-complex set of rules that I need to run against a table. The problem is as follows: I have a table that stores medical records and I need to identify the first site that a person goes to after their discharge date. The discharge date is the end_date with a location of 'initial' (this will be the first row for every group). The table is grouped by ID and sorted in the format shown below.
There are 3 rules: (1) within the group of ID, if any of the rows have a begin_date that matches the first rows end_date, return that location as the first site (if there are two rows that meet this condition, either are correct, the first instance is preferred). (2) if the first option does not exist, then if there is an instance that the patient had location 'Health', then return 'Health'. (3) else, if conditions 1 and 2 do not exist, then return 'Home'
table
ID color begin_date end_date location
1 red 2017-01-01 2017-01-07 initial
1 green 2017-01-05 2017-01-07 nursing
1 blue 2017-01-07 2017-01-15 rehab
1 red 2017-01-11 2017-01-22 Health
2 red 2017-02-22 2017-02-26 initial
2 green 2017-02-26 2017-02-28 nursing
2 blue 2017-02-26 2017-02-28 rehab
3 red 2017-03-11 2017-03-22 initial
4 red 2017-04-01 2017-04-07 initial
4 green 2017-04-05 2017-04-07 nursing
4 blue 2017-04-10 2017-04-15 Health
final result:
ID first_site
1 rehab
2 nursing
3 home
4 Health
In sql-server 2008 my attempt: (side note: I considered adding a helper column which would be the end_date of the 'initial' location to each row so its easier to compare within a row. Not sure if this is necessary). I appreciate any guidance!
SELECT
ID,
OVER( PARTITION ID CASE WHEN end_date[0] = begin_date THEN location
WHEN location = 'Health' THEN 'Health'
ELSE 'Home' end) AS [first_site]
FROM table
In python, I was able to get this answer with:
def conditions(x):
#compare each group first
val = x.loc[x['begin_date'] == x['end_date'].iloc[0], 'location']
#if at least one match (not return empty `Series` get first value)
if not val.empty:
return val.iloc[0]
#if value is empty, check if value 'Health' exists within the group
elif (x['location'] == 'Health').any():
return 'Health'
else:
return 'Home'
final = df.groupby('ID').apply(conditions).reset_index(name='first_site')
This can be achieved with a windowed function to rank the visits that start on the same day as the initial end and then a couple simple joins:
declare #t table(ID int,color varchar(20),begin_date date,end_date date,location varchar(20));
insert into #t values(1,'red','20170101','20170107','initial'),(1,'green','20170105','20170107','nursing'),(1,'blue','20170107','20170115','rehab'),(1,'red','20170111','20170122','Health'),(2,'red','20170222','20170226','initial'),(2,'green','20170226','20170228','nursing'),(2,'blue','20170226','20170228','rehab'),(3,'red','20170311','20170322','initial'),(4,'red','20170401','20170407','initial'),(4,'green','20170405','20170407','nursing'),(4,'blue','20170410','20170415','Health');
with d as
(
select ID
,color
,begin_date
,end_date
,location
,row_number() over (partition by ID
,begin_date
order by case when location = 'initial' then '29990101' else begin_date end
) as r
from #t
)
select i.ID
,isnull(d.location,isnull(h.location,'Home')) as first_site
from d as i
left join d
on i.end_date = d.begin_date
and d.r = 1
left join d as h
on i.ID = h.ID
and h.location = 'Health'
where i.location = 'initial'
;
Output:
+----+------------+
| ID | first_site |
+----+------------+
| 1 | rehab |
| 2 | nursing |
| 3 | Home |
| 4 | Health |
+----+------------+
declare #example table (
ExampleID int identity(1,1) not null primary key clustered
, ID int not null
, Color nvarchar(255) not null
, BeginDate date not null
, EndDate date not null
, Loc nvarchar(255) not null
);
insert into #example (ID, color, begindate, enddate, loc)
select 1, 'red' , '2017-01-01', '2017-01-07', 'initial' union all
select 1, 'green' , '2017-01-05', '2017-01-07', 'nursing' union all
select 1, 'blue' , '2017-01-07', '2017-01-15', 'rehab' union all
select 1, 'red' , '2017-01-11', '2017-01-22', 'Health' union all
select 2, 'red' , '2017-02-22', '2017-02-26', 'initial' union all
select 2, 'green' , '2017-02-26', '2017-02-28', 'nursing' union all
select 2, 'blue' , '2017-02-26', '2017-02-28', 'rehab' union all
select 3, 'red' , '2017-03-11', '2017-03-22', 'initial' union all
select 4, 'red' , '2017-04-01', '2017-04-07', 'initial' union all
select 4, 'green' , '2017-04-05', '2017-04-07', 'nursing' union all
select 4, 'blue' , '2017-04-10', '2017-04-15', 'Health';
with cte as (
select a.ID
, a.Color
, a.BeginDate
, a.EndDate
, b.Loc
, rank() over(partition by a.ID order by a.ID, a.begindate, b.enddate desc, b.loc) Ranking
from #example a
left join #example b
on a.EndDate = b.BeginDate
)
, cte2 as (
select id
, Loc
from #example
where loc = 'health'
)
select a.ID
, COALESCE(a.loc, b.loc, 'Home') as Loc
from cte a
left join cte2 b
on a.id = b.id
where Ranking = 1
Output:
ID Loc
1 rehab
2 nursing
3 home
4 Health

Reversed group by with multiple columns?

Hi i'm not sure if this possible in oracle database or any one but is possible to make this:
What i have:
Document | Volume | BAC | CO
-----------|-----------|---------|---------
TA1 | 4 | 2 | 0
What i want:
Document | Volume | BAC | CO | ID
-----------|-----------|---------|---------|---------
TA1 | 1 | 0 | 0 | 1
TA1 | 1 | 0 | 0 | 2
TA1 | 1 | 0 | 0 | 3
TA1 | 1 | 0 | 0 | 4
TA1 | 0 | 1 | 0 | 5
TA1 | 0 | 1 | 0 | 6
I tried using WITH but it's just mess in my Sqldevelopper now couldn't even come close to it knowing that WITH can't be used twice or been in UNION.
PS: Number of rows need to be equal to (Volume + Bac + CO).
Is this operation possible in ORACLE 12?
This should work and it only goes over the data once. It's a simple application of hierarchical query.
I added more test data; note that in the case of TA3, there should be no rows in the output (because all three values in the row are 0).
with
test_data ( document, volume, bac, co ) as (
select 'TA1', 4, 2, 0 from dual union all
select 'TA2', 0, 0, 1 from dual union all
select 'TA5', 0, 0, 0 from dual
)
-- end of test data; actual solution (SQL query) begins below this line
select document,
case when level <= volume then 1 else 0 end as volume,
case when level > volume and level <= volume + bac then 1 else 0 end as bac,
case when level > volume + bac then 1 else 0 end as co,
level as id
from test_data
where volume + bac + co > 0
connect by level <= volume + bac + co
and prior document = document
and prior sys_guid() is not null
order by document, id -- ORDER BY is optional
;
DOC VOLUME BAC CO ID
--- ---------- ---------- ---------- ----------
TA1 1 0 0 1
TA1 1 0 0 2
TA1 1 0 0 3
TA1 1 0 0 4
TA1 0 1 0 5
TA1 0 1 0 6
TA2 0 0 1 1
7 rows selected
You can try this -> First, create a temporary derived table containing the amount of rows equal to the maximum number allowed :
CREATE TABLE TMP_TABLE AS
SELECT s.num_col,ROW_NUMBER() OVER(ORDER BY 1) as rnk
FROM (
SELECT 1 as num_col FROM dual
UNION ALL SELECT 1 as num_col FROM dual
UNION ALL SELECT 1 as num_col FROM dual
UNION ALL SELECT 1 as num_col FROM dual
...... As many necessary) s
Then, use this:
SELECT p.num_col as volume,0 as BAC,0 as CO
FROM TMP_TABLE p
JOIN YourTable t
ON(t.Volume >= p.rnk)
UNION ALL
SELECT 0 as volume,p.num_col as BAC,0 as CO
FROM TMP_TABLE p
JOIN YourTable t
ON(t.BAC >= p.rnk)
UNION ALL
SELECT 0 as volume,0 as BAC,p.num_col as CO
FROM TMP_TABLE p
JOIN YourTable t
ON(t.CO >= p.rnk)
select t.document
,decode(t.col,'V',1,0) as volume
,decode(t.col,'B',1,0) as bac
,decode(t.col,'C',1,0) as co
,row_number () over
(
partition by t.document order by decode(t.col,'V',1,'B',2,'C',3)
) as id
from t unpivot (n for col in (volume as 'V',bac as 'B',co as 'C')) t
join (select level as n from dual connect by level <= (select max(greatest(volume,bac,co)) from t)) c
on c.n <= t.n
;
or
select t.document
,decode(t.col,'V',1,0) as volume
,decode(t.col,'B',1,0) as bac
,decode(t.col,'C',1,0) as co
,t.pre + c.n as id
from (select t.*,0 as pre_v,volume as pre_b,volume+bac as pre_c from t) t
unpivot ((n,pre) for col in ((volume,pre_v) as 'V',(bac,pre_b) as 'B',(co,pre_c) as 'C')) t
join (select level as n from dual connect by level <= (select max(greatest(volume,bac,co)) from t)) c
on c.n <= t.n
order by 1,id
;
or
with r (col,Document,col_val,n,id) as
(
select c.col,t.Document,decode (c.col,1,t.Volume,2,t.BAC,3,t.CO),1,decode (c.col,1,1,2,t.Volume+1,3,t.Volume+t.BAC+1)
from t cross join (select level as col from dual connect by level <= 3) c
where decode (c.col,1,t.Volume,2,t.BAC,3,t.CO) > 0
union all
select r.col,r.Document,r.col_val,r.n+1,r.id+1
from r join (select level as col from dual connect by level <= 3) c
on c.col = r.col and r.n < r.col_val
)
select Document
,decode (col,1,1,0) as Volume
,decode (col,2,1,0) as BAC
,decode (col,3,1,0) as CO
,id
from r
order by 1,5
;
or
with r_Volume (Document,col_val,Volume,Bac,Co,n) as (select Document,Volume,1,0,0,1 from t where Volume > 0 union all select Document,col_val,Volume,Bac,Co,n+1 from r_Volume where n < col_val)
,r_Bac (Document,col_val,Volume,Bac,Co,n) as (select Document,Bac ,0,1,0,1 from t where Bac > 0 union all select Document,col_val,Volume,Bac,Co,n+1 from r_Bac where n < col_val)
,r_Co (Document,col_val,Volume,Bac,Co,n) as (select Document,Co ,0,0,1,1 from t where Co > 0 union all select Document,col_val,Volume,Bac,Co,n+1 from r_Co where n < col_val)
select Document,Volume,Bac,Co,row_number () over (partition by Document order by Volume desc,Bac desc,Co desc) as id
from ( select Document,Volume,Bac,Co from r_Volume
union all select Document,Volume,Bac,Co from r_Bac
union all select Document,Volume,Bac,Co from r_Co
) r
;
Here's another way to solve this problem:
WITH sample_data( document, volume, bac, co ) AS (
SELECT 'TA1', 4, 2, 0
FROM dual
)
, recursive( document, col_id, col_cnt, id ) AS (
SELECT document -- First unpivot the data for each document
, col_id
, col_cnt
, SUM( col_cnt ) over( partition BY document order by col_id ) - col_cnt + 1
FROM sample_data UNPIVOT( col_cnt FOR col_id IN( volume AS 1,
bac AS 2,
co AS 3 ) )
WHERE col_cnt > 0 -- But throw away rows with zero col_cnts.
UNION ALL
SELECT document
, col_id
, col_cnt - 1 -- Recursively decrement the col_cnt
, id + 1 -- and increment id
FROM recursive
WHERE col_cnt > 1 -- until col_cnt is no longer > 1
)
SELECT document -- Finally pivot the recursive data
, volume -- back to its original columns
, bac
, co
, id
FROM recursive PIVOT( COUNT( * ) FOR col_id IN( 1 AS volume, 2 AS bac, 3 AS co ) )
ORDER BY document
, id;

How to count most consecutive occurrences of a value in a Column in SQL Server

I have a table Attendance in my database.
Date | Present
------------------------
20/11/2013 | Y
21/11/2013 | Y
22/11/2013 | N
23/11/2013 | Y
24/11/2013 | Y
25/11/2013 | Y
26/11/2013 | Y
27/11/2013 | N
28/11/2013 | Y
I want to count the most consecutive occurrence of a value Y or N.
For example in the above table Y occurs 2, 4 & 1 times. So I want 4 as my result.
How to achieve this in SQL Server?
Any help will be appreciated.
Try this:-
The difference between the consecutive date will remain constant
Select max(Sequence)
from
(
select present ,count(*) as Sequence,
min(date) as MinDt, max(date) as MaxDt
from (
select t.Present,t.Date,
dateadd(day,
-(row_number() over (partition by present order by date))
,date
) as grp
from Table1 t
) t
group by present, grp
)a
where Present ='Y'
SQL FIDDLE
You can do this with a recursive CTE:
;WITH cte AS (SELECT Date,Present,ROW_NUMBER() OVER(ORDER BY Date) RN
FROM Table1)
,cte2 AS (SELECT Date,Present,RN,ct = 1
FROM cte
WHERE RN = 1
UNION ALL
SELECT a.Date,a.Present,a.RN,ct = CASE WHEN a.Present = b.Present THEN ct + 1 ELSE 1 END
FROM cte a
JOIN cte2 b
ON a.RN = b.RN+1)
SELECT TOP 1 *
FROM cte2
ORDER BY CT DESC
Demo: SQL Fiddle
Note, the date's in the demo got altered due to the format you posted the dates in your question.

Creating groups of consecutive days meeting a given criteria

I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?
In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.
Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.
Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.
Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);
A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example