Query: How can enumerate rows based on condition? - sql

Having the following table:
DROP TABLE IF EXISTS #Data
CREATE TABLE #Data
(
Code VARCHAR(10),
Fee VARCHAR(3),
AValue DECIMAL(10, 4)
)
-- DELETE FROM #Data
INSERT INTO #Data
VALUES
('A001', '001', 100), ('A001', '002', 200), ('A001', '003', -50), ('A001', '004', -250), ('A001', '005', 340), ('A001', '006', 500), ('A001', '007', 600)
I need to get the following result (an ordered sequence based if Value is still positive or negative):
Code Fee Value Row
A001 001 100.0000 P1
A001 002 200.0000 P1
A001 003 -50.0000 N1
A001 004 -250.0000 N1
A001 005 340.0000 P2
A001 006 500.0000 P2
A001 007 600.0000 P2
I tried this:
SELECT Code, Fee, AValue, ROW_NUMBER() OVER(PARTITION BY Code, (CASE WHEN AValue > 0 THEN 1 ELSE 2 END) ORDER BY Fee) 'nRow',
FORMAT(ROW_NUMBER() OVER(PARTITION BY Code, Fee, CASE WHEN AValue > 0 THEN 1 ELSE 2 END ORDER BY Fee), CASE WHEN AValue > 0 THEN 'POS00' ELSE 'NEG00' END)
FROM #Data
But it returns:
Code Fee Value Row
A001 001 100.0000 P1
A001 002 200.0000 P1
A001 003 -50.0000 N1
A001 004 -250.0000 N1
A001 005 340.0000 P1

This is a type of gaps-and-islands problem, but it is tricky. Assuming you have no 0 values, then sign() is your friend.
Here is an approach that uses the fact that the difference of row numbers is constant when values on adjacent rows should be combined:
SELECT Code, Fee, AValue,
DENSE_RANK() OVER (PARTITION BY sign(avalue) ORDER BY seqnum - seqnum_2) as num
FROM (SELECT d.*,
ROW_NUMBER() OVER (PARTITION BY code, sign(avalue) ORDER BY fee) as seqnum_2,
ROW_NUMBER() OVER (PARTITION BY code ORDER BY fee) as seqnum
FROM Data d
) d
ORDER BY Code, Fee;
You can incorporate this into your string using CONCAT() or whatever.
Here is a db<>fiddle.

This is somewhat of a blind guess, but perhaps this is what you are after:
WITH Grps AS(
SELECT Code,
Fee,
AValue,
ROW_NUMBER() OVER (ORDER BY Fee) -
ROW_NUMBER() OVER (PARTITION BY CASE WHEN Avalue > 0 THEN 1 WHEN Avalue < 0 THEN -1 END ORDER BY Fee) AS Grp
FROM #Data)
SELECT Code,
Fee,
AValue,
CONCAT(CASE WHEN Avalue > 0 THEN 'P' WHEN Avalue < 0 THEN 'N' ELSE 'Z' END,
DENSE_RANK() OVER (PARTITION BY CASE WHEN Avalue > 0 THEN 1 WHEN Avalue < 0 THEN -1 END ORDER BY Grp)) AS Row
FROM Grps
ORDER BY Fee;```

Related

SQL- Cumulative sum based on condition

I have a scenario in which I have to calculate the counter based on below data. If the status is A, B,C than counter should be 0 which is working fine.
If STATUS is D counter should do a cumulative sum with the exception that if status is changed in between(like in 201907) , the counter should reset again and sum should start again with 1,2,3 and so on. Any possible help is appreciated on same.
Input - 3 columns - Customer_No, Date, Status
CUSTOMER_NO Date STATUS
1234 201901 A
1234 201902 B
1234 201903 C
1234 201904 D
1234 201905 D
1234 201906 D
1234 201907 C
1234 201908 D
1234 201910 D
1234 201911 D
1234 201912 D
expected Output - Input columns + Counter Column
CUSTOMER_NO Date STATUS COUNTER
----------------------------------------
1234 201901 A 0
1234 201902 B 0
1234 201903 C 0
1234 201904 D 1
1234 201905 D 2
1234 201906 D 3
1234 201907 C 0
1234 201908 D 1
1234 201910 D 2
1234 201911 D 3
1234 201912 D 4
Sample data
Thanks
You can create a numbering like a serial number for the ordering purpose using the ROW_NUMBER() function as shown below.
create table SampleData(CUSTOMER_NO int
, STATUS char(1)
, COUNTER int)
insert into SampleData Values
(1234, 'A', 0),
(1234, 'B', 0),
(1234, 'C', 0),
(1234, 'D', 1),
(1234, 'D', 2),
(1234, 'D', 3),
(1234, 'C', 0),
(1234, 'D', 1),
(1234, 'D', 2),
(1234, 'D', 3),
(1234, 'D', 4)
;with cte as(
Select *
, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS RN
from SampleData
)
select CUSTOMER_NO
, STATUS
, COUNTER
, (SELECT SUM(case STATUS when 'D' then Counter else 0 end) FROM cte t2 WHERE t2.RN <= cte.RN) AS Needed
from cte
Live db<>fiddle demo.
This is a similar approach this Gordon's, however, uses CTEs and ROW_NUMBER to make the islands first, and then 0's if there is only 1 row in that island using a windowed COUNT and a CASE expression:
WITH Grps AS(
SELECT ID,
CUSTOMER_NO,
[STATUS],
ROW_NUMBER() OVER (PARTITION BY CUSTOMER_NO ORDER BY ID) -
ROW_NUMBER() OVER (PARTITION BY CUSTOMER_NO, [STATUS] ORDER BY ID) AS Grp
FROM (VALUES(1,1234,'A'),
(2,1234,'B'),
(3,1234,'C'),
(4,1234,'D'),
(5,1234,'D'),
(6,1234,'D'),
(7,1234,'C'),
(8,1234,'D'),
(9,1234,'D'),
(10,1234,'D'),
(11,1234,'D'))V(ID,CUSTOMER_NO,[STATUS]))
SELECT ID,
CUSTOMER_NO,
[STATUS],
Grp,
CASE WHEN COUNT(ID) OVER (PARTITION BY CUSTOMER_NO, [STATUS], Grp) = 1 THEN 0
ELSE ROW_NUMBER() OVER (PARTITION BY CUSTOMER_NO, [STATUS], Grp ORDER BY ID) - 1
END AS [COUNTER]
FROM Grps;
As Gordon mentioned, as well, if you don't have some kind of sequential ID/Key, you can't do this with your data. You will need to implement some kind of sequential ID, and hope that your data retains it's "insert order".
This is a variant of a gaps-and-islands problem. For this particular incarnation, you can identify the islands by counting the number of non-D statuses before a given row.
After identifying the groups, use case and row_number():
select t.*,
(case when status = 'D'
then row_number() over (partition by customer_no, grp, status order by date)
else 0
end) as counter
from (select t.*,
sum(case when status <> 'D' then 1 else 0 end) over (partition by customer_no order by date) as grp
from t
) t

MSSQL distribute negative amount to positive amount rows

i think this is possible, but i stuck in the cumulative sums. I have a positive amounts and a single negative amount joined at the top. Any possible way to distribute this negative amount to collapse positive amount table?
ROW_NUM AMOUNT N_AMOUNT NEEDED_RESULT
------- ------ -------- -------------
1 100.00 NULL 100
2 300.00 NULL 200
3 300.00 -400.00 0
table example
DECLARE #T TABLE (ROW_NUM INT, AMOUNT MONEY,N_AMOUNT MONEY, NEEDED_RESULT MONEY)
INSERT INTO #T
SELECT * FROM (VALUES
(1, 100.00, NULL ,100),
(2, 300.00, NULL ,200),
(3, 300.00, -400.00 ,0 )
) a(ROW_NUM , AMOUNT ,N_AMOUNT , NEEDED_RESULT )
;WITH x AS
(
SELECT
*,
[R] = SUM(Amount) OVER (ORDER BY ROW_NUM DESC) + SUM(N_Amount) OVER ()
FROM #T
)
SELECT ROW_NUM,AMOUNT,N_AMOUNT, NEEDED_RESULT,
CASE
WHEN R < 0 THEN 0
WHEN R > Amount THEN Amount
ELSE R
END
FROM x
ORDER BY ROW_NUM

Query to select same event code with at least one hour interval

I have a sample table
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
I would like to have a list of txt code and dt date grouped by txt code where interval beetwen txt occurrence is at least one hour and nothing in-between.
To clarify when t1 first occures at '2017-01-01 00:01:00.000'
then next occurrence I am looking for is after at least one hour
which will be '2017-01-01 01:04:00.000'
third occurrence I am looking for is after at least one hour from '2017-01-01 01:04:00.000' and so on.
After some searching I found something like this
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
but this wasn't good because the interval isn't counted from first occurrence but from last, so I got
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
I think I found a workaround because in this case I could group record within same hour but I am not happy with that solution.
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
Any suggestions to improve the script so it will let the interval be any entered value in minutes would be appreciated.
If I have understood correctly I think the following does what you need.
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
Output
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+
I kind of like a method that is a bubble sort. The problem I have found when doing recursive operations is they work great for small sets(think less than 5 or 10k), then behave horrid when you get larger. For this reason I like a cursor approach were you are essentially saying: "Are you larger than a criteria? Yes, No. Insert or Ignore, Delete, move on." This way you are evaluating over every item once and once only, not every variation of a theme of recursion.
DECLARE #Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE #Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO #Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM #Temp)
BEGIN
DECLARE
#CurId INT
, #CurDt DATETIME
, #Curtxt VARCHAR(8)
, #LastDate DATETIME
;
SELECT TOP 1 #CurId = Id, #CurDt = Dt, #Curtxt = txt FROM #Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM #Holder)
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM #Temp WHERE Id = #CurId) = 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 #LastDate = dt FROM #Holder ORDER BY id desc
IF DATEDIFF(HOUR, #LastDate, #CurDt) >= 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, DATEDIFF(MINUTE, #LastDate, #CurDt))
END
END
--Delete the running values and loop again
DELETE #Temp WHERE Id = #CurId
END
Select *
From #Holder

Grouping by an aggregate column

I have a requirement to show a listing of data associated with a given widget, however I can only aggregate on the widget while it's in sequence (essentially breaking the running total when the widget changes).
Here's a bit of a break down of what I mean...
Example data:
ID WIDGET PART
1 A000 B22
2 A000 B23
3 A002 B24
4 A001 B25
5 A001 B26
6 A000 B27
Desired output:
WIDGET MINPART COUNT
A000 B22 2
A002 B24 1
A001 B25 2
A000 B27 1
In SQL Server I've tried running the following:
with a as (
select
WIDGET,
min(PART) over (partition by WIDGET) as MINPART,
1 tcount
from test )
select WIDGET, MINPART, sum(tcount)
from a
group by WIDGET, MINPART
But this just results in the usual aggregation you might expect. I.E.:
WIDGET MINPART COUNT
A000 B22 3
A002 B24 1
A001 B25 2
Does this work for you?
;with x as (
select *,
lag(widget) over(order by id) as lg
from #t
),
y as (
select *, sum(case when widget<>lg then 1 else 0 end) over(order by id) as grp
from x
)
select widget, min(part), count(*)
from y
group by widget, grp
Don't you mean to use the named columns?, i.e.
with a as (
select
WIDGET,
min(PART) over (partition by WIDGET) as MINPART,
1 tcount
from Widget )
select WIDGET, MINPART, sum(tcount)
from a
group by Widget, MinPart;
SqlFiddle here
we can write this one using derived table also other way
DECLARE #Widget TABLE
(ID INT,
Widget NVARCHAR(10),
PART NVARCHAR(10));
INSERT INTO #Widget VALUES
(1, 'A000', 'B22'),
(2, 'A000', 'B23'),
(3, 'A002', 'B24'),
(4, 'A001', 'B25'),
(5, 'A001', 'B26'),
(6, 'A000', 'B27');
IF OBJECT_ID('tempdb..#t1') IS NOT NULL
DROP TABLE #t1
select Widget,PART,1 as t into #t1 from
(select Widget,MIN(PART)OVER (PARTITION BY Widget) As Part from #Widget
GROUP BY Widget,ID,PART)AS F
select Widget,PART,SUM(t) from #t1
group by Widget, Part
#dean solution is neat but a pure SQLServer 2008 solution is possible, the idea is the same, the ranker formula
sum(case when widget<>lg then 1 else 0 end) over(order by id) as grp
can be faked by a running total, for the record the full query is
WITH part AS (
SELECT a.id, a.widget, a.part
, breaker = CASE WHEN a.widget<>coalesce(b.widget, a.widget)
THEN 1
ELSE 0
END
FROM Widgets a
LEFT JOIN Widgets b ON a.id = b.id + 1
)
, DATA AS(
SELECT a.id, a.widget, a.part
, rank = (SELECT sum(breaker) FROM part b WHERE a.id >= b.id)
FROM part a
)
SELECT widget
, minpart = min(part)
, [count] = count(1)
FROM DATA
GROUP BY widget, rank

Creating groups of consecutive days meeting a given criteria

I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?
In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.
Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.
Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.
Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);
A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example