TSQL : the top records of a given partition (conditional)

TSQL : the top records of a given partition (conditional) - sql

I need to get all the records in TABLE_A where at least the 2 last Status are vacant (relative to Inspection_Date) and the Room_ID does not exist in TABLE_B.
This is a simplified table I am using as an example:
TABLE_A:
Room_Id Status Inspection_Date
-------------------------------------
1 vacant 5/15/2015
2 occupied 5/21/2015
2 vacant 1/19/2016
1 occupied 12/16/2015
4 vacant 3/25/2016
3 vacant 8/27/2015
1 vacant 4/17/2016
3 vacant 12/12/2015
3 vacant 3/22/2016
4 occupied 2/2/2015
4 vacant 3/24/2015
TABLE_B:
Room_Id Status Inspection_Date
------------------------------------
1 vacant 5/15/2015
2 occupied 5/21/2015
2 vacant 1/19/2016
1 vacant 12/16/2015
1 vacant 4/17/2016
My result should look like this:
Room_Id Status Inspection_Date
---------------------------------
3 vacant 8/27/2015
3 vacant 12/12/2015
3 vacant 3/22/2016
4 occupied 2/2/2015
4 vacant 3/24/2015
4 vacant 3/25/2016
I have tried it this way, it works with the example but is not working with my data .. the logic is not complete:
With cteA As
(
Select *, Row_Number() Over (Partition By Room_ID, Status Order By Inspection_Date Desc) RowNum From Table_A
)
Select * From Table_A Where Room_Id In
(
Select Room_Id
From cteA
Where Room_Id Not In (Select Room_Id From Table_B)
And Status = 'vacant' And RowNum > 1
)
Order By Room_Id, Inspection_Date
Here is the schema:
CREATE TABLE TABLE_A (`Room_Id` int,
`Status` varchar(55),
`Inspection_Date` Date
);
INSERT INTO TABLE_A (Room_Id, Status, Inspection_Date)
VALUES (1, 'vacant', '5/15/2015'),
(2, 'occupied', '5/21/2015'),
(2, 'vacant', '1/19/2016'),
(1, 'occupied', '12/16/2015'),
(4, 'vacant', '3/25/2016'),
(3, 'vacant', '8/27/2015'),
(1, 'vacant', '4/17/2016'),
(3, 'vacant', '12/12/2015'),
(3, 'vacant', '3/22/2016'),
(4, 'occupied', '2/2/2015'),
(4, 'vacant', '3/24/2015');
CREATE TABLE TABLE_B (`Room_Id` int,
`Status` varchar(55),
`Inspection_Date` Date
);
INSERT INTO TABLE_B (Room_Id, Status, Inspection_Date)
VALUES
(1, 'vacant', '5/15/2015'),
(2, 'occupied', '5/21/2015'),
(2, 'vacant', '1/19/2016'),
(1, 'vacant', '12/16/2015'),
(1, 'vacant', '4/17/2016'),;

PLAIN
For each room in TABLE_A select the last date (as lastDate)
for each room in TABLE_A select previous date (as prevLastDate)
Get room_ids from lastDate which has the status 'vacant' (as lastDateVacant)
Get room_ids from prevLastDate which has the status 'vacant' (as prevLastDateVacant)
Filter TABLE_A to have only IDs which are there in lastDateVacant and prevLastDateVacant (inner)
Filter TABLE_A to have only IDs which are not in TABLE_B (left outer + IS NULL)
As the result you have:
WITH lastDate AS (
SELECT room_id AS room,MAX(inspection_date) AS date
FROM "TABLE_A"
GROUP BY room_id
), prevLastDate AS (
SELECT room_id AS room,MAX(inspection_date) AS date
FROM "TABLE_A" a
INNER JOIN lastDate ON a.room_id = lastDate.room and a.inspection_date < lastDate.date
GROUP BY room_id
), lastDateVacant AS (
SELECT room_id AS room FROM "TABLE_A"
WHERE (room_id,inspection_date) IN (
SELECT room, date FROM lastDate
) AND status = 'vacant'
), prevLastDateVacant AS (
SELECT room_id AS room FROM "TABLE_A"
WHERE (room_id,inspection_date) IN (
SELECT room, date FROM prevLastDate
) AND status = 'vacant'
)
SELECT a.* FROM "TABLE_A" a
INNER JOIN lastDateVacant
ON a.room_id = lastDateVacant.room
INNER JOIN prevLastDateVacant
ON a.room_id = prevLastDateVacant.room
LEFT OUTER JOIN "TABLE_B" AS b
ON a.room_id = b.room_id
WHERE b.room_id IS NULL
ORDER BY a.room_id ASC, a.inspection_date DESC
Window Function
Not sure if the syntax for TSQL is the same, but here is the shorter variant:
Ranking with partion by room and or order by date
Check for IDs with rank 1 and 2 having 'vacant' status, grouping by ID and having them occured more than once
WITH room AS (
select room from (
select room_id as room,status,inspection_date as date,
RANK() OVER (PARTITION BY room_id ORDER BY inspection_date DESC) AS RANK
from "TABLE_A"
)
where (rank in ( 1,2) and status = 'vacant')
group by room
having count() > 1
)
SELECT a. FROM "TABLE_A" a
INNER JOIN room
ON a.room_id = room.room
LEFT OUTER JOIN "TABLE_B" AS b
ON a.room_id = b.room_id
WHERE b.room_id IS NULL
ORDER BY a.room_id ASC, a.inspection_date DESC

Your conditions translate almost directly into a query. You can use window functions for the vacant count and not exists for the relationship to table_b:
select a.*
from (select a.*,
sum(case when status = 'vacant' then 1 else 0 end) over (partition by room_id) as num_vacant
from table_a a
where not exists (select 1
from table_b b
where b.room_id = a.room_id
)
) a
where num_vacant >= 2;
EDIT:
If you want the last two to be vacant, you can do find that last record that is non-vacant and then count the ones bigger than that:
select a.*
from (select a.*,
sum(case when a2.max_nonvacant > a.inspection_date then 0 else 1) over (partition by room_id) as num_vacant_last
from table_a a outer apply
(select max(inspection_date) as max_nonvacant
from table_a a2
where a2.room_id = a.room_id and a2.status <> 'vacant'
) a2
where not exists (select 1
from table_b b
where b.room_id = a.room_id
)
) a
where num_vacant_last >= 2;

This worked for me and I have checked again and again.
with Rooms as (
select
Room_Id, Status,
row_number() over (partition by Room_Id order by Inspection_Date desc) as rn
from TABLE_A
), Candidates as (
select Room_Id from Rooms group by Room_Id
having sum(case when rn in (1, 2) and Status = 'vacant' then 1 else null end) = 2
)
select * from TABLE_A
where Room_Id in (select Room_Id from Candidates except select Room_Id from TABLE_B)
order by Room_Id, Inspection_Date desc

I did this test:
extracts all the room_id that considering the last two Status (equal status) in relation to the inspection_date (descending order):
select * from TABLE_A WHERE [Room_Id] IN
(
SELECT [Room_Id] FROM
(SELECT ROW_NUMBER() OVER(PARTITION BY [Room_Id] ORDER BY [Inspection_Date] DESC ) AS id,
[Room_Id],[Status],[Inspection_Date]
FROM TABLE_A
) AA
WHERE AA.ID <=2
--selecting the last two Inspection_Date
and [Status] = 'vacant'
GROUP BY [Room_Id],[Status] HAVING COUNT(*) >1
)
AND
[Room_Id] NOT IN (SELECT Room_Id FROM TABLE_B)
order by Room_Id, Inspection_Date desc

Related

Throw error , if duplicate data (not duplicate records) found in a table

I have a table which contains information about products, I need to check for duplicate records and throw an error. There are two columns ,product key and product value, where product value contains both semi-colon separate and normal values. The sample data is given below. (No constraints )
source_id |Product_key | Product_value
-----------------------------------------------------------
1 xzy PRODUCT_TAG=SCENT;CODE=123;PRICE=234
1 xhmr POWDER
1 abc PRODUCT_TAG=COMB;CODE=123;PRICE=234
1 xhmr OIL
1 zrmt 123
Now i have to check if any two rows have the same product_key and product_tag value, also if the product_key is xhmr , then product_value should be considered as product_tag value. The query which i have written is given below
select source_id, PRODUCT_KEY, rec , (case when instr(rec,'PRODUCT_TAG')<>0 THEN regexp_substr(TRIM(rec), '[^=]+', 1,2)
ELSE rec
end) as PRODUCT_TAG
from
(select source_id,PRODUCT_KEY ,regexp_substr(TRIM(PRODUCT_VALUE), '[^;]+', 1,LEVEL) AS rec from products
connect by regexp_substr(TRIM(PRODUCT_VALUE), '[^;]+', 1,LEVEL) is not null
AND prior source_id = source_id
AND PRIOR SYS_GUID() IS NOT NULL) where instr(rec,'PRODUCT_TAG')<>0 or PRODUCT_KEY in('xhmr');
Output
source_id |Product_key | Product_value
-----------------------------------------------------------
1 xzy SCENT
1 xhmr POWDER
1 abc COMB
1 xhmr OIL
After this, I am taking a count of all the rows and all the distinct rows . If both counts are not equal, then throwing error.I was wondering , if all this could be done in a concise way.

I think you might do the following
Get the main query and put into a with clause
Then you can operate with it as many times as needed
You could write it like this:
Update
with main_query
as (
select source_id, PRODUCT_KEY, rec , (case when instr(rec,'PRODUCT_TAG')<>0 THEN regexp_substr(TRIM(rec), '[^=]+', 1,2)
ELSE rec
end) as PRODUCT_TAG
from
(select source_id,PRODUCT_KEY ,regexp_substr(TRIM(PRODUCT_VALUE), '[^;]+', 1,LEVEL) AS rec from products
connect by regexp_substr(TRIM(PRODUCT_VALUE), '[^;]+', 1,LEVEL) is not null
AND prior source_id = source_id
AND PRIOR SYS_GUID() IS NOT NULL) where instr(rec,'PRODUCT_TAG')<>0 or PRODUCT_KEY in('xhmr')
)
select
case when total_value = tot_dist then 'OK' -- whatever you want here
else 'ERROR' -- whatever you want here
end as result
from
( ( select count(*) as total_value from main_query ) ,
( select count(*) as total_dist from ( select distinct * from main_query ) )
)
Example in my case
SQL> desc my_test
Name Null? Type
----------------------------------------- -------- ----------------------------
C1 NUMBER
C2 NUMBER
SQL> select * from my_test ;
C1 C2
---------- ----------
1 1
1 1
2 2
SQL> with main_query as ( select c1 , c2 from my_test )
select a , b
from
( select count(*) as a from main_query ) ,
( select count(*) as b from ( select distinct * from main_query ) )
/ 2 3 4 5 6
A B
---------- ----------
3 2
SQL> with main_query as ( select c1 , c2 from my_test )
select case when a = b then 'OK' else 'ERROR' end as result
from
( select count(*) as a from main_query ) ,
( select count(*) as b from ( select distinct * from main_query ) )
/
RESUL
-----
ERROR

Avoid Unions to get TOP count

Here are two tables:
LocationId Address City State Zip
1 2100, 1st St Austin TX 76819
2 2200, 2nd St Austin TX 76829
3 2300, 3rd St Austin TX 76839
4 2400, 4th St Austin TX 76849
5 2500, 5th St Austin TX 76859
6 2600, 6th St Austin TX 76869
TripId PassengerId FromLocationId ToLocationId
1 746896 1 2
2 746896 2 1
3 234456 1 3
4 234456 3 1
5 234456 1 4
6 234456 4 1
7 234456 1 6
8 234456 6 1
9 746896 1 2
10 746896 2 1
11 746896 1 2
12 746896 2 1
I want TOP 5 locations which each passenger has traveled to (does not matter if its from or to location). I can get it using a UNION, but was wondering if there was a better way to do this.
My Solution:
select top 5 *
from
(select count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
from
Trip t
join LOCATION l on t.FromLocationId = l.LocationId
where t.PassengerId = 746896
group by count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
UNION
select count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
from
Trip t
join LOCATION l on t.ToLocationId = l.LocationId
where t.PassengerId = 746896
group by count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
) as tbl
order by cnt desc

This will give you top 5 location.
SELECT TOP 5 tmp.fromlocationid AS locationid,
Count(tmp.fromlocationid) AS Times
FROM (SELECT fromlocationid
FROM trip
UNION ALL
SELECT tolocationid
FROM trip) tmp
GROUP BY tmp.fromlocationid
Method 1: This will give you top 5 location of each passenger.
WITH cte AS
( SELECT passengerid,
locationid,
Count(locationid) AS Times,
Row_number() OVER(partition BY passengerid ORDER BY passengerid ASC) AS RowNum
FROM (SELECT tripid, passengerid, fromlocationid AS locationid
FROM trip
UNION ALL
SELECT tripid, passengerid, tolocationid AS locationid
FROM trip) tmp
GROUP BY passengerid, locationid )
SELECT *
FROM cte
WHERE rownum <= 5
ORDER BY passengerid, Times DESC
Method 2: Same result without Union Operator (Top 5 location of each passenger)
WITH cte AS
( SELECT passengerid,
locationid,
Count(locationid) AS Times,
Row_number() OVER(partition BY passengerid ORDER BY passengerid ASC) AS RowNum
FROM trip
UNPIVOT ( locationid
FOR subject IN (fromlocationid, tolocationid) ) u
GROUP BY passengerid, locationid )
SELECT *
FROM cte
WHERE rownum <= 5
ORDER BY passengerid, times DESC
If you also want to get the location details, you can simply join the location table.
SELECT cte.* , location.*
FROM cte
INNER JOIN location ON location.locationid = cte.locationid
WHERE rownum <= 5
ORDER BY passengerid, times DESC
Reference
- https://stackoverflow.com/a/19056083/6327676

YOou'll need to replace the SELECT *'s with the columns you need, however, something like this should work:
WITH Visits AS (
SELECT *,
COUNT(*) OVER (PARTITION BY t.PassengerID, L.LocationID) AS Visits
FROM Trip T
JOIN [Location] L ON T.FromLocationId = L.LocationId),
Rankings AS (
SELECT *,
DENSE_RANK() OVER (PARTITION BY V.PassengerID ORDER BY Visits DESC) AS Ranking
FROM Visits V)
SELECT *
FROM Rankings
WHERE Ranking <= 5;

Further simplified solution
select top 3 * from
(
Select distinct count(locationId) as cnt, locationId from trip
unpivot
(
locationId
for direction in (fromLocationId, toLocationId)
)u
where passengerId IN (746896, 234456)
group by direction, locationId
)as tbl2
order by cnt desc;
Solution combining columns
The main issue for me is avoiding union to combine the two columns.
The UNPIVOT command can do this.
select top 3 * from (
select count(locationId) cnt, locationId
from
(
Select valu as locationId, passengerId from trip
unpivot
(
valu
for loc in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId
) as tbl
order by cnt desc;
http://sqlfiddle.com/#!18/cec8b/136
If you want to get the counts by direction:
select top 3 * from (
select count(locationId) cnt, locationId, direction
from
(
Select valu as locationId, direction, passengerId from trip
unpivot
(
valu
for direction in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId, direction
) as tbl
order by cnt desc;
http://sqlfiddle.com/#!18/cec8b/139
Same Results as you ( minus some minor descriptions )
select top 3 * from
(
select distinct * from (
select count(locationId) cnt, locationId
from
(
Select valu as locationId, direction, passengerId from trip
unpivot
(
valu
for direction in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId, direction
) as tbl
)as tbl2
order by cnt desc;

You can do this without union all:
select top (5) t.passengerid, v.locationid, count(*)
from trip t cross apply
(values (fromlocationid), (tolocationid)) v(locationid) join
location l
on v.locationid = l.locationid
where t.PassengerId = 746896
group by t.passengerid, v.locationid
order by count(*) desc;
If you want an answer for all passengers, it would be a similar idea, using row_number(), but your query suggests you want the answer only for one customer at a time.
You can include additional fields from location as well.
Here is a SQL Fiddle.

get the most two recent dates for each customer

basically, I need to retrieve the last two dates for customers who purchased in at least two different dates, implying there are some customer who had purchased only in one date, the data has the following form
client_id date
1 2016-07-02
1 2016-07-02
1 2016-06-01
2 2015-06-01
and I would like to get it in the following form
client_id previous_date last_date
1 2016-06-01 2016-07-02
remarques:
a client can have multiple entries for the same date
a client can have entries only for one date, such customer should be discarded

Rank your dates with DENSE_RANK. Then group by client_id and show the last dates (ranked #1 and #2).
select
client_id,
max(case when rn = 2 then date end) as previous_date,
max(case when rn = 1 then date end) as last_date
from
(
select
client_id,
date,
dense_rank() over (partition by client_id order by date desc) as rn
from mytable
)
group by client_id
having max(rn) > 1;

build up:
t=# create table s153 (c int, d date);
CREATE TABLE
t=# insert into s153 values (1,'2016-07-02'), (1,'2016-07-02'),(1,'2016-06-01'),(2,'2016-06-01');
INSERT 0 4
query:
t=# with a as (
select distinct c,d from s153
)
, b as (
select c,nth_value(d,1) over (partition by c order by d) last_date, nth_value(d,2) over (partition by c order by d) prev_date
from a
)
select * from b where prev_date is not null
;
c | last_date | prev_date
---+------------+------------
1 | 2016-06-01 | 2016-07-02
(1 row)

UNTESTED:
We use a common table expression to assign a row number based on the date in descending order and then only include those records having a row number <=2 and then ensure that those having 1 row are excluded by the having.
WITH CTE AS (
SELECT Distinct Client_ID
, Date
, row_number() over (partition by clientID order by date desc) rn
FROM Table)
SELECT Client_ID, min(date) previous_date, max(date) last_date)
FROM CTE
WHERE RN <=2
GROUP BY Client_ID
HAVING max(RN) > 1

All you need is a group by...
--test date
declare #tablename TABLE
(
client_id int,
[date] datetime
);
insert into #tablename
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(2 , '2015-06-01');
--query
SELECT client_id,MIN([DATE]) AS [PREVIOUS_DATE], MAX([DATE]) AS [LAST_DATE]
FROM #tablename
GROUP BY client_id
Updated
-- create data
create table myTable
(
client_id integer,
given_date date
);
insert into myTable
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(1 , '2016-06-03'),
(1 , '2016-06-09'),
(2 , '2015-06-01'),
(3 , '2016-06-03'),
(3 , '2016-06-09');
-- query
SELECT sub.client_id, sub.PREVIOUS_DATE, sub.LAST_DATE
FROM
(select
ROW_NUMBER() OVER (PARTITION BY a.client_id order by b.given_date desc,(MAX(b.given_date) - a.given_date)) AS ROW_NUMBER,
a.client_id,a.given_date AS PREVIOUS_DATE, MAX(b.given_date) - a.given_date AS diff, (b.given_date) AS LAST_DATE
FROM myTable AS a
JOIN myTable AS b
ON b.client_id = a.client_id
WHERE a.given_date <> b.given_date
group by a.client_id, a.given_date, b.given_date) AS sub
WHERE sub.ROW_NUMBER = 1

TSQL getting max and min date with a seperate but not unique record

example table:
test_date | test_result | unique_ID
12/25/15 | 100 | 50
12/01/15 | 150 | 75
10/01/15 | 135 | 75
09/22/14 | 99 | 50
04/10/13 | 125 | 50
I need to find the first and last test date as well as the test result to match said date by user. So, I can group by ID, but not test result.
SELECT MAX(test_date)[need matching test_result],
MIN(test_date) [need matching test_result],
unique_id
from [table]
group by unique_id
THANKS!

Create TABLE #t
(
test_date date ,
Test_results int,
Unique_id int
)
INSERT INTO #t
VALUES ( '12/25/15',100,50 ),
( '12/01/15',150,75 ),
( '10/01/15',135,75 ),
( '09/22/14',99,50 ),
( '04/10/13',125,50 )
select 'MinTestDate' as Type, a.test_date, a.Test_results, a.Unique_id
from #t a inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemin
union all
select 'MaxTestDate' as Type, a.test_date, a.Test_results, a.Unique_id from #t a
inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemax

I would recommend window functions. The following returns the information on 2 rows per id:
select t.*
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t;
For one row, use conditional aggregation (or pivot if you prefer):
select unique_id,
min(test_date), max(case when seqnum_asc = 1 then test_result end),
max(test_date), max(case when seqnum_desc = 1 then test_result end)
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t
group by unique_id;

Consider using a combination of self-joins and derived tables:
SELECT t1.unique_id, minTable.MinOftest_date, t1.test_result As Mintestdate_result,
maxTable.MaxOftest_date, t2.test_result As Maxtestdate_result
FROM TestTable AS t1
INNER JOIN
(
SELECT Min(TestTable.test_date) AS MinOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) As minTable
ON (t1.test_date = minTable.MinOftest_date
AND t1.unique_id = minTable.unique_id)
INNER JOIN TestTable As t2
INNER JOIN
(
SELECT Max(TestTable.test_date) AS MaxOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) AS maxTable
ON t2.test_date = maxTable.MaxOftest_date
AND t2.unique_ID = maxTable.unique_ID
ON minTable.unique_id = maxTable.unique_id;
OUTPUT
unique_id MinOftest_date Mintestdate_result MaxOftest_date Maxtestdate_result
50 4/10/2013 125 12/25/2015 100
75 10/1/2015 135 12/1/2015 150

Creating groups of consecutive days meeting a given criteria

I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?

In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.

Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)

Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.

Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.

Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);

A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

TSQL : the top records of a given partition (conditional) - sql

Related

Throw error , if duplicate data (not duplicate records) found in a table

Avoid Unions to get TOP count

get the most two recent dates for each customer

TSQL getting max and min date with a seperate but not unique record

Creating groups of consecutive days meeting a given criteria

Categories

Resources