Oracle SQL: Find max overlap - sql

I'm looking for a way to find the maximum concurrent capacity of an institution (hospital) in terms of the number of studies it can run parallelly.
Even if there is 1 day overlap, the studies are considered to be overlapping. In the below data, there are 2 batches of overlaps at hospital "I1"- in the first batch there are 4 studies overlapping and in the second there are 2 studies overlapping. In summary, the maximum concurrent capacity of I1 is 4 (meaning it can handle 4 studies parallelly).
Can you help/guide with a efficient SQL for this?
Script to create test data is available below. Note: INST_ID is the hospital id.
CREATE TABLE TEST_INST_DT(INST_ID VARCHAR2(10), STUDY_ID VARCHAR2(10), STUDY_START_DATE DATE, STUDY_END_DATE DATE);
-- Overlap (4 studies)
INSERT INTO TEST_INST_DT VALUES('I1', 'S1', TO_DATE('31-DEC-2021', 'DD-MON-YYYY'), TO_DATE('02-JAN-2022', 'DD-MON-YYYY'));
INSERT INTO TEST_INST_DT VALUES('I1', 'S2', TO_DATE('01-JAN-2022', 'DD-MON-YYYY'), TO_DATE('05-JAN-2022', 'DD-MON-YYYY'));
INSERT INTO TEST_INST_DT VALUES('I1', 'S3', TO_DATE('02-JAN-2022', 'DD-MON-YYYY'), TO_DATE('03-JAN-2022', 'DD-MON-YYYY'));
INSERT INTO TEST_INST_DT VALUES('I1', 'S4', TO_DATE('04-JAN-2022', 'DD-MON-YYYY'), TO_DATE('10-JAN-2022', 'DD-MON-YYYY'));
-- Overlap (2 studies)
INSERT INTO TEST_INST_DT VALUES('I1', 'S5', TO_DATE('01-FEB-2022', 'DD-MON-YYYY'), TO_DATE('05-FEB-2022', 'DD-MON-YYYY'));
INSERT INTO TEST_INST_DT VALUES('I1', 'S6', TO_DATE('02-FEB-2022', 'DD-MON-YYYY'), TO_DATE('03-FEB-2022', 'DD-MON-YYYY'));

With MATCH_RECOGNIZE
select * from test_inst_dt
match_recognize (
partition by inst_id
order by study_start_date, study_end_date
measures first(study_start_date) as study_start_date, max(study_end_date) as study_end_date, count(*) as nstudies
pattern( merged* strt )
define
merged as max(study_end_date) > next(study_start_date)
);

You could try this:
SELECT INST_ID, To_Char(CONCURRENCY, 'MON-yyyy') "CONCURRENCY_MONTH",
MIN(STUDY_START_DATE) "START_DATE", MAX(STUDY_END_DATE) "END_DATE", COUNT(CONCURRENCY) "CONCURRENCY_COUNT"
FROM ( SELECT INST_ID, STUDY_ID, STUDY_START_DATE, STUDY_END_DATE,
Nvl( CASE WHEN INST_ID = PREV_INST_ID And STUDY_START_DATE Between PREV_START_DATE And PREV_END_DATE
THEN LEAST(STUDY_END_DATE, PREV_END_DATE)
WHEN INST_ID = PREV_INST_ID And NEXT_START_DATE Between STUDY_START_DATE And STUDY_END_DATE
THEN Nvl(NEXT_END_DATE, STUDY_END_DATE)
WHEN INST_ID = NEXT_INST_ID And STUDY_END_DATE Between NEXT_START_DATE And NEXT_END_DATE
THEN LEAST(NEXT_END_DATE, STUDY_END_DATE)
WHEN INST_ID = NEXT_INST_ID And STUDY_START_DATE Between PREV_START_DATE And PREV_END_DATE
THEN LEAST(NEXT_END_DATE, STUDY_END_DATE)
END,
CASE WHEN STUDY_START_DATE Between MIN(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_START_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row) And
MAX(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_END_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row)
THEN MAX(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_END_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row)
END
) "CONCURRENCY"
FROM ( Select INST_ID, LAG(INST_ID) OVER(Order By INST_ID, STUDY_ID) "PREV_INST_ID", LEAD(INST_ID) OVER(Order By INST_ID, STUDY_ID) "NEXT_INST_ID",
STUDY_ID, LAG(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_START_DATE", STUDY_START_DATE, LEAD(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_START_DATE",
LAG(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_END_DATE", STUDY_END_DATE, LEAD(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_END_DATE"
From TEST_INST_DT
Order By INST_ID, STUDY_ID
)
ORDER BY INST_ID, STUDY_ID
)
GROUP BY INST_ID, To_Char(CONCURRENCY, 'MON-yyyy')
Which, with your sample data, results as:
INST_ID
CONCURRENCY_MONTH
START_DATE
END_DATE
CONCURRENCY_COUNT
I1
JAN-2022
31-DEC-21
10-JAN-22
4
I1
FEB-2022
01-FEB-22
05-FEB-22
2
Here the innermost query collects previous and next row's data into every row:
Select INST_ID, LAG(INST_ID) OVER(Order By INST_ID, STUDY_ID) "PREV_INST_ID", LEAD(INST_ID) OVER(Order By INST_ID, STUDY_ID) "NEXT_INST_ID",
STUDY_ID, LAG(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_START_DATE", STUDY_START_DATE, LEAD(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_START_DATE",
LAG(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_END_DATE", STUDY_END_DATE, LEAD(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_END_DATE"
From TEST_INST_DT
Order By INST_ID, STUDY_ID
INST_ID PREV_INST_ID NEXT_INST_ID STUDY_ID PREV_START_DATE STUDY_START_DATE NEXT_START_DATE PREV_END_DATE STUDY_END_DATE NEXT_END_DATE
---------- ------------ ------------ ---------- --------------- ---------------- --------------- ------------- -------------- -------------
I1 I1 S1 31-DEC-21 01-JAN-22 02-JAN-22 05-JAN-22
I1 I1 I1 S2 31-DEC-21 01-JAN-22 02-JAN-22 02-JAN-22 05-JAN-22 03-JAN-22
I1 I1 I1 S3 01-JAN-22 02-JAN-22 04-JAN-22 05-JAN-22 03-JAN-22 10-JAN-22
I1 I1 I1 S4 02-JAN-22 04-JAN-22 01-FEB-22 03-JAN-22 10-JAN-22 05-FEB-22
I1 I1 I1 S5 04-JAN-22 01-FEB-22 02-FEB-22 10-JAN-22 05-FEB-22 03-FEB-22
I1 I1 S6 01-FEB-22 02-FEB-22 05-FEB-22 03-FEB-22
... it's resulting dataset is transformed with another query ...
SELECT INST_ID, STUDY_ID, STUDY_START_DATE, STUDY_END_DATE,
Nvl( CASE WHEN INST_ID = PREV_INST_ID And STUDY_START_DATE Between PREV_START_DATE And PREV_END_DATE
THEN LEAST(STUDY_END_DATE, PREV_END_DATE)
WHEN INST_ID = PREV_INST_ID And NEXT_START_DATE Between STUDY_START_DATE And STUDY_END_DATE
THEN Nvl(NEXT_END_DATE, STUDY_END_DATE)
WHEN INST_ID = NEXT_INST_ID And STUDY_END_DATE Between NEXT_START_DATE And NEXT_END_DATE
THEN LEAST(NEXT_END_DATE, STUDY_END_DATE)
WHEN INST_ID = NEXT_INST_ID And STUDY_START_DATE Between PREV_START_DATE And PREV_END_DATE
THEN LEAST(NEXT_END_DATE, STUDY_END_DATE)
END,
CASE WHEN STUDY_START_DATE Between MIN(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_START_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row) And
MAX(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_END_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row)
THEN MAX(CASE WHEN INST_ID = PREV_INST_ID THEN STUDY_END_DATE END) OVER(Order By INST_ID, STUDY_ID Rows Between Unbounded Preceding And Current Row)
END
) "CONCURRENCY"
FROM ( Select INST_ID, LAG(INST_ID) OVER(Order By INST_ID, STUDY_ID) "PREV_INST_ID", LEAD(INST_ID) OVER(Order By INST_ID, STUDY_ID) "NEXT_INST_ID",
STUDY_ID, LAG(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_START_DATE", STUDY_START_DATE, LEAD(STUDY_START_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_START_DATE",
LAG(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "PREV_END_DATE", STUDY_END_DATE, LEAD(STUDY_END_DATE) OVER(Order By INST_ID, STUDY_ID) "NEXT_END_DATE"
From TEST_INST_DT
Order By INST_ID, STUDY_ID
)
ORDER BY INST_ID, STUDY_ID
INST_ID STUDY_ID STUDY_START_DATE STUDY_END_DATE CONCURRENCY
---------- ---------- ---------------- -------------- -----------
I1 S1 31-DEC-21 02-JAN-22 02-JAN-22
I1 S2 01-JAN-22 05-JAN-22 02-JAN-22
I1 S3 02-JAN-22 03-JAN-22 03-JAN-22
I1 S4 04-JAN-22 10-JAN-22 10-JAN-22
I1 S5 01-FEB-22 05-FEB-22 03-FEB-22
I1 S6 02-FEB-22 03-FEB-22 03-FEB-22
... and this is aggregated and grouped by like already shown above.

Sorry, didn't pay attention to 11g criteria, then try
with rtest_inst_dt(rn, inst_id,study_id,study_start_date,study_end_date) as (
select row_number() over(partition by inst_id order by study_start_date,study_end_date),
inst_id,study_id, study_start_date,study_end_date
from test_inst_dt
)
,cte(rn, root, inst_id,study_id,study_start_date,study_end_date) as (
select rn, rn, inst_id,study_id,study_start_date,study_end_date
from rtest_inst_dt d1
where not exists(
select 1 from rtest_inst_dt d2
where d2.rn < d1.rn and
(
d2.study_end_date between d1.study_start_date and d1.study_end_date
or d2.study_start_date between d1.study_start_date and d1.study_end_date
or (d2.study_start_date < d1.study_start_date and d2.study_end_date > d1.study_end_date)
)
)
union all
select d1.rn, c.root, d1.inst_id, d1.study_id, least(c.study_start_date, d1.study_start_date),
greatest(c.study_end_date, d1.study_end_date)
from cte c
join rtest_inst_dt d1 on d1.rn > c.rn
and d1.study_start_date between c.study_start_date and c.study_end_date
)
select inst_id, min(study_start_date) as study_start_date, max(study_end_date) as study_end_date, count(distinct rn) as n
from cte
group by inst_id, root
;
I1 31/12/21 10/01/22 4
I1 01/02/22 05/02/22 2

Related

SQL logic to achieve the below mentioned scenario

I would like to know about the SQL logic to achieve the below scenario.
From the source I need to load the data to target as described below.
Source
ID Name Place Date
1 User 1 Chennai 01-Jun-22
1 User 1 Chennai 02-Jun-22
2 User 2 Bangalore 03-Jun-22
2 User 2 Bangalore 04-Jun-22
1 User 1 Bangalore 05-Jun-22
1 User 1 Bangalore 06-Jun-22
1 User 1 Bangalore 07-Jun-22
1 User 1 Chennai 08-Jun-22
Target
ID Name Place From Date To Date
1 User 1 Chennai 01-Jun-22 02-Jun-22
2 User 2 Bangalore 03-Jun-22 04-Jun-22
1 User 1 Bangalore 05-Jun-22 07-Jun-22
1 User 1 Chennai 08-Jun-22 08-Jun-22
Solution for your problem:
WITH CT1 AS
(
SELECT ID, Name, Place, "Date",
CASE WHEN CONCAT(ID,Place) != LAG(CONCAT(ID,Place),1,'0') OVER(ORDER BY "Date") THEN 1 ELSE 0END as t
FROM Table1
),
CT2 AS
(
SELECT ID, Name, Place, "Date",
SUM(t) OVER(ORDER BY "Date") as grp
FROM CT1
)
SELECT ID, Name, Place,
MIN("Date") as From_Date,
MAX("Date") as To_Date
FROM CT2
GROUP BY ID, Name, Place,grp
ORDER BY From_Date;
Working Example : db<>fiddle Link
CREATE TABLE #Temp([ID] INT,[Name] VARCHAR(100),[Place] VARCHAR(100),[Date] DATETIME)
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Chennai','01-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Chennai','02-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('2','User2','Bangalore','03-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('2','User2','Bangalore','04-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Bangalore','05-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Bangalore','06-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Bangalore','07-06-2022')
INSERT INTO #Temp([ID],[Name],[Place],[Date]) VALUES('1','User1','Chennai','08-06-2022')
;WITH A AS(
SELECT
ROW_NUMBER() OVER(ORDER BY [Date]) [Rono],
*,
LEAD([Name]) OVER(ORDER BY [Date]) LeadName,
LEAD([Place]) OVER(ORDER BY [Date]) LeadPlace,
LAG([Name]) OVER(ORDER BY [Date]) LagName,
LAG([Place]) OVER(ORDER BY [Date]) LagPlace,
CASE WHEN LEAD([Name]) OVER(ORDER BY [Date])=[Name] AND LEAD([Place]) OVER(ORDER BY [Date])=[Place] THEN 1 ELSE 0 END F1,
CASE WHEN LAG([Name]) OVER(ORDER BY [Date])=[Name] AND LAG([Place]) OVER(ORDER BY [Date])=[Place] THEN 1 ELSE 0 END F2
FROM #Temp
),
B AS(
SELECT *,
CASE WHEN (A.F1=1 AND A.F2=0) OR (A.F1=0 AND A.F2=0) THEN LEAD([Rono]) OVER(ORDER BY [Date]) WHEN (A.F1=1 AND A.F2=1) THEN NULL ELSE 0 END [FF]
FROM A
WHERE A.F1+A.F2!=2
)
SELECT
B.[ID],B.[Name],B.[Place],
B.[Date] [StrtDate],
ISNULL(AB.[Date],B.[Date]) [EndDate]
FROM B
LEFT JOIN B AB ON B.FF=AB.Rono
WHERE B.FF!=0 OR B.FF IS NULL

DENSE RANK should return NULL in case of previous duplicate value

How below one can be achieved ?
select
DENSE_RANK() over(order by Flt_OperativeFlight_Legs.DateSTA, Flt_OperativeFlight.FlightKey) SNo,
Flt_OperativeFlight.Flight_ID flightID ,
Flt_OperativeFlight.FlightKey flight
from
Flt_OperativeFlight inner join
Flt_OperativeFlight_Legs
on Flt_OperativeFlight.Flight_ID = Flt_OperativeFlight_Legs.Flight_ID inner join
Flt_OperativeFlight_Segments
on Flt_OperativeFlight.Flight_ID = Flt_OperativeFlight_Segments.Flight_ID
where
Flt_OperativeFlight_Legs.DateATA between '2020-05-25T09:00:00' and '2020-05-25T19:49:00'
Current Results:
SNo flightID flight
---------------------
1 757625 ET0567
1 757625 ET0567
2 744592 SQ0425
3 744390 MI0321
3 744390 MI0321
Expecting Results:
SNo flightID flight
---------------------
1 757625 ET0567
NULL 757625 ET0567
2 744592 SQ0425
3 744390 MI0321
NULL 744390 MI0321
You can mix with another analytical function row_number, which guarantees unique numbers.
I partition by the key, so only one row number is 1
SELECT
n
,IIF(
ROW_NUMBER() OVER (PARTITION BY n ORDER BY n) = 1,
DENSE_RANK() OVER (ORDER BY n)
, NULL) rn
FROM (VALUES ('a'), ('a'), ('b'), ('c'), ('c'), ('c'), ('d')) a (n)
I'm comparing every FlightDetails.Flight to the FlightDetails.Flight before it and if they're the same then null.
select case lag(flightID) over (order by flightID desc) when flightID then null else SNo end as SNo
,flightID
,flight
from t
SNo
flightID
flight
1
757625
ET0567
null
757625
ET0567
2
744592
SQ0425
3
744390
MI0321
null
744390
MI0321
Fiddle
I don't have any test data to test this with but the idea is it should be using the LAG analytical function
select case when prev_sno = sno then NULL
else sno end as sno from (
select lag(sno) over(order by sno) prev_sno, sno from (
select
DENSE_RANK() over(
order by
FlightLegInformation.DateSTA,
IncomingFlightSegments.FlightKey
) SNo)) ,

Doing a cross pivot in Google BigQuery

I have asked a previous question about doing a multi-level aggregation query on the X-axis here: Get the top patent countries, codes in a BQ public dataset.
Here is how the query (copied from the accepted answer works) to get:
Top 2 Countries by Count, and within those countries, top 2 Codes by Count
WITH A AS (
SELECT country_code
FROM `patents-public-data.patents.publications`
GROUP BY country_code
ORDER BY COUNT(1) DESC
LIMIT 2
), B AS (
SELECT
country_code,
application_kind,
COUNT(1) application_kind_count
FROM `patents-public-data.patents.publications`
WHERE country_code IN (SELECT country_code FROM A)
GROUP BY country_code, application_kind
), C AS (
SELECT
country_code,
application_kind,
application_kind_count,
DENSE_RANK() OVER(PARTITION BY country_code ORDER BY application_kind_count DESC) AS application_kind_rank
FROM B
)
SELECT
country_code,
application_kind,
application_kind_count
FROM C
WHERE application_kind_rank <= 2
And I get something like:
country_code application_kind count
JP A 125
JP U 124
CN A 118
CN U 101
Now I would like to add the following pivot on the y-axis: to get the following:
X: Top 2 Countries by Count, and within those countries, top 2 Codes by Count
Y: Top 2 family_id by Count, Top 2 priority_date by Count
The final results would then look like:
I am able to build the Y-query in a second query --
WITH A AS (
SELECT family_id
FROM `patents-public-data.patents.publications`
GROUP BY family_id
ORDER BY COUNT(1) DESC
LIMIT 2
), B AS (
SELECT
family_id,
priority_date,
COUNT(1) priority_date_count
FROM `patents-public-data.patents.publications`
WHERE family_id IN (SELECT family_id FROM A)
GROUP BY family_id, priority_date
), C AS (
SELECT
family_id,
priority_date,
priority_date_count,
DENSE_RANK() OVER(PARTITION BY family_id ORDER BY priority_date_count DESC) AS priority_date_rank
FROM B
)
SELECT
family_id,
priority_date,
priority_date_count
FROM C
WHERE priority_date_rank <= 2
However, I am not quite sure how to merge them together, in a single query or in two.
Below is for BigQuery Standard SQL and is just demo of the approach and not pretending to be 100% representing requested logic
WITH A_X AS (
SELECT country_code FROM `patents-public-data.patents.publications`
GROUP BY country_code ORDER BY COUNT(1) DESC LIMIT 2
), B_X AS (
SELECT country_code, application_kind, COUNT(1) application_kind_count
FROM `patents-public-data.patents.publications` WHERE country_code IN (SELECT country_code FROM A_X)
GROUP BY country_code, application_kind
), C_X AS (
SELECT country_code, application_kind, application_kind_count,
DENSE_RANK() OVER(PARTITION BY country_code ORDER BY application_kind_count DESC) AS application_kind_rank
FROM B_X
), X AS (
SELECT country_code, application_kind, application_kind_count
FROM C_X WHERE application_kind_rank <= 2
), A_Y AS (
SELECT family_id FROM `patents-public-data.patents.publications`
JOIN X USING(country_code, application_kind)
GROUP BY family_id
ORDER BY COUNT(1) DESC LIMIT 2
), B_Y AS (
SELECT family_id, priority_date, COUNT(1) priority_date_count
FROM `patents-public-data.patents.publications` WHERE family_id IN (SELECT family_id FROM A_Y)
GROUP BY family_id, priority_date
), C_Y AS (
SELECT family_id, priority_date, priority_date_count,
DENSE_RANK() OVER(PARTITION BY family_id ORDER BY priority_date_count DESC) AS pos_date
FROM B_Y
), Y AS (
SELECT family_id, priority_date, pos_date, DENSE_RANK() OVER(ORDER BY family_id) pos_family
FROM C_Y WHERE pos_date <= 2
)
SELECT country_code, application_kind,
COUNTIF(pos_family = 1 AND pos_date = 1) `family1_date1`,
COUNTIF(pos_family = 1 AND pos_date = 2) `family1_date2`,
COUNTIF(pos_family = 2 AND pos_date = 1) `family2_date1`,
COUNTIF(pos_family = 2 AND pos_date = 2) `family2_date2`
FROM `patents-public-data.patents.publications`
JOIN Y USING(family_id, priority_date)
WHERE country_code IN (SELECT country_code FROM X)
AND application_kind IN (SELECT application_kind FROM x)
GROUP BY country_code, application_kind
the result is
Obviously, there are number of zeroes above because of intersection logic

TSQL : the top records of a given partition (conditional)

I need to get all the records in TABLE_A where at least the 2 last Status are vacant (relative to Inspection_Date) and the Room_ID does not exist in TABLE_B.
This is a simplified table I am using as an example:
TABLE_A:
Room_Id Status Inspection_Date
-------------------------------------
1 vacant 5/15/2015
2 occupied 5/21/2015
2 vacant 1/19/2016
1 occupied 12/16/2015
4 vacant 3/25/2016
3 vacant 8/27/2015
1 vacant 4/17/2016
3 vacant 12/12/2015
3 vacant 3/22/2016
4 occupied 2/2/2015
4 vacant 3/24/2015
TABLE_B:
Room_Id Status Inspection_Date
------------------------------------
1 vacant 5/15/2015
2 occupied 5/21/2015
2 vacant 1/19/2016
1 vacant 12/16/2015
1 vacant 4/17/2016
My result should look like this:
Room_Id Status Inspection_Date
---------------------------------
3 vacant 8/27/2015
3 vacant 12/12/2015
3 vacant 3/22/2016
4 occupied 2/2/2015
4 vacant 3/24/2015
4 vacant 3/25/2016
I have tried it this way, it works with the example but is not working with my data .. the logic is not complete:
With cteA As
(
Select *, Row_Number() Over (Partition By Room_ID, Status Order By Inspection_Date Desc) RowNum From Table_A
)
Select * From Table_A Where Room_Id In
(
Select Room_Id
From cteA
Where Room_Id Not In (Select Room_Id From Table_B)
And Status = 'vacant' And RowNum > 1
)
Order By Room_Id, Inspection_Date
Here is the schema:
CREATE TABLE TABLE_A (`Room_Id` int,
`Status` varchar(55),
`Inspection_Date` Date
);
INSERT INTO TABLE_A (Room_Id, Status, Inspection_Date)
VALUES (1, 'vacant', '5/15/2015'),
(2, 'occupied', '5/21/2015'),
(2, 'vacant', '1/19/2016'),
(1, 'occupied', '12/16/2015'),
(4, 'vacant', '3/25/2016'),
(3, 'vacant', '8/27/2015'),
(1, 'vacant', '4/17/2016'),
(3, 'vacant', '12/12/2015'),
(3, 'vacant', '3/22/2016'),
(4, 'occupied', '2/2/2015'),
(4, 'vacant', '3/24/2015');
CREATE TABLE TABLE_B (`Room_Id` int,
`Status` varchar(55),
`Inspection_Date` Date
);
INSERT INTO TABLE_B (Room_Id, Status, Inspection_Date)
VALUES
(1, 'vacant', '5/15/2015'),
(2, 'occupied', '5/21/2015'),
(2, 'vacant', '1/19/2016'),
(1, 'vacant', '12/16/2015'),
(1, 'vacant', '4/17/2016'),;
PLAIN
For each room in TABLE_A select the last date (as lastDate)
for each room in TABLE_A select previous date (as prevLastDate)
Get room_ids from lastDate which has the status 'vacant' (as lastDateVacant)
Get room_ids from prevLastDate which has the status 'vacant' (as prevLastDateVacant)
Filter TABLE_A to have only IDs which are there in lastDateVacant and prevLastDateVacant (inner)
Filter TABLE_A to have only IDs which are not in TABLE_B (left outer + IS NULL)
As the result you have:
WITH lastDate AS (
SELECT room_id AS room,MAX(inspection_date) AS date
FROM "TABLE_A"
GROUP BY room_id
), prevLastDate AS (
SELECT room_id AS room,MAX(inspection_date) AS date
FROM "TABLE_A" a
INNER JOIN lastDate ON a.room_id = lastDate.room and a.inspection_date < lastDate.date
GROUP BY room_id
), lastDateVacant AS (
SELECT room_id AS room FROM "TABLE_A"
WHERE (room_id,inspection_date) IN (
SELECT room, date FROM lastDate
) AND status = 'vacant'
), prevLastDateVacant AS (
SELECT room_id AS room FROM "TABLE_A"
WHERE (room_id,inspection_date) IN (
SELECT room, date FROM prevLastDate
) AND status = 'vacant'
)
SELECT a.* FROM "TABLE_A" a
INNER JOIN lastDateVacant
ON a.room_id = lastDateVacant.room
INNER JOIN prevLastDateVacant
ON a.room_id = prevLastDateVacant.room
LEFT OUTER JOIN "TABLE_B" AS b
ON a.room_id = b.room_id
WHERE b.room_id IS NULL
ORDER BY a.room_id ASC, a.inspection_date DESC
Window Function
Not sure if the syntax for TSQL is the same, but here is the shorter variant:
Ranking with partion by room and or order by date
Check for IDs with rank 1 and 2 having 'vacant' status, grouping by ID and having them occured more than once
WITH room AS (
select room from (
select room_id as room,status,inspection_date as date,
RANK() OVER (PARTITION BY room_id ORDER BY inspection_date DESC) AS RANK
from "TABLE_A"
)
where (rank in ( 1,2) and status = 'vacant')
group by room
having count() > 1
)
SELECT a. FROM "TABLE_A" a
INNER JOIN room
ON a.room_id = room.room
LEFT OUTER JOIN "TABLE_B" AS b
ON a.room_id = b.room_id
WHERE b.room_id IS NULL
ORDER BY a.room_id ASC, a.inspection_date DESC
Your conditions translate almost directly into a query. You can use window functions for the vacant count and not exists for the relationship to table_b:
select a.*
from (select a.*,
sum(case when status = 'vacant' then 1 else 0 end) over (partition by room_id) as num_vacant
from table_a a
where not exists (select 1
from table_b b
where b.room_id = a.room_id
)
) a
where num_vacant >= 2;
EDIT:
If you want the last two to be vacant, you can do find that last record that is non-vacant and then count the ones bigger than that:
select a.*
from (select a.*,
sum(case when a2.max_nonvacant > a.inspection_date then 0 else 1) over (partition by room_id) as num_vacant_last
from table_a a outer apply
(select max(inspection_date) as max_nonvacant
from table_a a2
where a2.room_id = a.room_id and a2.status <> 'vacant'
) a2
where not exists (select 1
from table_b b
where b.room_id = a.room_id
)
) a
where num_vacant_last >= 2;
This worked for me and I have checked again and again.
with Rooms as (
select
Room_Id, Status,
row_number() over (partition by Room_Id order by Inspection_Date desc) as rn
from TABLE_A
), Candidates as (
select Room_Id from Rooms group by Room_Id
having sum(case when rn in (1, 2) and Status = 'vacant' then 1 else null end) = 2
)
select * from TABLE_A
where Room_Id in (select Room_Id from Candidates except select Room_Id from TABLE_B)
order by Room_Id, Inspection_Date desc
I did this test:
extracts all the room_id that considering the last two Status (equal status) in relation to the inspection_date (descending order):
select * from TABLE_A WHERE [Room_Id] IN
(
SELECT [Room_Id] FROM
(SELECT ROW_NUMBER() OVER(PARTITION BY [Room_Id] ORDER BY [Inspection_Date] DESC ) AS id,
[Room_Id],[Status],[Inspection_Date]
FROM TABLE_A
) AA
WHERE AA.ID <=2
--selecting the last two Inspection_Date
and [Status] = 'vacant'
GROUP BY [Room_Id],[Status] HAVING COUNT(*) >1
)
AND
[Room_Id] NOT IN (SELECT Room_Id FROM TABLE_B)
order by Room_Id, Inspection_Date desc

TSQL getting max and min date with a seperate but not unique record

example table:
test_date | test_result | unique_ID
12/25/15 | 100 | 50
12/01/15 | 150 | 75
10/01/15 | 135 | 75
09/22/14 | 99 | 50
04/10/13 | 125 | 50
I need to find the first and last test date as well as the test result to match said date by user. So, I can group by ID, but not test result.
SELECT MAX(test_date)[need matching test_result],
MIN(test_date) [need matching test_result],
unique_id
from [table]
group by unique_id
THANKS!
Create TABLE #t
(
test_date date ,
Test_results int,
Unique_id int
)
INSERT INTO #t
VALUES ( '12/25/15',100,50 ),
( '12/01/15',150,75 ),
( '10/01/15',135,75 ),
( '09/22/14',99,50 ),
( '04/10/13',125,50 )
select 'MinTestDate' as Type, a.test_date, a.Test_results, a.Unique_id
from #t a inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemin
union all
select 'MaxTestDate' as Type, a.test_date, a.Test_results, a.Unique_id from #t a
inner join (
select min(test_date) as test_datemin, max(test_date) as test_datemax, unique_id from #t
group by unique_ID) b
on a.test_date = b.test_datemax
I would recommend window functions. The following returns the information on 2 rows per id:
select t.*
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t;
For one row, use conditional aggregation (or pivot if you prefer):
select unique_id,
min(test_date), max(case when seqnum_asc = 1 then test_result end),
max(test_date), max(case when seqnum_desc = 1 then test_result end)
from (select t.*,
row_number() over (partition by unique_id order by test_date) as seqnum_asc,
row_number() over (partition by unique_id order by test_date desc) as seqnum_desc
from table t
) t
group by unique_id;
Consider using a combination of self-joins and derived tables:
SELECT t1.unique_id, minTable.MinOftest_date, t1.test_result As Mintestdate_result,
maxTable.MaxOftest_date, t2.test_result As Maxtestdate_result
FROM TestTable AS t1
INNER JOIN
(
SELECT Min(TestTable.test_date) AS MinOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) As minTable
ON (t1.test_date = minTable.MinOftest_date
AND t1.unique_id = minTable.unique_id)
INNER JOIN TestTable As t2
INNER JOIN
(
SELECT Max(TestTable.test_date) AS MaxOftest_date,
TestTable.unique_ID
FROM TestTable
GROUP BY TestTable.unique_ID
) AS maxTable
ON t2.test_date = maxTable.MaxOftest_date
AND t2.unique_ID = maxTable.unique_ID
ON minTable.unique_id = maxTable.unique_id;
OUTPUT
unique_id MinOftest_date Mintestdate_result MaxOftest_date Maxtestdate_result
50 4/10/2013 125 12/25/2015 100
75 10/1/2015 135 12/1/2015 150