T-SQL: Conditional NULL removal - sql

I need to select only the Room_IDs that have no instances where the Status is NULL.
For example here :
TABLE_A
Room_Id Status Inspection_Date
-----------------------------------
1 NULL 5/15/2015
2 occupied 5/21/2015
2 NULL 1/19/2016
1 occupied 12/16/2015
4 NULL 3/25/2016
3 vacant 8/27/2015
1 vacant 4/17/2016
3 vacant 12/12/2015
3 vacant 3/22/2016
4 vacant 2/2/2015
4 vacant 3/24/2015
My result should look like this:
Room_Id Status Inspection_Date
-----------------------------------
3 vacant 8/27/2015
3 vacant 12/12/2015
3 vacant 3/22/2016
Because Room_ID '3' has no instances where the Status is NULL

Quick example of how to do it:
DECLARE #tTable TABLE(
Room_Id INT,
Status VARCHAR(20),
Inspection_Date DATETIME)
INSERT INTO #tTable VALUES
(1, NULL, '5/15/2015'),
(1,NULL, '5/15/2015'),
(2,'occupied', '5/21/2015'),
(2,NULL, '1/19/2016'),
(1,'occupied', '12/16/2015'),
(4,NULL, '3/25/2016'),
(3,'vacant', '8/27/2015'),
(1,'vacant', '4/17/2016'),
(3,'vacant', '12/12/2015'),
(3,'vacant', '3/22/2016'),
(4,'vacant', '2/2/2015'),
(4,'vacant', '3/24/2015')
SELECT * FROM #tTable T1
WHERE Room_Id NOT IN (SELECT Room_ID FROM #tTable WHERE Status IS NULL)
Gives :
Room_Id | Status | Inspection_Date |
-------------------------------------------------
3 | vacant | 2015-08-27 00:00:00.000
3 | vacant | 2015-12-12 00:00:00.000
3 | vacant | 2016-03-22 00:00:00.000

Try this out:
SELECT *
FROM Table1
WHERE Room_ID NOT IN
(
SELECT DISTINCT Room_ID
FROM Table1
WHERE Status IS NULL
)
The sub query returns a list of unique room id's that, at one time or another, had a NULL status. The outer query looks at that list, and says "Return * where the room_ID IS NOT one those in the subquery.
If you want to try it in SQL Fiddle, here is the Schema:
CREATE TABLE Table1
(Room_ID int, Status varchar(8), Inspection_Date datetime)
;
INSERT INTO Table1
(Room_ID, Status, Inspection_Date)
VALUES
(1, NULL, '2015-05-15 00:00:00'),
(2, 'occupied', '2015-05-21 00:00:00'),
(2, NULL, '2016-01-19 00:00:00'),
(1, 'occupied', '2015-12-16 00:00:00'),
(4, NULL, '2016-03-25 00:00:00'),
(4, 'vacant', '2015-08-27 00:00:00'),
(1, 'vacant', '2016-04-17 00:00:00'),
(3, 'vacant', '2015-12-12 00:00:00'),
(3, 'vacant', '2016-03-22 00:00:00'),
(4, 'vacant', '2015-02-02 00:00:00'),
(4, 'vacant', '2015-03-24 00:00:00'),
(2, NULL, '2015-05-22 00:00:00')
;

As alternative to Hashman, I just prefer to use not exists over not in for these types of queries.
Creating some test data
Note that I just kept the same date for everything since it's not imperative to the question.
create table #table_a (
Room_Id int,
Status varchar(32),
Inspection_Date date);
insert #table_a (Room_Id, Status, Inspection_Date)
values
(1, null, getdate()),
(2, 'occupied', getdate()),
(2, null, getdate()),
(1, 'occupied', getdate()),
(4, null, getdate()),
(3, 'vacant', getdate()),
(1, 'vacant', getdate()),
(3, 'vacant', getdate()),
(3, 'vacant', getdate()),
(4, 'vacant', getdate()),
(4, 'vacant', getdate());
The query
select *
from #table_a t1
where not exists (
select *
from #table_a t2
where t1.Room_Id = t2.Room_Id
and Status is null);
The results
Room_Id Status Inspection_Date
----------- -------------------------------- ---------------
3 vacant 2016-06-17
3 vacant 2016-06-17
3 vacant 2016-06-17

You can use CTE and NOT EXIST like below code
WITH bt
AS ( SELECT RoomId ,
Status,
Inspection_Date
FROM dbo.Table_1
)
SELECT *
FROM bt AS a
WHERE NOT EXISTS ( SELECT 1
FROM bt
WHERE bt.RoomId = a.RoomId
AND bt.Status IS NULL );

Related

Trying to include rows with a Full Outer Join & Comparing in the same table

I'm attempting to return the First person to check in in each Room_id by joining the PERSON and CHECK_IN tables
http://sqlfiddle.com/#!17/35d930 > Select PostGreSQL 9.6 > Build Schema > Paste Query
CREATE TABLE person
("id" int)
;
INSERT INTO person
("id")
VALUES
(1),
(2),
(3),
(4),
(5),
(6)
;
CREATE TABLE check_in
("id" int, "person_id" int, "room_id" int, "check_in_date" timestamp, "check_out_date" timestamp)
;
INSERT INTO check_in
("id", "person_id", "room_id", "check_in_date", "check_out_date")
VALUES
(100, 1, 202, '2020-10-01 00:00:00', '2021-09-05 00:00:00'),
(101, 2, 201, '2020-12-15 00:00:00', '2021-02-15 00:00:00'),
(104, 3, 204, '2021-05-20 00:00:00', '2021-07-04 00:00:00'),
(106, 4, 202, '2022-08-01 00:00:00', NULL),
(108, 3, 204, '2021-08-15 00:00:00', NULL)
;
select c1.person_id, c1.room_id, c1.check_in_date
from check_in c1
FULL OUTER JOIN check_in c2 on c2.room_id = c1.room_id
where c1.check_in_date < c2.check_in_date
order by c1.room_id
I'm returning room_ids 202 and 204, but cannot get the select to return 201.. Should I not be using a full outer join?
We don't need to join the person table as we have all the info we need in the check_in table.
select id
,person_id
,room_id
,check_in_date
,check_out_date
from (
select *
,row_number() over(partition by room_id order by check_in_date desc) as rn
from check_in
) t
where rn = 1
id
person_id
room_id
check_in_date
check_out_date
101
2
201
2020-12-15 00:00:00
2021-02-15 00:00:00
106
4
202
2022-08-01 00:00:00
null
108
3
204
2021-08-15 00:00:00
null
Fiddle
Your where condition transforms the outer join into an inner one. See the answer with window function above as well
select c1.person_id, c1.room_id, c1.check_in_date
from check_in c1
where c1.check_in_date = (select min(c2.check_in_date) from check_in c2
where c2.room_id = c1.room_id )
order by c1.room_id
select c1.person_id, c1.room_id, c1.check_in_date
from check_in as c1
where c1.check_in_date in (select min(check_in_date)
from check_in as c2
join person as p
on p.id = c2.person_id
group by c2.room_id)
group by 2,1,3
order by c1.room_id

Function that returns MAX OR MIN dates based on ID count

I have a task in SQL Server where I need to return the RESULT_DATE column using ID, PRODUCT_ID and DATE columns. Task criteria:
If DATE column is filled once for each PRODUCT_ID then I need to return the only date (like for PRODUCT_ID 1 and 3). Let`s say its MIN date.
If DATE column is filled more than one time (like for PRODUCT_ID 2) then I need to return the next filled DATE row.
Data:
CREATE TABLE #temp (
ID INT,
PRODUCT_ID INT,
[DATE] DATETIME
)
INSERT #temp (ID, PRODUCT_ID, DATE) VALUES
(1, 1, '2008-04-24 00:00:00.000'),
(2, 1, NULL),
(3, 2, '2015-12-09 00:00:00.000'),
(4, 2, NULL),
(5, 2, NULL),
(6, 2, '2022-01-01 13:06:45.253'),
(7, 2, NULL),
(8, 2, '2022-01-19 13:06:45.253'),
(9, 3, '2018-04-25 00:00:00.000'),
(10,3, NULL),
(11,3, NULL)
ID
PRODUCT_ID
DATE
RESULT_DATE
1
1
2008-04-24 00:00:00.000
2008-04-24 00:00:00.000
2
1
NULL
2008-04-24 00:00:00.000
3
2
2015-12-09 00:00:00.000
2022-01-01 13:06:45.253
4
2
NULL
2022-01-01 13:06:45.253
5
2
NULL
2022-01-01 13:06:45.253
6
2
2022-01-01 13:06:45.253
2022-01-19 13:06:45.253
7
2
NULL
2022-01-19 13:06:45.253
8
2
2022-01-19 13:06:45.253
2022-01-19 13:06:45.253
9
3
2018-04-25 00:00:00.000
2018-04-25 00:00:00.000
10
3
NULL
2018-04-25 00:00:00.000
11
3
NULL
2018-04-25 00:00:00.000
I have tried different techniques, for example using LEAD and LAG SQL function combinations. The latest script: (However, still not working)
SELECT
COALESCE(DATE,
CAST(
SUBSTRING(
MAX(CAST(DATE AS BINARY(4)) + CAST(DATE AS BINARY(4))) OVER ( PARTITION BY PRODUCT_ID ORDER BY DATE ROWS UNBOUNDED PRECEDING)
,5,4)
AS INT)
) AS RESULT_DATE,
*
FROM TABLE
You can use a CTE, Select all rows with a non-NULL Date giving each a row_number, then use a second CTE to fetch all rows from the first CTE equivalent to the date with the largest row number per product_id that is less than 3. Finally join this CTE to the original table to supply the 2nd Date to each row:
Set Up
CREATE TABLE #temp (
ID INT,
PRODUCT_ID INT,
MyDATE DATETIME
)
INSERT #temp (ID, PRODUCT_ID, MyDate)
VALUES
(1, 1, '2008-04-24 00:00:00.000'),
(2, 1, NULL),
(3, 2, '2015-12-09 00:00:00.000'),
(4, 2, NULL),
(5, 2, NULL),
(6, 2, '2022-01-01 13:06:45.253'),
(7, 2, NULL),
(8, 2, '2022-01-19 13:06:45.253'),
(9, 3, '2018-04-25 00:00:00.000'),
(10,3, NULL),
(11,3, NULL);
Query:
;WITH CTE
AS
(
SELECT ID, Product_ID, MyDate,
ROW_NUMBER() OVER (PARTITION BY Product_ID ORDER BY Id) AS rn
from #temp
WHERE MyDate IS NOT NULL
),
CTE2
AS
(
SELECT *
FROM CTE C1
WHERE C1.rn < 3
AND
C1.rn =
(SELECT MAX(rn) FROM CTE WHERE Product_Id = C1.Product_Id AND rn<3)
)
SELECT T.Id, T.Product_Id, T.MyDate, C.MyDate As Result_date
FROM #temp T
INNER JOIN CTE2 C
ON T.Product_Id = C.Product_Id
ORDER BY T.Id;
Results:
Id Product_Id MyDate Result_Date
1 1 2008-04-24 00:00:00.000 2008-04-24 00:00:00.000
2 1 NULL 2008-04-24 00:00:00.000
3 2 2015-12-09 00:00:00.000 2022-01-01 13:06:45.253
4 2 NULL 2022-01-01 13:06:45.253
5 2 NULL 2022-01-01 13:06:45.253
6 2 2022-01-01 13:06:45.253 2022-01-01 13:06:45.253
7 2 NULL 2022-01-01 13:06:45.253
8 2 2022-01-19 13:06:45.253 2022-01-01 13:06:45.253
9 3 2018-04-25 00:00:00.000 2018-04-25 00:00:00.000
10 3 NULL 2018-04-25 00:00:00.000
11 3 NULL 2018-04-25 00:00:00.000

postgresql How show most frequent value per day date

I've got a problem with a query that is supposed to return the value which occur most per date
+------------+------------------+
| Date | value |
+------------+------------------+
| 2020-01-01 | Programmer |
| 2020-01-02 | Technician |
| 2020-01-03 | Business Analyst |
+------------+------------------+
So far I have done
select count(headline) as asd, publication_date, employer -> 'name' as dsa from jobhunter
group by publication_date,dsa
ORDER BY publication_date DESC
But it shows 2020-12-31 19:06:00 instead of just YYYY-MM-DD
Any idea on how to fix this?
enter image description here
Test data:
create table tbl (
id serial primary key,
row_datetime TIMESTAMP,
row_val VARCHAR(60)
);
insert into tbl (row_datetime, row_val) values ('2021-01-01 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 01:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 02:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-02 01:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 02:00:00', 'b');
Example query:
SELECT dt, val, cnt
FROM (
SELECT dt, val, cnt, ROW_NUMBER() OVER (PARTITION BY dt ORDER BY cnt DESC) AS row_num
FROM (
SELECT dt, val, COUNT(val) AS cnt
FROM (
SELECT DATE(row_datetime) AS dt, row_val AS val FROM tbl
) AS T1 GROUP BY dt, val
) AS T2
) AS T3
WHERE row_num=1
ORDER BY dt ASC
You can additionally customize your query to optimize the performance, get more fields, etc.

SQL Server Query for average value over a date period

DECLARE #SampleOrderTable TABLE
(
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '12/10/2019', '762.84'),
(2, '11/10/2019', '886.32'),
(3, '11/9/2019', '10245.00')
How do I select the the last 4 days prior to OrderDate and the average Amount over that period?
So result data would be:
pkPersonID Date Amount
------------------------------------
1 '12/7/2019' 190.71
1 '12/8/2019' 190.71
1 '12/9/2019' 190.71
1 '12/10/2019' 190.71
2 '12/7/2019' 221.58
2 '12/8/2019' 221.58
2 '12/9/2019' 221.58
2 '12/10/2019' 221.58
3 '11/6/2019' 2561.25
3 '11/7/2019' 2561.25
3 '11/8/2019' 2561.25
3 '11/9/2019' 2561.25
You may try with the following approach, using DATEADD(), windowed COUNT() and VALUES() table value constructor:
Table:
DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '20191210', '762.84'),
(2, '20191210', '886.32'),
(3, '20191109', '10245.00')
Statement:
SELECT
t.pkPersonID,
DATEADD(day, -v.Day, t.OrderDate) AS [Date],
CONVERT(numeric(18, 6), Amount / COUNT(Amount) OVER (PARTITION BY t.pkPersonID)) AS Amount
FROM #SampleOrderTable t
CROSS APPLY (VALUES (0), (1), (2), (3)) v(Day)
ORDER BY t.pkPersonID, [Date]
Result:
pkPersonID Date Amount
1 07/12/2019 00:00:00 190.710000
1 08/12/2019 00:00:00 190.710000
1 09/12/2019 00:00:00 190.710000
1 10/12/2019 00:00:00 190.710000
2 07/12/2019 00:00:00 221.580000
2 08/12/2019 00:00:00 221.580000
2 09/12/2019 00:00:00 221.580000
2 10/12/2019 00:00:00 221.580000
3 06/11/2019 00:00:00 2561.250000
3 07/11/2019 00:00:00 2561.250000
3 08/11/2019 00:00:00 2561.250000
3 09/11/2019 00:00:00 2561.250000
You can use sql functions like AVG, DATEADD and GETDATE.
SELECT AVG(Amount) as AverageAmount
FROM #SampleOrderTable
WHERE OrderDate >= DATEADD(DAY, -4, GETDATE())
DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
);
INSERT INTO #SampleOrderTable
(pkPersonID, OrderDate, Amount)
VALUES
(1, '12/20/2019', 762.84),
(2, '12/20/2019', 886.32),
(3, '12/20/2019', 10245.00),
(4, '12/19/2019', 50.00),
(5, '12/19/2019', 100.00),
(6, '09/01/2019', 200.00),
(7, '09/01/2019', 300.00),
(8, '12/15/2019', 400.00),
(9, '12/15/2019', 500.00),
(10, '09/02/2019', 150.00),
(11, '09/02/2019', 1100.00),
(12, '09/02/2019', 1200.00),
(13, '09/02/2019', 1300.00),
(14, '09/02/2019', 1400.00),
(15, '09/02/2019', 1500.00);
SELECT OrderDate,AVG(Amount) AS Average_Value
FROM #SampleOrderTable
WHERE DATEDIFF(DAY, CAST(OrderDate AS DATETIME), CAST(GETDATE() AS Datetime)) <= 4
GROUP BY OrderDate;

SQL Server episode identification

I am working with a blood pressure database in SQL Server which contains patient_id, timestamp (per minute) and systolicBloodPressure.
My goals are to find:
the number of episodes in which a patient is under a certain blood pressure threshold
An episode consists of the timestmap where the patient drops below a certain threshold until the timestamp where the patient comes above the threshold.
the mean blood pressure per episode per patient
the duration of the episode per episode per patient
What I have tried so far:
I am able to identify episodes by just making a new column which sets to 1 if threshold is reached.
select *
, CASE
when sys < threshold THEN '1'
from BPDATA
However , I am not able to 'identify' different episodes within the patient; episode1 episode 2 with their relative timestamps.
Could someone help me with this? Or is there someone with a better different solution?
EDIT: Sample data with example threshold 100
ID Timestamp SysBP below Threshold
----------------------------------------------------
1 9:38 110 Null
1 9:39 105 Null
1 9:40 96 1
1 9:41 92 1
1 9:42 102 Null
2 12:23 95 1
2 12:24 98 1
2 12:25 102 Null
2 12:26 104 Null
2 12:27 94 1
2 12:28 88 1
2 12:29 104 Null
Thanks for the sample data.
This should work:
declare #t table (ID int, Timestamp time, SysBP int, belowThreshold bit)
insert #t
values
(1, '9:38', 110, null),
(1, '9:39', 105, null),
(1, '9:40', 96, 1),
(1, '9:41', 92, 1),
(1, '9:42', 102, null),
(2, '12:23', 95, 1),
(2, '12:24', 98, 1),
(2, '12:25', 102, null),
(2, '12:26', 104, null),
(2, '12:27', 94, 1),
(2, '12:28', 88, 1),
(2, '12:29', 104, null)
declare #treshold int = 100
;with y as (
select *, case when lag(belowThreshold, 1, 0) over(partition by id order by timestamp) = belowThreshold then 0 else 1 end epg
from #t
),
z as (
select *, sum(epg) over(partition by id order by timestamp) episode
from y
where sysbp < #treshold
)
select id, episode, count(episode) over(partition by id) number_of_episodes_per_id, avg(sysbp) avg_sysbp, datediff(minute, min(timestamp), max(timestamp))+1 episode_duration
from z
group by id, episode
This answer relies on LEAD() and LAG() functions so only works on 2012 or later:
Setup:
CREATE TABLE #bloodpressure
(
Patient_id int,
[TimeStamp] SmallDateTime,
SystolicBloodPressure INT
)
INSERT INTO #bloodpressure
VALUES
(1, '2017-01-01 09:01', 60),
(1, '2017-01-01 09:02', 55),
(1, '2017-01-01 09:03', 60),
(1, '2017-01-01 09:04', 70),
(1, '2017-01-01 09:05', 72),
(1, '2017-01-01 09:06', 75),
(1, '2017-01-01 09:07', 60),
(1, '2017-01-01 09:08', 50),
(1, '2017-01-01 09:09', 52),
(1, '2017-01-01 09:10', 53),
(1, '2017-01-01 09:11', 65),
(1, '2017-01-01 09:12', 71),
(1, '2017-01-01 09:13', 73),
(1, '2017-01-01 09:14', 74),
(2, '2017-01-01 09:01', 70),
(2, '2017-01-01 09:02', 75),
(2, '2017-01-01 09:03', 80),
(2, '2017-01-01 09:04', 70),
(2, '2017-01-01 09:05', 72),
(2, '2017-01-01 09:06', 75),
(2, '2017-01-01 09:07', 60),
(2, '2017-01-01 09:08', 50),
(2, '2017-01-01 09:09', 52),
(2, '2017-01-01 09:10', 53),
(2, '2017-01-01 09:11', 65),
(2, '2017-01-01 09:12', 71),
(2, '2017-01-01 09:13', 73),
(2, '2017-01-01 09:14', 74),
(3, '2017-01-01 09:12', 71),
(3, '2017-01-01 09:13', 60),
(3, '2017-01-01 09:14', 74)
Now using Lead And Lag to find the previous rows values, to find whether this is the beginning or end of a sequence of low blood pressures, in combination with a common table expression. Using a UNION of start and end events ensures that an event which covers just one minute is recorded as both a start and an end event.
;WITH CTE
AS
(
SELECT *,
LAG(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As PrevValue,
Lead(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As NextValue
FROM #bloodpressure
),
CTE2
AS
(
-- Get Start Events (EventType 1)
SELECT 1 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (PrevValue IS NULL AND SystolicBloodPressure < 70) OR
(PrevValue >= 70 AND SystolicBloodPressure < 70)
UNION
-- Get End Events (EventType 2)
SELECT 2 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (NextValue IS NULL AND SystolicBloodPressure < 70 ) OR
(NextValue >= 70 AND SystolicBloodPressure < 70)
)
SELECT C1.Patient_id, C1.TimeStamp As EventStart, C2.TimeStamp As EventEnd
FROM CTE2 C1
INNER JOIN CTE2 C2
ON C1.Patient_id = C2.Patient_id AND C1.RN = C2.RN
WHERE C1.EventType = 1 AND C2.EventType = 2
ORDER BY C1.Patient_id, C1.TimeStamp