postgresql How show most frequent value per day date - sql

I've got a problem with a query that is supposed to return the value which occur most per date
+------------+------------------+
| Date | value |
+------------+------------------+
| 2020-01-01 | Programmer |
| 2020-01-02 | Technician |
| 2020-01-03 | Business Analyst |
+------------+------------------+
So far I have done
select count(headline) as asd, publication_date, employer -> 'name' as dsa from jobhunter
group by publication_date,dsa
ORDER BY publication_date DESC
But it shows 2020-12-31 19:06:00 instead of just YYYY-MM-DD
Any idea on how to fix this?
enter image description here

Test data:
create table tbl (
id serial primary key,
row_datetime TIMESTAMP,
row_val VARCHAR(60)
);
insert into tbl (row_datetime, row_val) values ('2021-01-01 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 01:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-01 02:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 00:00:00', 'a');
insert into tbl (row_datetime, row_val) values ('2021-01-02 01:00:00', 'b');
insert into tbl (row_datetime, row_val) values ('2021-01-02 02:00:00', 'b');
Example query:
SELECT dt, val, cnt
FROM (
SELECT dt, val, cnt, ROW_NUMBER() OVER (PARTITION BY dt ORDER BY cnt DESC) AS row_num
FROM (
SELECT dt, val, COUNT(val) AS cnt
FROM (
SELECT DATE(row_datetime) AS dt, row_val AS val FROM tbl
) AS T1 GROUP BY dt, val
) AS T2
) AS T3
WHERE row_num=1
ORDER BY dt ASC
You can additionally customize your query to optimize the performance, get more fields, etc.

Related

risk_score result for each of month

I want to generate highest risk_score result for each of month (Jan, Feb & Mar)
Displaying the following columns: Firm_id_1, risk_score_Jan, risk_score_Feb, risk_score_Mar
CREATE table firm_risk (
firm_id_1 INT,
assessment_date DATE,
risk_score FLOAT
);
INSERT INTO firm_risk (firm_id_1, assessment_date, risk_score)
VALUES (123, '1/01/2018', 0.43),
(123, '1/28/2018', 0.80),
(123, '2/11/2018', 0.28),
(123, '2/23/2018', 0.91),
(123, '3/11/2018', 0.08),
(123, '3/31/2018', 0.60),
(456, '1/4/2018', 0.87),
(456, '1/6/2018', 0.02),
(456, '1/20/2018', 0.39),
(456, '2/3/2018', 0.10),
(456, '3/1/2018', 0.12),
(789, '1/1/2018', 0.20),
(789, '3/1/2018', 0.17);
SELECT * FROM firm_risk;
SELECT firm_id_1, date_part('month', assessment_date) AS AD
FROM firm_risk
WHERE assessment_date = (SELECT MAX (assessment_date) FROM firm_risk)
GROUP BY firm_id_1, risk_score, assessment_date;
CREATE table latest_risk_score (
firm_id_2 integer,
latest_risk_score_Jan float,
latest_risk_score_Feb float,
latest_risk_score_Mar float
);
SELECT * FROM latest_risk_score;
INSERT INTO latest_risk_score (firm_id_2)
VALUES (123),
(456),
(789);
SELECT firm_risk.firm_id_1, date_part('month', assessment_date), firm_risk.risk_score
FROM firm_risk
INNER JOIN latest_risk_score
ON firm_risk.firm_id_1 = latest_risk_score.firm_id_2
GROUP BY firm_risk.firm_id_1, firm_risk.risk_score, assessment_date;
SELECT firm_risk.firm_id_1, date_part('month', assessment_date), firm_risk.risk_score
FROM firm_risk
WHERE assessment_date = (SELECT MAX (assessment_date) FROM firm_risk)
AND assessment_date LIKE '_%-01-2018%';
SELECT firm_risk.firm_id_1, date_part('month', assessment_date)
FROM firm_risk
WHERE assessment_date >= date_part('month', assessment_date - '3 months')
GROUP BY firm_risk.firm_id_1, ('month', assessment_date);
UPDATE latest_risk_score SET latest_risk_score_Jan = (SELECT Risk_Score FROM firm_risk.firm_id_1 WHERE Assessment_Date = (SELECT MAX(Assessment_Date)
FROM firm_risk.firm_id_1 WHERE firm_id_1 = 123 AND Assessment_Date LIKE "2018-01-%" ORDER BY Assessment_Date))
WHERE firm_id_1 = 123;
update latest_risk_score
set latest_risk_score_Feb = (select Risk_Score from firm_risk.firm_id_1 where Assessment_Date = (select max(Assessment_Date)
from firm_risk.firm_id_1 where firm_id_1 = 123 and Assessment_Date like "2018-02-%" order by Assessment_Date))
where firm_id_1 = 123;
update latest_risk_score
set latest_risk_score_Mar = (select Risk_Score from firm_risk.firm_id_1 where Assessment_Date = (select max(Assessment_Date)
from firm_risk.firm_id_1 where firm_id_1 = 123 and Assessment_Date like "2018-03-%" order by Assessment_Date))
where firm_id_1 = 123;
select * from latest_risk_score;
Assuming postgres is relevant (due to existence of "date_part" in question)
CREATE table firm_risk (
firm_id_1 INT,
assessment_date DATE,
risk_score FLOAT
);
INSERT INTO firm_risk (firm_id_1, assessment_date, risk_score)
VALUES (123, '2018-01-01', 0.43),
(123, '2018-01-28', 0.80),
(123, '2018-02-11', 0.28),
(123, '2018-02-23', 0.91),
(123, '2018-03-11', 0.08),
(123, '2018-03-31', 0.60),
(456, '2018-01-04', 0.87),
(456, '2018-01-06', 0.02),
(456, '2018-01-20', 0.39),
(456, '2018-02-03', 0.10),
(456, '2018-03-01', 0.12),
(789, '2018-01-01', 0.20),
(789, '2018-03-01', 0.17);
SELECT
firm_risk.firm_id_1
, max(case when date_part('month',assessment_date) = 1 then firm_risk.risk_score end) jan_risk
, max(case when date_part('month',assessment_date) = 2 then firm_risk.risk_score end) feb_risk
, max(case when date_part('month',assessment_date) = 3 then firm_risk.risk_score end) mar_risk
FROM firm_risk
WHERE date_part('month',assessment_date) in (1,2,3)
GROUP BY
firm_risk.firm_id_1
firm_id_1 | jan_risk | feb_risk | mar_risk
--------: | :------- | :------- | :-------
789 | 0.2 | null | 0.17
456 | 0.87 | 0.1 | 0.12
123 | 0.8 | 0.91 | 0.6
db<>fiddle here

Writing single SQL query satisfying two cases

After the comments received, I am rephrasing this question with required data.
Reference: SQL query to exclude some records from the output
Vertica analytical functions: https://www.vertica.com/blog/analytic-queries-vertica/
Table 1:
create table etl_group_membership
(
group_item_id int not null,
member_item_id int not null
);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335640, 117722);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335640, 104151);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335640, 5316);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335641, 117723);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335641, 104152);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335641, 5317);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335642, 117724);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335642, 104153);
INSERT INTO etl_group_membership (group_item_id, member_item_id) VALUES (335642, 5318);
Table 2:
create table v_poll_item
(
device_item_id int not null,
item_id int not null
);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (117722, 273215);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (117722, 117936);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (117722, 117873);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (117722, 123305);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (104151, 240006);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (104151, 240005);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (104151, 239415);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (104151, 239414);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (5316, 118310);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (5316, 130627);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (5316, 298564);
INSERT INTO v_poll_item (device_item_id, item_id) VALUES (5316, 118311);
Table 3: Note that im_utilization can be NULL as well
create table nrm_cpustats_rate
(
item_id int not null,
tstamp datetime not null,
im_utilization float,
);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (273215, '2021-06-28 19:55:00.000000', 2);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (273215, '2021-06-27 23:35:00.000000', 24);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (273215, '2021-06-26 14:05:00.000000', 27);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (273215, '2021-06-25 09:05:00.000000', 29);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-28 19:30:00.000000', 17);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-28 19:15:00.000000', 35);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-28 19:05:00.000000', 50);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-27 05:45:00.000000', 89);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-25 09:20:00.000000', 37);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-25 09:10:00.000000', 51);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (117936, '2021-06-25 08:50:00.000000', 90);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118310, '2021-06-23 04:10:00.000000', 51);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118310, '2021-06-23 03:15:00.000000', 48);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118310, '2021-06-22 22:20:00.000000', 19);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-22 17:10:00.000000', 11);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-22 16:30:00.000000', 37);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-22 16:35:00.000000', 38);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-28 18:45:00.000000', 74);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-28 18:48:00.000000', 76);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (239414, '2021-06-28 18:50:00.000000', 77);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118311, '2021-06-28 00:40:00.000000', 29);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118311, '2021-06-23 22:30:00.000000', 37);
INSERT INTO nrm_cpustats_rate (item_id, tstamp, im_utilization) VALUES (118311, '2021-06-23 22:25:00.000000', 92);
To get the device items ids in a group:
SELECT member_item_id FROM etl_group_membership WHERE group_item_id = 335640;
From the list of device item ids retrieved, to get the list of item_ids:
SELECT item_id FROM v_poll_item WHERE device_item_id IN (<devices retrieved from previous query>);
Inputs:
Two time ranges: yesterday until 7 days back (AND tstamp > '2021-06-22 00:00:00.000000'AND tstamp <= '2021-06-28 23:59:59.000000')
Group id: 335640
breach_threshold: 25
Minimum number of breaches each day: 2
Expected output:
breached means im_utilization is >= 25
Pick only those records where count_of_breached in a given day >= 2
That is, records with item_id 273215 are excluded because even though the number of breaches (>= 25) are 2, there's only one each day
device_item_id | item_id | count_of_breach | date_when_breached | max_utilization | max_utilization_tstamp
=====================================================================================================================
117722 | 117936 | 2 | 2021-06-28 | 90 | 2021-06-25 08:50:00.000000
117722 | 117936 | 3 | 2021-06-25 | 90 | 2021-06-25 08:50:00.000000
5316 | 118310 | 2 | 2021-06-23 | 51 | 2021-06-23 04:10:00.000000
5316 | 118311 | 2 | 2021-06-23 | 92 | 2021-06-23 22:25:00.000000
104151 | 239414 | 2 | 2021-06-22 | 77 | 2021-06-28 18:50:00.000000
104151 | 239414 | 3 | 2021-06-28 | 77 | 2021-06-28 18:50:00.000000
Even if a single SQL query cannot be written to produce this output, can two optimized queries be suggested. #marcothesane pointed that even the query to get daily breaches can be written in a better way.
UPDATE:
This query has worked for me for finding the max im_utlization and tstamp when it was max. I am not sure though why I had to use the timestamp range at two places!
SELECT t.item_id, t.tstamp, t.im_utilization
FROM (
SELECT item_id, MAX(im_utilization) AS max_cpu
FROM nrm_cpustats_rate
WHERE item_id IN (SELECT item_id from v_poll_item WHERE device_item_id IN (SELECT item_id FROM v_poll_item WHERE device_item_id IN (117722, 104151, 5316)))
AND tstamp > '2021-06-22 00:00:00.000000
AND tstamp <= '2021-06-28 23:59:59.000000'
GROUP BY item_id
) AS m
INNER JOIN nrm_cpustats_rate AS t
ON t.item_id = m.item_id
AND t.im_utilization = m.max_cpu
AND tstamp > '2021-06-22 00:00:00.000000
AND tstamp <= '2021-06-28 23:59:59.000000'
ORDER BY 1, 2 DESC, 3 DESC
To start, all your preliminary work relies specifically with the cpustats rate table. But as you mentioned, the breach consideration is based on more than 1 (hence 2 or more) ON THE SAME DAY. So, on a per item, per day (hence date( ncr.tstamp ) field to get just the DATE portion regardless of time, we can apply a HAVING clause so only those when grouped by item and date have more than one breach.
Then, from that, we can join to the poll item table for the specific device ID. The following should work for you.
select
vpi.device_item_id,
ncr.item_id,
count(*) count_of_breach,
date( ncr.tstamp ) date_when_breached,
max( ncr.im_utilization ) max_utilization
from
nrm_cpustats_rate ncr
join v_poll_item vpi
on ncr.item_id = vpi.item_id
where
ncr.im_utilization >= 25
group by
vpi.device_item_id,
ncr.item_id,
date( ncr.tstamp )
having
count(*) > 1
FEEDBACK - MODIFICATION
Removed the second table from query and trying with the TRUNC() function within query instead of date(). Also added in your date filter restriction.
select
ncr.item_id,
count(*) count_of_breach,
trunc( ncr.tstamp, 'DD' ) date_when_breached,
max( ncr.im_utilization ) max_utilization
from
nrm_cpustats_rate ncr
where
ncr.tstamp > '2021-06-22 00:00:00.000000'
AND ncr.tstamp <= '2021-06-28 23:59:59.000000'
AND ncr.im_utilization >= 25
group by
ncr.item_id,
trunc( ncr.tstamp, 'DD' )
having
count(*) > 1
You can hard-cast a TIMESTAMP to a DATE with the ::DATE operation.
As the others did it:
You can get all the first five columns in a grouping query, grouping by device_item_id,v_poll_item.item_id,tstamp::DATE . I put that as a common Table Expression (CTE) into a WITH clause. Then, I joined that CTE back with nrm_cpustats_rate over item_id and an equi predicate over the previously obtained max_utilization and im_utilization; and I finally filter away the rows with less than 2 breach counts and filter again by the timestamp range.
WITH
grp AS (
SELECT
device_item_id
, v_poll_item.item_id
, SUM(
CASE
WHEN im_utilization >= 25 THEN 1
ELSE 0
END
) AS count_of_breach
, tstamp::DATE as date_when_breached
, MAX(im_utilization) AS max_utilization
FROM nrm_cpustats_rate
JOIN v_poll_item USING(item_id)
JOIN etl_group_membership ON member_item_id=device_item_id
WHERE tstamp > '2021-06-22 00:00:00.000000'AND tstamp <= '2021-06-28 23:59:59.000000'
GROUP BY
device_item_id
, v_poll_item.item_id
, tstamp::DATE
)
SELECT
grp.*
, o.tstamp AS max_utilization_tstamp
FROM grp
JOIN nrm_cpustats_rate o
ON o.item_id=grp.item_id
AND max_utilization=im_utilization
WHERE tstamp > '2021-06-22 00:00:00.000000'AND tstamp <= '2021-06-28 23:59:59.000000'
AND count_of_breach >= 2
;
-- out device_item_id | item_id | count_of_breach | date_when_breached | max_utilization | max_utilization_tstamp
-- out ----------------+---------+-----------------+--------------------+-----------------+------------------------
-- out 5316 | 118310 | 2 | 2021-06-23 | 51 | 2021-06-23 04:10:00
-- out 104151 | 239414 | 3 | 2021-06-28 | 77 | 2021-06-28 18:50:00
-- out 104151 | 239414 | 2 | 2021-06-22 | 38 | 2021-06-22 16:35:00
-- out 117722 | 117936 | 3 | 2021-06-25 | 90 | 2021-06-25 08:50:00
-- out 5316 | 118311 | 2 | 2021-06-23 | 92 | 2021-06-23 22:25:00
-- out 117722 | 117936 | 2 | 2021-06-28 | 50 | 2021-06-28 19:05:00

Postgres query different COUNT and ROW_NUMBER()

I have a table messages with the following columns
group_id BIGINT,
user_id BIGINT,
message_date timestamp
For the right user_id I would like to be able to count the total rows with that user_id, the distinct groups with that user_id, and considering a leaderboard made by the count of user_id, the position too.
I tried this query
SELECT main.total_m, main.group_number, main.pos
FROM (
SELECT user_id, COUNT(group_id) AS group_number, COUNT(user_id) AS total_m,
ROW_NUMBER() OVER (
PARTITION BY COUNT(user_id)
ORDER BY COUNT(user_id) DESC
) AS pos
FROM messages
WHERE message_date > date_trunc('week', now())
GROUP BY user_id, group_id
) AS main
WHERE user_id = %s
But I don't get the result I would like to have. Where am I wrong?
The power of "sample data" and "expected result" is it enables others to answer efficiently. The following is a complete guess, but perhaps it will prompt you to prepare a "Minimal, Complete, and Verifiable Example" (MCVE)
The detials below can be accessed at SQL Fiddle
PostgreSQL 9.6 Schema Setup:
CREATE TABLE Messages
(USER_ID int, GROUP_ID int, MESSAGE_DATE timestamp)
;
INSERT INTO Messages
(USER_ID, GROUP_ID, MESSAGE_DATE)
VALUES
(1, 7, '2017-09-01 10:00:00'),
(1, 6, '2017-09-02 10:00:00'),
(1, 5, '2017-09-03 10:00:00'),
(1, 4, '2017-09-04 10:00:00'),
(1, 7, '2017-09-05 10:00:00'),
(2, 6, '2017-09-01 10:00:00'),
(2, 5, '2017-09-02 10:00:00'),
(2, 7, '2017-09-03 10:00:00'),
(2, 6, '2017-09-04 10:00:00'),
(2, 4, '2017-09-05 10:00:00'),
(2, 8, '2017-09-11 10:00:00')
;
Query 1:
select
user_id
, num_grps
, num_msgs
, dense_rank() over(order by num_grps DESC, num_msgs DESC, max_date DESC, user_id) rnk
from (
select
user_id
, count(distinct group_id) num_grps
, count(*) num_msgs
, max(message_date) max_date
from messages
group by
user_id
) d
Results:
| user_id | num_grps | num_msgs | rnk |
|---------|----------|----------|-----|
| 2 | 5 | 6 | 1 |
| 1 | 4 | 5 | 2 |
Looking at just the inner query, I see this in the select:
SELECT user_id, COUNT(group_id), ...
But this in the GROUP BY:
GROUP BY user_id, group_id
Put those together, and you'll never have a COUNT() result of anything other than 1, because each group_id has it's own group. It works for the same for total_m column.

How to find records with recursively overlapping date ranges in Oracle DB

I have a table like this:
| ID | DSTART | DEND
+------+------------+-----------
| fat1 | 01/01/2017 | 31/01/2017
| fat2 | 01/02/2017 | 28/02/2017
| fat3 | 01/03/2017 | 31/03/2017
| fat4 | 01/04/2017 | 30/04/2017
| fat5 | 01/02/2017 | 31/03/2017
| fat6 | 01/01/2017 | 28/02/2017
| fat7 | 01/03/2017 | 30/04/2017
| fat8 | 01/06/2017 | 30/06/2017
| fat9 | 28/04/2017 | 02/05/2017
given a record I want to find all the overlapping records and all the records overlapping the overlapping records.
e.g. searching for overlapping records of fat7 should return
fat5 (overlaps fat7)
fat4 (overlaps fat7)
fat3 (overlaps fat7)
fat2 (*overlaps fat5)
fat6 (*overlaps fat5)
fat1 (**overlaps fat6)
to create the dataset:
create table zz_fatt
( id varchar2(100) primary key,
dstart date,
dend date);
insert into zz_fatt (id, dstart, dend) values ('fat7', to_date('03/01/2017', 'mm/dd/yyyy'), to_date('04/30/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat1', to_date('01/01/2017', 'mm/dd/yyyy'), to_date('01/31/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat2', to_date('02/01/2017', 'mm/dd/yyyy'), to_date('02/28/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat3', to_date('03/01/2017', 'mm/dd/yyyy'), to_date('03/31/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat4', to_date('04/01/2017', 'mm/dd/yyyy'), to_date('04/30/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat5', to_date('02/01/2017', 'mm/dd/yyyy'), to_date('03/31/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat6', to_date('01/01/2017', 'mm/dd/yyyy'), to_date('02/28/2017', 'mm/dd/yyyy'));
insert into zz_fatt (id, dstart, dend) values ('fat8', to_date('06/01/2017', 'mm/dd/yyyy'), to_date('06/15/2017', 'mm/dd/yyyy'));
You can assign a group identifier to the records. The idea is to find records that do not overlap, and use them as the beginning of a group.
The following assigns the groups to each record:
select t.*, sum(group_start) over (order by dstart) as grp
from (select t.*,
(case when not exists (select 1
from t t2
where t2.dstart < t.dstart and t2.dend >= t.dstart
)
then 1 else 0
end) group_start
from t
) t
If you only want the groups for a certain record then there are several ways, such as:
with overlaps as (
<query above>
)
select o.*
from overlaps o
where o.grp = (select o2.grp from overlaps o2 where o2.id = ???);

T-SQL: Conditional NULL removal

I need to select only the Room_IDs that have no instances where the Status is NULL.
For example here :
TABLE_A
Room_Id Status Inspection_Date
-----------------------------------
1 NULL 5/15/2015
2 occupied 5/21/2015
2 NULL 1/19/2016
1 occupied 12/16/2015
4 NULL 3/25/2016
3 vacant 8/27/2015
1 vacant 4/17/2016
3 vacant 12/12/2015
3 vacant 3/22/2016
4 vacant 2/2/2015
4 vacant 3/24/2015
My result should look like this:
Room_Id Status Inspection_Date
-----------------------------------
3 vacant 8/27/2015
3 vacant 12/12/2015
3 vacant 3/22/2016
Because Room_ID '3' has no instances where the Status is NULL
Quick example of how to do it:
DECLARE #tTable TABLE(
Room_Id INT,
Status VARCHAR(20),
Inspection_Date DATETIME)
INSERT INTO #tTable VALUES
(1, NULL, '5/15/2015'),
(1,NULL, '5/15/2015'),
(2,'occupied', '5/21/2015'),
(2,NULL, '1/19/2016'),
(1,'occupied', '12/16/2015'),
(4,NULL, '3/25/2016'),
(3,'vacant', '8/27/2015'),
(1,'vacant', '4/17/2016'),
(3,'vacant', '12/12/2015'),
(3,'vacant', '3/22/2016'),
(4,'vacant', '2/2/2015'),
(4,'vacant', '3/24/2015')
SELECT * FROM #tTable T1
WHERE Room_Id NOT IN (SELECT Room_ID FROM #tTable WHERE Status IS NULL)
Gives :
Room_Id | Status | Inspection_Date |
-------------------------------------------------
3 | vacant | 2015-08-27 00:00:00.000
3 | vacant | 2015-12-12 00:00:00.000
3 | vacant | 2016-03-22 00:00:00.000
Try this out:
SELECT *
FROM Table1
WHERE Room_ID NOT IN
(
SELECT DISTINCT Room_ID
FROM Table1
WHERE Status IS NULL
)
The sub query returns a list of unique room id's that, at one time or another, had a NULL status. The outer query looks at that list, and says "Return * where the room_ID IS NOT one those in the subquery.
If you want to try it in SQL Fiddle, here is the Schema:
CREATE TABLE Table1
(Room_ID int, Status varchar(8), Inspection_Date datetime)
;
INSERT INTO Table1
(Room_ID, Status, Inspection_Date)
VALUES
(1, NULL, '2015-05-15 00:00:00'),
(2, 'occupied', '2015-05-21 00:00:00'),
(2, NULL, '2016-01-19 00:00:00'),
(1, 'occupied', '2015-12-16 00:00:00'),
(4, NULL, '2016-03-25 00:00:00'),
(4, 'vacant', '2015-08-27 00:00:00'),
(1, 'vacant', '2016-04-17 00:00:00'),
(3, 'vacant', '2015-12-12 00:00:00'),
(3, 'vacant', '2016-03-22 00:00:00'),
(4, 'vacant', '2015-02-02 00:00:00'),
(4, 'vacant', '2015-03-24 00:00:00'),
(2, NULL, '2015-05-22 00:00:00')
;
As alternative to Hashman, I just prefer to use not exists over not in for these types of queries.
Creating some test data
Note that I just kept the same date for everything since it's not imperative to the question.
create table #table_a (
Room_Id int,
Status varchar(32),
Inspection_Date date);
insert #table_a (Room_Id, Status, Inspection_Date)
values
(1, null, getdate()),
(2, 'occupied', getdate()),
(2, null, getdate()),
(1, 'occupied', getdate()),
(4, null, getdate()),
(3, 'vacant', getdate()),
(1, 'vacant', getdate()),
(3, 'vacant', getdate()),
(3, 'vacant', getdate()),
(4, 'vacant', getdate()),
(4, 'vacant', getdate());
The query
select *
from #table_a t1
where not exists (
select *
from #table_a t2
where t1.Room_Id = t2.Room_Id
and Status is null);
The results
Room_Id Status Inspection_Date
----------- -------------------------------- ---------------
3 vacant 2016-06-17
3 vacant 2016-06-17
3 vacant 2016-06-17
You can use CTE and NOT EXIST like below code
WITH bt
AS ( SELECT RoomId ,
Status,
Inspection_Date
FROM dbo.Table_1
)
SELECT *
FROM bt AS a
WHERE NOT EXISTS ( SELECT 1
FROM bt
WHERE bt.RoomId = a.RoomId
AND bt.Status IS NULL );