How to get specific records in posgtres - sql

In Postgres I have two tables:
Table A { int keyA, Text name}
Table B { int keyB, int keyA, char mark, date start, date end}
Mark from Table B could be 'X', 'Y', 'Z'.
I want to get every record 'X' with dates but only one from 'Y', 'Z'. Also if there are 'X', 'Y', 'Z' i want only 'X'.
From:
keyB
keyA
mark
start
end
1
1
X
15-01-2023
16-01-2023
2
1
X
17-01-2023
18-01-2023
3
1
Y
null
null
4
1
Z
null
null
5
2
Y
null
null
6
2
Z
null
null
7
2
Y
null
null
8
3
Z
null
null
9
3
Y
null
null
10
4
X
19-01-2023
20-01-2023
I want to get
keyB
keyA
mark
start
end
1
1
X
15-01-2023
16-01-2023
2
1
X
17-01-2023
17-01-2023
5
2
Y
null
null
8
3
Z
null
null
10
4
X
19-01-2023
20-01-2023
I tried:
1.
Select A.name,
(select b2.start from B b2 where b2.keyA = A.keyA and b2.mark = 'X') as Start,
(select b2.end from B b2 where b2.keyA = A.keyA and b2.mark = 'X') as End,
from A order by name;
Order is important. I need to have name first.
There is a porblem. In subqueries i have more than one record so i have to add limit 1. But I want to get every X not only one.
If I do this
Select A.name, B.start, B.end
from A inner join B on A.keyA = B.keyB
I'll have X, Y, Z and as I mentioned I want only X or one from Y or Z.
Any idea how should I solve this?

Use the row_number function with your join query as the following:
select name, keyB, keyA, mark, start_dt, end_dt
from
(
select A.name, B.*,
row_number() over (partition by B.keyA order by case when B.mark='X' then 1 else 2 end, B.keyb) rn
from tableB B join tableA A
on B.keyA = A.keyA
) T
where mark = 'X' or rn = 1
order by keyb
See demo

Related

query to flag a column based on multiple conditions

I want to flag a column based on interdependent conditions. My input data is as below
id
status
rnk
A
Open
1
A
Delay
2
A
In
3
B
In
1
B
Out
2
B
Delay
3
B
count
4
C
In
1
C
Close
2
C
out
3
D
Close
1
D
Open
2
D
Delay
3
D
In
4
My output should look like
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N
Logic - if status column is anything other than Delay then the flag will be N.
If the status column is Delay and if the status is either 'Open' or 'Close' for records which are having less rnk than that of Delay within the same ID then flag will be Y else N.
Example - for ID 'A' we have a status 'Delay' and its rank is 2, now we need to check if the status of A with rank < 2 is either 'Open' or 'Close' then flag 'Delay' to 'Y'
please note: rnk column is already populated in the table based on different logic
Below is the query I have tried, but I am getting flag 'N' for all the records,
SELECT
*,
CASE WHEN status != 'Delay' THEN 'N'
WHEN rnk < (COALESCE(MAX(CASE WHEN status = 'Delay' THEN rnk ELSE -1 END) OVER(PARTITION BY id)))
AND status IN ('Open','Close') THEN 'Y'
ELSE 'N'
END AS flag
FROM TABLE
A correlated subquery is here more help full
SELECT
*,
CASE WHEN status != 'Delay' THEN 'N'
WHEN Exists( SELECT 1 FROM Table1 ta1 WHERE ta1.id = t1.id
AND status IN ('Open','Close') AND ta1.rnk < t1.rnk) THEN 'Y'
ELSE 'N'
END AS flag
FROM Table1 t1
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N
Consider below simple approach
select *, if(
status = 'Delay' and
countif(status in ('Open', 'Close')) over(partition by id order by rnk) > 0,
'Y', 'N') as flag
from your_table
if applied to sample data in your question - output is
Use a grouped Common Table expression with the maximum rank and row count of all rows with status Open or Close, and left-join the base table with it:
-- your input, don't use in real query...
WITH
indata(id,status,rnk) AS (
SELECT 'A','Open',1
UNION ALL SELECT 'A','Delay',2
UNION ALL SELECT 'A','In',3
UNION ALL SELECT 'B','In',1
UNION ALL SELECT 'B','Out',2
UNION ALL SELECT 'B','Delay',3
UNION ALL SELECT 'B','count',4
UNION ALL SELECT 'C','In',1
UNION ALL SELECT 'C','Close',2
UNION ALL SELECT 'C','out',3
UNION ALL SELECT 'D','Close',1
UNION ALL SELECT 'D','Open',2
UNION ALL SELECT 'D','Delay',3
UNION ALL SELECT 'D','In',4
)
-- input ends here, real query starts below
-- replace following comma with "WITH" ...
,
prev_stats AS (
SELECT
id
, MAX(rnk) AS rnk
, COUNT(*) AS num
FROM indata
WHERE status IN ('Open','Close')
GROUP BY id
)
SELECT
indata.*
, CASE
WHEN status <> 'Delay' THEN 'N'
ELSE
CASE
WHEN prev_stats.num > 0 THEN 'Y'
ELSE 'N'
END
END AS flag
FROM indata
LEFT JOIN prev_stats ON indata.id = prev_stats.id
AND indata.rnk > prev_stats.rnk
;
Result:
id
status
rnk
flag
A
Open
1
N
A
Delay
2
Y
A
In
3
N
B
In
1
N
B
Out
2
N
B
Delay
3
N
B
count
4
N
C
In
1
N
C
Close
2
N
C
out
3
N
D
Close
1
N
D
Open
2
N
D
Delay
3
Y
D
In
4
N

Easiest way to select distinct with least number of null

I want to create a view over a table that has 500k rows and 10 columns. In that table there are duplicate id but with different amount of information, because some of the columns are NULL. My objective is to keep one column in case of duplicates, but want to keep the one with less number of NULL values.
Let me explain it with a quick example. I am working with a query similar to this.
CREATE TABLE test (ID INT, b char(1), c char (1), d char(1))
INSERT INTO test(ID,b,c,d) VALUES
(1,NULL,NULL,NULL),
(1,'B', NULL,NULL),
(1,'B','C',NULL),
(1,'B','C','D'),
(2,'E','F',NULL),
(2,'E',NULL,NULL),
(3,NULL,NULL,NULL),
(3,'G',NULL,NULL)
SELECT DISTINCT ID,b,c,d FROM test
DROP TABLE test
The result is
ID b c d
--------------------
1 NULL NULL NULL
1 B NULL NULL
1 B C NULL
1 B C D
2 E F NULL
2 E NULL NULL
3 NULL NULL NULL
3 G NULL NULL
However, the output I want to see is
ID b c d
--------------------
1 B C D
2 E F NULL
3 G NULL NULL
So, based on the id and if there are duplicates, I want to have the row with the least number of nulls. How is it possible?
Thank you very much
If you want the row with the least number of NULLs, then you would basically count them:
select t.*
from test t
order by ( (case when b is null then 1 else 0 end) +
(case when c is null then 1 else 0 end) +
(case when d is null then 1 else 0 end)
) desc
fetch first 1 row only;
However, if you want one row per id with a non-NULL value in each column (if available) then #maSTAShuFu's answer is appropriate.
EDIT:
If you want one row per client, then simply use row_number():
select t.*
from (select t.*,
row_number() over (partition by client_id
order by ( (case when b is null then 1 else 0 end) +
(case when c is null then 1 else 0 end) +
(case when d is null then 1 else 0 end)
) desc
) as seqnum
from t
) t
where seqnum = 1;
using MAX.
SELECT
MAX(ID) ID,
MAX(B) B,
MAX(C) C,
MAX(D) D
FROM test

Merge multiple columns into one column with multiple rows

In PostgreSQL, how can I merge multiple columns into one column with multiple rows?
The columns are all boolean, so I want to:
Filter for true values only
Replace the true value (1) with the name of the column (A, B or C)
I have this table:
ID | A | B | C
1 0 1 0
2 1 1 0
3 0 0 1
4 1 0 1
5 1 0 0
6 0 1 1
I want to get this table:
ID | Letter
1 B
2 A
2 B
3 C
4 A
4 C
5 A
6 B
6 C
I think you need something like this:
SELECT ID, 'A' as Letter FROM table WHERE A=1
UNION ALL
SELECT ID, 'B' as Letter FROM table WHERE B=1
UNION ALL
SELECT ID, 'C'as Letter FROM table WHERE C=1
ORDER BY ID, Letter
SELECT ID,
(CASE
WHEN TABLE.A = 1 then 'A'
WHEN TABLE.B = 1 then 'B'
WHEN TABLE.C = 1 then 'C'
ELSE NULL END) AS LETTER
from TABLE
You may try this.
insert into t2 select id, 'A' from t1 where A=1;
insert into t2 select id, 'B' from t2 where B=1;
insert into t2 select id, 'C' from t3 where C=1;
If you care about the order, then you can do this.
insert into t3 select id, letter from t2 order by id, letter;
W/o UNION
You can use a single query to get the desired output.Real time example
select id
,regexp_split_to_table((
concat_ws(',', case
when a = 0
then null
else 'a'
end, case
when b = 0
then null
else 'b'
end, case
when c = 0
then null
else 'c'
end)
), ',') l
from c1;
regexp_split_to_table() & concat_ws()

SQL Rows to Separate Columns

I realise this maybe similar to other questions, but I am stuck!
I am having trouble organising some data into an appropriate format to export to another tool. Basically I have an ID column and then 2 response columns. I would like to separate the ID and then list the responses under each. See the example below for clarification.
I have played around with Pivot and UnPivot but can't get it quite right.
Here is how the data looks now.
ID X1 X2
1 2 Y
1 5 Y
1 3 N
1 7 N
1 6 Y
2 5 N
2 4 Y
2 8 Y
2 3 N
3 5 Y
3 1 N
3 9 N
Here is how I would like the data to look
ID1_X1 ID1_X2 ID2_X1 ID2_X2 ID3_X1 ID3_X2
2 Y 5 N 5 Y
5 Y 4 Y 1 N
3 N 8 Y 9 N
7 N 3 N null null
6 Y null null null null
Here is the code to create/populate the table.
create table #test (ID int, X1 int, X2 varchar(1))
insert into #test values
('1','2','Y'),('1','5','Y'),('1','3','N'),('1','7','N'),
('1','6','Y'),('2','5','N'),('2','4','Y'),('2','8','Y'),
('2','3','N'),('3','5','Y'),('3','1','N'),('3','9','N')
You can do this using aggregation and row_number() . . . assuming you know the ids in advance:
select max(case when id = 1 then x1 end) as x1_1,
max(case when id = 1 then x2 end) as x2_1,
max(case when id = 2 then x1 end) as x1_2,
max(case when id = 2 then x2 end) as x2_2,
max(case when id = 3 then x1 end) as x1_3,
max(case when id = 3 then x2 end) as x2_3
from (select t.*,
row_number() over (partition by id order by (select null)) a seqnum
from #test t
) t
group by seqnum;
I should note that SQL tables represent unordered sets. Your original data doesn't have an indication of the ordering, so this is not guaranteed to put the values in the same order as the original data (actually, there is no such order that that statement is a tautology). If you have another column with the ordering, then you can use that.
Here is a alternative approach to Gordan's good answer using OUTER JOIN's
Considering that there is a Identity column in your table to define the order of X1 in each ID and fixed number of ID's
;WITH FST
AS (SELECT ROW_NUMBER()OVER(ORDER BY IDENTITY_COL) RN,X1 AS ID1_X1,X2 AS ID1_X2
FROM #TEST A
WHERE ID = 1),
SCD
AS (SELECT ROW_NUMBER()OVER(ORDER BY IDENTITY_COL) RN,X1 AS ID2_X1,X2 AS ID2_X2
FROM #TEST A
WHERE ID = 2),
TRD
AS (SELECT ROW_NUMBER()OVER(ORDER BY IDENTITY_COL) RN,X1 AS ID3_X1,X2 AS ID3_X2
FROM #TEST A
WHERE ID = 3)
SELECT ID1_X1,ID1_X2,ID2_X1,ID2_X2,ID3_X1,ID3_X2
FROM FST A
FULL OUTER JOIN SCD B
ON A.RN = B.RN
FULL OUTER JOIN TRD C
ON C.RN = COALESCE(B.RN, A.RN)

"Cluster" Code Help in SQL

I am relative newcomer to SQL, but have gained many useful ideas through the site. Now I'm stuck on a piece of code that seems simple enough, but for some reason I can't wrap my head around it.
I am trying to create a third column (Column Z) based off of the first two columns below:
Column X Column Y
-------------------
1 a
1 b
1 c
2 a
2 d
2 e
2 f
4 b
5 i
5 c
3 g
3 h
6 j
6 k
6 l
What i need to have happen in Column Z:
For each individual value found in Column Y, note the value of Column X
Likewise, for each individual value in Column X, note the value of Column Y
Then, cluster (RANK/ROW_NUMBER?) these into groups seen below:
Column X Column Y Column Z
-----------------------------
1 a 1
1 b 1
1 c 1
2 a 1
2 d 1
2 e 1
2 f 1
4 b 1
5 i 1
5 c 1
3 g 2
3 h 2
6 j 3
6 k 3
6 l 3
I hope I've been clear enough without over-complicating things. My head has been spinning all morning. Let me know if anyone needs any more info.
Greatly appreciated in advance!
I have faced exactly this problem for some analyses in the past. The only way I could get it to work is by doing a loop, that incrementally adds in the information.
The loop assigns the minimum "x" value within each group as the group id. By your rules, this is guaranteed to be unique. It starts by assigning the current x value to z. It then finds the minimum z along the x and y dimensions. It repeats this process until no records change.
Given your data, the following is an outline of how to do it:
update t set z = x
while 1=1
begin
with toupdate as (
select t.*,
min(z) over (partition by x) as idx,
min(z) over (partition by y) as idy from t
)
update toupdate
set z = (case when idx < idy then idx else idy end)
where z > idx or z > idy;
if (##ROWCOUNT = 0) break;
end;
;with a as
(
select z, dense_rank() over (order by z) newZ from t
)
update a set z = newZ
Maybe not the best way, but it works
SQLFiddle http://sqlfiddle.com/#!3/99532/1
;WITH cte AS (
SELECT *, ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS row_nb
FROM #t
)
, c2 AS (
SELECT e1.*
,CASE WHEN EXISTS(SELECT * FROM cte e2 WHERE e1.Y = e2.Y and e2.row_nb < e1.row_nb) THEN 1 ELSE 0 END as ex
FROM cte e1
)
, c3 AS (
SELECT X,1 - SIGN(SUM(ex)) as ex,MAX(row_nb) as max_row_nb
FROM c2
GROUP BY X
)
SELECT
cte.X,cte.Y
,(SELECT SUM(cc3.ex) FROM c3 cc3 where cc3.max_row_nb<= c3.max_row_nb) AS Z
FROM cte
INNER JOIN c3
ON c3.X = cte.X
ORDER BY cte.row_nb
declare #t table (x tinyint, y char(1), z tinyint)
insert #t (x,y) values(1,'a'),(1,'b'),(1,'c'),(2,'a'),(2,'d'),(2,'e'),(2,'c'),
(2,'f'),(4,'b'),(5,'i'),(5,'c'),(3,'g'),(3,'h'),(6,'j'),(6,'k'),(6,'l'),(7,'v')
;with a as
(
select x,parent from
(
select x, min(x) over (partition by y) parent from #t
) a
where x > parent
), b as
(
select x, parent from a
union all
select a.x, b.parent
from a join b on a.parent = b.x
), c as
(
select x, min(parent) parent
from b
group by x
), d as
(
select t.x,t.y, t.z,
dense_rank() over (order by coalesce(c.parent, t.x)) calculatedZ
from #t t
left join c on t.x = c.x
)
select x,y,calculatedZ as z from d
-- if you want to update instead of selecting, replace last line with:
-- update d set z = newz
-- select x,y,z from #t
option (maxrecursion 0)
Result:
x y z
1 a 1
1 b 1
1 c 1
2 a 1
2 d 1
2 e 1
2 c 1
2 f 1
4 b 1
5 i 1
5 c 1
3 g 2
3 h 2
6 j 3
6 k 3
6 l 3
8 j 3
7 v 4