SQL sort by max from children - sql

I have this kind of shema in my database :
create table a (
id serial
);
create table b (
id serial,
aId integer REFERENCES a(id)
);
create table c (
id serial,
bId integer REFERENCES b(id),
date datetime
);
I would like to get all A sorted by the date in C (DESC) following this relation : A -> B -> C.
I came up with this solution :
SELECT
a.*
FROM
a
JOIN (
SELECT
b.aId,
max(sub1.max) AS max
FROM
b
JOIN (
SELECT
c.bId AS id,
max(c.date) AS max
FROM
c
GROUP BY
c.bId
) AS sub1 ON sub1.id = b.id
) AS sub2 ON sub2.aId = a.id
GROUP BY
a.id
ORDER BY
sub2.max desc
Is this the most suitable way to handle this ?
Thanks

You can use a correlated subquery in the order by:
select a.*
from a
order by (select max(c.date)
from b join
c
on c.bid = b.id
where b.aid = a.id
) desc;
Alternatively, if you only want aid and insist that there be matches, you can just use aggregation:
select b.aid
from b join
c
on c.bid = b.id
group by b.aid
order by max(c.date) desc;

Related

How do I solve this unique constraint error?

I am running a python code in jupyter lab. I am trying to create a table from other tables in a database file. This is my code:
# Alternative INVENTORY_PARENTLOT table
c.execute("DROP TABLE IF EXISTS inventory_parentlot")
query = """
CREATE TEMPORARY TABLE tmp_inv_parents AS SELECT a.id, b.inventorytype AS type, a.parentid FROM inventory_parents AS a JOIN table_inventory AS b on a.id=b.id JOIN table_inventory AS c on a.parentid=c.id WHERE c.inventorytype > 12 ;
CREATE TEMPORARY TABLE tmp_inv_parents2 AS SELECT a.id, a.type, b.parentid FROM tmp_inv_parents AS a JOIN tmp_inv_parents AS b on a.parentid = b.id;
CREATE TEMPORARY TABLE tmp_inv_parents3 AS SELECT a.id, a.type, b.parentid FROM tmp_inv_parents2 AS a JOIN tmp_inv_parents2 AS b on a.parentid = b.id;
CREATE TEMPORARY TABLE tmp_inv_parents4 AS SELECT a.id, a.type, b.parentid FROM tmp_inv_parents3 AS a JOIN tmp_inv_parents3 AS b on a.parentid = b.id;
CREATE TEMPORARY TABLE tmp_inv_parents5 AS SELECT a.id, a.type, b.parentid FROM tmp_inv_parents4 AS a JOIN tmp_inv_parents4 AS b on a.parentid = b.id ;
CREATE TEMPORARY TABLE tmp_inv_parentall AS SELECT * from tmp_inv_parents UNION SELECT * from tmp_inv_parents2 UNION SELECT * from tmp_inv_parents3 UNION SELECT * from tmp_inv_parents4;
CREATE TABLE inventory_parentlot AS SELECT a.id, b.id AS parentlot FROM (select id, min(parentserial) as firstparentserial FROM (select a.id, a.parentid, b.idserial AS parentserial from tmp_inv_parentall AS a JOIN table_inventory AS b on a.parentid = b.id ORDER BY a.id) GROUP BY id) AS a JOIN table_inventory AS b on a.firstparentserial = b.idserial;
CREATE UNIQUE INDEX inventory_parentlot_id ON inventory_parentlot(id);
ANALYZE inventory_parentlot;
"""
c.executescript(query)
When this code runs, I get a unique constraint error. However, I have checked each table and the ids are unique.
How can I solve this? Is there a way I can just ignore the error?
Make sure that the id is unique in the outer query of the SQL that creates the tabel.
CREATE TABLE inventory_parentlot AS
SELECT a.id, MAX(b.id) AS parentlot
FROM (
SELECT id, MIN(parentserial) AS firstparentserial
FROM (
SELECT a.id, a.parentid, b.idserial AS parentserial
FROM tmp_inv_parentall AS a
JOIN table_inventory AS b ON a.parentid = b.id
)
GROUP BY id
) AS a
JOIN table_inventory AS b
ON a.firstparentserial = b.idserial
GROUP BY a.id;

Group select statements together by common column in sql-server

I need to combine two tables together based on a common column. Normally I would just use an inner join on the specific column (lets call it parentID), but I need the results to be in seperate rows.
Table A:
ID, ...
Table B:
ID, ParentID, SomeColB, ...
Table C:
ID, ParentID, SomeColC, ...
ParentID points to the ID of table A. The result should look as follows:
ParentID ID_A ID_B SomeColB SomeColC
1 10 20 'VAL_B1' NULL
1 10 20 NULL 'VAL_C1'
2 11 21 'VAL_B2' NULL
2 11 21 NULL 'VAL_C2'
...
So I want to alternate between selecting values from Table B and C and leave the remaining columns on null. How would I do that?
I tried joining them together but this results in results being put into a single row.
EDIT: Both Table B and C have a 1-n relationship to table A (one entry in table a can be referenced from multiple entries in table B and C). Table B and C don't reference each other and are completely independent of eachother.
Would something like this work for you? I've used a UNION to get both sets of data per ParentID:
SELECT
*
FROM (
SELECT
ParentID,
ID_A,
ID_B,
SomeCol B,
NULL AS SomeColC
FROM
TableA
UNION
SELECT
ParentID,
ID_A,
ID_B,
NULL AS SomeColB,
SomeColC
FROM
TableB
)
ORDER BY
ParentID,
SomeColB,
SomeColC
You should use union operator
SELECT IDA, ParentIDA, SomeColA FROM first_table
UNION
SELECT IDB, ParentIDB, SomeColB FROM second_table
UNION will skip the duplicate recored
if you want to show the duplicate records you should use UNION ALL operator
Looks like what you really want is a LEFT OUTER JOIN.
A slimmed down version of your select with the pertinent fields would look like this...
select a.ID as ParentID, b.SomeCol as SomeColB, c.SomeCol as SomeColC
from tableA a
left outer join tableB b
on b.ID = a.ID
left outer join tableC c
on c.ID = a.ID
;
Left outer joins include non-matching rows from the left table in the join, providing NULL values for the fields coming from the unmatched records in the table to the right in the join.
Bit of a stab in the dark, but it gets you the result set you want based on the sample data:
WITH A AS(
SELECT ID
FROM (VALUES(1),(2)) V(ID)),
B AS(
SELECT V.ID,
V.ParentID,
V.ColB
FROM (VALUES(1, 10,'Val_B1'),
(2,11,'Val_B2'))V(ParentID,ID, ColB)),
C AS(
SELECT V.ID,
V.ParentID,
V.ColC
FROM (VALUES(1,20,'Val_C1'),
(2,21,'Val_C2'))V(ParentID,ID, ColC))
SELECT A.ID AS ParentID,
B.ID AS ID_A,
C.ID AS ID_B,
B.ColB,
C.ColC
FROM A
CROSS APPLY (VALUES('B'),('C'))V(T)
LEFT JOIN B ON A.ID = B.ParentID
AND V.T = 'B'
LEFT JOIN C ON A.ID = C.ParentID
AND V.T = 'C'
ORDER BY A.ID,
V.T;
DB<>fiddle
My guess it's a UNION of JOINs
SELECT A.ID AS ParentID, B.ID AS ID_B, null as ID_C, B.SomeColB, null as SomeColC --, ..
FROM A
JOIN B ON A.ID = B.ParentID
UNION
SELECT A.ID AS ParentID, null, c.ID as ID_C, null, C.SomeColC --, ..
FROM A
JOIN C ON A.ID = C.ParentID
ORDER BY ParentID, ID_B, ID_C;
To repeat ids wrap it with one more SELECT:
SELECT ParentID
, max(ID_B) OVER(PARTITION BY ParentID) AS ID_B
, max(ID_C) OVER(PARTITION BY ParentID) AS ID_C
, SomeColB, SomeColC --, --
FROM (
SELECT A.ID AS ParentID, B.ID AS ID_B, null as ID_C, B.SomeColB, null as SomeColC --, ..
FROM A
JOIN B ON A.ID = B.ParentID
UNION
SELECT A.ID AS ParentID, null, c.ID as ID_C, null, C.SomeColC --, ..
FROM A
JOIN C ON A.ID = C.ParentID) t
ORDER BY ParentID, ID_B, ID_C;

Return matched-pair and the non-matched pair for each ID only once

Any help would be appreciated.
I have two sample tables here.
Table A:
ID |Name
123|REG
123|ERT
124|REG
124|ACR
Table B
ID |Name
123|REG
123|WWW
124|REG
124|ADR
Here is the simple join output and I will explain my question in the comments:
*Yes -- I want this row
*No -- I don't want this row
AID|Aname|BID|Bname
123|REG |123|REG --Yes-- Matched-pair for id '123'
123|ERT |123|REG --No--'REG' already had one match. 'ERT' should pair with 'WWW' for id '123'
123|REG |123|WWW --No--The same reason as above
123|ERT |123|WWW --Yes--non-matched pair for id '123'
124|REG |124|REG
124|ACR |124|REG
124|REG |124|ADR
124|ACR |124|ADR
My desired result:
AID|Aname|BID|Bname
123|ERT |123|WWW
123|REG |123|REG
124|REG |124|REG
124|ACR |124|ADR
SQL server 2017.
Thank you in advance.
My approach (Inspired by the post from #The Impaler)
;with CTEall as(
select A.id as AID, A.NAME as Aname, b.id as BID,b.NAME as Bname from A
inner join B on A.id = B.id),
match as (
select A.id as AID, A.NAME as Aname, b.id as BID,b.NAME as Bname
from A inner join B on A.id = B.id and A.NAME = B.NAME)
select *
from CTEall
where Aname not in (select Aname from match where AID = BID)
and Bname not in (select Aname from match where BID = AID)
union all
select * from match
order by 1
Often when you think about the logic you want in a different way, the answer (or at least AN answer) becomes obvious.
I am thinking of your logic this way:
JOIN Table A to Table B such that A.ID=B.ID (always) AND EITHER
A.Name=B.Name OR A.Name doesn't have a Match in B, and B.Name doesn't
have a match in A.
This logic is pretty easy to express in SQL
WHERE a.ID=b.ID
AND (
a.Name=b.Name OR (
NOT EXISTS(SELECT * FROM TableB b2 WHERE b2.ID=a.ID AND b2.Name=a.Name)
AND
NOT EXISTS(SELECT * FROM TableA a2 WHERE a2.ID=b.ID AND a2.Name=b.Name)
)
)
I would do:
with
m as ( -- matched rows
select a.id as aid, a.name as aname, b.id as bid, b.name as bname
from table_a a
join table_b b on a.id = b.id and a.name = b.name
),
l as ( -- unmatched "left rows"
select a.id, a.name,
row_number() over(partition by id order by name) as rn
from table_a a
left join table_b b on a.id = b.id and a.name = b.name
where b.id is null
),
r as ( -- unmatched "right rows"
select b.id, b.name,
row_number() over(partition by id order by name) as rn
from table_b b
left join table_a a on a.id = b.id and a.name = b.name
where a.id is null
)
select aid, aname, bid, bname from m
union all
select l.id, l.name, r.id, r.name
from l
join r on r.id = l.id and r.rn = l.rn
Note: This solution may be a little bit overkill, since matches all unmatched rows when there are multiple ones per ID... something that is not necessary. Per OP comments there always be a single unmatched row per ID.

Efficient way to get entire rows with MAX(id) when grouping by a foreign key

Consider tables A, B and C. B and C are related to A through a foreign key, and there are many Bs and Cs with the same A foreign key.
Suppose the following query:
SELECT
A.pk AS pk_a,
MAX(B.id) AS new_b,
MAX(C.id) AS new_c
FROM A
INNER JOIN B ON B.fk_a = pk_a
INNER JOIN C ON C.fk_a = pk_a
GROUP BY pk_a
I would like to retrieve the entire new_b and new_c rows from B and C for each GROUP BY pk_a.
Surely I could wrap this as a subselect and JOIN B ON b.id = new_b, and the same for C, but B and C are huge and I would like to avoid this.
I could also use SELECT DISTINCT ON(A.pk) A.pk, B.*, C.* and ORDER BY A.pk, B.id, C.id, but that would only guarantee the latest B., not the latest C..
Is there any other way I'm missing?
For few rows (like 2 or 3 or 5 on avg., depends) in B and C per row in A, DISTINCT ON is typically fastest.
For many rows per row in A, there are (much) more efficient solutions. And your information: "B and C are huge" indicates as much.
I suggest LATERAL subqueries with ORDER BY and LIMIT 1, backed by a matching index.
SELECT A.pk AS pk_a, B.*, C.*
FROM A
LEFT JOIN LATERAL (
SELECT *
FROM B
WHERE B.fk_a = A.pk -- lateral reference
ORDER BY B.id DESC
LIMIT 1
) B ON true
LEFT JOIN LATERAL (
SELECT *
FROM C
WHERE C.fk_a = A.pk -- lateral reference
ORDER BY C.id DESC
LIMIT 1
) C ON true;
Assuming B.id and C.id are NOT NULL.
You need at least indexes on the FK columns. Ideally, multi-column indexes on B (fk_a, id DESC) and C (fk_a, id DESC) though.
Use LEFT JOIN! to not exclude rows from A that are not referenced in either B or C. It would be an evil trap to use [INNER] JOIN here, since you join to two unrelated tables.
Detailed explanation:
Optimize GROUP BY query to retrieve latest record per user
Related:
Select first row in each GROUP BY group?
What is the difference between LATERAL and a subquery in PostgreSQL?
Simpler syntax with smart naming convention
The result of above query has pk_a once and fk_a twice. Useless ballast - and the same column name twice may be an actual problem, depending on your client.
You can spell out a column list in the outer SELECT (instead of the syntax shortcut A.*, B.*) to avoid redundancies. You may have to do that either way if there are more duplicate names or if you don't want all columns.
But with a smart naming convention, the USING clause can fold the redundant PK and FK columns for you:
SELECT *
FROM A
LEFT JOIN LATERAL (
SELECT * FROM B
WHERE B.a_id = A.a_id
ORDER BY B.id DESC
LIMIT 1
) B USING (a_id)
LEFT JOIN LATERAL (
SELECT * FROM C
WHERE C.a_id = A.a_id
ORDER BY C.id DESC
LIMIT 1
) C USING (a_id);
Logically, USING (a_id) is redundant here, since WHERE B.a_id = A.a_id in the subquery already filters the same way. But the additional effect of USING is that joining columns are folded to one instance. So only one a_id remains in the result. The manual:
Furthermore, the output of JOIN USING suppresses redundant columns:
there is no need to print both of the matched columns, since they must
have equal values. While JOIN ON produces all columns from T1 followed
by all columns from T2, JOIN USING produces one output column for each
of the listed column pairs (in the listed order), followed by any
remaining columns from T1, followed by any remaining columns from T2.
It also typically makes a lot of sense to use the same name for the same data. So: a_id for PK and FK columns.
Is this what you are asking for?
SELECT abc.*
FROM (SELECT A.pk AS pk_a, b.*, c.*,
ROW_NUMBER() OVER (PARTITION BY a.pk ORDER BY b.id DESC) as seqnum_b,
ROW_NUMBER() OVER (PARTITION BY a.pk ORDER BY c.id DESC) as seqnum_c
FROM A INNER JOIN
B
ON B.fk_a = pk_a INNER JOIN
C
ON C.fk_a = pk_a
) abc
WHERE seqnum_b = 1 or seqnum_c = 1;
Actually, I think the above is on the right track, but you probably want:
SELECT a.pk, b.*, c.*
FROM A INNER JOIN
(SELECT DISTINCT ON (b.fk_a) b.*
FROM b
ORDER BY b.fk_a, b.id DESC
) b
ON B.fk_a = pk_a JOIN
(SELECT DISTINCT ON (c.fk_a) c.*
FROM c
ORDER BY c.fk_a, c.id DESC
) c
ON c.fk_a = pk_a;
In Postgres 9.5, you can also use lateral joins for a similar effect.
How about this:
SELECT DISTINCT
A.pk AS pk_a,
MAX(B.id) OVER(PARTITION BY pk_a) AS new_b,
MAX(C.id) OVER(PARTITION BY pk_a) AS new_c
FROM A
INNER JOIN B ON B.fk_a = pk_a
INNER JOIN C ON C.fk_a = pk_a

SQL multiple shared WHERE criteria

I'm trying to find a good, efficient way to run a query like this:
SELECT *
FROM tableA a
WHERE a.manager IN ( SELECT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob')
OR a.teamLead IN ( SELECT ID
FROM tableB b
CONNECT BY PRIOR b.ID = b.manager_id
START WITH b.ID = 'managerBob')
OR a.creator IN ( SELECT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob')
As you can see, I'm trying to use multiple WHERE clauses, but each clause is using the same dataset on the right-hand side of the equation. It seems to run very slowly if I use more than one clause, and I'm pretty sure that it's because Oracle is running each subquery. Is there a way to make something like this work?
SELECT *
FROM tableA a
WHERE a.manager,
a.teamLead,
a.creator in ( SELECT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob')
By the way, I'm sorry if this is something I could have Googled, I'm not sure what to call this.
Subquery factoring may help:
WITH people AS
( SELECT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob'
)
SELECT *
FROM tableA a
WHERE a.manager IN (SELECT id FROM people)
OR a.teamLead IN (SELECT id FROM people)
OR a.creator IN (SELECT id FROM people)
You can do:
WITH bob_subordinates AS (
( SELECT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob')
SELECT * FROM tableA a
WHERE a.manager in (select id from bob_subordinates)
OR a.teamlead in (select id from bob_subordinates)
or a.creator in (select id from bob_subordinates)
Alternative (check the use of DISTINCT: if ids are not unique in table B then this is not equivalent):
WITH bob_subordinates AS (
( SELECT DISTINCT id
FROM tableB b
CONNECT BY PRIOR b.id = b.manager_id
START WITH b.id = 'managerBob')
SELECT DISTINCT a.*
FROM tableA a JOIN bob_subordinates b ON b.id IN (a.manager, a.teamlead, a.creator);
UPDATE as per comments - try
SELECT A.* FROM
(SELECT bb.id FROM tableB bb CONNECT BY PRIOR bb.id = bb.manager_id START WITH bb.id = 'managerBob') B INNER JOIN TABLEA A ON B.ID IN (A.MANAGER, A.TEAMLEAD, A.CREATOR)