Left join tables in sqlite ordered by desc limit - sql

I want to left join two tables in sqlite. Short summary:
I have a table named "_Menu", one of the fields in this table is named "menu_id" with unique numbers.
Another table is called "_Approvals". This table has a history of which items has been "approved" or "unapproved". This table also has a field named "menu_id".
I want to get the lowest row (if there is any) from "_Approvals" for a given menu_id and join the two tables.
What I have so far is:
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url, a.status, a.auth
FROM _Menu AS m
LEFT JOIN (
SELECT * FROM _Approvals ORDER BY _Approvals.approval_id DESC LIMIT 1) as a
ON a.menu_id = m.menu_id
GROUP BY m.menu_id
ORDER BY m.menu_id
My problem is only the absolute last row in "_Approvals" gets joined. I.e I only know the status of the last item approved/unapproved.
Any help is greatly appreciated!

You want the lowest row (if there is any) from "_Approvals" for a given menu_id and not the the lowest row from "_Approvals" which is what your code does.
One way to do what you need is by using NOT EXISTS in the subquery that you join:
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url, a.status, a.auth
FROM _Menu AS m
LEFT JOIN (
SELECT * FROM _Approvals t
WHERE NOT EXISTS (
SELECT 1 FROM _Approvals
WHERE menu_id = t.menu_id AND approval_id > t.approval_id
)
) AS a
ON a.menu_id = m.menu_id
GROUP BY m.menu_id
ORDER BY m.menu_id
Another way with a CTE:
WITH cte AS (
SELECT t.* FROM _Approvals t
INNER JOIN (
SELECT menu_id, MAX(approval_id)
FROM _Approvals
GROUP BY menu_id
) g
ON g.menu_id = t.menu_id
)
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url, a.status, a.auth
FROM _Menu AS m LEFT JOIN cte AS a
ON a.menu_id = m.menu_id
GROUP BY m.menu_id
ORDER BY m.menu_id
Or with window function ROW_NUMBER() if your version of SQLite is 3.25.0+:
WITH cte AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY menu_id ORDER BY approval_id DESC) rn
FROM _Approvals
)
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url, a.status, a.auth
FROM _Menu AS m LEFT JOIN (
SELECT * FROM cte
WHERE rn = 1
) AS a
ON a.menu_id = m.menu_id
GROUP BY m.menu_id
ORDER BY m.menu_id

I would recommend writing this using row_number():
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url,
a.status, a.auth
FROM _Menu m LEFT JOIN
(SELECT a.*,
ROW_NUMBER() OVER (PARTITION BY a.menu_id ORDER BY a.approval_id DESC) as seqnum
FROM _Approvals a
) a
ON a.menu_id = m.menu_id AND a.seqnum = 1
ORDER BY m.menu_id;
No aggregation should be needed for the query, assuming that menu_id is the primary key in _Menu.
If you are using an old version of SQLite that doesn't support window functions, then there are several options. Probably the simplest is:
SELECT m.menu_id, m.p_id AS parent_id, m.name, m.url,
a.status, a.auth
FROM _Menu m LEFT JOIN
_Approvals a
ON a.menu_id = m.menu_id LEFT JOIN
(SELECT a.menu_id, MAX(a.approval_id) as max_approval_id
FROM _Approvals a
GROUP BY a.menu_id
) aa
ON aa.menu_id = a.menu_id AND
aa.max_approval_id = a.approval_id
ORDER BY m.menu_id;

Related

How optimize select with max subquery on the same table?

We have many old selects like this:
SELECT
tm."ID",tm."R_PERSONES",tm."R_DATASOURCE", ,tm."MATCHCODE",
d.NAME AS DATASOURCE,
p.PDID
FROM TABLE_MAPPINGS tm,
PERSONES p,
DATASOURCES d,
(select ID
from TABLE_MAPPINGS
where (R_PERSONES, MATCHCODE)
in (select
R_PERSONES, MATCHCODE
from TABLE_MAPPINGS
where
id in (select max(id)
from TABLE_MAPPINGS
group by MATCHCODE)
)
) tm2
WHERE tm.R_PERSONES = p.ID
AND tm.R_DATASOURCE=d.ID
and tm2.id = tm.id;
These are large tables, and queries take a long time.
How to rebuild them?
Thank you
You can query the table only once using something like (untested as you have not provided a minimal example of your create table statements or sample data):
SELECT *
FROM (
SELECT m.*,
COUNT(CASE WHEN rnk = 1 THEN 1 END)
OVER (PARTITION BY r_persones, matchcode) AS has_max_id
FROM (
SELECT tm.ID,
tm.R_PERSONES,
tm.R_DATASOURCE,
tm.MATCHCODE,
d.NAME AS DATASOURCE,
p.PDID,
RANK() OVER (PARTITION BY tm.matchcode ORDER BY tm.id DESC) As rnk
FROM TABLE_MAPPINGS tm
INNER JOIN PERSONES p ON tm.R_PERSONES = p.ID
INNER JOIN DATASOURCES d ON tm.R_DATASOURCE = d.ID
) m
)
WHERE has_max_id > 0;
First finding the maximum ID using the RANK analytic function and then finding all the relevant r_persones, matchcode pairs using conditional aggregation in a COUNT analytic function.
Note: you want to use the RANK or DENSE_RANK analytic functions to match the maximums as it can match multiple rows per partition; whereas ROW_NUMBER will only ever put a single row per partition first.
You're querying table_mappings 3 times; how about doing it only once?
WITH
tab_map
AS
(SELECT a.id,
a.r_persones,
a.matchcode,
a.datasource,
ROW_NUMBER ()
OVER (PARTITION BY a.matchcode ORDER BY a.id DESC) rn
FROM table_mappings a)
SELECT tm.id,
tm.r_persones,
tm.matchcode,
d.name AS datasource,
p.pdid
FROM tab_map tm
JOIN persones p ON p.id = tm.r_persones
JOIN datasources d ON d.id = tm.r_datasource
WHERE tm.rn = 1

JOIN 2 tables ORDER BY SUM value

I have 2 tables: 1st is comment, 2nd is rating
SELECT * FROM comment_table a
INNER JOIN (SELECT comment_id, SUM(rating_value) AS total_rating FROM rating_table GROUP BY comment_id) b
ON a.comment_id = b.comment_id
ORDER BY b.total_rating DESC
I tried the above SQL but doesn't work!
Object is to display a list of comments order by rating points of each comments.
SELECT s.* FROM (
SELECT * FROM comment_table a
INNER JOIN (SELECT comment_id, SUM(rating_value) AS total_rating FROM rating_table GROUP BY comment_id) b
ON a.comment_id = b.comment_id
) AS s
ORDER BY s.total_rating DESC
Nest it inside an another select. It will then output the data in the correct order.

Select second most recent date from inner join

I have this query :
SELECT
companies.display_name, companies.pay_schedule_id,
pay_schedule_periods.schedule_id,
pay_schedule_periods.created_at
FROM
companies
INNER JOIN
pay_schedule_periods ON pay_schedule_id = pay_schedule_periods.schedule_id
ORDER BY
companies.display_name, pay_schedule_periods.created_at DESC;
I get this result :
How can I select only the second most recent created_at date from each unique display_name ?
You could use row_number to assign a sequence to your dates and apply this before joining, then include as part of your join criteria, such as:
select c.display_name, c.pay_schedule_id, psp.schedule_id, psp.created_at
from companies c
join (
select pay_schedule_id, created_at,
Row_Number() over(partition by pay_schedule_id order by created_at desc) rn
from pay_schedule_periods
)psp on psp.schedule_id = c.pay_schedule_id and rn = 2
order by c.display_name, psp.created_at desc;
You could also apply this using a lateral join which would simplify further.

Get Min date as condition

I have a table that contains invoices for all phone numbers, and each number has several invoices, I want to display only the first invoice for precise number but i don't really know how get only first invoice , this is my query
SELECT
b.contrno
a.AR_INVDATE
FROM P_STG_TABS.IVM_INVOICE_RECORD a
INNER JOIN P_EDW_TMP.invoice b
ON b.contrno=a.contrno
WHERE a.AR_INVDATE< (SELECT AR_INVDATE FROM P_STG_TABS.IVM_INVOICE_RECORD WHERE contrno=b.contrno )
Teradata supports a QUALIFY clause to filter the result of an OLAP-function (similar to HAVING after GROUP BY), which greatly simplifies Tim Biegeleisens's answer:
SELECT *
FROM P_STG_TABS.IVM_INVOICE_RECORD a
INNER JOIN P_EDW_TMP.invoice b
ON b.contrno = a.contrno
QUALIFY
ROW_NUMBER()
OVER (PARTITION BY b.contrno
ORDER BY a.AR_INVDATE) = 1
Additionally you can apply the ROW_NUMBER before the join (might be more efficient depending on additional conditions):
SELECT *
FROM
( SELECT *
FROM P_STG_TABS.IVM_INVOICE_RECORD a
QUALIFY
ROW_NUMBER()
OVER (PARTITION BY b.contrno
ORDER BY a.AR_INVDATE) = 1
) AS a
INNER JOIN P_EDW_TMP.invoice b
ON b.contrno = a.contrno
Use ROW_NUMBER():
SELECT
t.contrno,
t.AR_INVDATE
FROM
(
SELECT
b.contrno,
a.AR_INVDATE,
ROW_NUMBER() OVER (PARTITION BY b.contrno ORDER BY a.AR_INVDATE) rn
FROM P_STG_TABS.IVM_INVOICE_RECORD a
INNER JOIN P_EDW_TMP.invoice b
ON b.contrno = a.contrno
) t
WHERE t.rn = 1;
If you are worried about ties, and you want to display all ties, then you can replace ROW_NUMBER with either RANK or DENSE_RANK.
If I correctly understand, then one way is to use group by with min(a.AR_INVDATE):
SELECT
b.contrno,
min(a.AR_INVDATE)
FROM P_STG_TABS.IVM_INVOICE_RECORD a
INNER JOIN P_EDW_TMP.invoice b
ON b.contrno=a.contrno
group by b.contrno

How can I fix my GROUP BY clause

I have 2 tables as seen below:
Now the question is :
How can I have a view which shows the details of the last Owner? in other words I need the details of person who has MAX(StartDate) in tbl_Owners table?
I want to find the latest owner of each apartment.
I tried different approaches but I couldn't find the way to do that.
I know I need to get the personID in a Group By clause which groups records by AppID but I can't do that
Thank you
Try this
select t1.* from tbl_persons as t1 inner join
(
select t1.* from tbl_owners as t1 inner join
(
select appid,max(startdate) as startdate from tbl_owners group by appid
) as t2
on t1.appid=t2.appid and t1.startdate=t2.startdate
) as t2
on t1.personid=t2.personid
Add this to your query:
JOIN (select AppId, MAX(StartDate) as MAxStartDate
from dbo.tbl_Owners
group by PersonId) o2
ON dbo.tbl_Owners.AppId= o2.AppId and
dbo.tbl_Owners.StartDate = o2.MAxStartDate
The sub-query above returns every AppId together with it's latest StartDate. Self-joining with that result will give you what you want.
You can USE CTE for this purpose
;WITH CTE AS
(
SELECT AppID,PersonID,StartDate,
ROW_NUMBER() OVER (PARTITION BY AppID ORDER BY StartDate DESC) RN
FROM TableNAme
GROUP BY AppID,PersonID,StartDate
)
SELECT * FROM CTE
WHERE RN=1
Using row_number
select t.*, p.* -- change as needed
from (select *, rn= row_number() over(partition by AppID order by StartDate desc)
from dbo.tbl_Owners
) t
join dbo.tbl_Persons p on t.rn=1 and t.PersonId = p.PersonId
using cross apply
select t.*, p.* -- change as needed
from dbo.tbl_Persons p
cross apply (
select top(1) *
from dbo.tbl_Owners o
where o.PersonId = p.PersonId
order by o.StartDate desc
) t
SELECT dbo.tbl_Owners.*,dbo.tbl_Persons.PersonFullname FROM dbo.tbl_Owners
INNER JOIN
dbo.tbl_Persons ON dbo.tbl_Owners.PersonID=dbo.tbl_Persons.PersonID
GROUP BY dbo.tbl_Owners.StartDate HAVING MAX(StartDate);
Use GROUP BY on StartDate instead on PersonID.