Postgres sub query returns 1 row, but exists returns false - sql

I'm not sure how this is possible. This is an inner query for a NOT EXISTS test.
SELECT subq.* FROM(
SELECT distinct on("contractId") "contractId", clients.id, clients.name, "contractHistory".status, "contractHistory"."timeStamp", first_value("contractHistory"."id") over(partition by "contractId" order by "timeStamp" desc) as window
FROM "contractHistory", "clients", "contracts"
WHERE "contractHistory"."contractId" = "contracts"."id"
AND "clients"."id" = "contracts"."clientId"
AND contracts.opened < now()
AND contracts.expires > now() + '1 day'::interval
) AS subq
WHERE subq.status = 'Signed'
AND subq.id = 12345
;
(1 row)
If I change the outer SELECT to count(subq.*) I get:
count
1
So far so good. But wrap the entire original query in SELECT EXISTS and:
exists
f
Why is this? I need this query to be wrapped in an outer query:
SELECT * FROM "clients" AS c WHERE status = 'Active' AND NOT EXISTS(
SELECT subq.* FROM(
SELECT distinct on("contractId") "contractId", clients.id, clients.name, "contractHistory".status, "contractHistory"."timeStamp", first_value("contractHistory"."id") over(partition by "contractId" order by "timeStamp" desc) as window
FROM "contractHistory", "clients", "contracts"
WHERE "contractHistory"."contractId" = "contracts"."id"
AND "clients"."id" = "contracts"."clientId"
AND contracts.opened < now()
AND contracts.expires > now() + '1 day'::interval
) AS subq
WHERE subq.status = 'Signed'
AND subq.id = c.id
);
It is returning a row for the outer query even though the inner query returns 1 row.
Edit to add SELECT EXISTS:
SELECT EXISTS(
SELECT subq.* FROM(
SELECT distinct on("contractId") "contractId", clients.id, clients.name, "contractHistory".status, "contractHistory"."timeStamp", first_value("contractHistory"."id") over(partition by "contractId" order by "timeStamp" desc) as window
FROM "contractHistory", "clients", "contracts"
WHERE "contractHistory"."contractId" = "contracts"."id"
AND "clients"."id" = "contracts"."clientId"
AND contracts.opened < now()
AND contracts.expires > now() + '1 day'::interval
) AS subq
WHERE subq.status = 'Signed'
AND subq.id = 12345
);
exists
f
(1 row)
And just for completeness:
SELECT 1 FROM(
SELECT distinct on("contractId") "contractId", clients.id, clients.name, "contractHistory".status, "contractHistory"."timeStamp", first_value("contractHistory"."id") over(partition by "contractId" order by "timeStamp" desc) as window
FROM "contractHistory", "clients", "contracts"
WHERE "contractHistory"."contractId" = "contracts"."id"
AND "clients"."id" = "contracts"."clientId"
AND contracts.opened < now()
AND contracts.expires > now() + '1 day'::interval
) AS subq
WHERE subq.status = 'Signed'
AND subq.id = 12345
?column?
(0 rows)

Related

Converting Subquery into Single Query

I've a query that has multiple subqueries with parameters as follows:
SELECT
V.EMPNO,
V.FIRST_NAME || ' ' || V.MIDDLE_NAME
|| ' '
|| V.LAST_NAME FULLNAME,
M.APP_NO,
K.TOTAL_AMT,
K.TOTAL_AMT - (SELECT
SUM(Q.RECAMOUNT)
FROM
LOAN_ADJ_DETAILS_NEW q
WHERE
Q.APP_NO = M.APP_NO
AND Q.RECDATE <= '01-AUG-2022') REMAINING,
(SELECT
SUM(P.RECAMOUNT)
FROM
LOAN_ADJ_DETAILS_NEW p
WHERE
P.APP_NO = M.APP_NO
AND P.RECDATE <= '01-AUG-2022') TOTALADJ,
(SELECT
COUNT(*)
FROM
LOAN_ADJ_DETAILS_NEW p
WHERE
P.APP_NO = M.APP_NO
AND P.RECDATE IS NOT NULL
AND P.RECDATE <= '01-AUG-2022') INSTALLMENT,
(SELECT
MAX(Q.RECAMOUNT)
FROM
LOAN_ADJ_DETAILS_NEW q
WHERE
Q.APP_NO = M.APP_NO
AND Q.RECDATE = '01-AUG-2022') LASTINSTALL,
(SELECT
MAX(S.RECDATE)
FROM
LOAN_ADJ_DETAILS_NEW s
WHERE
S.APP_NO = M.APP_NO
AND S.RECDATE <= '01-AUG-2022') LASTDATE,
(SELECT
SUM(P.RECAMOUNT)
FROM
LOAN_ADJ_DETAILS_NEW p
WHERE
P.APP_NO = M.APP_NO
AND P.RECDATE <= '01-AUG-2022') PAID
FROM
LOAN_ADJ_DETAILS_NEW m,
TBL_LOAN_MASTER k,
EMP_PERSONAL v
WHERE
V.EMPNO = M.EMPNO
AND K.LOAN_ID = M.APP_NO
AND M.RECAMOUNT > 0
AND M.RECDATE IS NOT NULL
AND M.RECDATE = '01-AUG-2022'
AND M.RECDATE >= '01-AUG-2022';
The query is simple, just to get user wise loan information. Now the thing is, I require to make it into one query something as follows:
SELECT * FROM TABLE WHERE COLUMN <= PARAMTER;
The above query will be used as a dynamic query from database to front-end. I was hoping if this could be converted to a single query or view anyway.
You can use analytic functions and combine the sub-queries into a single one in the FROM clause:
SELECT V.EMPNO,
V.FIRST_NAME || ' ' || V.MIDDLE_NAME || ' ' || V.LAST_NAME
AS FULLNAME,
M.APP_NO,
K.TOTAL_AMT,
K.TOTAL_AMT - m.paid AS REMAINING,
m.totaladj,
m.paid,
m.installment,
m.lastinstall,
m.lastdate
FROM ( SELECT app_no,
empno,
recamount,
recdate,
SUM(RECAMOUNT) OVER (PARTITION BY app_no) AS totaladj,
SUM(RECAMOUNT) OVER (PARTITION BY app_no) AS paid,
COUNT(*) OVER (PARTITION BY app_no) AS installment,
MAX(RECAMOUNT) OVER (PARTITION BY app_no) AS lastinstall,
MAX(RECDATE) OVER (PARTITION BY app_no) AS lastdate
FROM LOAN_ADJ_DETAILS_NEW
WHERE RECDATE <= DATE '2022-08-01'
) m
INNER JOIN TBL_LOAN_MASTER k
ON (K.LOAN_ID = M.APP_NO)
INNER JOIN EMP_PERSONAL v
ON (V.EMPNO = M.EMPNO)
WHERE M.RECAMOUNT > 0
AND M.RECDATE = DATE '2022-08-01';

Left Join Lateral is Very Slow

I have the following query
WITH time_series AS (
SELECT *
FROM generate_series(now() - interval '1days', now(), INTERVAL '1 hour') AS ts
), recent_instances AS (
SELECT instance_id,
(CASE WHEN last_update_granted_ts IS NOT NULL THEN last_update_granted_ts ELSE created_ts END),
version,
4 status
FROM instance_application
WHERE group_id=$1
AND last_check_for_updates >= now() - interval '1days'
ORDER BY last_update_granted_ts DESC
), instance_versions AS (
SELECT instance_id, created_ts, version, status
FROM instance_status_history
WHERE instance_id IN (SELECT instance_id
FROM recent_instances)
AND status = 4
UNION
(SELECT * FROM recent_instances)
ORDER BY created_ts DESC
)
SELECT ts,
(CASE WHEN version IS NULL THEN '' ELSE version END),
sum(CASE WHEN version IS NOT null THEN 1 ELSE 0 END) total
FROM (
SELECT *
FROM time_series
LEFT JOIN LATERAL (
SELECT distinct ON (instance_id) instance_Id, version, created_ts
FROM instance_versions
WHERE created_ts <= time_series.ts
ORDER BY instance_Id, created_ts DESC
) _ ON true
) AS _
GROUP BY 1,2
ORDER BY ts DESC;
So instance_versions subquery is executed with every value of timestamps generated from time_series query(see the last select statement). But for some reason the lateral join is very slow,the rows returned by the subquery of lateral join ranges in around 12k-15k(for a single timestamp from time_series query) which is not a big number and the final no of rows returned after the Lateral join ranges from 250k-350k. Is there a way i can optimize this?

Divide results from two query by another query in SQL

I have this query in Metabase:
with l1 as (SELECT date_trunc ('day', Ticket_Escalated_At) as time_scale, count (Ticket_ID) as chat_per_day
FROM CHAT_TICKETS where SUPPORT_QUEUE = 'transfer_investigations'
and date_trunc('month', TICKET_ESCALATED_AT) > now() - interval '6' Month
GROUP by 1)
with l2 as (SELECT date_trunc('day', created_date) as week, count(*) as TI_watchman_ticket
FROM jira_issues
WHERE issue_type NOT IN ('Transfer - General', 'TI - Advanced')
and date_trunc('month', created_date) > now() - interval '6' Month
and project_key = 'TI2'
GROUP BY 1)
SELECT l1.* from l1
UNION SELECT l2.* from l2
ORDER by 1
and this one:
with hours as (SELECT date_trunc('day', ws.start_time) as date_
,(ifnull(sum((case when ws.shift_position = 'TI - Non-watchman' then (minutes_between(ws.end_time, ws.start_time)/60) end)),0) + ifnull(sum((case when ws.shift_position = 'TI - Watchman' then (minutes_between(ws.end_time, ws.start_time)/60) end)),0) ) as total
from chat_agents a
join wiw_shifts ws on a.email = ws.user_email
left join people_ops.employees h on substr(h.email,1, instr(h.email,'#revolut') - 1) = a.login
where (seniority != 'Lead' or seniority is null)
and date_trunc('month', ws.start_time) > now() - interval '6' Month
GROUP BY 1)
I would like to divide the output of the UNION of the first one, by the result of the second one, any ideas.

How to calculate running sums with append-only rows

I have a table where rows are never mutated but only inserted; they are immutable records. It has the following fields:
id: int
user_id: int
created: datetime
is_cool: boolean
likes_fruits: boolean
An object is tied to a user, and the "current" object for a given user is the one that has the latest created date. E.g. if I want to update is_cool for a user, I'd append a record with a new created timestamp and is_cool=true.
I want to calculate how many users are is_cool at the end of each day. I.e. I'd like the output table to have the columns:
day: some kind of date_trunc('day', created)
cool_users_count: number of users that have is_cool at the end of this day.
What SQL query can i write that does this? FWIW I'm using Presto (or Redshift if need to).
Note that there are other columns, e.g. likes_fruits, which means a record where is_cool is false does not mean is_cool was just changed to false - it could have been false for a while.
This is what procedural pseudo-code would look like to represent what I'd want to do in SQL:
// rows = ...
min_date = min([row.created for row in rows])
max_date = max([row.created for row in rows])
counts_by_day = {}
for date in range(min_date, max_date):
rows_up_until_date = [row for row in rows if row.created <= date]
latest_row_by_user = rows_up_until_date.reduce(
{},
(acc, row) => acc[row.user_id] = row,
)
counts_by_day[date] = latest_row_by_user.filter(row => row.is_cool).length
You can do this using jus a query .. try using a sum on boolend and group by
select date(created), sum(is_cool)
from my_table
group by date(created)
or if you need the number of users
select t.date_created, count(*) num_user
from (
select distinct date(created) date_created, user_id
from my_table
where is_cool = TRUE
) t
group by t.date_created
or if need the last value for is_cool
select date(max_date), sum(is_cool)
from (
select t.user_id, t.max_date, m.is_cool, m.user_id
from my_table m
inner join (
select max(date_created) max_date, user_id
from my_table
group by user_id, date(date_created)
) t on t.max_date = m.date_created
and t.user_id = m.user_id
where m.is_cool = TRUE
) t2
group by date(max_date)
A correlated subquery might be the simplest solution. The following gets the value of is_cool for each user on each date:
select u.user_id, d.date,
(select t.is_cool
from t
where t.user_id = u.user_id and
t.created < dateadd(day, 1, d.date)
order by t.created desc
limit 1
) as is_cool
from (select distinct date(created) as date
from t
) d cross join
(select distinct user_id
from t
) u ;
Then aggregate:
select date, sum(is_cool)
from (select u.user_id, d.date,
(select t.is_cool
from t
where t.user_id = u.user_id and
t.created < dateadd(day, 1, d.date)
order by t.created desc
limit 1
) as is_cool
from (select distinct date(created) as date
from t
) d cross join
(select distinct user_id
from t
) u
) ud
group by date;

Convert a nested subquery into normal query

I have problem with following query where in which the nested query should be
converted to normal query:
select
count(*) as count,
TO_CHAR(RH.updated_datetime,'DD-MM-YYYY HH:MI:SS') as date,
SUM(
extract (
epoch from (
RH.updated_datetime - PRI.procedure_performed_datetime
)
)/60
)::integer/count(*) as diff
from
procedure_runtime_information PRI,
study S,
report R,
report_history RH
where
RH.report_fk = R.pk AND
R.study_fk = S.pk AND
S.procedure_runtime_fk = PRI.pk AND
RH.old_status_fk = 21 AND
RH.revision = (select max(revision) from report_history where RH.report_fk = RH.report_fk) AND
RH.updated_datetime > TO_DATE('22-01-2013 00:00:00', 'DD-MM-YYYY HH24:MI:SS') AND RH.updated_datetime < TO_DATE('22-01-2014 00:00:00', 'DD-MM-YYYY HH24:MI:SS')
group by date order by date asc;
Assuming this
(select max(revision) from report_history where RH.report_fk = RH.report_fk)
should really be:
(select max(revision) from report_history x where x.report_fk = RH.report_fk)
You could transform the nested (correlated) subquery into a plain subquery like this (one way of many):
SELECT count(*) AS ct
,to_char(rh.updated_datetime,'DD-MM-YYYY HH:MI:SS') AS date -- HH24?
,sum(extract(epoch FROM (RH.updated_datetime
- PRI.procedure_performed_datetime))
/ 60)::int / count(*) AS diff
FROM procedure_runtime_information PRI
JOIN study S ON S.procedure_runtime_fk = PRI.pk
JOIN report R ON R.study_fk = S.pk
JOIN report_history RH ON RH.report_fk = R.pk
JOIN (
SELECT report_fk, max(revision) AS revision
FROM report_history RH1
GROUP BY 1
) RH1 ON RH1.report_fk = RH.report_fk
AND RH1.revision = RH.revision
WHERE RH.old_status_fk = 21
AND RH.updated_datetime > to_date('22-01-2013', 'DD-MM-YYYY') -- >= ?
AND RH.updated_datetime < to_date('22-01-2014', 'DD-MM-YYYY') -- to_timestamp?
GROUP BY date -- where does date come from?
ORDER BY date;