How to optimize this query for my school project - sql

It's my assignment kindly help me to optimize below two queries.
Optimize assignment 1:
SELECT
n.node_id,
MIN(LEAST(n.date,ec.date)) date
FROM
n, ec
WHERE
(n.node_id = ec.node_id_from OR n.node_id = ec.node_id_to)
AND n.date - ec.date > 0
GROUP BY
n.node_id;
Optimize assignment 2:
SELECT
TO_CHAR(CONVERT_TIMEZONE ('UTC','America/Los_Angeles', tableA."date"), 'YYYY-MM') AS "date_month",
COUNT(DISTINCT CASE WHEN (tableB."date" IS NOT NULL) THEN tableB._id ELSE NULL END) AS "tableB.countB",
COUNT(DISTINCT CASE WHEN (tableC."date" IS NOT NULL) THEN tableC._id ELSE NULL END) AS "tableC.countC"
FROM
tableA AS tableA
LEFT JOIN
tableB AS tableB ON (DATE (CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles',tableB."date"))) = (DATE (CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles',tableA."date")))
LEFT JOIN
tableC AS tableC ON (DATE (CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles',tableC."date"))) = (DATE (CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles',tableA."date")))
WHERE
tableA."date" >= CONVERT_TIMEZONE ('America/Los_Angeles', 'UTC', DATEADD (month, -17, DATE_TRUNC('month', DATE_TRUNC('day', CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles',GETDATE ()))))
GROUP BY
1
ORDER BY
1 DESC
LIMIT 500;

use short alias that makes sql query shorter and cleaner.
Here is the optimized version of second query
SELECT DatePart(month, a.Date-8/24) date_month,
sum(case when b.date is Not null then 1 else 0 end) countb,
sum(case when c.date is Not null then 1 else 0 end) countc,
FROM tableA a
LEFT JOIN tableB b
ON b.Date = a.Date -- Timezone offsets are not necessary,
LEFT JOIN tableC c
ON c.date = a.date -- both in same timezone
WHERE a.date >= DateAdd(hour, 8,
DATEADD (month,-17,DATE_TRUNC('month',
GETDATE () ))
GROUP BY 1
ORDER BY 1 DESC LIMIT 500;

Very simple solution for assignment #1
SELECT n.node_id, MIN(ec.date) as date
FROM n
JOIN ec
ON n.node_id IN (ec.node_id_from, ec.node_id_to) AND ec.date < n.date
GROUP BY n.node_id;
just using min(ec.date) instead of MIN(LEAST(n.date,ec.date)).
Because the JOIN already forces the ec.date to be lower than n.date anyway.
Also note that a where clause like
where (x >= y and x <= z)
can be changed to
where (x between y and z)

Related

How to apply filter using a joined table

I'm trying to apply a filter to my query (accounts.provider = 'z') using the accounts table. The query I have at the moment is not applying the filter correctly, the full list of payments is being added up, regardless of the provider condition. The reason why I'm using table x to join the accounts table is because table t doesn't have the account_id column to allow me to join it with the accounts table.
This is my current query
SELECT
distinct on (x.day) x.day,
coalesce(pending_payments,0)
from
(( SELECT day::date
FROM generate_series(timestamp '2017-03-13', current_date + interval '1 week', interval '1 day') day
) d
left JOIN (
SELECT date_trunc('day', payment_date)::date AS day,
sum(case when payment_amount > 0
and description not ilike '%credit%'
and state = 'pending'
then payment_amount end) as pending_payments
FROM payments
GROUP BY 1
) t USING (day) inner join payments on payments.payment_date = t.day) x
inner join accounts on accounts.id = x.account_id and accounts.provider = 'z'
where day <= current_date + interval '1 week'
and day >= current_date - interval'6 months'
ORDER BY x.day desc
Thanks for your help
Updated query based on suggestions in the comments but it's not producing the right outcome (see comments).
SELECT
distinct on (t.day) t.day as day,
coalesce(pending_payments,0)
from
( SELECT day::date
FROM generate_series(timestamp '2017-03-13', current_date + interval '1 week', interval '1 day') day
) d
left JOIN (
SELECT date_trunc('day', t.payment_date)::date AS day,
sum(case when t.payment_amount > 0
and t.description not ilike '%credit%'
and t.state = 'success'
then t.payment_amount end) as pending_payments
FROM payments t
inner join payments p on p.payment_date = date_trunc('day', t.payment_date)::date
inner join accounts on accounts.id = p.account_id and accounts.provider = 'z'
where date_trunc('day', t.payment_date)::date <= current_date + interval '1 week'
and date_trunc('day', t.payment_date)::date >= current_date - interval'1 months'
GROUP BY 1
) t USING (day)
ORDER BY day desc
You are calculating the pending_payments (In sub-query) before applying the accounts.provider = 'z' condition.
You should replace this code:
....
....
left JOIN (
SELECT date_trunc('day', payment_date)::date AS day,
sum(case when payment_amount > 0
and description not ilike '%credit%'
and state = 'pending'
then payment_amount end) as pending_payments
FROM payments
GROUP BY 1
) t USING (day) inner join payments on payments.payment_date = t.day) x
inner join accounts on accounts.id = x.account_id and accounts.provider = 'z'
....
....
with
....
....
left JOIN (
SELECT date_trunc('day', t.payment_date)::date AS day,
sum(case when t.payment_amount > 0
and t.description not ilike '%credit%'
and t.state = 'pending'
then t.payment_amount end) as pending_payments
FROM payments t
inner join payments p on p.payment_date = date_trunc('day', t.payment_date)::date
inner join accounts on accounts.id = p.account_id and accounts.provider = 'z'
GROUP BY 1
) t
....
....

Update with inner join not working in db2

I have a structure similar to a working query in mysql that is an update based on an inner join with counts
update schema.daily_totals ct
inner JOIN (
SELECT
COUNT (*) AS contacted,
SUM( CASE WHEN f.follow_up_date BETWEEN CURRENT_DATE AND CURRENT_DATE + 7 DAYS THEN 1 ELSE 0 END ) AS potentials,
CAST (ROUND((SUM( CASE WHEN f.follow_up_date BETWEEN CURRENT_DATE AND CURRENT_DATE + 7 DAYS THEN 1.0 ELSE 0 END )/ COUNT (*)) * 100.00, 2) AS DECIMAL (12, 2)) AS PERCENT,
u.user_id as userID,
FROM schema.users u
INNER JOIN schema.notated n
ON n.user_identifier = u.user_id
INNER JOIN schema.comms m
ON n.comms_ID = m.comms_ID
LEFT JOIN schema.FDates f
ON f.dNumber = n.dNumber
WHERE code <> 'none'
AND n.created_at >= CURRENT_DATE - 1 DAYS
GROUP BY u.user_id, u.first_name, u.last_name
) as cu
on cu.userID = ct.ext_id
set ct.contacted_contacted = cu.contacted,
ct.percent_up_to_date = cu.percent
where ct.date_of_report >= current_date;
But it won't run, it seems to break around the final 'on' where I'm joining on the subquery.
Am I not able to run this in db2 at all?
Use MERGE statement instead.
MERGE INTO schema.daily_totals ct
USING (
SELECT
COUNT (*) AS contacted,
SUM( CASE WHEN f.follow_up_date BETWEEN CURRENT_DATE AND CURRENT_DATE + 7 DAYS THEN 1 ELSE 0 END ) AS potentials,
CAST (ROUND((SUM( CASE WHEN f.follow_up_date BETWEEN CURRENT_DATE AND CURRENT_DATE + 7 DAYS THEN 1.0 ELSE 0 END )/ COUNT (*)) * 100.00, 2) AS DECIMAL (12, 2)) AS PERCENT,
u.user_id as userID,
FROM schema.users u
INNER JOIN schema.notated n
ON n.user_identifier = u.user_id
INNER JOIN schema.comms m
ON n.comms_ID = m.comms_ID
LEFT JOIN schema.FDates f
ON f.dNumber = n.dNumber
WHERE code <> 'none'
AND n.created_at >= CURRENT_DATE - 1 DAYS
GROUP BY u.user_id, u.first_name, u.last_name
) as cu
on cu.userID = ct.ext_id and ct.date_of_report >= current_date
WHEN MATCHED THEN UPDATE
set contacted_contacted = cu.contacted, percent_up_to_date = cu.percent;

SQL Sub-query Error

I think there is issue with the A2 set where I am only including Restart_case = Y, but I am not sure please help. I am getting this error: An unexpected token "WHERE" was found following "_ID) AS Max_DateBegin". Expected tokens may include...
Thank you
SELECT A2.* FROM
(SELECT A1.*, min(BeginDate) OVER (PARTITION BY Per_ID) AS Min_BeginDate,
MAX(BeginDate) OVER (PARTITION BY Per_ID) AS Max_BeginDate
WHERE RestartCase = 'Y'
From) A2
(SELECT distinct C.Per_ID, P.DOB, C.BeginDate, C.EndDate, C.RestartCase, P.per_type
FROM CaseSum C LEFT JOIN PERSON p on C.ID_PRSN = P.ID_PRSN) A1
WHERE per_Type = 1 AND BeginDate <= '9/30/2017' AND (EndDate >= '10/01/2017' OR EndDate IS NULL)
ORDER BY A1.Per_ID
Consider a conditional CASE inline aggregate in your window functions to combine both attempted resultsets:
SELECT DISTINCT C.Per_ID, P.DOB, C.BeginDate, C.EndDate, C.RestartCase, P.per_type,
MIN(CASE WHEN C.RestartCase = 'Y' THEN C.BeginDate END)
OVER (PARTITION BY C.Per_ID) AS Min_BeginDate,
MAX(CASE WHEN C.RestartCase = 'Y' THEN C.BeginDate END)
OVER (PARTITION BY C.Per_ID) AS Max_BeginDate
FROM CaseSum C
LEFT JOIN PERSON P on C.ID_PRSN = P.ID_PRSN
WHERE P.per_Type = 1 AND C.BeginDate <= '9/30/2017'
AND (C.EndDate >= '10/01/2017' OR C.EndDate IS NULL)
ORDER BY C.Per_ID
I believe this is what you want (or something like it):
SELECT
A2.*
FROM
(SELECT
A1.*,
MIN(BeginDate) OVER (PARTITION BY Per_ID) AS Min_BeginDate,
MAX(BeginDate) OVER (PARTITION BY Per_ID) AS Max_BeginDate
FROM
(SELECT DISTINCT
C.Per_ID,
P.DOB,
C.BeginDate,
C.EndDate,
C.RestartCase,
P.per_type
FROM CaseSum C
LEFT JOIN PERSON P
on C.ID_PRSN = P.ID_PRSN
WHERE P.per_Type = 1
AND C.BeginDate <= '9/30/2017'
AND (C.EndDate >= '10/01/2017' OR C.EndDate IS NULL)
) A1
WHERE A1.RestartCase = 'Y'
ORDER BY A1.Per_ID
) A2

is two inner joins is best for optimization of query

i just got a challenge from school optimise this query this is theoretical question
Challenge :
SELECT TO_CHAR(CONVERT_TIMEZONE ('UTC','America/Los_Angeles',tableA."date"),'YYYY-MM') AS "date_month",
COUNT(DISTINCT CASE WHEN (tableB."date" IS NOT NULL) THEN tableB._id ELSE NULL END) AS "tableB.countB",
COUNT(DISTINCT CASE WHEN (tableC."date" IS NOT NULL) THEN tableC._id ELSE NULL END) AS "tableC.countC"
FROM tableA AS tableA
LEFT JOIN tableB AS tableB ON (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',tableB."date"))) = (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',tableA."date")))
LEFT JOIN tableC AS tableC ON (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',tableC."date"))) = (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',tableA."date")))
WHERE tableA."date" >= CONVERT_TIMEZONE ('America/Los_Angeles','UTC',DATEADD (month,-17,DATE_TRUNC('month',DATE_TRUNC('day',CONVERT_TIMEZONE ('UTC','America/Los_Angeles',GETDATE ()))))
GROUP BY 1
ORDER BY 1 DESC LIMIT 500;
for optimize, i just remove case statements in above mentioned query i think this will also improve the efficiency of query
SELECT To_char(Convert_timezone ('UTC','America/Los_Angeles',tablea."date"),'YYYY-MM') AS "date_month",
Count(DISTINCT
decode(tableb."date", not null,tableb._id,null)
AS "tableB.countB",
Count(DISTINCT
decode(tablec."date", not null,tablec._id ,null)
AS "tableC.countC"
FROM tablea AS tablea
LEFT JOIN tableb AS tableb
ON (
Date (Convert_timezone ('UTC','America/Los_Angeles',tableb."date"))) = (Date (Convert_timezone ('UTC','America/Los_Angeles',tablea."date")))
LEFT JOIN tablec AS tablec
ON (
Date (Convert_timezone ('UTC','America/Los_Angeles',tablec."date"))) = (Date (Convert_timezone ('UTC','America/Los_Angeles',tablea."date")))
WHERE tablea."date" >= convert_timezone ('America/Los_Angeles','UTC',Dateadd (month,-17,Date_trunc('month',Date_trunc('day',Convert_timezone ('UTC','America/Los_Angeles',Getdate ())))) group BY 1 ORDER BY 1 DESC limit 500;
what you suggest if we remove one left join and merge the statement
is that fine for optimization
... or, use a shorter alias that actually makes the SQL shorter and cleaner. This also helps read-ability. Also, format it to separate clauses (Select, From, Join, Where, Order By, Group by, Having, etc. so they are easy to separate and distinguish with the eye. and use indentation consistent with the logical structure that supports, and does not hinder, you ability to separate those sections one from another.
Just as an example, here's your first SQL query re formatted, but identical in logical structure to what you posted:
SELECT TO_CHAR(CONVERT_TIMEZONE ('UTC','America/Los_Angeles', a.date),'YYYY-MM') date_month,
COUNT(DISTINCT CASE WHEN (b."date" IS NOT NULL) THEN b._id ELSE NULL END) countB,
COUNT(DISTINCT CASE WHEN (c."date" IS NOT NULL) THEN c._id ELSE NULL END) countC
FROM tableA a
LEFT JOIN tableB b
ON (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',b.date))) =
(DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',a.date)))
LEFT JOIN tableC c
ON (DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',c.date))) =
(DATE (CONVERT_TIMEZONE ('UTC','America/Los_Angeles',a.date)))
WHERE a.date >= CONVERT_TIMEZONE ('America/Los_Angeles', 'UTC',
DATEADD (month,-17,DATE_TRUNC('month',
DATE_TRUNC('day',CONVERT_TIMEZONE ('UTC','America/Los_Angeles',
GETDATE ()))))
GROUP BY 1
ORDER BY 1 DESC LIMIT 500;
Here is an optimized version
SELECT DatePart(month, a.Date-8/24) date_month,
sum(case when b.date is Not null then 1 else 0 end) countb,
sum(case when c.date is Not null then 1 else 0 end) countc,
FROM tableA a
LEFT JOIN tableB b
ON b.Date = a.Date -- Timezone offsets are not necessary,
LEFT JOIN tableC c
ON c.date = a.date -- both in same timezone
WHERE a.date >= DateAdd(hour, 8,
DATEADD (month,-17,DATE_TRUNC('month',
GETDATE () ))
GROUP BY 1
ORDER BY 1 DESC LIMIT 500;
Presumably, the _id columns are unique. So:
SELECT TO_CHAR(CONVERT_TIMEZONE('UTC','America/Los_Angeles', a."date"), 'YYYY-MM') AS date_month,
SUM(CASE WHEN b."date" IS NOT NULL THEN 1 ELSE 0 END) AS tableB_countB,
SUM(CASE WHEN c."date" IS NOT NULL THEN 1 ELSE 0 END) AS tableC_countC
FROM tableA a LEFT JOIN
tableB b
ON DATE(CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles', b."date")) = DATE(CONVERT_TIMEZONE ('UTC', 'America/Los_Angeles', b."date")) LEFT JOIN
tableC c
ON DATE(CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', c."date")) = DATE(CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', a."date")
WHERE a."date" >= CONVERT_TIMEZONE('America/Los_Angeles', 'UTC',
DATEADD(month, -17, DATE_TRUNC('month', DATE_TRUNC('day', CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', GETDATE ()))
GROUP BY 1
ORDER BY 1 DESC
LIMIT 500;
Then, the date conversions in the ON clause don't seem necessary, because the two sides are being converted from the same time zone. If the values have no time component (as suggested by a name like date), then the DATE() is not needed either:
SELECT TO_CHAR(CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', a."date"), 'YYYY-MM') AS date_month,
SUM(CASE WHEN b."date" IS NOT NULL THEN 1 ELSE 0 END) AS tableB_countB,
SUM(CASE WHEN c."date" IS NOT NULL THEN 1 ELSE 0 END) AS tableC_countC
FROM tableA a LEFT JOIN
tableB b
ON b."date" = b."date" LEFT JOIN
tableC c
ON c."date" = a."date"
WHERE a."date" >= CONVERT_TIMEZONE('America/Los_Angeles', 'UTC',
DATEADD(month, -17, DATE_TRUNC('month', DATE_TRUNC('day', CONVERT_TIMEZONE('UTC', 'America/Los_Angeles', GETDATE ()))
GROUP BY 1
ORDER BY 1 DESC
LIMIT 500;
The WHERE clause is fine. It can take advantage of an index on a(date).

How to Compare values between rows in access query

I have a table table1 like below. I want to find all H in Status field that is in between two L. The output for mentioned criteria should be 04/01/15, 05/01/15 and 07/01/15. How can I solve this?
Date Status
01/01/15 A
02/01/15 H
03/01/15 L
04/01/15 H
05/01/15 H
06/01/15 L
07/01/15 H
08/01/15 L
I think you can use a query like this:
SELECT
t.Date, t.Status
FROM
yourTable AS t
JOIN (
SELECT MIN(Date) AS minDate, Max(Date) AS maxDate
FROM yourTable
WHERE Status = 'L') AS l
ON t.Date >= l.minDate
AND t.Date <= l.maxDate
AND t.Status <> 'L';