Correlated subquery to handle multiple rows - sql

Consider a query:
WITH RECURSIVE dates0(date) AS
(
VALUES('2022-03-01')
UNION ALL SELECT date(date, '+1 day')
FROM dates0
WHERE date < DATE('2022-03-03')),
q0 AS
(SELECT d.date,
(
--pretend this is a result of an expensive subquery for which we need d.date
SELECT 'Need to insert result of another query here'
----pretend this is a result of an expensive subquery for which we need d.date
) AS vegetables
FROM dates0 d)
SELECT *
FROM q0;
Instead of 'SELECT' between 2 commented lines I need to incorporate result of the following subquery:
SELECT name, SUM(amount) AS amount FROM (
SELECT 'POTATO' AS name,
'2022-03-03' AS date,
3 AS amount
UNION
SELECT 'TURNIP' AS name,
'2022-03-03' AS date,
15 AS amount
UNION
SELECT 'TURNIP' AS name,
'2022-03-03' AS date,
25 AS amount) GROUP BY name
For the end result:
date potato turnip
3/1/2022 0 0
3/2/2022 0 0
3/3/2022 3 40
PlaceHolderPlaceHolderPlaceHolderPlaceHolderPlaceHolder (sorry, SO otherwise considers the post to have insufficient amount of text)

Assuming that your query is a CTE named vegetables, you can do it with a LEFT join of dates0 to vegetables and conditional aggregation:
WITH
RECURSIVE dates0(date) AS (
VALUES('2022-03-01')
UNION ALL
SELECT date(date, '+1 day')
FROM dates0
WHERE date < DATE('2022-03-03')
),
vegetables AS (
SELECT 'POTATO' AS name, '2022-03-03' AS date, 3 AS amount
UNION ALL
SELECT 'TURNIP' AS name, '2022-03-03' AS date, 15 AS amount
UNION ALL
SELECT 'TURNIP' AS name, '2022-03-03' AS date, 25 AS amount
),
q0 AS (
SELECT d.date,
TOTAL(CASE WHEN v.name = 'POTATO' THEN v.amount END) AS potato,
TOTAL(CASE WHEN v.name = 'TURNIP' THEN v.amount END) AS turnip
FROM dates0 d LEFT JOIN vegetables v
ON v.date = d.date
GROUP BY d.date
)
SELECT *
FROM q0;
See the demo.

Related

Select data where sum for last 7 from max-date is greater than x

I have a data set as such:
Date Value Type
2020-06-01 103 B
2020-06-01 100 A
2020-06-01 133 A
2020-06-11 150 A
2020-07-01 1000 A
2020-07-21 104 A
2020-07-25 140 A
2020-07-28 1600 A
2020-08-01 100 A
Like this:
Type ISHIGH
A 1
B 0
Here's the query i tried,
select type, case when sum(value) > 10 then 1 else 0 end as total_usage
from table_a
where (select sum(value) as usage from tableA where date = max(date)-7)
group by type, date
This is clearly not right. What is a simple way to do this?
It is a simply group by except that you need to be able to access max date before grouping:
select type
, max(date) as last_usage_date
, sum(value) as total_usage
, case when sum(case when date >= cutoff_date then value end) >= 1000 then 'y' end as [is high!]
from t
cross apply (
select dateadd(day, -6, max(date))
from t as x
where x.type = t.type
) as ca(cutoff_date)
group by type, cutoff_date
If you want just those two columns then a simpler approach is:
select t.type, case when sum(value) >= 1000 then 'y' end as [is high!]
from t
left join (
select type, dateadd(day, -6, max(date)) as cutoff_date
from t
group by type
) as a on t.type = a.type and t.date >= a.cutoff_date
group by t.type
Find the max date by type. Then used it to find last 7 days and sum() the value.
with
cte as
(
select [type], max([Date]) as MaxDate
from tableA
group by [type]
)
select c.[type], sum(a.Value),
case when SUM(a.Value) > 1000 then 1 else 0 end as ISHIGH
from cte c
inner join tableA a on a.[type] = c.[type]
and a.[Date] >= DATEADD(DAY, -7, c.MaxDate)
group by c.[type]
This can be done through a cumulative total as follows:
;With CTE As (
Select [type], [date],
SUM([value]) Over (Partition by [type] Order by [date] Desc) As Total,
Row_Number() Over (Partition by [type] Order by [date] Desc) As Row_Num
From Tbl)
Select Distinct CTE.[type], Case When C.[type] Is Not Null Then 1 Else 0 End As ISHIGH
From CTE Left Join CTE As C On (CTE.[type]=C.[type]
And DateDiff(dd,CTE.[date],C.[date])<=7
And C.Total>1000)
Where CTE.Row_Num=1
I think you are quite close with you initial attempt to solve this. Just a tiny edit:
select type, case when sum(value) > 1000 then 1 else 0 end as total_usage
from tableA
where date > (select max(date)-7 from tableA)
group by type

Presto SQL to find number of transactions in the year before the current transaction

I have a (simplified) transaction table of customer and order date. For each row/order I want to find the number of orders the year before the current order. I can do this with a self join, but when my transactions table is far bigger, it gets inefficient. I think I really want to use a window function with range between on the date field, but this isn't implemented in Presto yet. Any ideas of how I can do this more efficiently?
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
)
select
t1.*,
count(case when t2.order_date between date_add('day', -14, t1.order_date) and date_add('day', -1, t1.order_date) then t2.order_date else null end) as orders_14_days_before
from
transactions t1
left join
transactions t2 on t1.customer = t2.customer
group by
t1.customer,
t1.order_date
Result:
customer order_date orders_14_days_before
1 2020-01-01 0
1 2020-01-26 0
1 2020-02-01 1
1 2020-02-02 2
Presto does not seem to fully support the range window specification. So you can do this another way . . . by doings ins-and-outs:
with cd as (
select customer, order_date as dte, 1 as inc
from transactions
union all
select customer, order_date + interval '1' year, -1 inc
from transactions
)
select t.*, cd.one_year_count
from (select customer, dte,
sum(sum(inc)) over (partition by customer order by dte) as one_year_count
from cd
group by customer, date
) cd join
transactions t
on cd.dte = t.order_date;
You should find that this is much faster.
Thanks to Gordon Linoff's answer above, I tweaked it to get the correct answer (at least in Athena). You don't need the sum(sum()) over ..., just sum() over ... is sufficient.
with
transactions as (
select
1 as customer,
date '2020-01-01' as order_date
union all
select
1 as customer,
date '2020-01-26' as order_date
union all
select
1 as customer,
date '2020-02-01' as order_date
union all
select
1 as customer,
date '2020-02-02' as order_date
),
cd as (
select
customer,
order_date as dte,
1 as inc
from
transactions
union all
select
customer,
order_date + interval '13' day,
-1 inc
from
transactions
),
cd2 as (
select
customer,
dte,
inc,
sum(inc) over (partition by customer order by dte rows between unbounded preceding and 1 preceding) as one_year_count
from
cd
)
select
t.*,
coalesce(cd2.one_year_count, 0) as one_year_count
from
cd2
inner join
transactions t
on cd2.dte = t.order_date
where
cd2.inc = 1
order by
2 asc

sql get balance at end of year

I have a transactions table for a single year with the amount indicating the debit transaction if the value is negative or credit transaction values are positive.
Now in a given month if the number of debit records is less than 3 or if the sum of debits for a month is less than 100 then I want to charge a fee of 5.
I want to build and sql query for this in postgre:
select sum(amount), count(1), date_part('month', date) as month from transactions where amount < 0 group by month;
I am able get records per month level, I am stuck on how to proceed further and get the result.
You can start by generating the series of month with generate_series(). Then join that with an aggregate query on transactions, and finally implement the business logic in the outer query:
select sum(t.balance)
- 5 * count(*) filter(where coalesce(t.cnt, 0) < 3 or coalesce(t.debit, 0) < 100) as balance
from generate_series(date '2020-01-01', date '2020-12-01', '1 month') as d(dt)
left join (
select date_trunc('month', date) as dt, count(*) cnt, sum(amount) as balance,
sum(-amount) filter(where amount < 0) as debit
from transactions t
group by date_trunc('month', date)
) t on t.dt = d.dt
Demo on DB Fiddle:
| balance |
| ------: |
| 2746 |
How about this approach?
SELECT
SUM(
CASE
WHEN usage.amount_s > 100
OR usage.event_c > 3
THEN 0
ELSE 5
END
) AS YEAR_FEE
FROM (SELECT 1 AS month UNION
SELECT 2 UNION
SELECT 3 UNION
SELECT 4 UNION
SELECT 5 UNION
SELECT 6 UNION
SELECT 7 UNION
SELECT 8 UNION
SELECT 9 UNION
SELECT 10 UNION
SELECT 11 UNION
SELECT 12
) months
LEFT OUTER JOIN
(
SELECT
sum(amount) AS amount_s,
count(1) event_c,
date_part('month', date) AS month
FROM transactions
WHERE amount < 0
GROUP BY month
) usage ON months.month = usage.month;
First you must use a resultset that returns all the months (1-12) and join it with a LEFT join to your table.
Then aggregate to get the the sum of each month's amount and with conditional aggregation subtract 5 from the months that meet your conditions.
Finally use SUM() window function to sum the result of each month:
SELECT DISTINCT SUM(
COALESCE(SUM(t.Amount), 0) -
CASE
WHEN SUM((t.Amount < 0)::int) < 3
OR SUM(CASE WHEN t.Amount < 0 THEN -t.Amount ELSE 0 END) < 100 THEN 5
ELSE 0
END
) OVER () total
FROM generate_series(1, 12, 1) m(month) LEFT JOIN transactions t
ON m.month = date_part('month', t.date) AND date_part('year', t.date) = 2020
GROUP BY m.month
See the demo.
Results:
> | total |
> | ----: |
> | 2746 |
I think you can use the hanving clause.
Select ( sum(a.total) - (12- count(b.cnt ))*5 ) as result From
(Select sum(amount) as total , 'A' as name from transactions ) as a left join
(Select count(amount) as cnt , 'A' as name
From transactions
where amount <0
group by month(date)
having not(count(amount) <3 or sum(amount) >-100) ) as b
on a.name = b.name
select
sum(amount) - 5*(12-(
select count(*)
from(select month, count(amount),sum(amount)
from transactions
where amount<0
group by month
having Count(amount)>=3 And Sum(amount)<=-100))) as balance
from transactions ;

MIN value from data set and sum

I have data:
ID DUE AMT
4 2018-03-10 335.75
3 2018-04-10 334.75
1 2018-05-10 333.75
2 2018-06-10 332.75
I need to extract:
least due (03-10)
amt for least due (335.75)
sum of amt column.
Could it be done in single query?
Try keep dense rank:
with tt as (
select 4 id, date '2018-03-10' due, 335.75 amt from dual union all
select 3 id, date '2018-04-10' due, 334.75 amt from dual union all
select 1 id, date '2018-05-10' due, 333.75 amt from dual union all
select 2 id, date '2018-06-10' due, 332.75 amt from dual
)
select min(due) least_due,
min(amt) keep (dense_rank first order by due) amt_for_least_due,
sum(amt) sum_amt
from tt
We can try using analytic functions here:
WITH cte AS (
SELECT ID, DUE, AMOUNT,
SUM(AMOUNT) OVER () AS TOTALAMOUNT,
ROW_NUMBER() OVER (ORDER BY DUE) rn
FROM yourTable
)
SELECT ID, DUE, AMOUNT, TOTALAMOUNT
FROM cte
WHERE rn = 1;

SQL Join two tables by unrelated date

I’m looking to join two tables that do not have a common data point, but common value (date). I want a table that lists the date and total number of hired/terminated employees on that day. Example is below:
Table 1
Hire Date Employee Number Employee Name
--------------------------------------------
5/5/2018 10078 Joe
5/5/2018 10077 Adam
5/5/2018 10078 Steve
5/8/2018 10079 Jane
5/8/2018 10080 Mary
Table 2
Termination Date Employee Number Employee Name
----------------------------------------------------
5/5/2018 10010 Tony
5/6/2018 10025 Jonathan
5/6/2018 10035 Mark
5/8/2018 10052 Chris
5/9/2018 10037 Sam
Desired result:
Date Total Hired Total Terminated
--------------------------------------
5/5/2018 3 1
5/6/2018 0 2
5/7/2018 0 0
5/8/2018 2 1
5/9/2018 0 1
Getting the total count is easy, just unsure as the best approach from the standpoint of "adding" a date column
If you need all dates within some window then you need to join the data to a calendar. You can then left join and sum flags for data points.
DECLARE #StartDate DATETIME = (SELECT MIN(ActionDate) FROM(SELECT ActionDate = MIN(HireDate) FROM Table1 UNION SELECT ActionDate = MIN(TerminationDate) FROM Table2)AS X)
DECLARE #EndDate DATETIME = (SELECT MAX(ActionDate) FROM(SELECT ActionDate = MAX(HireDate) FROM Table1 UNION SELECT ActionDate = MAX(TerminationDate) FROM Table2)AS X)
;WITH AllDates AS
(
SELECT CalendarDate=#StartDate
UNION ALL
SELECT DATEADD(DAY, 1, CalendarDate)
FROM AllDates
WHERE DATEADD(DAY, 1, CalendarDate) <= #EndDate
)
SELECT
CalendarDate,
TotalHired = SUM(CASE WHEN H.HireDate IS NULL THEN NULL ELSE 1 END),
TotalTerminated = SUM(CASE WHEN T.TerminationDate IS NULL THEN NULL ELSE 1 END)
FROM
AllDates D
LEFT OUTER JOIN Table1 H ON H.HireDate = D.CalendarDate
LEFT OUTER JOIN Table2 T ON T.TerminationDate = D.CalendarDate
/* If you only want dates with data points then uncomment out the where clause
WHERE
NOT (H.HireDate IS NULL AND T.TerminationDate IS NULL)
*/
GROUP BY
CalendarDate
I would do this with a union all and aggregations:
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
order by dte;
This does not include the "missing" dates. If you want those, a calendar or recursive CTE works. For instance:
with ht as (
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
),
d as (
select min(dte) as dte, max(dte) as max_dte)
from ht
union all
select dateadd(day, 1, dte), max_dte
from d
where dte < max_dte
)
select d.dte, coalesce(ht.num_hired, 0) as num_hired, coalesce(ht.num_termed) as num_termed
from d left join
ht
on d.dte = ht.dte
order by dte;
Try this one
SELECT ISNULL(a.THE_DATE, b.THE_DATE) as Date,
ISNULL(a.Total_Hire,0) as Total_Hire,
ISNULL (b.Total_Terminate,0) as Total_terminate
FROM (SELECT Hire_date as the_date, COUNT(1) as Total_Hire
FROM TABLE_HIRE GROUP BY HIRE_DATE) a
FULL OUTER JOIN (SELECT Termination_Date as the_date, COUNT(1) as Total_Terminate
FROM TABLE_TERMINATE GROUP BY HIRE_DATE) a
ON a.the_date = b.the_date