Calculating % of only each group by value - sql

I have query below as
select month, type, count(*) / sum(count(*)) over() as ratio
from
(
select
ID,
date_trunc('month',to_date(hour,'YYYYMMDDHH24')) as month,
timestamp,
from tbl
where hour between '2021060100' and '2021073123'
and (type IN('A','B') or type is null))
where type IN('A','B')
group by 1,2;
""")
Current output:
month | type | ratio
2021-06-01 A .60
2021-06-01 B .05
2021-07-01 A .30
2021-07-01 B .05
Desired Output:
month | type | ratio
2021-06-01 A .60
2021-06-01 B .40
2021-07-01 A .85
2021-07-01 B .15
Where I want desired output to show % of only that month, am I missing something? Thank you

I think you need to partition by month in your over:
select month, type, count(*) / sum(count(*)) over(partition by month)

Related

SQL query to generate summary file base on change in price per item

I need help writing a query to generate a summary file of quantity purchase per item, and per cost from a purchase history file. To run the query the ORDER BY would be ITEM_NO, PO_DATE, AND COST.
SAMPLE DATE - PURCHASE HISTORY
OUTPUT FILE - SUMMARY
We can group by item_no and cost and get all the info we need.
select item_no
,cost
,min(po_date) as start_date
,max(po_date) as end_date
,sum(qty) as qty
from (
select *
,count(chng) over(partition by item_no order by po_date) as grp
from (
select *
,case when lag(cost) over(partition by item_no order by po_date) <> cost then 1 end as chng
from t
) t
) t
group by item_no, cost, grp
order by item_no, start_date
item_no
cost
start_date
end_date
qty
12345
1.25
2021-01-02 00:00:00
2021-01-04 00:00:00
150
12345
2.00
2021-02-01 00:00:00
2021-02-03 00:00:00
60
78945
5.25
2021-06-10 00:00:00
2021-06-12 00:00:00
90
78945
4.50
2021-10-18 00:00:00
2021-10-19 00:00:00
150
Fiddle

Group by date and find median of processing time

I select input date and output date from a database. I use a formula to indicate the processing time. Now, I would like the values ​​to be grouped according to the date of receipt and the median of the processing time to be output for all grouped dates of receipt. Something like this:
The data I select:
input date | output date | processing time
2022-01-03 | 2022-01-03 | 0
2022-01-03 | 2022-01-06 | 3
2022-01-03 | 2022-01-11 | 8
2022-01-05 | 2022-01-10 | 5
2022-01-05 | 2022-01-15 | 10
The output I want:
input date | processing time
2022-01-03 | 3
2022-01-05 | 7.5
My SQL Code:
SELECT [received_date]
,CONVERT(date, [exported_on])
,DATEDIFF(day, [received_date], [exported_on]) AS processing_time
FROM [request] WHERE YEAR (received_date) = 2022
GROUP BY received_date, [exported_on]
ORDER BY received_date
How can I do this? Do I need a temp table to do this, or can I modify my query?
You could try using PERCENTILE_CONT
with cte as (
select input_date,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY processing_time) OVER(PARTITION BY input_date) as Median_Process_Time
FROM tableA
)
SELECT *
FROM cte
GROUP BY input_date, Median_Process_Time
db fiddle
Also you check check out the discussion here How to find the SQL medians for a grouping
Here my solution. Thank you for your help.
SET NOCOUNT ON;
DECLARE #working TABLE(entry_date date, exit_date date, work_time int)
INSERT INTO #working
SELECT [received] AS date_of_entry
,CONVERT(date, [exported]) AS date_of_exit
,DATEDIFF(day, [received], [exported]) AS processing_time
FROM [zsdt].[dbo].[antrag] WHERE YEAR([received]) = 2022 AND scanner_name IS NOT NULL AND exportiert_am IS NOT NULL AND NOT scanner_name = 'AP99'
GROUP BY [received], [exported]
ORDER BY [received] ASC
;WITH CTE AS
( SELECT entry_date,
work_time,
[half1] = NTILE(2) OVER(PARTITION BY entry_date ORDER BY work_time),
[half2] = NTILE(2) OVER(PARTITION BY entry_date ORDER BY work_time DESC)
FROM #working
WHERE work_time IS NOT NULL
)
SELECT entry_date,
(MAX(CASE WHEN Half1 = 1 THEN work_time END) +
MIN(CASE WHEN Half2 = 1 THEN work_time END)) / 2.0
FROM CTE
GROUP BY entry_date;

How can I select records from the last value accumulated

I have the next data: TABLE_A
RegisteredDate
Quantity
2022-03-01 13:00
100
2022-03-01 13:10
20
2022-03-01 13:20
-80
2022-03-01 13:30
-40
2022-03-02 09:00
10
2022-03-02 22:00
-5
2022-03-03 02:00
-5
2022-03-03 03:00
25
2022-03-03 03:20
-10
If I add cumulative column
select RegisteredDate, Quantity
, sum(Quantity) over ( order by RegisteredDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Summary
from TABLE_A
RegisteredDate
Quantity
Summary
2022-03-01 13:00
100
100
2022-03-01 13:10
20
120
2022-03-01 13:20
-80
40
2022-03-01 13:30
-40
0
2022-03-02 09:00
10
10
2022-03-02 22:00
-5
5
2022-03-03 02:00
-5
0
2022-03-03 03:00
25
25
2022-03-03 03:20
-10
15
Is there a way to get the following result with a query?
RegisteredDate
Quantity
Summary
2022-03-03 03:00
25
25
2022-03-03 03:20
-10
15
This result is the last records after the last zero.
EDIT:
Really for the solution to this problem I need the: 2022-03-03 03:00 is the first date of the last records after the last zero.
You can try to use SUM aggregate window function to calculation grp column which part represent to last value accumulated.
Query 1:
WITH cte AS
(
SELECT RegisteredDate,
Quantity,
sum(Quantity) over (order by RegisteredDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Summary
FROM TABLE_A
), cte2 AS (
SELECT *,
SUM(CASE WHEN Summary = 0 THEN 1 ELSE 0 END) OVER(order by RegisteredDate desc) grp
FROM cte
)
SELECT RegisteredDate,
Quantity
FROM cte2
WHERE grp = 0
ORDER BY RegisteredDate
Results:
| RegisteredDate | Quantity |
|----------------------|----------|
| 2022-03-03T03:00:00Z | 25 |
| 2022-03-03T03:20:00Z | -10 |
Use a CTE that returns the summary column and NOT EXISTS to filter out the rows that you don't need:
WITH cte AS (SELECT *, SUM(Quantity) OVER (ORDER BY RegisteredDate) Summary FROM TABLE_A)
SELECT c1.*
FROM cte c1
WHERE NOT EXISTS (
SELECT 1
FROM cte c2 WHERE c2.RegisteredDate >= c1.RegisteredDate AND c2.Summary = 0
)
ORDER BY c1.RegisteredDate;
There is no need for ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW in the OVER clause of the window function, because this is the default behavior.
See the demo.
Try this:
with u as
(select RegisteredDate,
Quantity,
sum(Quantity) over (order by RegisteredDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as Summary
from TABLE_A)
select * from u
where RegisteredDate >= all(select RegisteredDate from u where Summary = 0)
and Summary <> 0;
Fiddle
Basically what you want is for RegisteredDate to be >= all RegisteredDatess where Summary = 0, and you want Summary <> 0.
When using window functions, it is necessary to take into account that RegisteredDate column is not unique in TABLE_A, so ordering only by RegisteredDate column is not enough to get a stable result on the same dataset.
With A As (
Select ROW_NUMBER() Over (Order by RegisteredDate, Quantity) As ID, RegisteredDate, Quantity
From TABLE_A),
B As (
Select A.*, SUM(Quantity) Over (Order by ID) As Summary
From A)
Select Top 1 *
From B
Where ID > (Select MAX(ID) From B Where Summary=0)
ID
RegisteredDate
Quantity
Summary
8
2022-03-03 03:00
25
25

Calculate percentage change of price based on Category with SQL

I am writing a Query with SQL and couldn't figure it out yet...
My table looks like this:
Category Price Date
Cat1 20 2019-04
Cat2 12 2019-04
Cat3 5 2019-04
Cat1 23 2020-04
Cat2 17 2020-04
Cat3 8 2020-04
I would like to get a table that shows this:
Cat Pct_change Period
Cat 1 0.15 2019-2020
Cat 2 0.41 "
And so on.
I can get this category by category but I have like 100 categories, cant do this manually. It would be great, too, to see both prices side by side. What I don't (can't) allow is to generate new tables just saving the data to join separate tables...
Thank you!!
Use LEAD() window function to get the price and date of the next date for each category:
SELECT Category,
ROUND(1.0 * next_price / Price - 1, 2) Pct_change,
SUBSTR(Date, 1, 4) || '-' || SUBSTR(next_date, 1, 4) Period
FROM (
SELECT *,
LEAD(Price) OVER (PARTITION BY Category ORDER BY Date) next_price,
LEAD(Date) OVER (PARTITION BY Category ORDER BY Date) next_date
FROM tablename
)
WHERE next_date IS NOT NULL
See the demo.
Results:
Category
Pct_change
Period
Cat1
0.15
2019-2020
Cat2
0.42
2019-2020
Cat3
0.6
2019-2020
You can use first_value():
select distinct category, min(date), max(date),
(-1 + first_value(price) over (partition by category order by date desc) /
first_value(price) over (partition by category order by date asc)
) as percent_change
from t;

Postgres Bank Account Transaction Balance

Here's an example "transactions" table where each row is a record of an amount and the date of the transaction.
+--------+------------+
| amount | date |
+--------+------------+
| 1000 | 2020-01-06 |
| -10 | 2020-01-14 |
| -75 | 2020-01-20 |
| -5 | 2020-01-25 |
| -4 | 2020-01-29 |
| 2000 | 2020-03-10 |
| -75 | 2020-03-12 |
| -20 | 2020-03-15 |
| 40 | 2020-03-15 |
| -50 | 2020-03-17 |
| 200 | 2020-10-10 |
| -200 | 2020-10-10 |
+--------+------------+
The goal is to return one column "balance" with the balance of all transactions. Only catch is that there is a monthly fee of $5 for each month that there are not at least THREE payment transactions (represented by a negative value in the amount column) that total at least $100. So in the example, the only month where you wouldn't have a $5 fee is March because there were 3 payments (negative amount transactions) that totaled $145. So the final balance would be $2,746. The sum of the amounts is $2,801 minus the $55 monthly fees (11 months X 5). I'm not a postgres expert by any means, so if anyone has any pointers on how to get started solving this problem or what parts of the postgres documentation which help me most with this problem that would be much appreciated.
The expected output would be:
+---------+
| balance |
+---------+
| 2746 |
+---------+
This is rather complicated. You can calculate the total span of months and then subtract out the one where the fee is cancelled:
select amount, (extract(year from age) * 12 + extract(month from age)), cnt,
amount - 5 *( extract(year from age) * 12 + extract(month from age) + 1 - cnt) as balance
from (select sum(amount) as amount,
age(max(date), min(date)) as age
from transactions t
) t cross join
(select count(*) as cnt
from (select date_trunc('month', date) as yyyymm, count(*) as cnt, sum(amount) as amount
from transactions t
where amount < 0
group by yyyymm
having count(*) >= 3 and sum(amount) < -100
) tt
) tt;
Here is a db<>fiddle.
This calculates 2756, which appears to follow your rules. If you want the full year, you can just use 12 instead of the calculating using the age().
I would first left join with a generate_series that represents the months you are interested in (in this case, all in the year 2020). That adds the missing months with a balance of 0.
Then I aggregate these values per month and add the negative balance per month and the number of negative balances.
Finally, I calculate the grand total and subtract the fee for each month that does not meet the criteria.
SELECT sum(amount_per_month) -
sum(5) FILTER (WHERE negative_per_month > -100 OR negative_count < 3)
FROM (SELECT sum(amount) AS amount_per_month,
sum(amount) FILTER (WHERE amount < 0) AS negative_per_month,
month_start,
count(*) FILTER (WHERE amount < 0) AS negative_count
FROM (SELECT coalesce(t.amount, 0) AS amount,
coalesce(date_trunc('month', CAST (t.day AS timestamp)), dates.d) AS month_start
FROM generate_series(
TIMESTAMP '2020-01-01',
TIMESTAMP '2020-12-01',
INTERVAL '1 month'
) AS dates (d)
LEFT JOIN transactions AS t
ON dates.d = date_trunc('month', CAST (t.day AS timestamp))
) AS gaps_filled
GROUP BY month_start
) AS sums_per_month;
This would be my solution by simply using cte.
DB fiddle here.
balance
2746
Code:
WITH monthly_credited_transactions
AS (SELECT Date_part('month', date) AS cred_month,
Sum(CASE
WHEN amount < 0 THEN Abs(amount)
ELSE 0
END) AS credited_amount,
Sum(CASE
WHEN amount < 0 THEN 1
ELSE 0
END) AS credited_cnt
FROM transactions
GROUP BY 1),
credit_fee
AS (SELECT ( 12 - Count(1) ) * 5 AS fee,
1 AS id
FROM monthly_credited_transactions
WHERE credited_amount >= 100
AND credited_cnt >= 3),
trans
AS (SELECT Sum(amount) AS amount,
1 AS id
FROM transactions)
SELECT amount - fee AS balance
FROM trans a
LEFT JOIN credit_fee b
ON a.id = b.id
For me the below query worked (have adopted my answer from #GordonLinoff):
select CAST(totalamount - 5 *(12 - extract(month from firstt) + 1 - nofeemonths) AS int) as balance
from (select sum(amount) as totalamount, min(date) as firstt
from transactions t
) t cross join
(select count(*) as nofeemonths
from (select date_trunc('month', date) as months, count(*) as nofeemonths, sum(amount) as totalamount
from transactions t
where amount < 0
group by months
having count(*) >= 3 and sum(amount) < -100
) tt
) tt;
The firstt is the date of first transaction in that year and 12 - extract(month from firstt) + 1 - nofeemonths are the number of months for which the credit card fees of 5 will be charged.