Psql : Get Min, Max and Count records for each partner' invoice, and last payment - sql

I have a table invoice like this :
id, partner_id, number, invoice_date
And a Payment Table like this:
id, payment_date, partner_id
I want to get min and max for both number & invoice_date, and count invoices, and last payment for each partner, something like this :
partner_id, min number, min date, max number, max date, count, last_pay
1, INV-2017-003, 02-01-2017, INV-2020-010, 01-01-2020, 142, 02-12-2019
5, INV-2019-124, 05-03-2019, INV-2020-005, 01-01-2020, 150, 01-01-2020
....

You can join those three tables including partners and grouping by parners' id column along with related aggregations :
select pr.id, min(invoice_date), max(invoice_date), count(*), max(payment_date) as last_pay
from partners pr
left join invoices i on i.partner_id = pr.id
left join payments p on p.partner_id = pr.id
group by pr.id
Update : You can use min() over (), max() over () and row_number() analytic functions to get the desired code depending on max and min dates :
select *
from
(
select pr.id,
min(invoice_date) over (partition by pr.id order by invoice_date) as min_invoice_date,
max(invoice_date) over (partition by pr.id order by invoice_date desc) as max_invoice_date,
max(code) over (partition by pr.id order by invoice_date desc) as max_code,
min(code) over (partition by pr.id order by invoice_date) as min_code,
count(*) over (partition by pr.id) as cnt,
max(payment_date) over (partition by pr.id) as last_pay,
row_number() over (partition by pr.id order by invoice_date desc) as rn
from partners pr
left join invoices i on i.partner_id = pr.id
left join payments p on p.partner_id = pr.id
) q
where rn = 1

Why isn't this simple aggregation?
select i.partner_id,
min(i.number) as min_number,
min(i.invoice_date) as min_invoice_date,
max(i.number) as min_number,
max(i.invoice_date) as min_invoice_date,
count(distinct i.invoice_id) as num_invoices,
max(p.payment_date) as max_payment_date
from invoices i left join
payments p
on p.invoice_id = i.invoice_id
group by i.partner_id;
If you want the number on the earliest invoice (and min() doesn't work), then you can do this with a "first" aggregation function. Unfortunately, Postgres doesn't directly support one. But it does through array functions:
select i.partner_id,
(array_agg(i.number order by i.invoice_date asc))[1] as min_number,
min(i.invoice_date) as min_invoice_date,
(array_agg(i.number order by i.invoice_date desc))[1] as min_number,
max(i.invoice_date) as min_invoice_date,
count(distinct i.invoice_id) as num_invoices,
max(p.payment_date) as max_payment_date
from invoices i left join
payments p
on p.partner_id = i.partner_id
group by i.partner_id;

This is similar to #BarbarosÖzhan, but calculates the min/max before the join (if you got multiple rows per partner for both invoices and payments the COUNT will be wrong otherwise). Additionally there's only a single PARTTION/ORDER which should result in a more efficient plan.
SELECT i.*, p.last_pay
FROM
( -- 1st row has all the min values = filtered using row_number
SELECT
partner_id
,number AS min_code
,invoice_date AS min_invoice_date
-- value from row with max date
,Last_Value(number)
Over (PARTITION BY partner_id
ORDER BY invoice_date
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS max_code
,Last_Value(invoice_date)
Over (PARTITION BY partner_id
ORDER BY invoice_date
ROWS BETWEEN Unbounded Preceding AND Unbounded Following) AS max_invoice_date
,Count(*)
Over (PARTITION BY partner_id) AS Cnt
,Row_Number()
Over (PARTITION BY partner_id ORDER BY invoice_date) AS rn
FROM invoices
) AS i
LEFT JOIN
( -- max payment date per partner
SELECT partner_id, Max(payment_date) AS last_pay
FROM payments
GROUP BY partner_id
) AS p
ON p.partner_id = i.partner_id
WHERE i.rn = 1

Related

Create a column with daily count in impala

I want to create a count column which will has the count per day. I have managed to do it like this:
select book, orders, s.common_id,s.order_date,d.customer_region,t.cnt
from books_tbt as s
inner join customer_tbt as d
on s.common_id = d.common_id
inner join (select count(*) as cnt,order_date from customer_tbt where customer !='null'
group by order_date) as t
on t.order_date = d.order_date
where d.customer !='null'
and s.order_date = 20220122
group by book, orders, s.common_id,s.order_date,d.customer_region,t.cnt;
I want to ask if there is a more efficient way to do it?
You can simply use COUNT(*) OVER( Partitioned by ORDER_DATE Order by ORDER_DATE) window function to calculate count for an order date.
select book, orders, s.common_id,s.order_date,d.customer_region,d.cnt
from books_tbt as s
inner join
( select d.*, COUNT(*) OVER( Partition by ORDER_DATE Order by ORDER_DATE) as cnt from customer_tbt d) as d on s.common_id = d.common_id -- count(*) over can not be calculated together with group by so we are using a sub qry
where d.customer !='null'
and s.order_date = 20220122
group by book, orders, s.common_id,s.order_date,d.customer_region,d.cnt;

SQL get top 3 values / bottom 3 values with group by and sum

I am working on a restaurant management system. There I have two tables
order_details(orderId,dishId,createdAt)
dishes(id,name,imageUrl)
My customer wants to see a report top 3 selling items / least selling 3 items by the month
For the moment I did something like this
SELECT
*
FROM
(SELECT
SUM(qty) AS qty,
order_details.dishId,
MONTHNAME(order_details.createdAt) AS mon,
dishes.name,
dishes.imageUrl
FROM
rms.order_details
INNER JOIN dishes ON order_details.dishId = dishes.id
GROUP BY order_details.dishId , MONTHNAME(order_details.createdAt)) t
ORDER BY t.qty
This gives me all the dishes sold count order by qty.
I have to manually filter max 3 records and reject the rest. There should be a SQL way of doing this. How do I do this in SQL?
You would use row_number() for this purpose. You don't specify the database you are using, so I am guessing at the appropriate date functions. I also assume that you mean a month within a year, so you need to take the year into account as well:
SELECT ym.*
FROM (SELECT YEAR(od.CreatedAt) as yyyy,
MONTH(od.createdAt) as mm,
SUM(qty) AS qty,
od.dishId, d.name, d.imageUrl,
ROW_NUMBER() OVER (PARTITION BY YEAR(od.CreatedAt), MONTH(od.createdAt) ORDER BY SUM(qty) DESC) as seqnum_desc,
ROW_NUMBER() OVER (PARTITION BY YEAR(od.CreatedAt), MONTH(od.createdAt) ORDER BY SUM(qty) DESC) as seqnum_asc
FROM rms.order_details od INNER JOIN
dishes d
ON od.dishId = d.id
GROUP BY YEAR(od.CreatedAt), MONTH(od.CreatedAt), od.dishId
) ym
WHERE seqnum_asc <= 3 OR
seqnum_desc <= 3;
Using the above info i used i combination of group by, order by and limit
as shown below. I hope this is what you are looking for
SELECT
t.qty,
t.dishId,
t.month,
d.name,
d.mageUrl
from
(
SELECT
od.dishId,
count(od.dishId) AS 'qty',
date_format(od.createdAt,'%Y-%m') as 'month'
FROM
rms.order_details od
group by date_format(od.createdAt,'%Y-%m'),od.dishId
order by qty desc
limit 3) t
join rms.dishes d on (t.dishId = d.id)

getting difference between two invoices by ranking and subtracting one from the other

Trying to grab difference in invoices
Attempted using cte's for ranks 1 and 2, but they have a subquery in them and cant be done!
the second query looks the same, but with rank=2.
select *
from (
SELECT i.id, i.subtotal/100 as subtotal, i.created_at, i.paid_at
,RANK() OVER (PARTITION BY i.subscription_id ORDER BY i.created_at DESC) AS Rank
From Invoices i
) as r
where r.rank = 1
order by r.created_at desc;
Following the path that you are on (using row_number()/rank()), you can use conditional aggregation. Assuming you want the difference of the subtotal, then:
select sum(case when seqnum = 1 then subtotal
else - subtotal
end) as difference
from (select i.*, i.subtotal/100 as subtotal,
row_number() over (partition by i.subscription_id order by i.created_at desc) as seqnum
from Invoices i
) i
where seqnum in (1, 2)
order by r.created_at desc;

How do I get the value associated with a MIN or MAX

I'm in the middle of creating a query and have it where I need the other values, however I am pulling a MIN and MAX date for individual patient_id. I'm wondering how I would go about how I would pull a value associated with that MIN or MAX date as well? I'm looking for a value the column provider_id which will show which doctor they saw on that MIN or MAX date. Here is what I have so far:
WITH test AS (
SELECT patient_id,
clinic,
SUM(amount) AS production,
MIN(tran_date) AS first_visit,
MAX(tran_date) AS last_visit
FROM transactions
WHERE impacts='P'
GROUP BY patient_id, clinic)
SELECT w.patient_id,
w.clinic,
p.city,
p.state,
p.zipcode,
p.sex,
w.production,
w.first_visit,
w.last_visit
FROM test w
LEFT JOIN patient p
ON (w.patient_id=p.patient_id AND w.clinic=p.clinic)
I believe that this will get what you're looking for:
;WITH CTE_Transactions AS (
SELECT DISTINCT
patient_id,
clinic,
SUM(amount) OVER (PARTITION BY patient_id, clinic) AS production,
FIRST_VALUE(tran_date) OVER (PARTITION BY patient_id, clinic ORDER BY tran_date) AS first_visit,
FIRST_VALUE(provider_id) OVER (PARTITION BY patient_id, clinic ORDER BY tran_date) AS first_provider_id,
LAST_VALUE(tran_date) OVER (PARTITION BY patient_id, clinic ORDER BY tran_date) AS last_visit,
LAST_VALUE(provider_id) OVER (PARTITION BY patient_id, clinic ORDER BY tran_date) AS last_provider_id,
ROW_NUMBER() OVER (PARTITION BY patient_id, clinic ORDER BY tran_date) AS row_num
FROM Transactions
WHERE impacts='P'
)
SELECT
w.patient_id,
w.clinic,
p.city,
p.state,
p.zipcode,
p.sex,
w.production,
w.first_visit,
w.last_visit
FROM
CTE_Transactions W
LEFT JOIN Patient P ON
W.patient_id = P.patient_id AND
W.clinic = P.clinic
INNER JOIN Provider FIRST_PROV ON
FIRST_PROV.provider_id = W.first_provider_id
INNER JOIN Provider LAST_PROV ON
LAST_PROV.provider_id = W.last_provider_id
WHERE
W.row_num = 1
I assume you are referring to the CTE. You can use conditional aggregation along with window functions. For instance, to get the amount for the first visit:
WITH test AS (
SELECT patient_id, clinic,
SUM(amount) AS production,
MIN(tran_date) AS first_visit,
MAX(tran_date) AS last_visit,
SUM(CASE WHEN tran_date = min_tran_date THEN amount END) as first_amount
FROM (SELECT t.*,
MIN(trans_date) OVER (PARTITION BY patient_id, clinic) as min_tran_date
FROM transactions
WHERE impacts = 'P'
) t
GROUP BY patient_id, clinic
)

How to pull the whole row which has the latest date in SQL?

I’m trying to select the pair of product – distribution center attached with the most recent order (based on order date). For one order I can have multiple products, but the whole order will be shipped from one specific distribution center.
How do I select the specific product-distribution center attached with the latest order?
My structure is basically like this:
data.orderdetail table has ordernum, orderdate, distributioncenter
I tried to pull like this, but it doesn’t give me the desired result. I’m using sql server 2008:
SELECT DISTINCT y.OrderNum, y.Product, y.DistributionCenter
, CAST(y.OrderDate AS DATE) AS Orderdate
FROM (SELECT OrderNum, MAX(CAST(Orderdate AS date)) AS orderdate
FROM data.OrderDetail
GROUP BY OrderNum) AS x
INNER JOIN data.OrderDetail AS y
ON y.OrderNum = x.OrderNum
It looks as if you need one more clause in your join Condition
You've got
ON y.OrderNum = x.OrderNum
Which will return all the orders that match the Order number in the subquery
But you'll need
ON y.OrderNum = x.OrderNum
AND y.OrderDate = x.orderdate
Which will return all the orders that match the Order number in the subquery and the maximum date for that order number
SELECT DISTINCT
y.OrderNum,
y.Product,
y.DistributionCenter,
CAST(y.OrderDate AS DATE) AS Orderdate
FROM (
SELECT
OrderNum,
MAX(CAST(Orderdate AS date)) AS orderdate
FROM data.OrderDetail
GROUP BY OrderNum
) AS x
INNER JOIN
data.OrderDetail AS y
ON y.OrderNum = x.OrderNum
AND y.OrderDate = x.orderdate
I believe what you are looking for is row_number. This will partition your result set by OrderNum then rank the sets by the OrderDate. You can then filter off the extra rows in another where clause.
select result.*,
CAST(result.OrderDate as date) as Orderdate,
from (
select y.*,
row_number() over (
partition by y.OrderNum order by CAST(y.OrderDate as date) desc
) rank_
from (
select OrderNum,
MAX(CAST(Orderdate as date)) as orderdate
from data.OrderDetail
group by OrderNum
) as x
inner join data.OrderDetail as y on y.OrderNum = x.OrderNum
) result
where result.rank_ = 1;
select * from
(
SELECT OrderNum, Product, DistributionCenter, OrderDate
, ROW_NUMBER() over (partition by OrderNum order by OrderDate desc) as rownum
FROM OrderDetail
) as xxx
where xxx.rownum = 1
ROW_NUMBER (Transact-SQL)
Try this.
; WITH CTE1
AS (
SELECT
od.OrderNum
, od.Product
, od.DistributionCenter
, CAST(od.OrderDate AS DATE) AS OrderDate
, RowNumber = ROW_NUMBER() OVER (PARTITION BY od.Product, od.DistributionCenter ORDER BY CAST(od.OrderDate AS DATE) DESC)
FROM data.OrderDetail od
)
SELECT
OrderNum
, Product
, DistributionCenter
, OrderDate
FROM CTE1
WHERE RowNumber = 1