How to get Top 10 for a grouped column?

How to get Top 10 for a grouped column? - sql

My data is a list of customers and products, and the cost for each product
Member Product Cost
Bob A123 $25
Bob A123 $25
Bob A123 $75
Joe A789 $50
Joe A789 $50
Bob C321 $50
Joe A123 $50
etc, etc, etc
My current query grabs each customer, product and cost, and also the total cost for that customer. It gives results like this:
Member Product Cost Total Cost
Bob A123 $125 $275
Bob A1433 $100 $275
Bob C321 $50 $275
Joe A123 $150 $250
Joe A789 $100 $250
How can I get the top 10 by Total Cost, not just the top 10 records overall? My query is:
SELECT a.Member
,a.Product
,SUM(a.Cost)
,(SELECT SUM(b.Cost) from MyTable b WHERE b.Member = a.Member) as 'Total Cost'
FROM MyTable a
GROUP BY a.Member
,a.Product
ORDER BY [Total Cost] DESC
If I do a SELECT TOP 10 it only gives me the first 10 rows. The actual Top 10 would end up being more like 40 or 50 rows.
Thanks!

Try this one.
SELECT tbl.member,
tbl.product,
Sum(tbl.cost) AS cost,
Max(stbl.totalcost) AS totalcost
FROM mytable tbl
INNER JOIN (SELECT member,
Sum(cost) AS totalcost,
Row_number() OVER (ORDER BY Sum(cost) DESC) AS rn
FROM mytable
GROUP BY member) stbl
ON stbl.member = tbl.member
WHERE stbl.rn <= 10
GROUP BY tbl.member, tbl.product
ORDER BY Max(stbl.rn)
Online Demo: http://sqlfiddle.com/#!18/87857/1/0
Table structure & Sample Data
CREATE TABLE mytable
(
member NVARCHAR(50),
product NVARCHAR(10),
cost INT
)
INSERT INTO mytable
VALUES ('Bob','A123','25'),
('Bob','A123','25'),
('Bob','A123','75'),
('Joe','A789','50'),
('Joe','A789','50'),
('Bob','C321','50'),
('Joe','A123','50'),
('Rock','A123','50'),
('Anord','A100','50'),
('Jack','A123','50'),
('Anord','A123','50'),
('Joe','A123','50'),
('Karma','A123','50'),
('Seetha','A123','50'),
('Aruna','A123','50'),
('Jake','A123','50'),
('Paul','A123','50'),
('Logan','A123','50'),
('Joe','A123','50');
Subquery - Total cost per customer
SELECT member,
Sum(cost) AS totalcost,
Row_number() OVER (ORDER BY Sum(cost) DESC) AS rn
FROM mytable
GROUP BY member
Subquery: Output
+---------+------------+----+
| member | totalcost | rn |
+---------+------------+----+
| Joe | 250 | 1 |
| Bob | 175 | 2 |
| Anord | 100 | 3 |
| Aruna | 50 | 4 |
| Jack | 50 | 5 |
| Jake | 50 | 6 |
| Karma | 50 | 7 |
| Logan | 50 | 8 |
| Paul | 50 | 9 |
| Rock | 50 | 10 |
| Seetha | 50 | 11 |
+---------+------------+----+
Record Count: 11
Main Query
SELECT tbl.member,
tbl.product,
Sum(tbl.cost) AS cost,
Max(stbl.totalcost) AS totalcost,
Max(stbl.rn) AS rn
FROM mytable tbl
INNER JOIN (SELECT member,
Sum(cost) AS totalcost,
Row_number() OVER (ORDER BY Sum(cost) DESC) AS rn
FROM mytable
GROUP BY member) stbl
ON stbl.member = tbl.member
GROUP BY tbl.member, tbl.product
ORDER BY Max(stbl.rn)
Main Query: Output
+---------+----------+-------+------------+----+
| member | product | cost | totalcost | rn |
+---------+----------+-------+------------+----+
| Joe | A123 | 150 | 250 | 1 |
| Joe | A789 | 100 | 250 | 1 |
| Bob | C321 | 50 | 175 | 2 |
| Bob | A123 | 125 | 175 | 2 |
| Anord | A100 | 50 | 100 | 3 |
| Anord | A123 | 50 | 100 | 3 |
| Aruna | A123 | 50 | 50 | 4 |
| Jack | A123 | 50 | 50 | 5 |
| Jake | A123 | 50 | 50 | 6 |
| Karma | A123 | 50 | 50 | 7 |
| Logan | A123 | 50 | 50 | 8 |
| Paul | A123 | 50 | 50 | 9 |
| Rock | A123 | 50 | 50 | 10 |
| Seetha | A123 | 50 | 50 | 11 |
+---------+----------+-------+------------+----+
Record Count: 14

You can use rank() and partition by but you may also need to use a window function:
with temp as (
SELECT a.Member
,a.Product
,SUM(a.Cost)
,(SELECT SUM(b.Cost) from MyTable b WHERE b.Member = a.Member)
as 'Total Cost'
FROM MyTable a
GROUP BY a.Member,a.Product
)
select a.*, rank() over (partition by member order by [Total Cost]
desc) as rank
from temp a
order by rank desc limit 10

You can use dense_rank() with apply :
select mt.*
from (select mt.*, sum(mt.Cost) over (partition by Product, Member) as Cost,
dense_rank() over (order by TotalCost desc) as seq
from MyTable mt cross apply
(select sum(mt1.Cost) as TotalCost
from MyTable mt1
whete mt1.member = mt.member
) mt1
) mt
where mt.seq <= 10;

Use a subquery to get the TOP 10 total costs and join to your query:
SELECT
t.Member, t.Product, t.Cost, g.[Total Cost]
FROM (
SELECT Member, Product, SUM(Cost) as Cost
FROM MyTable
GROUP BY Member, Product
) t INNER JOIN (
SELECT TOP (10) Member, SUM(Cost) as [Total Cost]
FROM MyTable
GROUP BY Member
ORDER BY [Total Cost] DESC
) g on g.Member = t.Member
ORDER BY g.[Total Cost] DESC, t.Member, t.Cost DESC
Depending on your requirement you may use:
SELECT TOP (10) WITH TIES...

You don't have to select from the same table twice. Use SUM OVER to get the total per member.
Use DENSE_RANK to get the totals ranked (highest total = 1, second highest total = 2, ...).
Use TOP(10) WITH TIES to get all rows having the top ten totals.
The query:
select top(10) with ties *
from
(
select
member,
product,
sum(cost),
sum(sum(cost)) over (partition by member) as total_cost
from mytable
group by member, product
) results
order by dense_rank() over (order by total_cost) desc;

If you want exactly 10 customers even when there are ties, then a slight variation on Thorsten's method will work:
select top(10) with ties t.*
from (select member, product, sum(cost) as cost,
sum(sum(cost)) over (partition by member) as total_cost
from t
group by member, product
) t
order by dense_rank() over (order by total_cost) desc, member;
The addition of member as a second key may seem like a minor addition. However, it ensures that the dense_rank() is unique for each member (of course ordered by total_cost). This, in turn, guarantees that you get exactly 10 customers.

You can use dense_rank() like below. Worked in SQL Server 2016. Change the value of limit variable to filter number of rows returned.
declare #limit int = 10;
SELECT *
FROM
(
select x.*,rn = dense_rank() over (order by x.TotalCost desc)
from (
SELECT a.Member
,a.Product
,SUM(a.Cost)
,(SELECT SUM(b.Cost) from MyTable b WHERE b.Member = a.Member) as 'TotalCost'
FROM MyTable a
GROUP BY a.Member
,a.Product
ORDER BY [Total Cost] DESC
) x
) y
where rn <= #limit
order by rn

Related

SQL - get rid of the nested aggregate select

There is a table Payment, which for example tracks the amount of money user puts into account, simplified as
===================================
Id | UserId | Amount | PayDate |
===================================
1 | 42 | 11 | 01.02.99 |
2 | 42 | 31 | 05.06.99 |
3 | 42 | 21 | 04.11.99 |
4 | 24 | 12 | 05.11.99 |
What is need is to receive a table with balance before payment moment, eg:
===============================================
Id | UserId | Amount | PayDate | Balance |
===============================================
1 | 42 | 11 | 01.02.99 | 0 |
2 | 42 | 31 | 05.06.99 | 11 |
3 | 42 | 21 | 04.11.99 | 42 |
4 | 24 | 12 | 05.11.99 | 0 |
Currently the select statement looks something like
SELECT
Id,
UserId,
Amount,
PaidDate,
(SELECT sum(amount) FROM Payments nestedp
WHERE nestedp.UserId = outerp.UserId AND
nestedp.PayDate < outerp.PayDate) as Balance
FROM
Payments outerp
How can I rewrite this select to get rid of the nested aggregate selection? The database in question is SQL Server 2019.

You need to use cte with some custom logic to handle this type of problem.
WITH PaymentCte
AS (
SELECT ROW_NUMBER() OVER (
PARTITION BY UserId ORDER BY Id
) AS RowId
,Id
,UserId
,PayDate
,Amount
,SUM(Amount) OVER (
PARTITION BY UserId ORDER BY Id
) AS Balance
FROM Payment
)
SELECT X.Id
,X.UserId
,X.Amount
,X.PayDate
,Y.Balance
FROM PaymentCte x
INNER JOIN PaymentCte y ON x.userId = y.UserId
AND X.RowId = Y.RowId + 1
UNION
SELECT X.Id
,X.UserId
,X.Amount
,X.PayDate
,0 AS Balance
FROM PaymentCte x
WHERE X.RowId = 1
This provides the desired output

You can try the following using lag with a cumulative sum
with b as (
select * , isnull(lag(amount) over (partition by userid order by id),0) Amt
from t
)
select Id, UserId, Amount, PayDate,
Sum(Amt) over (partition by userid order by id) Balance
from b
order by Id

Thanks to other participants' leads I came up with a query that (seems) to work:
SELECT
Id,
UserId,
Amount,
PayDate,
COALESCE(sum(Amount) over (partition by UserId
order by PayDate
rows between unbounded preceding and 1 preceding), 0) as Balance
FROM
Payments
ORDER BY
UserId, PayDate
Lots of related examples can be found here

How to find the next highest value in a new column? (SQL)

I have a table with multiple work orders for the same product with each row showing a different hour meter for how long the product has been run. I would like to create a new column that shows the next highest hour meter reading for the product next to the highest one but have not had much luck. I've been trying to rank the entries by hour meter but have not had any luck past that
("ROW_NUMBER() OVER(PARTITION BY Product ORDER BY Current_Counter_Reading DESC") 'Ranking'
Does anyone have any advice on how to approach this? Below is an example of what I am trying to do:
Product | Work Order | Hour Meter
--------+------------+------------
Car1 1 100
Car1 2 200
Product | Higher Hour Meter | Lower Hour Meter
--------+-------------------+-----------------
Car1 200 100
Thanks!

See also LEAD or LAG window functions:
WITH cte1 AS (
SELECT t.*
, LEAD(HourMeter) OVER (PARTITION BY Product ORDER BY HourMeter DESC) AS NextLower
, ROW_NUMBER() OVER (PARTITION BY Product ORDER BY HourMeter DESC) AS rn
FROM work_orders AS t
)
SELECT Product, HourMeter, NextLower
FROM cte1
WHERE rn = 1
ORDER BY Product
;
Given the following data:
+-----------+---------+-----------+
| WorkOrder | Product | HourMeter |
+-----------+---------+-----------+
| 2 | Car1 | 200 |
| 1 | Car1 | 100 |
| 3 | Car1 | 50 |
| 5 | Car2 | 66 |
| 4 | Car2 | 55 |
| 6 | Car2 | 45 |
+-----------+---------+-----------+
The result:
+---------+-----------+-----------+
| Product | HourMeter | NextLower |
+---------+-----------+-----------+
| Car1 | 200 | 100 |
| Car2 | 66 | 55 |
+---------+-----------+-----------+
Working Test Case

WITH CTE AS (
SELECT 'Car1' as Product,1 as WorkOrder,100 as HourMeter
union all
select 'Car1',2,200
union all
select 'Car1',3,20
)
SELECT
c1.Product,
MAX(c1.HourMeter) as HigherHourMeter,
MAX(c2.HourMeter) LowerHourMeter
FROM CTE c1
INNER JOIN CTE c2 ON c2.Product=c1.Product and c2.HourMeter<c1.HourMeter
GROUP BY c1.Product;

Assuming lower meter is the 2nd highest
Here we use the window function row_number() over() in concert with a conditional aggregation.
Select Product
,Higher_Meter = max( case when RN=1 then [Hour Meter] end)
,Lower_Meter = max( case when RN=2 then [Hour Meter] end)
From (
Select *
,RN = row_number() over (partition by Product order by [Hour Meter] desc)
From YourTable
) src
Group By Product

Get total count of duplicates in column

I need a query to count the total number of duplicates in a table, is there any way to do this?
If I have a table like this:
+------------+----------+
| item_name |quantity |
+------------+----------+
| Calculator | 89 |
| Notebooks | 40 |
| Pencil | 40 |
| Pens | 32 |
| Shirts | 29 |
| Shoes | 29 |
| Trousers | 29 |
+------------+----------+
I can't use SELECT COUNT(quantity) because it returns 2. (40 | 29)
How can I return 5? (40 | 40 | 29 | 29 | 29)

Using analytic functions:
WITH cte AS (
SELECT *, COUNT(*) OVER (PARTITION BY quantity) cnt
FROM yourTable
)
SELECT COUNT(*)
FROM cte
WHERE cnt > 1;

One method uses two levels of aggregation:
select sum(cnt)
from (select quantity, count(*) as cnt
from t
group by quantity
) t
where cnt > 1;
Interestingly, if you wanted "3" -- the number of rows that have duplicates, you could express this as:
select count(*) - count(distinct quantity)
from t;
But that is not what you are asking for.

Greatest count for each customer in PostgreSQL

customer | category | count
------------+---------------+-------
4846 | Vegetables | 1
1687 | Fast-Food | 7
2654 | Drink | 2
2654 | Vegetables | 3
1597 | Vegetables | 1
4846 | Drink | 2
2654 | Fast-Food | 1
1597 | Drink | 6
1597 | Snack | 3
how can i select the category which has greatest count for each customer for this table?

This is called the mode. You can use distinct on:
select distinct on (customer) t.*
from t
order by customer, count desc;

You can use window function row_number().
select
customer,
category,
count
from
(
select
*,
row_number() over (partition by customer order by count desc) as rnk
from yourTable
) val
where rnk = 1

Simple code for you try:
SELECT c.*
FROM (SELECT customer, max(count) as max_count
FROM customers
GROUP BY customer) as max_count_table
JOIN customers as c on max_count_table.customer = c.customer and max_count_table.max_count = c.count
Result:

Retrieve the minimal create date with multiple rows

I have an issue with an SQL query that I am trying to write. I am trying to retrieve the row that has the minimal create_dt for each inst (see table) and amount (which isn't unique).
Unfortunately I can't use group by as the amount column isn't unique.
+--------------+--------+------+-------------+
| Company_Name | Amount | inst | Create Date |
+--------------+--------+------+-------------+
| Company A | 1000 | 4545 | 01/10/2018 |
| Company A | 400 | 4545 | 01/11/2018 |
| Company A | 200 | 4545 | 31/10/2018 |
| Company B | 2000 | 4893 | 01/10/2016 |
| Company B | 212 | 4893 | 04/10/2016 |
| Company B | 100 | 4893 | 10/10/2017 |
| Company B | 20 | 4893 | 04/10/2018 |
+--------------+--------+------+-------------+
In the above example I expect to see:
+--------------+--------+------+-------------+
| Company_Name | Amount | inst | Create Date |
+--------------+--------+------+-------------+
| Company A | 1000 | 4545 | 01/10/2018 |
| Company B | 2000 | 4893 | 01/10/2016 |
+--------------+--------+------+-------------+
Code:
SELECT
bill_company, bill_name, account_no
FROM
dbo.customer_information;
SELECT
balance_id, balance_id2, minus_balance,new_balance,
create_date, account_no
FROM
dbo.btr
SELECT
balance_id, balance_id2, expired_Date, amount, balance_type, account_no
FROM
dbo.btr_balance
SELECT
balance_ist, expired_date, account_no, balance_type
FROM
dbo.BALANCE_inst
Retrieve the minimal create data for a balance instance with the lowest balance for a balance inst.
(SELECT
bill_company,
bill_name,
account_no,
balance_ist,
amount,
MIN(create_date)
FROM
dbo.mtr btr
LEFT JOIN
btr_balance btrb ON btr.balance_id = btrb.balance_id
AND btr.balance_id2 = btrb.balance_id2
LEFT JOIN
balance_inst bali ON btr.account_no = bali.account_no
AND btrb.expired_date = bali.expired_date
GROUP BY
bill_company, bill_name, account_no,amount, balance_ist)
I have seen some solutions about using correlated query but can't see to get my head around it.

Common Table Expression (CTE) will help you.
;with cte as (
select *, row_number() over(partition by company_name order by create_date) rn
from dbo.myTable
)
select * from cte
where rn = 1;

use row_number() i assumed bill_company is your company name
select * from
( SELECT bill_company,
bill_name,
account_no,
balance_ist,
amount,
create_date,
row_number() over(partition by bill_company order by create_date) rn
FROM dbo.mtr btr left join btr_balance btrb
on btr.balance_id = btrb.balance_id and btr.balance_id2 = btrb.balance_id2
left join balance_inst bali
on btr.account_no = bali.account_no and btrb.expired_date = bali.expired_date
) t where t.rn=1

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

How to get Top 10 for a grouped column? - sql

Related

SQL - get rid of the nested aggregate select

How to find the next highest value in a new column? (SQL)

Get total count of duplicates in column

Greatest count for each customer in PostgreSQL

Retrieve the minimal create date with multiple rows

Categories

Resources