How to find max value from each group and display their information when using "group by" - sql

For example, i create a table about people contribue to 2 campaigns
+-------------------------------------+
| ID Name Campaign Amount (USD) |
+-------------------------------------+
| 1 A 1 10 |
| 2 B 1 5 |
| 3 C 2 7 |
| 4 D 2 9 |
+-------------------------------------+
Task: For each campaign, find the person (Name, ID) who contribute the most to
Expected result is
+-----------------------------------------+
| Campaign Name ID |
+-----------------------------------------+
| 1 A 1 |
| 2 D 4 |
+-----------------------------------------+
I used "group by Campaign" but the result have 2 columns "Campagin" and "max value" when I need "Name" and "ID"
Thanks for your help.
Edited: I fix some values, really sorry

You can use analytic functions for this:
select name, id, amount
from (select t.*, max(amount) over (partition by campaign) as max_amount
from t
) t
where amount = max_amount;

You can also do it by giving a rank/row_number partiton by campaign and order by descending order of amount.
Query
;with cte as(
select [num] = dense_rank() over(
partition by [Campaign]
order by [Amount] desc
), *
from [your_table_name]
)
select [Campaign], [Name], [ID]
from cte
where [num] = 1;

Try the next query:-
SELECT Campaign , Name , ID
FROM (
SELECT Campaign , Name , ID , MAX (Amount)
FROM MyTable
GROUP BY Campaign , Name , ID
) temp;

Simply use Where Clause with the max of amount group by Campaign:-
As following generic code:-
select a, b , c
from tablename
where d in
(
select max(d)
from tablename
group by a
)
Demo:-
Create table #MyTable (ID int , Name char(1), Campaign int , Amount int)
go
insert into #MyTable values (1,'A',1,10)
insert into #MyTable values (2,'B',1,5)
insert into #MyTable values (3,'C',2,7)
insert into #MyTable values (4,'D',2,9)
go
select Campaign, Name , ID
from #MyTable
where Amount in
(
select max(Amount)
from #MyTable
group by Campaign
)
drop table #MyTable
Result:-

Please find the below code for the same
SELECT *
FROM #MyTable T
OUTER APPLY (
SELECT COUNT(1) record
FROM #MyTable T1
where t.Campaign = t1.Campaign
and t.amount < t1.amount
)E
where E.record = 0

Related

Return the highest SUM value of all donors by designations

I have the following script:
SELECT DISTINCT GIFT_ID, GIFT_DESG, SUM(GIFT_AMT)
FROM GIFT_TABLE
GROUP BY GIFT_ID, GIFT_DESG
It will return something like this:
GIFT_ID GIFT_DESG SUM(GIFT_AMT)
1 A 25
1 B 500
1 C 75
2 A 100
2 B 200
2 C 300
...
My desired outcome is:
GIFT_ID GIFT_DESG SUM(GIFT_AMT)
1 B 500
2 C 300
How would I do that?
Possibly row_number() right? I think it's something with the summing of gift amounts by designation that is throwing me off.
Thank you.
if your DBMS support ROW_NUMBER window function you can try to make row number by GIFT_ID order by SUM(GIFT_AMT) then get rn = 1 row.
SELECT t1.GIFT_ID,t1.GIFT_DESG,t1.GIFT_AMT
FROM (
SELECT t1.*,ROW_NUMBER() OVER(PARTITION BY GIFT_ID ORDER BY GIFT_AMT DESC) rn
FROM (
SELECT GIFT_ID, GIFT_DESG, SUM(GIFT_AMT) GIFT_AMT
FROM GIFT_TABLE
GROUP BY GIFT_ID, GIFT_DESG
) t1
) t1
where rn =1
Note
You already use GROUP BY the DISTINCT keyword is no sense, you can remove it from your query.
Here is a sample
CREATE TABLE T(
GIFT_ID int,
GIFT_DESG varchar(5),
GIFT_AMT int
);
insert into t values (1,'A' ,25);
insert into t values (1,'B' ,500);
insert into t values (1,'C' ,75);
insert into t values (2,'A' ,100);
insert into t values (2,'B' ,200);
insert into t values (2,'C' ,300);
Query 1:
SELECT t1.GIFT_ID,t1.GIFT_DESG,t1.GIFT_AMT
FROM (
SELECT t1.*,ROW_NUMBER() OVER(PARTITION BY GIFT_ID ORDER BY GIFT_AMT DESC) rn
FROM T t1
) t1
where rn =1
Results:
| GIFT_ID | GIFT_DESG | GIFT_AMT |
|---------|-----------|----------|
| 1 | B | 500 |
| 2 | C | 300 |
You can do this with no subquery:
SELECT TOP (1) WITH TIES GIFT_ID, GIFT_DESG, SUM(GIFT_AMT)
FROM GIFT_TABLE
GROUP BY GIFT_ID, GIFT_DESG
ORDER BY ROW_NUMBER() OVER (PARTITION BY GIFT_ID ORDER BY SUM(GIFT_AMT) DESC);
You can do it also like this
WITH t as
SELECT GIFT_ID, GIFT_DESG, SUM(GIFT_AMT) AS GIFT_AMT
FROM GIFT_TABLE
GROUP BY GIFT_ID, GIFT_DESG)
SELECT GIFT_ID,
max(GIFT_DESG) KEEP (DENSE_RANK LAST ORDER BY GIFT_AMT),
max(GIFT_AMT) GIFT_AMT
FROM T
GROUP BY GIFT_ID;

Count of duplicate values by two columns in SQL Server

From this table:
Number Value
1 a
2 b
3 a
2 c
2 b
3 a
2 b
I need to get count of all duplicate rows by Number and Value, i.e. 5.
Thanks.
I think this query is what you want:
SELECT SUM(t.cnt)
FROM
(
SELECT COUNT(*) cnt
FROM table_name
GROUP BY number, value
HAVING COUNT(*) > 1
)t;
Maybe something like this?
select value,number,max(cnt) as Count_distinct from (
select *,row_number () over (partition by value,number order by number) as cnt
from #sample
)t
group by value,number
Output
+---------------------------------+
| Value | Number | Count_Distinct |
| a | 1 | 1 |
| b | 2 | 3 |
| c | 2 | 1 |
| a | 3 | 2 |
+---------------------------------+
Select
count(distinct Number) as Distinct_Numbers,
count(distinct Value) as Distinct_Values
from
Table
This shows how many distinct values are in each column. Does this help?
Give a row number partition by both the columns and order by both the columns. Then count the number of rows where row number greater than 1.
Query
;with cte as(
select [rn] = row_number() over(
partition by [Number], [Value]
order by [Number], [Value]
), *
from [your_table_name]
)
select count(*) from cte
where [rn] > 1;
I think you mean number of unique number - value pairs, you can use:
SELECT count(*)
FROM
(SELECT ROW_NUMBER() OVER (PARTITION BY number, value ORDER BY (select 1)) from mytable rnk) i
where i.rnk = 1
May be this query may help you
select * from [dbo].[Sample_table1]
;WITH
DupContactRecords(number,value,DupsCount)
AS
(
SELECT number,value, COUNT() AS TotalCount FROM [Sample_table1] GROUP BY number,value HAVING COUNT() > 1
)
--to get the duplicats
/*select * from DupContactRecords*/
SELECT sum(DupsCount) FROM DupContactRecords

Get specific row from a subquery using aggregate function

I am trying to get a specific row from a subquery, but I cannot use an aggregate function in a WHERE clause and I have read that I should be using a HAVING clause but I have no idea where to start.
This is my current sql statement:
SELECT *
FROM
(
select ID, SUM(BALANCE) AS Balance FROM bankacc GROUP BY ID
)A
I will get :
ID | Balance
1 | 30
2 | 40
3 | 50
4 | 50
I need the rows with the MAX(Balance), but I have no idea where to start, please help.
With window function:
DECLARE #t TABLE ( ID INT, Amount MONEY )
INSERT INTO #t
VALUES ( 1, 10 ),
( 1, 10 ),
( 1, 10 ),
( 2, 5 ),
( 2, 20 ),
( 3, 50 )
SELECT ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount ,
RANK() OVER ( ORDER BY SUM(Amount) DESC ) AS rn
FROM #t
GROUP BY ID
) t
WHERE rn = 1
With TOP and TIES:
SELECT TOP 1 WITH TIES
ID ,
SUM(Amount) AS Amount
FROM #t
GROUP BY ID
ORDER BY Amount desc
These versions will return rows where sum will be max, not just top 1 row.
Output:
ID Amount
3 50.00
you can wrap it in a subquery:
SELECT q.id, max(q.b)
FROM
(
select ID, SUM(BALANCE) b FROM bankacc GROUP BY ID
) q
group by q.id
or order them in dessending order and get first record:
select top 1 ID, SUM(BALANCE) b FROM bankacc GROUP BY ID order by b desc
in MySQL you need to use limit 1 instead of top 1
I think this should be simple.
-- This will return only 1 record, even if there are 2 records for MAX same amount
SELECT top 1 ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount
FROM Table
GROUP BY ID
) t
Order by Amount desc,ID asc
Using Window function : This will return what you want.
SELECT ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount ,
RANK() OVER ( ORDER BY SUM(Amount) DESC ) AS rnk
FROM Table
GROUP BY ID
) t
WHERE rnk = 1

Find and delete duplicate friend connections in SQL Server

I need a script to find Member who has a friend connection more than once so it doesn't show their friendship more than once on the website.
The MemberConnection table is as follows:
+----------------------+------------+----------------+
| MemberConnectionID | MemberID | ConnMemberID |
+----------------------+------------+----------------+
| 25 | 33 | 43 |
| 26 | 43 | 33 |
| 27 | 13 | 143 |
| 28 | 143 | 13 |
| 29 | 33 | 43 |
+----------------------+------------+----------------+
As you can see rows 25 and 29 are identical and I need to find these duplicates so they can be deleted.
How can I write a script for this?
Any help that anyone can provide would be greatly appreciated.
Many thanks in advance
neojakey
Try this if you want to leave one connection:
Use this to select connections:
;WITH CTE
AS
(
SELECT
MemberConnectionID,
ROW_NUMBER() OVER (PARTITION BY MemberID, ConnMemberID ORDER BY MemberConnectionID) RN
FROM MemberConnection
)
SELECT
MemberConnectionID
FROM CTE
WHERE RN > 1
or use this to delete from table:
;WITH CTE
AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY MemberID, ConnMemberID ORDER BY MemberConnectionID) RN
FROM MemberConnection
)
DELETE
FROM CTE
WHERE RN > 1
You can do something like :
SELECT * FROM
(SELECT MemberID,ConnMemberID ,count( MemberID,ConnMemberID ) as c FROM MemberConnection group by MemberID,ConnMemberID ) x
WHERE x.c >1
This will show you all repeated rows.
Using this as a test:
DECLARE #Table AS TABLE
(
MemberConnectionID INT ,
MemberID INT ,
ConMemberID INT
)
INSERT INTO #Table
SELECT 1 ,
2 ,
3
INSERT INTO #Table
SELECT 2 ,
3 ,
4
INSERT INTO #Table
SELECT 3 ,
2 ,
3
Select occurence of member connections
SELECT COUNT(MemberConnectionID) AS Occurence ,
MemberID ,
ConMemberID
FROM #Table
GROUP BY MemberID ,
ConMemberID
Delete Statement
DELETE FROM #Table
FROM #Table t
INNER JOIN ( SELECT MemberID ,
ConMemberID
FROM ( SELECT COUNT(MemberConnectionID) AS Occurence ,
MemberID ,
ConMemberID
FROM #Table
GROUP BY MemberID ,
ConMemberID
) t2
WHERE t2.Occurence > 1
) t3 ON t3.MemberID = T.MemberID
AND t3.ConMemberID = t.ConMemberID
I always use this little query (changed to meet your table).
Delete MemberConnection
from MemberConnection
join
(select max(MemberConnectionId)as CountIt, MemberId, ConnMemberId
from #MemberConnection
group by MemberId, ConnMemberId
having count(1) > 1
) as derived
on MemberConnection.MemberId = derived.MemberId
and MemberConnection.ConnMemberId = derived.ConnMemberId
and CountIt > MemberConnectionId

Group data by the change of grouping column value in order

With the following data
create table #ph (product int, [date] date, price int)
insert into #ph select 1, '20120101', 1
insert into #ph select 1, '20120102', 1
insert into #ph select 1, '20120103', 1
insert into #ph select 1, '20120104', 1
insert into #ph select 1, '20120105', 2
insert into #ph select 1, '20120106', 2
insert into #ph select 1, '20120107', 2
insert into #ph select 1, '20120108', 2
insert into #ph select 1, '20120109', 1
insert into #ph select 1, '20120110', 1
insert into #ph select 1, '20120111', 1
insert into #ph select 1, '20120112', 1
I would like to produce the following output:
product | date_from | date_to | price
1 | 20120101 | 20120105 | 1
1 | 20120105 | 20120109 | 2
1 | 20120109 | 20120112 | 1
If I group by price and show the max and min date then I will get the following which is not what I want (see the over lapping of dates).
product | date_from | date_to | price
1 | 20120101 | 20120112 | 1
1 | 20120105 | 20120108 | 2
So essentially what I'm looking to do is group by the step change in data based on group columns product and price.
What is the cleanest way to achieve this?
There's a (more or less) known technique of solving this kind of problem, involving two ROW_NUMBER() calls, like this:
WITH marked AS (
SELECT
*,
grp = ROW_NUMBER() OVER (PARTITION BY product ORDER BY date)
- ROW_NUMBER() OVER (PARTITION BY product, price ORDER BY date)
FROM #ph
)
SELECT
product,
date_from = MIN(date),
date_to = MAX(date),
price
FROM marked
GROUP BY
product,
price,
grp
ORDER BY
product,
MIN(date)
Output:
product date_from date_to price
------- ---------- ------------- -----
1 2012-01-01 2012-01-04 1
1 2012-01-05 2012-01-08 2
1 2012-01-09 2012-01-12 1
I'm new to this forum so hope my contribution is helpful.
If you really don't want to use a CTE (although I think thats probably the best approach) you can get a solution using set based code. You will need to test the performance of this code!.
I have added in an extra temp table so that I can use a unique identifier for each record but I suspect you will already have this column in you source table. So heres the temp table.
If Exists (SELECT Name FROM tempdb.sys.tables WHERE name LIKE '#phwithId%')
DROP TABLE #phwithId
CREATE TABLE #phwithId
(
SaleId INT
, ProductID INT
, Price Money
, SaleDate Date
)
INSERT INTO #phwithId SELECT row_number() over(partition by product order by [date] asc) as SalesId, Product, Price, Date FROM ph
Now the main body of the Select statement
SELECT
productId
, date_from
, date_to
, Price
FROM
(
SELECT
dfr.ProductId
, ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno1
, ChangeDate AS date_from
, dfr.Price
FROM
(
SELECT
sl1.ProductId AS ProductId
, sl1.SaleDate AS ChangeDate
, sl1.price
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl2
ON sl1.SaleId = sl2.SaleId + 1
WHERE
sl1.Price <> sl2.Price OR sl2.Price IS NULL
) dfr
) da1
LEFT JOIN
(
SELECT
ROW_NUMBER() OVER (PARTITION BY ProductId ORDER BY ChangeDate) AS rowno2
, ChangeDate AS date_to
FROM
(
SELECT
sl1.ProductId
, sl1.SaleDate AS ChangeDate
FROM
#phwithId sl1
LEFT JOIN
#phwithId sl3
ON sl1.SaleId = sl3.SaleId - 1
WHERE
sl1.Price <> sl3.Price OR sl3.Price IS NULL
) dto
) da2
ON da1.rowno1 = da2.rowno2
By binding the data source offset by 1 record (+or-) we can identify when the price buckets change and then its just a matter of getting the start and end dates for the buckets back into a single record.
All a bit fiddly and I'm not sure its going to give better performance but I enjoyed the challenge.
WITH marked AS (
SELECT
*,
case
when (lag(price,1,'') over (partition by product order by date_from)) = price
then 0 else 1
end is_price_change
FROM #ph
),
marked_as_group AS
( SELECT m.*,
SUM(is_price_change) over (PARTITION BY product order by date_from ROWS
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS price_change_group
FROM marked m
),
SELECT
product,
date_from = MIN(date_from),
date_to = MAX(date_to),
price = MIN(price)
FROM marked_as_group
GROUP BY
product,
price_change_group
ORDER BY
product,
date_to
One solution I have come up with which is relatively "clean" is:
;with cte_sort (product, [date], price, [row])
as
(select product, [date], price, row_number() over(partition by product order by [date] asc) as row
from #ph)
select a.product, a.[date] as date_from, c.[date] as date_to, a.price
from cte_sort a
left outer join cte_sort b on a.product = b.product and (a.row+1) = b.row and a.price = b.price
outer apply (select top 1 [date] from cte_sort z where z.product = a.product and z.row > a.row order by z.row) c
where b.row is null
order by a.[date]
I used a CTE with row_number because you then don't need to worry about whether any dates are missing if you use functions like dateadd. You obviously only need the outer apply if you want to have the date_to column (which I do).
This solution does solve my problem, I am however having a slight issue getting it to perform as quickly as I'd like on my table of 5 million rows.
Create function [dbo].[AF_TableColumns](#table_name nvarchar(55))
returns nvarchar(4000) as
begin
declare #str nvarchar(4000)
select #str = cast(rtrim(ltrim(column_name)) as nvarchar(500)) + coalesce(' ' + #str , ' ')
from information_schema.columns
where table_name = #table_name
group by table_name, column_name, ordinal_position
order by ordinal_position DESC
return #str
end
--select dbo.AF_TableColumns('YourTable') Select * from YourTable