Get the row with max(column) for distinct key - sql

I have some data like
code amount month
aaa1 100 1
aaa1 200 2
aaa1 300 3
aaa4 450 2
aaa4 400 3
aaa6 0 2
From the above, for each code I want to get the row with max(month)
code amount month
aaa1 300 3
aaa4 400 3
aaa6 0 2
How can I create a ms sql query for this?

;WITH MyCTE AS
(
SELECT code,
amount,
month,
ROW_NUMBER() OVER(PARTITION BY code ORDER BY code,month DESC) AS rownum
FROM table
)
SELECT *
FROM MyCTE
WHERE rownum = 1

You can use the ranking function ROW_NUMBER() with PARTITION BY code ORDER BY month DESC to do this:
WITH CTE
AS
(
SELECT
code, amount, month,
ROW_NUMBER() OVER(PARTITION BY code
ORDER BY month DESC) AS RN
FROM Tablename
)
SELECT code, amount, month
FROM CTE
WHERE RN = 1;
This will give you the maximum month for each code.
SQL Fiddle Demo

Try this
SELECT *
FROM
(SELECT MAX(MONTH) month, code
FROM table1
GROUP BY code) res
JOIN table1
ON res.month = table1.month
AND res.code = table1.code
Here is the SQLfiddle

Related

How to Rank By Partition with island and gap issue

Is it possible to rank item by partition without use CTE method
Expected Table
item
value
ID
A
10
1
A
20
1
B
30
2
B
40
2
C
50
3
C
60
3
A
70
4
A
80
4
By giving id to the partition to allow agitated function to work the way I want.
item
MIN
MAX
ID
A
10
20
1
B
30
40
2
C
50
60
3
A
70
80
4
SQL Version: Microsoft SQL Sever 2017
Assuming that the value column provides the intended ordering of the records which we see in your question above, we can try using the difference in row numbers method here. Your problem is a type of gaps and islands problem.
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (ORDER BY value) rn1,
ROW_NUMBER() OVER (PARTITION BY item ORDER BY value) rn2
FROM yourTable
)
SELECT item, MIN(value) AS [MIN], MAX(value) AS [MAX], MIN(ID) AS ID
FROM cte
GROUP BY item, rn1 - rn2
ORDER BY MIN(value);
Demo
If you don't want to use a CTE here, for whatever reason, you may simply inline the SQL code in the CTE into the bottom query, as a subquery:
SELECT item, MIN(value) AS [MIN], MAX(value) AS [MAX], MIN(ID) AS ID
FROM
(
SELECT *, ROW_NUMBER() OVER (ORDER BY value) rn1,
ROW_NUMBER() OVER (PARTITION BY item ORDER BY value) rn2
FROM yourTable
) t
GROUP BY item, rn1 - rn2
ORDER BY MIN(value);
You can generate group IDs by analyzing the previous row item value that could be obtained with the LAG function and finally use GROUP BY to get the minimum and maximum value in item groups.
SELECT
item,
MIN(value) AS "min",
MAX(value) AS "max",
group_id + 1 AS id
FROM (
SELECT
*,
SUM(CASE WHEN item = prev_item THEN 0 ELSE 1 END) OVER (ORDER BY value) AS group_id
FROM (
SELECT
*,
LAG(item, 1, item) OVER (ORDER BY value) AS prev_item
FROM t
) items
) groups
GROUP BY item, group_id
Query produces output
item
min
max
id
A
10
20
1
B
30
40
2
C
50
60
3
A
70
80
4
You can check a working demo here

Selecting rows that have row_number more than 1

I have a table as following (using bigquery):
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
112
2020
11
3000
1
113
2020
11
1000
1
Is there a way in which I can select rows that have row numbers more than one?
For example, my desired output is:
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
I don't want to just exclusively select rows with row_number = 2 but also row_number = 1 as well.
The original code block I used for the first table result is:
SELECT
id,
year,
month,
SUM(sales) AS sales,
ROW_NUMBER() OVER (PARTITIONY BY id ORDER BY id ASC) AS row_number
FROM
table
GROUP BY
id, year, month
You can use window functions:
select t.* except (cnt)
from (select t.*,
count(*) over (partition by id) as cnt
from t
) t
where cnt > 1;
As applied to your aggregation query:
SELECT iym.* EXCEPT (cnt)
FROM (SELECT id, year, month,
SUM(sales) as sales,
ROW_NUMBER() OVER (Partition by id ORDER BY id ASC) AS row_number
COUNT(*) OVER(Partition by id ORDER BY id ASC) AS cnt
FROM table
GROUP BY id, year, month
) iym
WHERE cnt > 1;
You can wrap your query as in below example
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (YOUR_ORIGINAL_QUERY)
)
where flag
so it can look as
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (
SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month
)
)
where flag
so when applied to sample data in your question - it will produce below output
Try this:
with tmp as (SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month)
select * from tmp a where exists ( select 1 from tmp b where a.id = b.id and b.row_number =2)
It's a so clearly exists statement SQL
This is what I use, it's similar to #ElapsedSoul answer but from my understanding for static list "IN" is better than using "EXISTS" but I'm not sure if the performance difference, if any, is significant:
Difference between EXISTS and IN in SQL?
WITH T1 AS
(
SELECT
id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY id ASC) AS ROW_NUM
FROM table
GROUP BY id, year, month
)
SELECT *
FROM T1
WHERE id IN (SELECT id FROM T1 WHERE ROW_NUM > 1);

Get specific row from a subquery using aggregate function

I am trying to get a specific row from a subquery, but I cannot use an aggregate function in a WHERE clause and I have read that I should be using a HAVING clause but I have no idea where to start.
This is my current sql statement:
SELECT *
FROM
(
select ID, SUM(BALANCE) AS Balance FROM bankacc GROUP BY ID
)A
I will get :
ID | Balance
1 | 30
2 | 40
3 | 50
4 | 50
I need the rows with the MAX(Balance), but I have no idea where to start, please help.
With window function:
DECLARE #t TABLE ( ID INT, Amount MONEY )
INSERT INTO #t
VALUES ( 1, 10 ),
( 1, 10 ),
( 1, 10 ),
( 2, 5 ),
( 2, 20 ),
( 3, 50 )
SELECT ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount ,
RANK() OVER ( ORDER BY SUM(Amount) DESC ) AS rn
FROM #t
GROUP BY ID
) t
WHERE rn = 1
With TOP and TIES:
SELECT TOP 1 WITH TIES
ID ,
SUM(Amount) AS Amount
FROM #t
GROUP BY ID
ORDER BY Amount desc
These versions will return rows where sum will be max, not just top 1 row.
Output:
ID Amount
3 50.00
you can wrap it in a subquery:
SELECT q.id, max(q.b)
FROM
(
select ID, SUM(BALANCE) b FROM bankacc GROUP BY ID
) q
group by q.id
or order them in dessending order and get first record:
select top 1 ID, SUM(BALANCE) b FROM bankacc GROUP BY ID order by b desc
in MySQL you need to use limit 1 instead of top 1
I think this should be simple.
-- This will return only 1 record, even if there are 2 records for MAX same amount
SELECT top 1 ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount
FROM Table
GROUP BY ID
) t
Order by Amount desc,ID asc
Using Window function : This will return what you want.
SELECT ID ,
Amount
FROM ( SELECT ID ,
SUM(Amount) AS Amount ,
RANK() OVER ( ORDER BY SUM(Amount) DESC ) AS rnk
FROM Table
GROUP BY ID
) t
WHERE rnk = 1

How to filter out the first and last entry from a table using RANK?

I've this data:
Id Date Value
'a' 2000 55
'a' 2001 3
'a' 2012 2
'a' 2014 5
'b' 1999 10
'b' 2014 110
'b' 2015 8
'c' 2011 4
'c' 2012 33
I want to filter out the first and the last value (when the table is sorted on the Date column), and only keep the other values. In case there are only two entries, nothing is returned. (Example for Id = 'c')
ID Date Value
'a' 2001 3
'a' 2012 2
'b' 2014 110
I tried to use order by (RANK() OVER (PARTITION BY [Id] ORDER BY Date ...)) in combination with this article (http://blog.sqlauthority.com/2008/03/02/sql-server-how-to-retrieve-top-and-bottom-rows-together-using-t-sql/) but I can't get it to work.
[UPDATE]
All the 3 answers seem fine. But I'm not a SQL expert, so my question is which one has the fastest performance if the table has around 800000 rows and there a no indexes on any column.
You can use row_number twice to determine the min and max dates and then filter accordingly:
with cte as (
select id, [date], value,
row_number() over (partition by id order by [date]) minrn,
row_number() over (partition by id order by [date] desc) maxrn
from data
)
select id, [date], value
from cte
where minrn != 1 and maxrn != 1
SQL Fiddle Demo
Here's another approach using min and max for this without needing to use a ranking function:
with cte as (
select id, min([date]) mindate, max([date]) maxdate
from data
group by id
)
select *
from data d
where not exists (
select 1
from cte c
where d.id = c.id and d.[date] in (c.mindate, c.maxdate))
More Fiddle
Here is a similar solution with row_number and count :
SELECT id,
dat,
value
FROM (SELECT *,
ROW_NUMBER()
OVER(
partition BY id
ORDER BY dat) rnk,
COUNT(*)
OVER (
partition BY id) cnt
FROM #table) t
WHERE rnk NOT IN( 1, cnt )
You can do this with EXISTS:
SELECT *
FROM Table1 a
WHERE EXISTS (SELECT 1
FROM Table1 b
WHERE a.ID = b.ID
AND b.Date < a.Date
)
AND EXISTS (SELECT 1
FROM Table1 b
WHERE a.ID = b.ID
AND b.Date > a.Date
)
Demo: SQL Fiddle

Rows inside the greatest streak?

Given the Rows
symbol_id profit date
1 100 2009-08-18 01:01:00
1 100 2009-08-18 01:01:01
1 156 2009-08-18 01:01:04
1 -56 2009-08-18 01:01:06
1 18 2009-08-18 01:01:07
How would I most efficiently select the rows that are involved in the greatest streak (of profit).
The greatest streak would be the first 3 rows, and I would want those rows. The query I came up with is just a bunch of nested queries and derived tables. I am looking for an efficient way to do this using common table expressions or something more advanced.
You haven't defined how 0 profit should be treated or what happens if there is a tie for longest streak. But something like...
;WITH T1 AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY symbol_id ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY symbol_id, SIGN(profit)
ORDER BY date) AS Grp
FROM Data
), T2 AS
(
SELECT *,
COUNT(*) OVER (PARTITION BY symbol_id,Grp) AS StreakLen
FROM T1
)
SELECT TOP 1 WITH TIES *
FROM T2
ORDER BY StreakLen DESC
Or - if you are looking for most profitable streak
;WITH T1 AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY symbol_id ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY symbol_id, CASE WHEN profit >= 0 THEN 1 END
ORDER BY date) AS Grp
FROM Data
), T2 AS
(
SELECT *,
SUM(profit) OVER (PARTITION BY symbol_id,Grp) AS StreakProfit
FROM T1
)
SELECT TOP 1 WITH TIES *
FROM T2
ORDER BY StreakProfit DESC
declare #T table
(
symbol_id int,
profit int,
[date] datetime
)
insert into #T values
(1, 100, '2009-08-18 01:01:00'),
(1, 100, '2009-08-18 01:01:01'),
(1, 156, '2009-08-18 01:01:04'),
(1, -56, '2009-08-18 01:01:06'),
(1, 18 , '2009-08-18 01:01:07')
;with C1 as
(
select *,
row_number() over(order by [date]) as rn
from #T
),
C2 as
(
select *,
rn - row_number() over(order by rn) as grp
from C1
where profit >= 0
)
select top 1 with ties *
from C2
order by sum(profit) over(partition by grp) desc
Result:
symbol_id profit date rn grp
----------- ----------- ----------------------- -------------------- --------------------
1 100 2009-08-18 01:01:00.000 1 0
1 100 2009-08-18 01:01:01.000 2 0
1 156 2009-08-18 01:01:04.000 3 0
If that's a MSSQL server then you want to consider using TOP 3 in your select clause
and ORDER BY PROFIT DESC.
If mysql/postgres you might want to consider using limit in your select clause with
the same order by too.
hope this helps.