Max values on distinct groups - sql

Column1 Column2 Column3
------- ------- -------
jim 1788 5F
jim 2000 9F
jim 500 9F
ben 190 4H
matt 400 46
matt 20 3G
I need to run a query that outputs:
Column1 MaxValue PL
------- ------- -------
jim 2000 9F
jim 2000 NULL
ben 190 4H
matt 400 46
matt 400 NULL
For each value in Column1 (e.g. jim, ben, matt): we group the data by Column1 and for each group we display the row that has the maximum value on column2.
Then, for each row found in this manner, it displays it again but with NULL in column3, if the groupping by Column1 returns more than 1 row and there are smaller values in Column2 than the maximum found number in the previous step.
ben 190 NULL is not displayed because we have ben only once on Column1.
Thank you in advance for any tips or suggestions.
This is what I tried so far but I receive an error prompting me to include Column2 and Column3 in the GROUP By clause, but if I do so I don't reach the desired output as shown above.
CREATE VIEW VIEWB AS
SELECT DISTINCT t1.Column1,
/* MAX_Value */
(MAX(t1.[Column2])) AS [MAX Value],
/* PL */
(CASE
WHEN t1.[Column2] = MAX(t1.[Column2]) THEN t1.[Column3]
ELSE NULL
END) AS PL
FROM TABLEA AS t1
GROUP BY t1.Column1;

Try this code:
DECLARE #t TABLE (Column1 VARCHAR(50), Column2 INT, Column3 VARCHAR(50))
INSERT #t
VALUES
('jim' ,1788 ,'5F'),
('jim' ,2000 ,'9F'),
('jim' ,500 ,'9F'),
('ben' ,190 ,'4H'),
('matt' ,400 ,'46'),
('matt' ,20 ,'3G')
;WITH a AS (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY Column1 ORDER BY Column2 DESC) RowNum,
MAX(Column2) OVER (PARTITION BY Column1) Maximum
FROM #t
)
SELECT Column1,
Maximum,
CASE WHEN RowNum = 1 THEN Column3 END
FROM a
WHERE RowNum IN (1, 2)
ORDER BY Column3 DESC
If you need to, you can put this in a view.

try this
;with cte as (select *,ROW_NUMBER() over(partition by Column1
order by Column2 desc) as row_num from table_A),
a as (
select Column1,MAX(Column2) [Column2],null [row_num]
from table_A
group by Column1
having COUNT(*)>1),
b as (select Column1,Column2,Column3 from cte where row_num=1
union all
select * from a)
select Column1,Column2 [MaxValue],Column3 [PL] from b
order by Column2 desc,Column1,ISNULL(Column3,'') desc

with data as (
select * from
(values
( 'jim' , 1788 , '5F' ),
( 'jim' , 2000 , '9F' ),
( 'jim' , 500 , '9F' ),
( 'ben' , 190 , '4H' ),
( 'matt' , 400 , '46' ),
( 'matt' , 20 , '3G' )
) foo (col1, col2, col3)
),
maxes as (
select d.col1, d.col2, d.col3
from
data d
inner join (select col1, max(col2) as col2 from data group by col1) m on d.col1 = m.col1 and d.col2 = m.col2
)
select col1, col2, col3
from maxes
union all
select col1, col2, null
from maxes
where exists (select 0 from data where data.col1 = maxes.col1 and data.col2 < maxes.col2)
order by col1, col3 desc

declare #t table(column1 varchar(10),column2 int, column3 varchar(10))
insert into #t
select 'jim', 1788, '5F' union all
select 'jim', 2000, '9F' union all
select 'jim', 500, '9F' union all
select 'ben', 190, '4H' union all
select 'matt', 400, '46' union all
select 'matt', 20, '3G'
select column1,column2,column3 from
(
select *, row_number() over (partition by column1 order by column2 desc) as sno from #t
) as t
where sno=1
union
select t1.column1,t2.column2,NULL
from #t as t1 inner join
(
select Column1, max(Column2) as column2,count(*) as counting from #t
group by column1 having count(*)>1
) as t2
on t1.column1=t2.column1

Might not be the most efficient way of doing this, so if you have a really large table this may not be an ideal solution, but an option:
create table #temp (column1 varchar(10), column2 float, column3 varchar(10))
insert #temp select 'jim', 1788, '5F'
insert #temp select 'jim', 2000, '9F'
insert #temp select 'jim', 500, '9F'
insert #temp select 'ben', 190, '4H'
insert #temp select 'matt', 400, '46'
insert #temp select 'matt', 20, '3G'
SELECT column1, column2 as MaxValue, column3 as PL FROM #temp
WHERE column2=(SELECT Max(column2) FROM #temp t2 WHERE t2.column1=#temp.column1)
union
SELECT column1, column2, NULL FROM #temp
WHERE column2=(SELECT Max(column2) FROM #temp t3 WHERE t3.column1=#temp.column1)
AND EXISTS(SELECT 1 FROM #temp t4 WHERE t4.column2<>#temp.column2 and t4.column1=#temp.column1)
DROP TABLE #temp

Related

Transform rows to columns(probably pivot)

Here is my requirement:
create table #TEMP
(
KEY_VALUE VARCHAR(100)
,NAME VARCHAR(100)
,AMOUNT INT
,QUANTITY INT
)
INSERT INTO #TEMP
VALUES
('K1','ABC',100,10000),
('K2','XYZ',200,20000),
('K1','ABC',50,5000),
('K2','XYZ',300,30000),
('K3','MNO',50,500)
select * from #TEMP
Because the KEY_VALUE COLUMN matches for 2 rows(K1 and K2), I want to transform it to something as below:
KEY_VALUE NAME AMOUNT_1 AMOUNT_2 QUANTITY_1 QUANTITY_2
K1 ABC 100 50 10000 5000
K2 XYZ 200 300 20000 30000
K3 MNO 50 NULL 500 NULL
What/How do I do that? Please let me know if my question is not clear.
You can use ROW_NUMBER() & do conditional aggregation :
SELECT KEY_VALUE, NAME,
MAX(CASE WHEN seq = 1 THEN AMOUNT END) AS AMOUNT_1,
MAX(CASE WHEN seq = 2 THEN AMOUNT END) AS AMOUNT_2,
MAX(CASE WHEN seq = 1 THEN QUANTITY END) AS QUANTITY_1,
MAX(CASE WHEN seq = 2 THEN QUANTITY END) AS QUANTITY_2
FROM (SELECT t.*,
ROW_NUMBER() OVER (PARTITION BY KEY_VALUE ORDER BY AMOUNT) AS seq
FROM #TEMP t
) t
GROUP BY KEY_VALUE, NAME;
EDIT : If you want to do further calculation then you can use CTE :
WITH CTE AS (
<query>
)
SELECT C.*,
C.AMOUNT_1 - C.AMOUNT_1 AS Diff_Amt
FROM CTE C;
Prepare data
CREATE TABLE #t (
key_value varchar(10),
name varchar(10),
amount int,
quantity int
);
INSERT INTO #t
VALUES
('K1', 'ABC', 100, 10000),
('K2', 'XYZ', 200, 20000),
('K1', 'ABC', 50, 5000),
('K2', 'XYZ', 300, 30000),
('K3', 'MNO', 50, 500);
Querying
WITH t1 (id, key_value, name, amount, quantity)
AS (
SELECT ROW_NUMBER() OVER (ORDER BY key_value), key_value, name, amount, quantity FROM #t
),
t2
AS (
SELECT MIN(id) AS min_id, MAX(id) AS max_id, key_value, name
FROM t1
GROUP BY key_value, name
),
t3
AS (
SELECT t2.key_value, t2.name,
t11.amount AS amount_1, t11.quantity AS quantity_1,
t12.amount AS amount_2, t12.quantity AS quantity_2
FROM t2
INNER JOIN
t1 t11 ON t11.key_value = t2.key_value AND t11.name = t2.name
AND t11.id = t2.min_id
LEFT JOIN
t1 t12 ON t12.key_value = t2.key_value AND t12.name = t2.name
AND t12.id = t2.max_id AND t12.id <> t2.min_id
)
SELECT * FROM t3
Result
key_value name amount_1 quantity_1 amount_2 quantity_2
---------- ---------- ----------- ----------- ----------- -----------
K1 ABC 100 10000 50 5000
K2 XYZ 300 30000 200 20000
K3 MNO 50 500 NULL NULL

MS SQL Server select rows with max values

Good day
I have Table1:
COLUMN1 COLUMN2 Column3
----------------------------
Eva Apple 1
Eva Apple 2
Eva Apple 3
Eva Apple 4
Eva Apple 5
Eva Apple 6
Bob Apple 1
Bob Samsung 1
Bob Samsung 2
... ... ...
I need
COLUMN1 COLUMN2 Column3
----------------------------
Eva Apple 6
Bob Samsung 2
Bob Apple 1
... ... ...
How i can setup string for select only rows with MAX values in Column3 ?
My version of string is :
SELECT MAX(Column3) , [column2], [Column2]
FROM Table1
WHERE Column3 = MAX ;
Thanks for Opinions
You can use row_number
Select top (1) with ties *
from table1
order by row_number() over (partition by Column1, Column2 order by Column3 desc)
Other way is to use outer query:
Select * from (
Select *, RowN = row_number() over (partition by Column1, Column2 order by Column3 desc) from table1 ) a
Where a.RowN = 1
You want to find the maximum Column3 for each combination of Column1 and Column2.
You can achieve this with a GROUP BY
SELECT Column1, Column2, MAX(Column3)
FROM Table1
GROUP BY Column1, Column2
See https://learn.microsoft.com/en-us/sql/t-sql/queries/select-group-by-transact-sql
select * from (
SELECT *, rn=ROW_NUMBER() over (partition by COLUMN1,COLUMN2,Column3 order by
Column3 desc)
FROM Table1
)
WHERE rn=1
Please try the following:
WITH B AS
(
SELECT
Column1, Column2, Column3,
ROW_NUMBER() OVER (PARTITION BY Column1, Column2 ORDER BY Column3 DESC) AS row_num
FROM
Table1
)
SELECT Column1, Column2, Column3
FROM B
WHERE row_num = 1
You need to add a group by. In queries of this type, you have a set of columns you want the values from (these are the columns you group by) and you have other columns most of whose values you'll throw away. You use a function like MAX, MIN, SUM, AVG to specify what to do with the data from rows that are "thrown away". The result is a unique set of values from the columns that were grouped, and a single value corresponding to the min/max/avg etc from the columns that were not grouped:
SELECT [column1], [Column2], MAX(Column3) as Column3
FROM Table1
GROUP BY Column3 ;
Add a Group by on column 3.
Select col1, col2, max(col3)
from test
Group By col3
You can use a function like DENSE_RANK().
In this example, if there are duplicate values you want to retrieve.
declare #t as table (COLUMN1 char(3), COLUMN2 varchar(10), COLUMN3 int)
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Eva', 'Apple' ,1
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Eva', 'Apple' ,2
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Eva', 'Apple' ,3
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Bob', 'Apple' ,1
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Bob', 'Samsung' ,1
INSERT #t (COLUMN1 , COLUMN2 , COLUMN3 ) select 'Bob', 'Samsung' ,2
SELECT * FROM (
SELECT DENSE_RANK() OVER (PARTITION BY COLUMN1, COLUMN2 ORDER BY COLUMN1, COLUMN3 desc) [Max] , * from #t ) as T
WHERE max = 1 -- Set here what position do you want
Order by COLUMN3

Joining two tables with many to many relationship in sql

I have two tables with many to many relationship. I need to join them and get the matched records.
Table 1
Column1 | column 2| column 3|
1|p1|1.0
1|p1|1.1
1|p1|1.2
Table 2
Column1 | column 2| column 3|
1|p1|2.0
1|p1|2.1
1|p1|2.2
Now I want the result as
1|p1|1.0|2.0
1|p1|1.1|2.1
1|p1|1.2|2.2
I mean column1 and column2 matching and showing values from both columns for column3
Edit 1:
I have one issue after trying MT0 query. I am very much satisfied with his answer but still need some changes to be done:
Table 1
Column1 | column 2| column 3|
1|p1|1.0
1|p1|1.1
1|p1|1.2
Table 2
Column1 | column 2| column 3|
1|p1|1.0
1|p1|1.2
Now I want the result as
1|p1|1.0|1.0
1|p1|1.1|NULL
1|p1|1.2|1.2
But I am getting as
1|p1|1.0|1.0
1|p1|1.1|1.2
1|p1|1.2|NULL
Please do some help on this
If you have unequal numbers of rows for each partition then you can do:
Oracle Setup:
CREATE TABLE table1 ( col1, col2, col3 ) AS
SELECT 1, 'P1', '1.0' FROM DUAL UNION ALL
SELECT 1, 'P1', '1.1' FROM DUAL UNION ALL
SELECT 1, 'P1', '1.2' FROM DUAL UNION ALL
SELECT 1, 'P2', '1.0' FROM DUAL UNION ALL
SELECT 1, 'P2', '1.2' FROM DUAL UNION ALL
SELECT 2, 'P1', '1.0' FROM DUAL;
CREATE TABLE table2 ( col1, col2, col3 ) AS
SELECT 1, 'P1', '2.0' FROM DUAL UNION ALL
SELECT 1, 'P1', '2.1' FROM DUAL UNION ALL
SELECT 1, 'P1', '2.2' FROM DUAL UNION ALL
SELECT 1, 'P2', '2.1' FROM DUAL UNION ALL
SELECT 2, 'P1', '2.0' FROM DUAL UNION ALL
SELECT 2, 'P1', '2.1' FROM DUAL;
Query:
SELECT COALESCE( t1.col1, t2.col1 ) AS col1,
COALESCE( t1.col2, t2.col2 ) AS col2,
t1.col3 AS t1col3,
t2.col3 AS t2col3
FROM (
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY col1, col2
ORDER BY col3 ) AS rn
FROM table1 t
) t1
FULL OUTER JOIN
(
SELECT t.*,
ROW_NUMBER() OVER ( PARTITION BY col1, col2
ORDER BY col3 ) AS rn
FROM table2 t
) t2
ON ( t1.col1 = t2.col1 AND t1.col2 = t2.col2 AND t1.RN = t2.rn )
ORDER BY col1, col2, t1col3 NULLS LAST, t2col3 NULLS LAST;
Output:
COL1 COL2 T1COL3 T2COL3
---------- ---- ------ ------
1 P1 1.0 2.0
1 P1 1.1 2.1
1 P1 1.2 2.2
1 P2 1.0 2.1
1 P2 1.2
2 P1 1.0 2.0
2 P1 2.1
You could add a calculated column with the numbering, using the row_number window function and use that in the join:
SELECT t1.column1, t2.column2, t1.column3, t2.column3
FROM (SELECT column1, column2, column3,
ROW_NUMBER() OVER (PARTITION BY column1, column2
ORDER BY column3) AS rn
FROM table1) t1
JOIN (SELECT column1, column2, column3,
ROW_NUMBER() OVER (PARTITION BY column1, column2
ORDER BY column3) AS rn
FROM table2) t2 ON t1.column1 = t2.column1 AND
t1.column2 = t2.column2 AND
t1.rn = t2.rn
Create one more column only having the matching ID in both tables like
alter table 1
add column1 char(100)
alter table 2
add column2 char(100)
so table 1
Column1 | column 2| column 3|column4
1|p1|1.0
1|p1|1.1
1|p1|1.2
update table table1
set column4 = left(column1,4)
Do the same in table 2 and use that as a key for left join.
Please try below.
create table tab1(Col1 int,col2 varchar(10), col3 varchar(10))
insert into tab1
values(1,'p1','1.0'),
(1,'p1','1.1'),
(1,'p1','1.2')
create table tab2(Col1 int,col2 varchar(10), col3 varchar(10))
insert into tab2
values(1,'p1','2.0'),
(1,'p1','2.1'),
(1,'p1','2.2')
SELECT a.col1,a.col2,a.col3,b.col3
FROM
(
select
*,ROW_NUMBER() over(order by col1) as rownum
from tab1
)a
inner join
(
select *,ROW_NUMBER() over(order by col1) as rownum
from tab2
)b ON a.rownum = b.rownum and a.Col1 = b.Col1

Get row for each user where the count of a value in a column is maximum

My column structure:
Column0 Column1
aaa abc
aaa abc
aaa xyx
aaa NA
bbb fgh
bbb NA
bbb NA
bbb NA
ccc NA
ccc NA
ccc NA
ccc NA
What I wish to get is foreach distinct 'Column0' data 'Column1' data whose count is max unless that data is NA in which case get the second highest.
If for a 'Column0' data all values of 'Column1' are NA then the value can be NA
So expected value:
Column0 Column1
aaa abc
bbb fgh
ccc NA
This will give the correct result:
DECLARE #t table(Column0 char(3), Column1 varchar(3))
INSERT #t values
('aaa','abc'),('aaa','abc'),('aaa','xyx'),('aaa','NA')
,('bbb','fgh'),('bbb','NA'),('bbb','NA'),('bbb','NA')
,('ccc','NA'),('ccc','NA'),('ccc','NA'),('ccc','NA')
;WITH CTE as
(
SELECT
column0,
column1,
count(case when column1 <> 'NA' THEN 1 end) over (partition by column0, column1) cnt
FROM #t
), CTE2 as
(
SELECT
column0,
column1,
row_number() over (partition by column0 order by cnt desc) rn
FROM CTE
)
SELECT column0, column1
FROM CTE2
WHERE rn = 1
Result:
column0 column1
aaa abc
bbb fgh
ccc NA
You can use two CTEs and the ranking function ROW_NUMBER:
WITH CTE1 AS
(
SELECT Column0, Column1, Cnt = COUNT(*) OVER (PARTITION BY Column0, Column1)
FROM dbo.TableName
)
, CTE2 AS
(
SELECT Column0, Column1,
RN = ROW_NUMBER() OVER (PARTITION BY Column0
ORDER BY CASE WHEN Column1 = 'NA' THEN 1 ELSE 0 END ASC
, Cnt DESC)
FROM CTE1
)
SELECT Column0, Column1
FROM CTE2
WHERE RN = 1
Demo
How about something like this?
select T1.Column0,
isnull((
select top(1) T2.Column1
from dbo.YourTable as T2
where T1.Column0 = T2.Column0 and
T2.Column1 <> 'NA'
group by T2.Column1
order by count(*) desc
), 'NA') as Column1
from dbo.YourTable as T1
group by T1.Column0
SQL Fiddle
And with an index
create index IX_YourTable_Column0 on YourTable(Column0, Column1)
You get a nice looking query plan.
And a version that deals with NULL values in Column0.
select T1.Column0,
isnull((
select top(1) T2.Column1
from dbo.YourTable as T2
where exists(select T1.Column0 intersect select T2.Column0) and
T2.Column1 <> 'NA'
group by T2.Column1
order by count(*) desc
), 'NA') as Column1
from dbo.YourTable as T1
group by T1.Column0
The query plan for for this version is the same as the one above.
You can use row_number() with an aggregation:
select column0, column1
from (select column0, column1,
row_number() over (partition by column0
order by count(*) desc
) as seqnum
from [table]
group by column0, column1
) t
where seqnum = 1;
If you want to allow duplicates in the case of ties, then use rank() or dense_rank() instead of row_number().

select column name from max query

I have a query that goes something like this :
;WITH t as
(
select 1 as RowNumber, 1 as ObjectID, 10 as [Col1], 20 as [Col2], 20 as [Col3], 20 as [Col4] UNION ALL
select 2 as RowNumber, 2 as ObjectID, 20 as [Col1], 30 as [Col2], 40 as [Col3], 50 as [Col4]
)
SELECT RowNumber, ObjectID,
(
SELECT MAX(Amount)
FROM (
SELECT [Col1] AS Amount
UNION ALL
SELECT [Col2]
UNION ALL
SELECT [Col3]
UNION ALL
SELECT [Col4]
) d
WHERE Amount > 0
)
FROM t
The query works fine, but I want to know is where the Max(Amount) comes from.
So in my result set, on top of having (RowNumber, ObjectId, Amount) I want the name of the column (Col1, Col2, Col3, Col4) as a String.
Is there any way to do that?
EDIT
Question from the comments : If two columns have the same max, it could be either one? Yes, it could be either one. Any column name will do as long as I know where it could be coming from.
Using SQL Server 2008
Don't MAX: use TOP which avoids the aggregate/GROUP BY.
It can also deal with duplicates using WITH TIES
I'm not sure if what you had was psuedo-code or a sub-query, but this should do what you want
SELECT TOP 1 -- WITH TIES if needed
*
FROM
(
SELECT RowNumber, ObjectID, [Col1] AS Amount, 'Col1' AS ColName
FROM table
UNION ALL
SELECT RowNumber, ObjectID, [Col2], 'Col2' AS ColName
FROM table
UNION ALL
SELECT RowNumber, ObjectID, [Col3], 'Col3' AS ColName
FROM table
UNION ALL
SELECT RowNumber, ObjectID, [Col4], 'Col4' AS ColName
FROM table
) foo
WHERE Amount > 0
ORDER BY Amount DESC
Your main problem is that you'll have to touch the table 4 times no matter how you do it because a subquery only returns one value. I can't see a ROW_NUMBER solution either (but there probably is one though... :-)
This is untested: however to see whats going on with your data, this might help. Not really production code quality:
SELECT RowNumber, ObjectID,
(
SELECT MAX(Amount)
FROM (
SELECT str([Col1]) + ", col1, " AS Amount
UNION ALL
SELECT str([Col2]) + ", col2"
UNION ALL
SELECT str([Col3]) + ", col3"
UNION ALL
SELECT str([Col4]) + ", col4"
)
WHERE Amount > 0
)
FROM table
str() is the "toString()" function of your DBMS.
Your SQL seems pretty weird, what DBMS are you using?
Adding a step to user202553's answer
;WITH t1 as(
select 1 as RowNumber, 1 as ObjectID, 10 as [Col1], 20 as [Col2], 20 as [Col3], 20 as [Col4] UNION ALL
select 2 as RowNumber, 2 as ObjectID, 20 as [Col1], 30 as [Col2], 40 as [Col3], 50000045 as [Col4]
),
t2 as(
SELECT RowNumber, ObjectID,
(
SELECT TOP 1 CAST(C AS BINARY(4)) + CAST(Amount as BINARY(4))
FROM (
SELECT 'Col1' AS C, [Col1] AS Amount
UNION ALL
SELECT 'Col2' AS C, [Col2]
UNION ALL
SELECT 'Col3' AS C, [Col3]
UNION ALL
SELECT 'Col4' AS C, [Col4]
) d
WHERE Amount > 0
ORDER BY Amount desc
) AS Top1
FROM t1
)
SELECT RowNumber,
ObjectID,
CAST(Left(Top1, 4) AS CHAR(4)) AS Col,
CAST(SUBSTRING(Top1,5,4) AS INT) AS Amount
FROM t2
You can use a combination of UNPIVOT and OUTER APPLY:
;WITH t as (
select 1 as RowNumber, 1 as ObjectID, 10 as [Col1], 20 as [Col2],
20 as [Col3], 20 as [Col4] UNION ALL
select 2 as RowNumber, 2 as ObjectID, 20 as [Col1], 30 as [Col2],
40 as [Col3], 50 as [Col4] )
SELECT
RowNumber,
ObjectID,
ColName,
ColAmount
FROM t
OUTER APPLY (
SELECT TOP 1
ColName,
ColAmount
FROM
(
SELECT
Col1,
Col2,
Col3,
Col4
) x
UNPIVOT (
ColAmount FOR ColName IN (Col1, Col2, Col3, Col4)
) y
WHERE ColAmount > 0
ORDER BY ColAmount DESC
) z
Results:
RowNumber ObjectID ColName ColAmount
----------- ----------- --------- -----------
1 1 Col2 20
2 2 Col4 50