Getting "High Score" table from score table - sql

I've got the following sql table setup for tracking the scores of different players.
create table scoreTable (
userName varchar2(100),
score number
)
/
insert into scoreTable values ('Andy', 200);
insert into scoreTable values ('Andy', 33);
insert into scoreTable values ('Bob', 444);
insert into scoreTable values ('Charlie', 213);
insert into scoreTable values ('Charlie', 4);
insert into scoreTable values ('Charlie', 777);
Now I want to return each player's highest score using a select statement. I'd like the result to be
NAME SCORE
_____ ____
Andy 200
Bob 444
Charlie 777
I'd prefer to just have one select statement that gives the top result for any number of distinct name values. Is such a thing possible?

Is such a thing possible?
Yes.
Simple aggregation:
SELECT userName, MAX(score) AS score
FROM scoreTable
GROUP BY userName
ORDER BY userName;
LiveDemo
EDIT:
Return entire row using RANK function:
WITH cte AS
(
SELECT *, RANK() OVER(PARTITION BY userName ORDER BY score DESC) AS rn
FROM scoreTable
)
SELECT *
FROM cte
WHERE rn = 1
ORDER BY userName;
or using SELF JOIN:
SELECT s1.*
FROM scoreTable s1
LEFT JOIN scoreTable s2
ON s1.userName = s2.userName
AND s1.score < s2.score
WHERE s2.userName IS NULL
ORDER BY userName;
LiveDemo

Related

SQL query to filter records based on count and status

I have to filter records based on the status when the count is more than 1.
Column names: Student_id, Status, term, and course.
DB: Postgres
Condition to filter:
If there exists only one record for the student then the status(true or false) does not matter. Fetch the record.
If record count for a student is more than one then fetch only those students whose status is true.
(More than one record would mean, same Student_id, term, and course). At any given time there will be only one record with status as true.
How do I write the SQL query for this?
You can try below query:
Create Table #TableA(
id int,
Student_id Varchar(100),
[Status] bit,
term int,
course varchar(10)
)
Insert Into #TableA Values(1, 1, 1, 3, 'C#')
Insert Into #TableA Values(2, 2, 0, 6, 'Php')
Insert Into #TableA Values(3, 2, 0, 6, 'Php')
Insert Into #TableA Values(4, 2, 1, 6, 'Php')
Insert Into #TableA Values(5, 2, 1, 7, 'Php')
Select a.id, a.Student_id, a.Status, a.term, a.course from
(
Select *, count(*) over (Partition By Student_id, term, course) As row_count From #TableA
) a
Where a.row_count = 1 Or a.[Status] = 1
Result will be like below:
id Student_id Status term course
1 1 1 3 C#
4 2 1 6 Php
5 2 1 7 Php
Hmmm . . . You seem to want:
select t.*
from (select t.*, count(*) over (partition by student_id) as cnt
from t
) t
where cnt = 1 or status;
This filters out students with more than one record and not true status.
If you really want one row per student, even students with no true status, then use distinct on:
select distinct on (student_id) t.*
from t
order by student_id, status desc;
CASE WHEN status='x' AND count>1 THEN 1 ELSE 0 END

Grouping records - ROW NUMBER and PARTITION BY

Please see the DDL below:
create table #Test (ID int identity not null, name varchar(100), primary key (id))
insert into #Test (name) values ('Ian')
insert into #Test (name) values ('Ian')
insert into #Test (name) values ('Ian')
insert into #Test (name) values ('Mark')
insert into #Test (name) values ('James')
insert into #Test (name) values ('James')
insert into #Test (name) values ('Henry')
I am looking for the output below:
Ian 1
Ian 1
Ian 1
Mark 2
James 3
James 3
Henry 4
All the Ians' have the same number. All the James' have the same number. I have been experimenting with ROW NUMBER and PARTITION BY but I have been unsuccessful so far.
You can do this in a few ways, but row_number() per se is not one of them.
Here is a method:
select t.name, dense_rank() over (order by nameid)
from (select t.*, min(id) over (partition by name) as nameid
from #test t
) t;
This calculates the min id for each name and then uses that for dense_rank().
If you don't care about the particular ordering, you can use dense_rank() on the name:
select name, dense_rank() over (order by name)
from #test t;
Use DENSE_RANK instead.
Select Name, DENSE_RANK() Over (Order By Name)
From #Test
You can get the desired output with:
with prep as
(
select name
, DENSE_RANK() over (order by c) [rank]
from (
select distinct name, min(id) c
from #Test
group by name
) a
)
select T.name
, prep.[rank]
from #Test T
inner join prep on prep.name = T.name
The first (sub)query selects the lowest possible ID per record, the DENSE_RANK then ensures the numbering is seqential, and the final query uses those prepped results against the original #Test table to duplicate the data per row.
You can find out more about the ranking functions here: https://msdn.microsoft.com/en-us/library/ms189798.aspx

how to insert many records excluding some

I want to create a table with a subset of records from a master table.
for example, i have:
id name code
1 peter 73
2 carl 84
3 jack 73
I want to store peter and carl but not jack because has same peter's code.
I need hight performance because i have 20M records.
I try this:
SELECT id, name, DISTINCT(code) INTO new_tab
FROM old_tab
WHERE (conditions)
but don't work.
Assuming you want to pick the row with the maximum id per code, then this should do it:
insert into new_tab (id, name, code)
(SELECT id, name, code
FROM
(
SELECT id, name, code, rank() as rnk OVER (PARTITION BY code ORDER BY id DESC)
FROM old_tab WHERE rnk = 1
)
)
and for the minimum id per code, just change the sort order in the rank from DESC to ASC:
insert into new_tab (id, name, code)
(SELECT id, name, code
FROM
(
SELECT id, name, code, rank() as rnk OVER (PARTITION BY code ORDER BY id ASC)
FROM old_tab WHERE rnk = 1
)
)
Using a derived table, you can find the minID for each code, then join back to that in the outer to get the rest of the columns for that ID from oldTab.
select id,name,code
insert into newTabFROM
from old_tab t inner join
(SELECT min(id) as minId, code
from old_tab group by code) x
on t.id = x.minId
WHERE (conditions)
Try this:
CREATE TABLE #Temp
(
ID INT,
Name VARCHAR(50),
Code INT
)
INSERT #Temp VALUES (1, 'Peter', 73)
INSERT #Temp VALUES (2, 'Carl', 84)
INSERT #Temp VALUES (3, 'Jack', 73)
SELECT t2.ID, t2.Name, t2.Code
FROM #Temp t2
JOIN (
SELECT t.Code, MIN(t.ID) ID
FROM #temp t
JOIN (
SELECT DISTINCT Code
FROM #Temp
) d
ON t.Code = d.Code
GROUP BY t.Code
) b
ON t2.ID = b.ID

How to select top 3 values from each group in a table with SQL which have duplicates [duplicate]

This question already has answers here:
Select top 10 records for each category
(14 answers)
Closed 5 years ago.
Assume we have a table which has two columns, one column contains the names of some people and the other column contains some values related to each person. One person can have more than one value. Each value has a numeric type. The question is we want to select the top 3 values for each person from the table. If one person has less than 3 values, we select all the values for that person.
The issue can be solved if there are no duplicates in the table by the query provided in this article Select top 3 values from each group in a table with SQL . But if there are duplicates, what is the solution?
For example, if for one name John, he has 5 values related to him. They are 20,7,7,7,4. I need to return the name/value pairs as below order by value descending for each name:
-----------+-------+
| name | value |
-----------+-------+
| John | 20 |
| John | 7 |
| John | 7 |
-----------+-------+
Only 3 rows should be returned for John even though there are three 7s for John.
In many modern DBMS (e.g. Postgres, Oracle, SQL-Server, DB2 and many others), the following will work just fine. It uses CTEs and ranking function ROW_NUMBER() which is part of the latest SQL standard:
WITH cte AS
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
)
SELECT name, value, rn
FROM cte
WHERE rn <= 3
ORDER BY name, rn ;
Without CTE, only ROW_NUMBER():
SELECT name, value, rn
FROM
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
) tmp
WHERE rn <= 3
ORDER BY name, rn ;
Tested in:
Postgres
Oracle
SQL-Server
In MySQL and other DBMS that do not have ranking functions, one has to use either derived tables, correlated subqueries or self-joins with GROUP BY.
The (tid) is assumed to be the primary key of the table:
SELECT t.tid, t.name, t.value, -- self join and GROUP BY
COUNT(*) AS rn
FROM t
JOIN t AS t2
ON t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
GROUP BY t.tid, t.name, t.value
HAVING COUNT(*) <= 3
ORDER BY name, rn ;
SELECT t.tid, t.name, t.value, rn
FROM
( SELECT t.tid, t.name, t.value,
( SELECT COUNT(*) -- inline, correlated subquery
FROM t AS t2
WHERE t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
) AS rn
FROM t
) AS t
WHERE rn <= 3
ORDER BY name, rn ;
Tested in MySQL
I was going to downvote the question. However, I realized that it might really be asking for a cross-database solution.
Assuming you are looking for a database independent way to do this, the only way I can think of uses correlated subqueries (or non-equijoins). Here is an example:
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3)
However, each database that you mention (and I note, Hadoop is not a database) has a better way of doing this. Unfortunately, none of them are standard SQL.
Here is an example of it working in SQL Server:
with t as (
select 1 as personid, 5 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 7 as val union all
select 1 as personid, 8 as val
)
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3);
Using GROUP_CONCAT and FIND_IN_SET you can do that.Check SQLFIDDLE.
SELECT *
FROM tbl t
WHERE FIND_IN_SET(t.value,(SELECT
SUBSTRING_INDEX(GROUP_CONCAT(t1.value ORDER BY VALUE DESC),',',3)
FROM tbl t1
WHERE t1.name = t.name
GROUP BY t1.name)) > 0
ORDER BY t.name,t.value desc
If your result set is not so heavy, you can write a stored procedure (or an anonymous PL/SQL-block) for that problem which iterates the result set and finds the bigges three by a simple comparing algorithm.
Try this -
CREATE TABLE #list ([name] [varchar](100) NOT NULL, [value] [int] NOT NULL)
INSERT INTO #list VALUES ('John', 20), ('John', 7), ('John', 7), ('John', 7), ('John', 4);
WITH cte
AS (
SELECT NAME
,value
,ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY (value) DESC
) RN
FROM #list
)
SELECT NAME
,value
FROM cte
WHERE RN < 4
ORDER BY value DESC
This works for MS SQL. Should be workable in any other SQL dialect that has the ability to assign row numbers in a group by or over clause (or equivelant)
if object_id('tempdb..#Data') is not null drop table #Data;
GO
create table #data (name varchar(25), value integer);
GO
set nocount on;
insert into #data values ('John', 20);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 5);
insert into #data values ('Jack', 5);
insert into #data values ('Jane', 30);
insert into #data values ('Jane', 21);
insert into #data values ('John', 5);
insert into #data values ('John', -1);
insert into #data values ('John', -1);
insert into #data values ('Jane', 18);
set nocount off;
GO
with D as (
SELECT
name
,Value
,row_number() over (partition by name order by value desc) rn
From
#Data
)
SELECT Name, Value
FROM D
WHERE RN <= 3
order by Name, Value Desc
Name Value
Jack 5
Jane 30
Jane 21
Jane 18
John 20
John 7
John 7

SQL Server Distinct Question

I need to be able to select only the first row for each name that has the greatest value.
I have a table with the following:
id name value
0 JOHN 123
1 STEVE 125
2 JOHN 127
3 JOHN 126
So I am looking to return:
id name value
1 STEVE 125
2 JOHN 127
Any idea on the MSSQL Syntax on how to perform this operation?
While you specified SQL Server, you did not specify the version. If you are using SQL Server 2005 or later, you can do something like:
With RankedItems As
(
Select id, name, value
, Row_Number() Over ( Partition By name Order By value Desc, id Asc ) As ItemRank
From Table
)
Select id, name, value
From RankedItems
Where ItemRank = 1
try:
SELECT
MIN(id) as id,dt.name,dt.value
FROM (SELECT
name,MAX(value) as value
FROM YourTable
GROUP BY name
) dt
INNER JOIN YourTable t ON dt.name=t.name and dt.value=t.value
GROUP BY dt.name,dt.value
try it out:
DECLARE #YourTable table (id int, name varchar(10), value int)
INSERT #YourTable VALUES (0, 'JOHN', 123)
INSERT #YourTable VALUES (1, 'STEVE', 125)
INSERT #YourTable VALUES (2, 'JOHN', 127)
INSERT #YourTable VALUES (3, 'JOHN', 126)
--extra data not in the question, shows why you need the outer group by
INSERT #YourTable VALUES (4, 'JOHN', 127)
INSERT #YourTable VALUES (5, 'JOHN', 127)
INSERT #YourTable VALUES (6, 'JOHN', 127)
INSERT #YourTable VALUES (7, 'JOHN', 127)
SELECT
MIN(id) as id,dt.name,dt.value
FROM (SELECT
name,MAX(value) as value
FROM #YourTable
GROUP BY name
) dt
INNER JOIN #YourTable t ON dt.name=t.name and dt.value=t.value
GROUP BY dt.name,dt.value
ORDER BY id
output:
id name value
----------- ---------- -----------
1 STEVE 125
2 JOHN 127
(2 row(s) affected)
You could do something like
SELECT id, name, value
FROM (SELECT id, name, value
ROWNUMBER() OVER (PARTITION BY name ORDER BY value DESC) AS r
FROM table) AS x
WHERE x.r = 1 ;
This will not work in SQL Server 2000 and earlier, but it will be incredibly fast in SQL Server 2005 and 2008
How about:
SELECT a.id, a.name, b.maxvalue
FROM mytbl a
INNER JOIN (SELECT id, max(value) as maxvalue
FROM mytbl
GROUP BY id) b ON b.id = a.id
SELECT a.id, a.name, a.value
FROM mytbl a
INNER JOIN (SELECT name, max(value) as maxvalue
FROM mytbl
GROUP BY name) b ON b.name = a.name and b.maxvalue = a.value