retrieve the most recent record for each customer - sql

I have this data:
ID NAME DATE
3 JOHN 2011-08-08
2 YOKO 2010-07-07
1 JOHN 2009-06-06
Code (for SQL Server 2005):
DECLARE #TESTABLE TABLE (id int, name char(4), date smalldatetime)
INSERT INTO #TESTABLE VALUES (3, 'JOHN', '2011-08-08')
INSERT INTO #TESTABLE VALUES (2, 'YOKO', '2010-07-07')
INSERT INTO #TESTABLE VALUES (1, 'JOHN', '2009-06-06')
I want to get, for each NAME, the ID that has the most recent DATE. Like this:
3 JOHN 2011-08-08
2 YOKO 2010-07-07
What is the most elegant way of accomplishing this?

;WITH x AS
(
SELECT ID, NAME, [DATE],
rn = ROW_NUMBER() OVER
(PARTITION BY NAME ORDER BY [DATE] DESC)
FROM #TESTABLE
)
SELECT ID, NAME, [DATE] FROM x WHERE rn = 1
ORDER BY [DATE] DESC;
Try to avoid reserved words (and vague column names) like [DATE]...

SELECT <fields>
FROM SourceTable st
INNER JOIN (SELECT name, MAX(Datefield) as Datefield
FROM SourceTable
GROUP BY name) x
ON x.Name = st.name
AND x.datefield = st.datefield

below is a possible solution:
Select c.CustomerID, c.CustomerName, c.CustomerOrder, c.CustomerOrderDate, c.CustomerQty
from tblCustomer c
inner join (select c2.CustomerName, MAX(c2.CustomerOrderDate) as MaxDate from tblCustomer c2 group by c2.CustomerName) c2
on c.CustomerName = c2.CustomerName
where c.CustomerOrderDate = c2.MaxDate

Related

SQL:How do i get the row that has the max value of a column in SQL Server

I have a data a set which is already grouped by Person and Class columns and I use this query for this process:
SELECT Person,Class, MAX(TimeSpent) as MaxTimeSpent
FROM Persons
GROUP BY Person,Class
Output:
Person Class MaxTimeSpent
--------|--------|-------------|
MJ | 0 | 0 |
MJ | 1 | 659 |
MJ | 2 | 515 |
What I want to do is to get the row that has the maximum Class value in this data set (which is the 3rd row for this example).
How can I do this ? Any help would be appreciated.
Try this one
SELECT T.*
FROM
(SELECT Person,
Class,
MAX(TimeSpent) AS MaxTimeSpent
FROM Persons AS P
WHERE Person = 'MJ'
GROUP BY Person, Class) AS T
WHERE T.class = (
SELECT MAX(class) FROM Persons AS P
WHERE P.person = T.person)
You can use cte for that.
declare #Persons table (person nvarchar(10),Class int ,TimeSpent int)
insert into #Persons
select 'MJ',0,0 union all
select 'MJ',1,659 union all
select 'MJ',2,515
;with cte
as(
SELECT Person,Class,TimeSpent , row_number() over(partition by Person order by Class desc ) as RN
FROM #Persons
)
select * from cte where RN=1
Solution 2 :With Out Cte:
SELECT * FROM (
SELECT Person
,Class
,TimeSpent
,row_number() OVER (PARTITION BY Person ORDER BY Class DESC) AS RN FROM #Persons
) t WHERE t.RN = 1
TRY This:
declare #t table (Person varchar (20),Class int ,MaxTimeSpent int )
insert into #t VALUES ('MJ',0,0)
insert into #t VALUES ('MJ',1,659)
insert into #t VALUES ('MJ',2,515)
SELECT TOP 1 * FROM #t ORDER BY 2 DESC
--OR
SELECT * FROM #t WHERE Class = (SELECT max(class) FROM #t)
-- OR
SELECT TOP 1 * FROM (
SELECT *
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY Class DESC) Record_Count FROM #t
) a
SELECT Top 1 Person,Class, MAX(TimeSpent) as MaxTimeSpent
FROM Persons
GROUP BY Person,Class order by Class desc

how to insert many records excluding some

I want to create a table with a subset of records from a master table.
for example, i have:
id name code
1 peter 73
2 carl 84
3 jack 73
I want to store peter and carl but not jack because has same peter's code.
I need hight performance because i have 20M records.
I try this:
SELECT id, name, DISTINCT(code) INTO new_tab
FROM old_tab
WHERE (conditions)
but don't work.
Assuming you want to pick the row with the maximum id per code, then this should do it:
insert into new_tab (id, name, code)
(SELECT id, name, code
FROM
(
SELECT id, name, code, rank() as rnk OVER (PARTITION BY code ORDER BY id DESC)
FROM old_tab WHERE rnk = 1
)
)
and for the minimum id per code, just change the sort order in the rank from DESC to ASC:
insert into new_tab (id, name, code)
(SELECT id, name, code
FROM
(
SELECT id, name, code, rank() as rnk OVER (PARTITION BY code ORDER BY id ASC)
FROM old_tab WHERE rnk = 1
)
)
Using a derived table, you can find the minID for each code, then join back to that in the outer to get the rest of the columns for that ID from oldTab.
select id,name,code
insert into newTabFROM
from old_tab t inner join
(SELECT min(id) as minId, code
from old_tab group by code) x
on t.id = x.minId
WHERE (conditions)
Try this:
CREATE TABLE #Temp
(
ID INT,
Name VARCHAR(50),
Code INT
)
INSERT #Temp VALUES (1, 'Peter', 73)
INSERT #Temp VALUES (2, 'Carl', 84)
INSERT #Temp VALUES (3, 'Jack', 73)
SELECT t2.ID, t2.Name, t2.Code
FROM #Temp t2
JOIN (
SELECT t.Code, MIN(t.ID) ID
FROM #temp t
JOIN (
SELECT DISTINCT Code
FROM #Temp
) d
ON t.Code = d.Code
GROUP BY t.Code
) b
ON t2.ID = b.ID

How to select top 3 values from each group in a table with SQL which have duplicates [duplicate]

This question already has answers here:
Select top 10 records for each category
(14 answers)
Closed 5 years ago.
Assume we have a table which has two columns, one column contains the names of some people and the other column contains some values related to each person. One person can have more than one value. Each value has a numeric type. The question is we want to select the top 3 values for each person from the table. If one person has less than 3 values, we select all the values for that person.
The issue can be solved if there are no duplicates in the table by the query provided in this article Select top 3 values from each group in a table with SQL . But if there are duplicates, what is the solution?
For example, if for one name John, he has 5 values related to him. They are 20,7,7,7,4. I need to return the name/value pairs as below order by value descending for each name:
-----------+-------+
| name | value |
-----------+-------+
| John | 20 |
| John | 7 |
| John | 7 |
-----------+-------+
Only 3 rows should be returned for John even though there are three 7s for John.
In many modern DBMS (e.g. Postgres, Oracle, SQL-Server, DB2 and many others), the following will work just fine. It uses CTEs and ranking function ROW_NUMBER() which is part of the latest SQL standard:
WITH cte AS
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
)
SELECT name, value, rn
FROM cte
WHERE rn <= 3
ORDER BY name, rn ;
Without CTE, only ROW_NUMBER():
SELECT name, value, rn
FROM
( SELECT name, value,
ROW_NUMBER() OVER (PARTITION BY name
ORDER BY value DESC
)
AS rn
FROM t
) tmp
WHERE rn <= 3
ORDER BY name, rn ;
Tested in:
Postgres
Oracle
SQL-Server
In MySQL and other DBMS that do not have ranking functions, one has to use either derived tables, correlated subqueries or self-joins with GROUP BY.
The (tid) is assumed to be the primary key of the table:
SELECT t.tid, t.name, t.value, -- self join and GROUP BY
COUNT(*) AS rn
FROM t
JOIN t AS t2
ON t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
GROUP BY t.tid, t.name, t.value
HAVING COUNT(*) <= 3
ORDER BY name, rn ;
SELECT t.tid, t.name, t.value, rn
FROM
( SELECT t.tid, t.name, t.value,
( SELECT COUNT(*) -- inline, correlated subquery
FROM t AS t2
WHERE t2.name = t.name
AND ( t2.value > t.value
OR t2.value = t.value
AND t2.tid <= t.tid
)
) AS rn
FROM t
) AS t
WHERE rn <= 3
ORDER BY name, rn ;
Tested in MySQL
I was going to downvote the question. However, I realized that it might really be asking for a cross-database solution.
Assuming you are looking for a database independent way to do this, the only way I can think of uses correlated subqueries (or non-equijoins). Here is an example:
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3)
However, each database that you mention (and I note, Hadoop is not a database) has a better way of doing this. Unfortunately, none of them are standard SQL.
Here is an example of it working in SQL Server:
with t as (
select 1 as personid, 5 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 6 as val union all
select 1 as personid, 7 as val union all
select 1 as personid, 8 as val
)
select distinct t.personid, val, rank
from (select t.*,
(select COUNT(distinct val) from t t2 where t2.personid = t.personid and t2.val >= t.val
) as rank
from t
) t
where rank in (1, 2, 3);
Using GROUP_CONCAT and FIND_IN_SET you can do that.Check SQLFIDDLE.
SELECT *
FROM tbl t
WHERE FIND_IN_SET(t.value,(SELECT
SUBSTRING_INDEX(GROUP_CONCAT(t1.value ORDER BY VALUE DESC),',',3)
FROM tbl t1
WHERE t1.name = t.name
GROUP BY t1.name)) > 0
ORDER BY t.name,t.value desc
If your result set is not so heavy, you can write a stored procedure (or an anonymous PL/SQL-block) for that problem which iterates the result set and finds the bigges three by a simple comparing algorithm.
Try this -
CREATE TABLE #list ([name] [varchar](100) NOT NULL, [value] [int] NOT NULL)
INSERT INTO #list VALUES ('John', 20), ('John', 7), ('John', 7), ('John', 7), ('John', 4);
WITH cte
AS (
SELECT NAME
,value
,ROW_NUMBER() OVER (
PARTITION BY NAME ORDER BY (value) DESC
) RN
FROM #list
)
SELECT NAME
,value
FROM cte
WHERE RN < 4
ORDER BY value DESC
This works for MS SQL. Should be workable in any other SQL dialect that has the ability to assign row numbers in a group by or over clause (or equivelant)
if object_id('tempdb..#Data') is not null drop table #Data;
GO
create table #data (name varchar(25), value integer);
GO
set nocount on;
insert into #data values ('John', 20);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 7);
insert into #data values ('John', 5);
insert into #data values ('Jack', 5);
insert into #data values ('Jane', 30);
insert into #data values ('Jane', 21);
insert into #data values ('John', 5);
insert into #data values ('John', -1);
insert into #data values ('John', -1);
insert into #data values ('Jane', 18);
set nocount off;
GO
with D as (
SELECT
name
,Value
,row_number() over (partition by name order by value desc) rn
From
#Data
)
SELECT Name, Value
FROM D
WHERE RN <= 3
order by Name, Value Desc
Name Value
Jack 5
Jane 30
Jane 21
Jane 18
John 20
John 7
John 7

SQL cross apply

I have a SQL table which contains audit information:
GroupId AuditDate ID FirstName LastName
1 01/06/2011 123 Michael Jackson
1 01/09/2010 123 M J
1 01/06/2009 123 Mike J
and trying show the differences between the audit records:
GroupId AuditDate ID Attribute From To
1 01/06/2011 123 FirstName M Michael
1 01/06/2011 123 LastName J Jackson
1 01/09/2010 123 FirstName Mike M
1 01/06/2009 123 FirstName NULL Mike
1 01/06/2009 123 LastName NULL J
I am using the following SQL query:
WITH result AS (
SELECT [Current].Id,
[Current].GroupId,
[Current].AuditDate,
[Current].FirstName,
[Current].LastName
Previous.FirstName AS PFirstName,
Previous.LastName AS PLastName,
FROM
(SELECT
*, ROW_NUMBER() OVER(PARTITION BY GroupId ORDER BY AuditDate ASC) AS RowNumber
FROM
AuditTable
WHERE
Id = #ID
) AS [Current]
LEFT JOIN
(SELECT
*, ROW_NUMBER() OVER(PARTITION BY GroupId ORDER BY AuditDate ASC) AS RowNumber
FROM
AuditTable
WHERE
Id = #ID
) AS [Previous]
ON
[Current].RowNumber = [Previous].RowNumber + 1
)
SELECT r.Id,r.GroupId, r.AuditDate
x.Attribute,
x.[From],
x.[To]
FROM result r
CROSS APPLY
(
VALUES
('FirstName', t.FirstName, t.PFirstName),
('LastName', t.LastName, t.PLastName),
) x (Attribute, [To], [From])
where
ISNULL(x.[From],'') <> ISNULL(x.[To],'')
ORDER BY r.AuditDate asc;
Is it possible to merge two select queries to improve the performance?
Try this query
WITH result AS (
SELECT Id,
GroupId,
AuditDate,
FirstName,
LastName,
ROW_NUMBER() OVER(PARTITION BY GroupId ORDER BY AuditDate ASC) AS RowNumber
FROM AuditTable
WHERE Id = #ID
)
SELECT r.Id,r.GroupId, r.AuditDate,
x.Attribute,
x.[From],
x.[To]
FROM result r LEFT JOIN result r2 ON r.RowNumber = r2.RowNumber + 1
CROSS APPLY (
VALUES ('FirstName', r.FirstName, r2.FirstName),
('LastName', r.LastName, r2.LastName)
) x (Attribute, [To], [From])
WHERE ISNULL(x.[From],'') <> ISNULL(x.[To],'')
ORDER BY r.AuditDate ASC;
Demo on SQLFiddle
You can eliminate both subqueries entirely by using lag():
WITH result AS (
SELECT Id,
GroupId,
AuditDate,
FirstName,
LastName,
lag(FirstName) over (PARTITION BY GroupId ORDER BY AuditDate ASC)
AS PFirstName,
lag(LastName) over (PARTITION BY GroupId ORDER BY AuditDate ASC)
AS PLastName
FROM AuditTable
WHERE Id = #ID
)
...
Here is the relevant documentation.
Update: However, this is only available in SQL Server 2012, unfortunately. If you have an earlier version, you will need some sort of self join.
If you can't use lag(), you should at least be able to reduce your code from 3 queries to 2: include the row number in your first select statement, and left join one subquery to it rather than having two subqueries. I'm not sure whether this way or Chris Moutray's way would be faster.
WITH result AS (
SELECT ROW_NUMBER() OVER(PARTITION BY GroupId ORDER BY AuditDate ASC) AS RowNumber
[Current].Id,
[Current].GroupId,
[Current].AuditDate,
[Current].FirstName,
[Current].LastName
[Previous].FirstName AS PFirstName,
[Previous].LastName AS PLastName,
FROM AuditTable as [Current]
LEFT JOIN
(SELECT
*, ROW_NUMBER() OVER(PARTITION BY GroupId ORDER BY AuditDate ASC) AS RowNumber
FROM
AuditTable
WHERE
Id = #ID
) AS [Previous]
ON
[Current].RowNumber = [Previous].RowNumber + 1
)
You can use LAG in SQL Server 2012. I've used UNION ALL here to unpivot columns into rows.
Depending on how you filter and what your group level is, add/modify the PARTITION BY
DECLARE #foo TABLE (GroupId tinyint, AuditDate date, ID tinyint, FirstName varchar(100), LastName varchar(100));
INSERT #foo VALUES (1, '20110601', 123, 'Michael', 'Jackson'), (1, '20100901', 123, 'M', 'J'), (1, '20090601', 123, 'Mike', 'J');
SELECT
X.GroupId, X.AuditDate, X.ID, X.[From], X.[To]
FROM
(
SELECT
F.GroupId, F.AuditDate, F.ID, 'FirstName' AS Attribute, LAG(F.FirstName) OVER (/*PARTITION BY GroupId, ID*/ ORDER BY AuditDate) AS [From], F.FirstName AS [To]
FROM
#foo F
UNION ALL
SELECT
F.GroupId, F.AuditDate, F.ID, 'LastName' AS Attribute, LAG(F.LastName) OVER (/*PARTITION BY GroupId, ID*/ ORDER BY AuditDate) AS [From], F.LastName AS [To]
FROM
#foo F
) X
WHERE
ISNULL(X.[From], '') <> ISNULL(X.[To], '')
ORDER BY
X.AuditDate DESC, X.Attribute

SQL Server Distinct Question

I need to be able to select only the first row for each name that has the greatest value.
I have a table with the following:
id name value
0 JOHN 123
1 STEVE 125
2 JOHN 127
3 JOHN 126
So I am looking to return:
id name value
1 STEVE 125
2 JOHN 127
Any idea on the MSSQL Syntax on how to perform this operation?
While you specified SQL Server, you did not specify the version. If you are using SQL Server 2005 or later, you can do something like:
With RankedItems As
(
Select id, name, value
, Row_Number() Over ( Partition By name Order By value Desc, id Asc ) As ItemRank
From Table
)
Select id, name, value
From RankedItems
Where ItemRank = 1
try:
SELECT
MIN(id) as id,dt.name,dt.value
FROM (SELECT
name,MAX(value) as value
FROM YourTable
GROUP BY name
) dt
INNER JOIN YourTable t ON dt.name=t.name and dt.value=t.value
GROUP BY dt.name,dt.value
try it out:
DECLARE #YourTable table (id int, name varchar(10), value int)
INSERT #YourTable VALUES (0, 'JOHN', 123)
INSERT #YourTable VALUES (1, 'STEVE', 125)
INSERT #YourTable VALUES (2, 'JOHN', 127)
INSERT #YourTable VALUES (3, 'JOHN', 126)
--extra data not in the question, shows why you need the outer group by
INSERT #YourTable VALUES (4, 'JOHN', 127)
INSERT #YourTable VALUES (5, 'JOHN', 127)
INSERT #YourTable VALUES (6, 'JOHN', 127)
INSERT #YourTable VALUES (7, 'JOHN', 127)
SELECT
MIN(id) as id,dt.name,dt.value
FROM (SELECT
name,MAX(value) as value
FROM #YourTable
GROUP BY name
) dt
INNER JOIN #YourTable t ON dt.name=t.name and dt.value=t.value
GROUP BY dt.name,dt.value
ORDER BY id
output:
id name value
----------- ---------- -----------
1 STEVE 125
2 JOHN 127
(2 row(s) affected)
You could do something like
SELECT id, name, value
FROM (SELECT id, name, value
ROWNUMBER() OVER (PARTITION BY name ORDER BY value DESC) AS r
FROM table) AS x
WHERE x.r = 1 ;
This will not work in SQL Server 2000 and earlier, but it will be incredibly fast in SQL Server 2005 and 2008
How about:
SELECT a.id, a.name, b.maxvalue
FROM mytbl a
INNER JOIN (SELECT id, max(value) as maxvalue
FROM mytbl
GROUP BY id) b ON b.id = a.id
SELECT a.id, a.name, a.value
FROM mytbl a
INNER JOIN (SELECT name, max(value) as maxvalue
FROM mytbl
GROUP BY name) b ON b.name = a.name and b.maxvalue = a.value