Select top dates grouped by ID's

Select top dates grouped by ID's - sql

I have a table as follows:
DECLARE #tmp TABLE
(
userID int,
testID int,
someDate datetime
)
Within it I store dates along with two ID values, e.g.
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-10-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-11-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-12-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (2, 20, '2010-10-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (2, 20, '2010-11-01')
I need to select the latest date per userID/testID combination. So, the result would be
userID testID someDate
1 50 2010-12-01
2 20 2010-11-01
It sounds really easy but I can't figure it out. SQL Fiddle Here.

SELECT userID, testID, MAX(someDate)
FROM #tmp
GROUP BY testId,userID;
fiddle

Try
SELECT t1.* FROM #tmp t1
INNER JOIN (SELECT userId, MAX(someDate) someDate
FROM #tmp
GROUP BY userId) t2
ON t1.userId = t2.userId
AND t1.someDate = t2.someDate

SELECT userId, testId, MAX(someDate)
FROM #tmp
GROUP BY testId, userId
http://www.sqlfiddle.com/#!6/d41d8/5205

Related

How to use DateDiff into only one SELECT statement?

I want to make a short version on my DATEDIFF function on my SQL Query. In my code, I created two temporary tables then there, I select and use the DATEDIFF funtion.
I would want this code to be simplified and only use ONE SELECT statement that will provide the same results. Is it possible?
Here is my result:
This is my SQL Query
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp INTO #Temp2
FROM #Temp
SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp INTO #Temp3
FROM #Temp
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM #Temp2 as temp2
LEFT JOIN #Temp3 as temp3 on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
Thanks!

If you are using SQL Server 2012:
select * ,isnull(datediff(day,lag(stamp) over(partition by id order by stamp),stamp) ,0)
from #temp t1
Else use this..
;with cte
as
(select * ,row_number() over (partition by id order by stamp ) as rownum
from #temp t1
)
select c1.id,c1.stamp,isnull(datediff(day,c2.stamp,c1.stamp),0) as datee
from cte c1
left join
cte c2
on c1.id=c2.id and c1.rownum=c2.rownum+1

You could remove insert into the temp-tables and use subselects within the final query:
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM (SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp FROM #Temp) as temp2
LEFT JOIN (SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp FROM #Temp) as temp3
on temp2.ID = temp3.ID and temp2.c = temp3.d + 1

In SQL Server 2012+, you would just use lag():
select t.*
isnull(datediff(day, lag(stamp) over (partition by id order by stamp), stamp), 0)
from #temp t;
In earlier versions, I would use outer apply:
select t.*,
isnull(datediff(day, t2.stamp, t.stamp), 0)
from #temp t outer apply
(select top 1 t2.*
from #temp t2
where t2.id = t.id and t2.stamp < t.stamp
order by t2.stamp desc
) t2;

try a cte,
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY ID) as RowNo, ID, Stamp
FROM #Temp
)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM CTE as temp2
LEFT JOIN CTE as temp3 on temp2.ID = temp3.ID
AND temp2.RowNo = temp3.RowNo + 1

Select MAX dates plus ID value

Please consider the following table...
DECLARE #tmp TABLE
(
ID int,
userID int,
testID int,
someDate datetime
)
...containing the following values:
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (1, 1, 50, '2010-10-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (2, 1, 50, '2010-11-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (3, 1, 50, '2010-12-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (4, 2, 20, '2010-10-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (5, 2, 30, '2010-11-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (6, 2, 20, '2012-11-01')
I need to retrieve the maximum date for each userID/testID combination of values, and also the accompanying ID value. The results should be:
ID userID testID someDate
-------------------------------
3 1 50 2010-12-01
5 2 30 2010-11-01
6 2 20 2012-11-01
When I try the following query, the result set becomes incorrect and all rows are shown. I cannot omit ID from the GROUP BY clause because it causes and error. Can anyone help please? It seems long-winded to join the table to itself to get these values.
SELECT ID, userID, testID, MAX(someDate)
FROM #tmp
GROUP BY testId,userID,ID;
http://www.sqlfiddle.com/#!6/d41d8/5219

Please try:
select * from (
select
*,
ROW_NUMBER() over (partition by userID, testID order by SomeDate desc) Rnum
From #tmp
)x where Rnum=1

Select rows with duplicate values in 2 columns

This is my table:
CREATE TABLE [Test].[dbo].[MyTest]
(
[Id] BIGINT NOT NULL,
[FId] BIGINT NOT NULL,
[SId] BIGINT NOT NULL
);
And some data:
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (1, 100, 11);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (2, 200, 12);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (3, 100, 21);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (4, 200, 22);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (5, 300, 13);
INSERT INTO [Test].[dbo].[MyTest] ([Id], [FId], [SId]) VALUES (6, 200, 12);
So I need 2 select query,
First Select FId, SId that like a distinct in both column so the result is:
100, 11
200, 12
100, 21
200, 22
300, 13
As you see the values of 200, 12 returned once.
Second query is the Id's of that columns whose duplicated in both FId, SId So the result is:
2
6
Does any one have any idea about it?

Standard SQL
SELECT
M.ID
FROM
( -- note all duplicate FID, SID pairs
SELECT FID, SID
FROM MyTable
GROUP BY FID, SID
HAVING COUNT(*) > 1
) T
JOIN -- back onto main table using these duplicate FID, SID pairs
MyTable M ON T.FID = M.FID AND T.SID = M.SID
Using windowing:
SELECT
T.ID
FROM
(
SELECT
ID,
COUNT(*) OVER (PARTITION BY FID, SID) AS CountPerPair
FROM
MyTable
) T
WHERE
T.CountPerPair > 1

First query:
SELECT DISTINCT Fid,SId
FROM MyTest
Second query:
SELECT DISTINCT a1.Id
FROM MyTest a1 INNER JOIN MyTest a2
ON a1.Fid = a2.Fid
AND a1.SId = a2.SId
AND a1.Id <> a2.Id
I cannot test them, but I think they should work...

first:
select distinct FId,SId from [Test].[dbo].[MyTest]
second query
select distinct t.Id
from [Test].[dbo].[MyTest] t
inner join [Test].[dbo].[MyTest] t2
on t.Id<>t2.Id and t.FId=t2.FId and t.SId=t2.SId

Part 1 is as mentioned above distinct.
This will resolve second part.
select id from [Test].[dbo].[MyTest] a
where exists(select 1 from [Test].[dbo].[MyTest] where a.[SId] = [SId] and a.[FId] = [FId] and a.id <> id)

How do I remove all but some records based on a threshold?

I have a table like this:
CREATE TABLE #TEMP(id int, name varchar(100))
INSERT INTO #TEMP VALUES(1, 'John')
INSERT INTO #TEMP VALUES(1, 'Adam')
INSERT INTO #TEMP VALUES(1, 'Robert')
INSERT INTO #TEMP VALUES(1, 'Copper')
INSERT INTO #TEMP VALUES(1, 'Jumbo')
INSERT INTO #TEMP VALUES(2, 'Jill')
INSERT INTO #TEMP VALUES(2, 'Rocky')
INSERT INTO #TEMP VALUES(2, 'Jack')
INSERT INTO #TEMP VALUES(2, 'Lisa')
INSERT INTO #TEMP VALUES(3, 'Amy')
SELECT *
FROM #TEMP
DROP TABLE #TEMP
I am trying to remove all but some records for those that have more than 3 names with the same id. Therefore, I am trying to get something like this:
id name
1 Adam
1 Copper
1 John
2 Jill
2 Jack
2 Lisa
3 Amy
I am not understanding how to write this query. I have gotten to the extent of preserving one record but not a threshold of records:
;WITH FILTER AS
(
SELECT id
FROM #TEMP
GROUP BY id
HAVING COUNT(id) >=3
)
SELECT id, MAX(name)
FROM #TEMP
WHERE id IN (SELECT * FROM FILTER)
GROUP BY id
UNION
SELECT id, name
FROM #TEMP
WHERE id NOT IN (SELECT * FROM FILTER)
Gives me:
1 Robert
2 Rocky
3 Amy
Any suggestions? Oh by the way, I don't care what records are preserved while merging.

You can do it using CTE
CREATE TABLE #TEMP(id int, name varchar(100))
INSERT INTO #TEMP VALUES(1, 'John')
INSERT INTO #TEMP VALUES(1, 'Adam')
INSERT INTO #TEMP VALUES(1, 'Robert')
INSERT INTO #TEMP VALUES(1, 'Copper')
INSERT INTO #TEMP VALUES(1, 'Jumbo')
INSERT INTO #TEMP VALUES(2, 'Jill')
INSERT INTO #TEMP VALUES(2, 'Rocky')
INSERT INTO #TEMP VALUES(2, 'Jack')
INSERT INTO #TEMP VALUES(2, 'Lisa')
INSERT INTO #TEMP VALUES(3, 'Amy')
SELECT *
FROM #TEMP;
WITH CTE(N) AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY id ORDER BY id)
FROM #Temp
)
DELETE CTE WHERE N>3;
SELECT *
FROM #TEMP;
DROP TABLE #TEMP

I will change your select like this (not tested)
select name from #temp group by name having count(id) > 3
then you can implement your query in a delete statement using your select as a where clause

in inner query you can use row_number function over (partition by id)
and then in outer query you have to give condition like below
select id,name from (
SELECT id,name, row_number() over (partition by id order by 1) count_id FROM #test
group by id, name )
where count_id <=3

If i got your question right, you need to get rows when id occurrence 3 or more times
select t1.name,t1.id from tbl1 t1
inner join tbl1 t2 on t1.id = t2.id
group by t1.name, t1.id
having count(t1.id) > 2

Merging records based on a time difference?

I have the following table:
CREATE TABLE #TEMP (id int, name varchar(255), startdate datetime, enddate datetime)
INSERT INTO #TEMP VALUES(1, 'John', '2011-01-11 00:00:00.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(2, 'John', '2011-01-11 00:00:20.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(3, 'John', '2011-01-11 00:01:40.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(4, 'Adam', '2011-01-11 00:00:40.000','2011-01-11 00:01:20.000')
INSERT INTO #TEMP VALUES(5, 'Adam', '2011-01-11 00:00:10.000','2011-01-11 00:01:30.000')
SELECT * FROM #TEMP
DROP TABLE #TEMP
I am trying to merge all records with the same name within a range of 60 seconds to each other to get the following:
John 2011-01-11 00:00:00.000 2011-01-11 00:01:10.000
John 2011-01-11 00:01:40.000 2011-01-11 00:01:50.000
Adam 2011-01-11 00:00:10.000 2011-01-11 00:01:20.000
Any suggestions on how to do this on a table with about 50K records? Currently, I managed to get to this:
SELECT * FROM #TEMP
CREATE TABLE #Merge(id1 int, id2 int)
INSERT INTO #Merge
SELECT id, uuid
FROM
(
SELECT t.id, u.uuid, t.name, t.startdate, t.enddate, u.ustartdate, u.uenddate,
(CASE WHEN (DATEDIFF(second, t.startdate, u.ustartdate) <= 60 AND DATEDIFF(second, t.startdate, u.ustartdate) >= 0) then 1 else 0 END) Flag
FROM #Temp t
INNER JOIN
(SELECT id AS uuid, name, startdate AS ustartdate, enddate AS uenddate
FROM #Temp) u
ON t.name = u.name AND t.startdate != u.ustartdate AND t.id != u.uuid
) w
WHERE Flag = 1
SELECT * FROM #Merge
-- Insert non-mergable records
CREATE TABLE #TEMP2 (id int, name varchar(255), membergroup varchar(255), startdate datetime, enddate datetime)
INSERT INTO #TEMP2
SELECT * FROM #TEMP
WHERE id NOT IN (SELECT id1 FROM #Merge UNION SELECT id2 FROM #Merge)
SELECT * FROM #TEMP2
Of course, I am not sure how to proceed from here. The #Merge table gives me rows that are to be merged. What I did was to insert non-mergable rows first into #Temp2 first.
EDIT:
Updated set of rows, just in case:
INSERT INTO #TEMP VALUES(1, 'John', 'A', '2011-01-11 00:00:00.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(2, 'John', 'A', '2011-01-11 00:00:01.000','2011-01-11 00:01:10.000')
INSERT INTO #TEMP VALUES(3, 'John', 'B', '2011-01-11 00:00:20.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(4, 'John', 'C', '2011-01-11 00:01:40.000','2011-01-11 00:01:50.000')
INSERT INTO #TEMP VALUES(5, 'John', 'C', '2011-01-11 00:01:50.000','2011-01-11 00:02:20.000')
INSERT INTO #TEMP VALUES(6, 'Adam', 'A', '2011-01-11 00:00:40.000','2011-01-11 00:01:20.000')
INSERT INTO #TEMP VALUES(7, 'Adam', 'B', '2011-01-11 00:00:10.000','2011-01-11 00:01:30.000')
INSERT INTO #TEMP VALUES(8, 'Adam', 'B', '2011-01-11 00:03:10.000','2011-01-11 00:04:30.000')

The code below manage's to show both merged rows (rows 1-2,4-5) and unique rows (row 3)
SELECT DISTINCT a.id,a.name,a.startdate,a.enddate
FROM temp a
LEFT JOIN temp b ON a.name = b.name AND a.id < b.id AND DATEDIFF(s,a.startdate,b.startdate)<=60
LEFT JOIN temp c ON c.name = a.name AND c.id < a.id AND DATEDIFF(s,c.startdate,a.startdate)<=60
WHERE (b.id IS NOT NULL OR c.id IS NULL) AND a.id <= COALESCE(c.id,a.id)

Given you haven't said how to use the 60 second interval and your sample code showed only a startdate comparison, here you go
SELECT
*
FROM
#Temp t1
CROSS APPLY
(SELECT TOP 1*
FROM #Temp t2
WHERE t1.name = t2.name AND DATEDIFF(second, t1.startdate, t2.startdate) < 60 AND t1.id < t2.id
ORDER BY id DESC
) t2x
Based on startdate only, row pairs 1/2 and 4/5 make it into the output. Row 3 doesn't so you'll have to explain why you added it.
That is, row id = 3 is not within 60 seconds of row 1 or 2 based on startdate. So it shouldn't be in the output.
This assumes that id and startdate are both increasing.
Edit, after chat:
SELECT
*
FROM
#Temp t1
CROSS APPLY
(SELECT TOP 1 *
FROM #Temp t2
WHERE t1.name = t2.name AND DATEDIFF(second, t1.startdate, t2.startdate) < 60 AND t1.id < t2.id
ORDER BY t2.id DESC
) t2x
UNION ALL
SELECT
t1.*, t1.*
FROM
#Temp t1
WHERE NOT EXISTS
(
SELECT
t1ZZ.id, t2xZZ.id
FROM
#Temp t1ZZ
CROSS APPLY
(SELECT TOP 1 *
FROM #Temp t2ZZ
WHERE t1ZZ.name = t2ZZ.name AND DATEDIFF(second, t1ZZ.startdate, t2ZZ.startdate) < 60 AND t1ZZ.id < t2ZZ.id
ORDER BY t2ZZ.id DESC
) t2xZZ
WHERE
t1.id IN (t1ZZ.id, t2xZZ.id)
)

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Select top dates grouped by ID's - sql

SELECT userID, testID, MAX(someDate) FROM #tmp GROUP BY testId,userID; fiddle

Try SELECT t1.* FROM #tmp t1 INNER JOIN (SELECT userId, MAX(someDate) someDate FROM #tmp GROUP BY userId) t2 ON t1.userId = t2.userId AND t1.someDate = t2.someDate

SELECT userId, testId, MAX(someDate) FROM #tmp GROUP BY testId, userId http://www.sqlfiddle.com/#!6/d41d8/5205

Related

How to use DateDiff into only one SELECT statement?

Select MAX dates plus ID value

Select rows with duplicate values in 2 columns

How do I remove all but some records based on a threshold?

Merging records based on a time difference?

Categories

Resources