Select MAX dates plus ID value - sql

Please consider the following table...
DECLARE #tmp TABLE
(
ID int,
userID int,
testID int,
someDate datetime
)
...containing the following values:
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (1, 1, 50, '2010-10-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (2, 1, 50, '2010-11-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (3, 1, 50, '2010-12-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (4, 2, 20, '2010-10-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (5, 2, 30, '2010-11-01')
INSERT INTO #tmp (ID, userID, testID, someDate) VALUES (6, 2, 20, '2012-11-01')
I need to retrieve the maximum date for each userID/testID combination of values, and also the accompanying ID value. The results should be:
ID userID testID someDate
-------------------------------
3 1 50 2010-12-01
5 2 30 2010-11-01
6 2 20 2012-11-01
When I try the following query, the result set becomes incorrect and all rows are shown. I cannot omit ID from the GROUP BY clause because it causes and error. Can anyone help please? It seems long-winded to join the table to itself to get these values.
SELECT ID, userID, testID, MAX(someDate)
FROM #tmp
GROUP BY testId,userID,ID;
http://www.sqlfiddle.com/#!6/d41d8/5219

Please try:
select * from (
select
*,
ROW_NUMBER() over (partition by userID, testID order by SomeDate desc) Rnum
From #tmp
)x where Rnum=1

Related

How to use DateDiff into only one SELECT statement?

I want to make a short version on my DATEDIFF function on my SQL Query. In my code, I created two temporary tables then there, I select and use the DATEDIFF funtion.
I would want this code to be simplified and only use ONE SELECT statement that will provide the same results. Is it possible?
Here is my result:
This is my SQL Query
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp INTO #Temp2
FROM #Temp
SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp INTO #Temp3
FROM #Temp
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM #Temp2 as temp2
LEFT JOIN #Temp3 as temp3 on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
Thanks!
If you are using SQL Server 2012:
select * ,isnull(datediff(day,lag(stamp) over(partition by id order by stamp),stamp) ,0)
from #temp t1
Else use this..
;with cte
as
(select * ,row_number() over (partition by id order by stamp ) as rownum
from #temp t1
)
select c1.id,c1.stamp,isnull(datediff(day,c2.stamp,c1.stamp),0) as datee
from cte c1
left join
cte c2
on c1.id=c2.id and c1.rownum=c2.rownum+1
You could remove insert into the temp-tables and use subselects within the final query:
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM (SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp FROM #Temp) as temp2
LEFT JOIN (SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp FROM #Temp) as temp3
on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
In SQL Server 2012+, you would just use lag():
select t.*
isnull(datediff(day, lag(stamp) over (partition by id order by stamp), stamp), 0)
from #temp t;
In earlier versions, I would use outer apply:
select t.*,
isnull(datediff(day, t2.stamp, t.stamp), 0)
from #temp t outer apply
(select top 1 t2.*
from #temp t2
where t2.id = t.id and t2.stamp < t.stamp
order by t2.stamp desc
) t2;
try a cte,
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY ID) as RowNo, ID, Stamp
FROM #Temp
)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM CTE as temp2
LEFT JOIN CTE as temp3 on temp2.ID = temp3.ID
AND temp2.RowNo = temp3.RowNo + 1

TSQL - Delete All Rows Except 1 Per Group

Let's say I have 5 workcenters (Workcenter 1, Workcenter 2, Workcenter 3, Workcenter 4, Workcenter 5)
Each workcenter has several rows of notes that are ordered by the date the data was entered. I would like to delete all rows per workcenter except the row of data that was entered last.
If my columns are: ID | Workcenter | Note | Log_Date
How would I go about doing this?
My code is only giving me the most current note entry for the entire table, but I want one per workcenter.
This is what I have right now:
DELETE FROM #Table
WHERE ID NOT IN (SELECT TOP 1 ID FROM #Table
GROUP BY Workcenter, ID
ORDER BY Log_Date DESC)
try this:
delete t1 from table t1
where not exists
(select 1 from
(select workcenter,max(log_date)as log_date from table group by workcenter) t2
where t1.workcenter = t2.workcenter and t1.log_date = t2.log_date
)
use exists subquery to get the max log_date for each workcenter and then connect them to the table.
using CTE we can achieve this:
;WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups)
DELETE FROM cte WHERE rowno !=1;
CREATE TABLE workgroups(id INT IDENTITY(1,1),name VARCHAR(50), createdate DATETIME DEFAULT GETDATE())
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (1, N'workgroup1', CAST(0x0000A60F011F7840 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (2, N'workgroup1', CAST(0x0000A60F011F7F8E AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (3, N'workgroup1', CAST(0x0000A60F011F8728 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (4, N'workgroup2', CAST(0x0000A60F011F92B9 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (5, N'workgroup2', CAST(0x0000A60F011F97C0 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (6, N'workgroup3', CAST(0x0000A60F011FA443 AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (7, N'workgroup3', CAST(0x0000A60F011FA73B AS DateTime))
INSERT [dbo].[workgroups] ([id], [name], [createdate]) VALUES (8, N'workgroup3', CAST(0x0000A60F011FA9FB AS DateTime))
SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups
;WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY name ORDER BY createdate DESC ) AS rowno, * FROM workgroups)
DELETE FROM cte WHERE rowno !=1;

Select top dates grouped by ID's

I have a table as follows:
DECLARE #tmp TABLE
(
userID int,
testID int,
someDate datetime
)
Within it I store dates along with two ID values, e.g.
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-10-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-11-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (1, 50, '2010-12-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (2, 20, '2010-10-01')
INSERT INTO #tmp (userID, testID, someDate) VALUES (2, 20, '2010-11-01')
I need to select the latest date per userID/testID combination. So, the result would be
userID testID someDate
1 50 2010-12-01
2 20 2010-11-01
It sounds really easy but I can't figure it out. SQL Fiddle Here.
SELECT userID, testID, MAX(someDate)
FROM #tmp
GROUP BY testId,userID;
fiddle
Try
SELECT t1.* FROM #tmp t1
INNER JOIN (SELECT userId, MAX(someDate) someDate
FROM #tmp
GROUP BY userId) t2
ON t1.userId = t2.userId
AND t1.someDate = t2.someDate
SELECT userId, testId, MAX(someDate)
FROM #tmp
GROUP BY testId, userId
http://www.sqlfiddle.com/#!6/d41d8/5205

T-SQL: Paging WITH TIES

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.
declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.
I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])

How do I remove all but some records based on a threshold?

I have a table like this:
CREATE TABLE #TEMP(id int, name varchar(100))
INSERT INTO #TEMP VALUES(1, 'John')
INSERT INTO #TEMP VALUES(1, 'Adam')
INSERT INTO #TEMP VALUES(1, 'Robert')
INSERT INTO #TEMP VALUES(1, 'Copper')
INSERT INTO #TEMP VALUES(1, 'Jumbo')
INSERT INTO #TEMP VALUES(2, 'Jill')
INSERT INTO #TEMP VALUES(2, 'Rocky')
INSERT INTO #TEMP VALUES(2, 'Jack')
INSERT INTO #TEMP VALUES(2, 'Lisa')
INSERT INTO #TEMP VALUES(3, 'Amy')
SELECT *
FROM #TEMP
DROP TABLE #TEMP
I am trying to remove all but some records for those that have more than 3 names with the same id. Therefore, I am trying to get something like this:
id name
1 Adam
1 Copper
1 John
2 Jill
2 Jack
2 Lisa
3 Amy
I am not understanding how to write this query. I have gotten to the extent of preserving one record but not a threshold of records:
;WITH FILTER AS
(
SELECT id
FROM #TEMP
GROUP BY id
HAVING COUNT(id) >=3
)
SELECT id, MAX(name)
FROM #TEMP
WHERE id IN (SELECT * FROM FILTER)
GROUP BY id
UNION
SELECT id, name
FROM #TEMP
WHERE id NOT IN (SELECT * FROM FILTER)
Gives me:
1 Robert
2 Rocky
3 Amy
Any suggestions? Oh by the way, I don't care what records are preserved while merging.
You can do it using CTE
CREATE TABLE #TEMP(id int, name varchar(100))
INSERT INTO #TEMP VALUES(1, 'John')
INSERT INTO #TEMP VALUES(1, 'Adam')
INSERT INTO #TEMP VALUES(1, 'Robert')
INSERT INTO #TEMP VALUES(1, 'Copper')
INSERT INTO #TEMP VALUES(1, 'Jumbo')
INSERT INTO #TEMP VALUES(2, 'Jill')
INSERT INTO #TEMP VALUES(2, 'Rocky')
INSERT INTO #TEMP VALUES(2, 'Jack')
INSERT INTO #TEMP VALUES(2, 'Lisa')
INSERT INTO #TEMP VALUES(3, 'Amy')
SELECT *
FROM #TEMP;
WITH CTE(N) AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY id ORDER BY id)
FROM #Temp
)
DELETE CTE WHERE N>3;
SELECT *
FROM #TEMP;
DROP TABLE #TEMP
I will change your select like this (not tested)
select name from #temp group by name having count(id) > 3
then you can implement your query in a delete statement using your select as a where clause
in inner query you can use row_number function over (partition by id)
and then in outer query you have to give condition like below
select id,name from (
SELECT id,name, row_number() over (partition by id order by 1) count_id FROM #test
group by id, name )
where count_id <=3
If i got your question right, you need to get rows when id occurrence 3 or more times
select t1.name,t1.id from tbl1 t1
inner join tbl1 t2 on t1.id = t2.id
group by t1.name, t1.id
having count(t1.id) > 2