select the latest result based on DateTime field - sql

I have a simple table with only 4 fields.
http://sqlfiddle.com/#!3/06d7d/1
CREATE TABLE Assessment (
id INTEGER IDENTITY(1,1) PRIMARY KEY,
personId INTEGER NOT NULL,
dateTaken DATETIME,
outcomeLevel VARCHAR(2)
)
INSERT INTO Assessment (personId, dateTaken, outcomeLevel)
VALUES (1, '2014-04-01', 'L1')
INSERT INTO Assessment (personId, dateTaken, outcomeLevel)
VALUES (1, '2014-04-05', 'L2')
INSERT INTO Assessment (personId, dateTaken, outcomeLevel)
VALUES (2, '2014-04-03', 'E3')
INSERT INTO Assessment (personId, dateTaken, outcomeLevel)
VALUES (2, '2014-04-07', 'L1')
I am trying to select for each "personId" their latest assessment result based on the dateTaken.
So my desired output for the following data would be.
[personId, outcomeLevel]
[1, L2]
[2, L1]
Thanks,
Danny

Try this:
;with cte as
(select personId pid, max(dateTaken) maxdate
from assessment
group by personId)
select personId, outcomeLevel
from assessment a
inner join cte c on a.personId = c.pid
where c.maxdate = a.dateTaken
order by a.personId

;with Cte as (Select personId,outcomeLevel, C= ROW_NUMBER()
over(PARTITION By personId Order By dateTaken desc)
From #Assessment
)
Select * from cte where C=1
Sample here

SELECT asst.personId,
asst.outcomeLevel
FROM dbo.Assessment asst
WHERE asst.dateTaken=(SELECT MAX(ast.dateTaken)
FROM assessment ast
WHERE asst.personid=ast.personId)
ORDER BY asst.personId
Result will be like this
personId outcomeLevel
1 L2
2 L1

Here is a possible solution using common table expression:
WITH cte AS (
SELECT
ROW_NUMBER() OVER (PARTITION BY personId ORDER BY dateTaken DESC) AS rn
, personId
, outcomeLevel
FROM
[dbo].[Assessment]
)
SELECT
personId
, outcomeLevel
FROM
cte
WHERE
rn = 1
About CTEs
A common table expression (CTE) can be thought of as a temporary result set that is defined within the execution scope of a single SELECT, INSERT, UPDATE, DELETE, or CREATE VIEW statement. A CTE is similar to a derived table in that it is not stored as an object and lasts only for the duration of the query. Unlike a derived table, a CTE can be self-referencing and can be referenced multiple times in the same query. From MSDN: Using Common Table Expressions

try this:
SELECT a.personId, a.outcomeLevel
FROM Assessment a
INNER JOIN
(
SELECT max(dateTaken) as datetaken1, personId
FROM Assessment
GROUP BY personId ) b
ON a.dateTaken = b.datetaken1
demo: http://sqlfiddle.com/#!3/06d7d/9
Idea is to first derive a table with the max dates per person and then join that with the original table on the date field so you can get the outcome level for this maxed date...

This should work perfectly without cte :
SELECT [Table4].[personId], [Table4].[outcomeLevel]
FROM (
SELECT [Table1].[personId]
FROM [Assessment] AS [Table1]
GROUP BY [Table1].[personId]
) AS [Table2]
CROSS APPLY (
SELECT TOP (1) [Table3].[personId], [Table3].[outcomeLevel], [Table3].[dateTaken]
FROM [Assessment] AS [Table3]
WHERE [Table2].[personId] = [Table3].[personId]
ORDER BY [Table3].[dateTaken] DESC
) AS [Table4]
ORDER BY [Table4].[dateTaken] DESC

Related

Turn these temp tables into one longer subquery (can't use Temp tables in Power BI)

Currently I have created these temp tables to get the desired output I need. However, Power BI doesn't allow the use of temp tables so I need to get this all into 1 query using inner selects.
drop table if exists #RowNumber
Select Date, ID, ListID
, row_number() over (partition by ID order by ID) as rownum
into #RowNumber
from Table
where Date= cast(getdate()-1 as date)
group by Date, ID, ListID
order by ID
drop table if exists #1stListIDs
select ListID as FirstID, ID, Date
into #1stListIDs
from #RowNumber
where rownum = 1
drop table if exists #2ndlistids
Select ListID as SecondListID, ID, Date
into #2ndlistids
from #RowNumber
where rownum = 2
--Joins the Two Tables back together to allow the listids to be in the same row
drop table if exists #FinalTableWithTwoListIDs
select b.FirstListID, a.SecondListID, a.ID, a.Date
into #FinalTableWithTwoListIDs
from #2ndlistids a
join #1stListIDs b on a.ID= b.ID
order by ID
This code is simple and straight forward. However I can't seem to figure out using a subquery. Here is what I have. It works for the FirstListID select statement, but not the SecondListID portion. I believe this is because you can't reference the inner most select statement with multiple different outer select statements, but I could be wrong.
Select a.ListId as SecondListID, a.ID, a.Date
from (
select a.ListId as FirstListID, a.ID, a.Date
from (
Select Date, ID, ListId
, row_number() over (partition by ID order by ID) as rownum
from Table
where Date = cast(getdate()-1 as date)
group by Date, ID, ListId
order by ID) a
where a.rownum = 1) b
where a.rownum = 2) c
Just to show, for completeness, how you could use CTEs to replace the #temp tables, it would be something along the lines of
with RowNumber as (
select Date, ID, ListID
, row_number() over (partition by ID order by ID) as rownum
from Table
where Date= cast(dateadd(day,-1,getdate()) as date)
group by Date, ID, ListID
),
FirstListIDs as (
select ListID as FirstID, ID, Date
from RowNumber
where rownum = 1
),
SecondListIDs as (
select ListID as SecondID, ID, Date
from RowNumber
where rownum = 2
)
select f.FirstID, s.SecondID, s.ID, s.Date
from Secondlistids s
join FirstListIDs f on s.ID=f.ID
order by s.ID
Note the use of dateadd which is recommended over the ambiguousdate +/- value assumed to be days, and where relevant meaningful table aliases.
You could do it with a CTE and joining the two together, but that is inefficient and unnecessary.
It looks like you just need LAG to get the previous ListID
I note that PARTITION BY ID ORDER BY ID is non-deterministic and the ordering will be random. I strongly suggest you find a deterministic ordering.
SELECT
PrevID AS b.FirstListID,
ListID AS a.SecondListID,
ID,
Date
FROM (
SELECT
Date,
ID,
ListID,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY ID) AS rownum,
LAG(ListID) OVER (PARTITION BY ID ORDER BY ID) AS PrevID
from [Table]
where Date = cast(getdate() - 1 as date)
group by Date, ID, ListID
) AS WithRowAndLag
WHERE rownum = 2;
ORDER BY ID;

Select the newest record

I would like to run a select statement that runs and select only the newest record by Recored_timestampe field for the keys teacher_id and student_id. So any time, it runs it needs to provide only one record. how could I do it, please? The output could be without the field Recored_timestampe. Thanks
Using the window function,partitioned by teacher_id and student_id and sorting it by recorded_timestamp will give you the desired result.
select * from(select teacher_id,student_id,teacher_name,comment ,recorded_timestamp, row_number() over(partition by teacher_id,student_id order by recorded_timestamp desc)as rownum from temp0607)out1 where rownum=1
Also you may have to look at the way recorded_timestamp is stored. If it is stored as string, you can convert it into timestamp using from_unixtime(unix_timestamp(recorded_timestamp,'dd/MM/yyyy HH:mm'),'dd/MM/yyyy HH:mm')
First, arrange the record by datetime
SELECT *,RANK() OVER (PARTITION BY student_id ORDER BY Recored_timestamp desc) as ranking
FROM #temp
Then, if you want to know the newest record with student_id which is not null, then you can use OUTER APPLY to add a column which is non-NULL student_id.
OUTER APPLY (SELECT student_id
FROM #temp
WHERE #temp.teacher_id = ranktable.teacher_id
AND student_id IS NOT NULL
) AS jointable
Here is an example:
Create Table #temp
(
teacher_id int
,student_id int
,teacher_name varchar(40)
,comment varchar(100)
,Recored_timestamp datetime
)
INSERT INTO #temp
VALUES
(449,111,'lucy','Could be better','2021-05-04 07:00:00.000')
,(449,null,'lucy','smily','2021-05-11 07:00:00.000')
,(449,111,'lucy','not listening','2021-05-08 07:00:00.000')
,(448,null,'Toni','Good','2021-06-04 09:00:00.000')
,(448,222,'Toni','not doing as expected','2021-06-04 08:00:00.000')
SELECT DISTINCT teacher_id,
jointable.student_id,
teacher_name,
comment,
Recored_timestamp,
ranking
FROM
(
SELECT *,RANK() OVER (PARTITION BY teacher_id ORDER BY Recored_timestamp DESC) AS ranking
FROM #temp
) AS ranktable
OUTER APPLY (SELECT student_id
FROM #temp
WHERE #temp.teacher_id = ranktable.teacher_id
AND student_id IS NOT NULL
) AS jointable
WHERE ranking = 1 --only newest record will be extracted
Drop table #temp
You can base from this query to get the newest data.
SELECT TOP 1 * FROM tablename T1
INNER JOIN(SELECT teacher_id, Max(Recored_timestamp) as MaxDate from tablename GROUP BY teacher_id) T2 ON T2.teacher_id = T1.teacher_id AND T1.Recored_timestamp = T2.MaxDate

SQL - All values from first table joined with first value from second one and null otherwise

is it possible to do a join like this in SQL:
We want to join the two tables on SessionID, but the resulting table should return full table "Pageviews" combined with only a single value of Sessions for each unique SessionID (for others the value should be NULL, please see the 'Desired Result' table for explanation).
Thank you.
Based on you requirement( and truly based on the data provided) the below code would do the work -
declare #pageviews table(sessionid char(10), pageviews tinyint)
declare #sessions table(sessionid char(10), sessions tinyint)
insert into #pageviews values
('FA-1',34),
('FA-1',36),
('FA-2',23),
('FA-3',11),
('FA-3',32),
('FA-3',25)
insert into #sessions values
('FA-1',23),
('FA-2',14),
('FA-3',9)
;with cte as
(select sessionid,pageviews,ROW_NUMBER() OVER(PARTITION BY sessionid ORDER BY sessionid desc) rn
from #pageviews)
select p.sessionid,p.pageviews,case when rn = 1 then s.sessions else null end as sessions
from cte p inner join #sessions s
on p.sessionid = s.sessionid
You can do like this.
SELECT T2.SessionID,T2.pageviews,T1.Sessions
FROM sessions AS T1
RIGHT JOIN
(
SELECT ROW_NUMBER() OVER(PARTITION BY sessionid ORDER BY sessionid DESC) AS ROWNUM,
sessionid,
pageviews
FROM pageviews
) AS T2
ON T1.sessionid = T2.sessionid AND T2.ROWNUM = 1
SQLFiddle

SQL - Finding the most common value, in a column, for different sets in a table

I have a database with two tables; [Clients] and [Transactions]. I have a foreign key assigned to either one or multiple records in the [Transactions] table that relates to the [Clients] table.
In the [Transactions] table, I have a field, called 'URL', that is populated with URLs (it is what it says on the tin). I want to go through all the records in the [Transactions] table and find out which is the most common value in the 'URL' field for each of the sets associated with the [Clients] table.
Once I have the most common values, I want to insert them into the [Clients] table, in a field called 'URL' (just like in the [Transactions] table), against the associated Client record.
I'm sure I could figure most of it out, my only problem is with finding the most common values for many different small groups. Any help is appreciated!
SAMPLE DATA:
[Clients]
ID Name URL
-----------------------------------
999999999 Testing Client 1 NULL
999999998 Testing Client 2 NULL
999999997 Testing Client 3 NULL
999999996 Testing Client 4 NULL
999999995 Testing Client 5 NULL
[Transactions]
ID ClientID URL
-----------------------------------------
73611 999999999 http://www.google.com
73612 999999999 http://www.yahoo.com
73613 999999999 http://www.google.com
73626 999999998 http://www.stackoverflow.com
73627 999999998 http://www.stackoverflow.com
73628 999999998 http://www.slack.com
73629 999999997 http://www.dotnetpearls.com
73630 999999997 http://www.c-sharpcorner.com
73631 999999996 http://www.roastmymealdeal.co.uk
73632 999999996 http://www.roastmymealdeal.co.uk
73633 999999996 http://www.roastmymealdeal.co.uk
73634 999999996 NULL
73635 999999995 NULL
73636 999999995 http://www.w3schools.com
73637 999999995 http://www.w3schools.com
This will update the Clients table with the most common URL:
update Clients
set Clients.URL = x2.URL
from(
--This takes the inner query and sorts the rows according to the URLCount (descending), assigning a rank (using the row_number() function).
--The highest URLCount will be given a URLRank of 1. The URLRank resets for each client (partition by clientID).
select ClientID, URL, row_number() over (partition by clientID order by URLCount desc) URLRank
from(
--This groups the clients, giving one row for each client/URL combo, along with how many times that combo occurs.
select t.ClientID, t.URL, Count(1) URLCount
from Transactions t
group by t.ClientID, t.Url
) x
) x2
where x2.URLRank = 1 --Set the URL to the highest ranking URL
and Clients.ID = x2.ClientID
If you just want to see what the most common URL is for each client, use this:
select *
from(
select ClientID, URL, row_number() over (partition by clientID order by URLCount desc) URLRank
from(
select t.ClientID, t.URL, Count(1) URLCount
from Transactions t
group by t.ClientID, t.Url
) x
) x2
where x2.URLRank = 1
You could use 2 CTE like this
;WITH temp as
(
SELECT cl.ID as ClientID, t.URL, COUNT(t.ID) as NumberTransactions
FROM Clients cl
LEFT JOIN [Transactions] t on cl.ID = t.ClientID
GROUP BY cl.ID, t.URL
),
temp1 as
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY t.ClientID ORDER BY t.NumberTransactions desc) as Rn
FROM temp t
)
SELECT t.ClientID, t.URL
FROM temp1 t
WHERE t.Rn = 1
Given this data:
IF OBJECT_ID('tempdb..#client') IS NOT NULL DROP TABLE #client
IF OBJECT_ID('tempdb..#transactions') IS NOT NULL DROP TABLE #transactions
CREATE TABLE #client (id INT, name VARCHAR(100), url VARCHAR(100));
CREATE TABLE #transactions (id INT IDENTITY(1, 1), clientID INT, url VARCHAR(100));
INSERT #client
( id, name, url )
VALUES ( 9, 'a', null ),
( 8, 'b', null ),
( 7, 'c', null ),
( 6, 'd', null ),
( 5, 'e', null );
INSERT #transactions
( clientID, url )
VALUES (9, 'http://www.google.com' ),
(9, 'http://www.yahoo.com' ),
(9, 'http://www.google.com' ),
(8, 'http://www.stackoverflow.com' ),
(8, 'http://www.stackoverflow.com' ),
(8, 'http://www.slack.com' ),
(7, 'http://www.dotnetpearls.com' ),
(7, 'http://www.c-sharpcorner.com' ),
(6, 'http://www.roastmymealdeal.co.uk' ),
(6, 'http://www.roastmymealdeal.co.uk' ),
(6, 'http://www.roastmymealdeal.co.uk' ),
(6, NULL ),
(5, NULL ),
(5, 'http://www.w3schools.com' ),
(5, 'http://www.w3schools.com' );
This query would get the count of each url per clientID. We do however get one row per transaction
SELECT
urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
, transactionURL = t.URL
, t.clientID
FROM #transactions t
From that query we only want the highest urlCount, which we get by ordering on urlCount DESC and getting the TOP 1. We do want to get this top counted URL per client.
We can do that by using cross apply, to run this count and top 1 for each client. Cross apply will run the inner query once per row in the client table. The inner query is filtered on client.ID to get the urlCount for that client.
The inner query is sorted by urlCount DESC to get the highest counted URL (per client). As a tie-breaker it is also sorted on transactions.ID - this is done so that it would produce the same result each time it was run.
SELECT c.url, transURLs.transactionURL, c.id
FROM #client c
CROSS APPLY (
SELECT TOP 1
urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
, transactionURL = t.URL
FROM #transactions t
WHERE t.clientID = c.ID
ORDER BY urlCount DESC, t.id
) transURLs
To wrap it up we simply update
WITH baseData AS (
SELECT c.url, transURLs.transactionURL, c.id
FROM #client c
CROSS APPLY (
SELECT TOP 1
urlCount = COUNT(*) OVER (PARTITION BY clientID, url)
, transactionURL = t.URL
FROM #transactions t
WHERE t.clientID = c.ID
ORDER BY urlCount DESC, t.id
) transURLs
)
UPDATE baseData
SET url = transactionURL;
SELECT * FROM #client
with ctetbl (clientid,url,cnt,rowid) As
(
Select t.clientid, t.url,t.cnt,
ROW_NUMBER () over (partition by clientid order by t.cnt desc)as RowId
from (select clientid,url, COUNT(1)as cnt from transactions group by clientid,url)t
)
Update c
set url=ct.url
from clients c
inner join
ctetbl ct on c.id=ct.clientid where rowid=1
/*************Script to recreate the scenario *************/
Create table [Clients]
(
Id bigint PRIMARY KEY,
Name nvarchar(100),
URL nvarchar(1000)
)
Insert into Clients VALUES
(999999999,'Testing Client 1',NULL),
(999999998,'Testing Client 2',NULL),
(999999997,'Testing Client 3',NULL),
(999999996,'Testing Client 4',NULL),
(999999995,'Testing Client 5',NULL)
Create table Transactions
(
ID bigint,
ClientID bigint FOREIGN KEY REFERENCES Clients(ID),
URL nvarchar(1000)
)
Insert into Transactions VALUES
(73611, 999999999,'http://www.google.com'),
(73612, 999999999,'http://www.yahoo.com'),
(73613, 999999999,'http://www.google.com'),
(73626, 999999998,'http://www.stackoverflow.com'),
(73627, 999999998,'http://www.stackoverflow.com'),
(73628, 999999998,'http://www.slack.com'),
(73629, 999999997,'http://www.dotnetpearls.com'),
(73630, 999999997,'http://www.c-sharpcorner.com'),
(73631, 999999996,'http://www.roastmymealdeal.co.uk'),
(73632, 999999996,'http://www.roastmymealdeal.co.uk'),
(73633, 999999996,'http://www.roastmymealdeal.co.uk'),
(73634, 999999996,NULL),
(73635, 999999995,NULL),
(73636, 999999995,'http://www.w3schools.com'),
(73637, 999999995,'http://www.w3schools.com')
/***************List the tables *****************/
Select * from dbo.Clients
Select * from dbo.Transactions
/***************************************************************************************
cte_grp1 --
Using window functions find the ClientID and URL and number of times it is found in Transaction
cte_grp2
In second step, just find out the ones with maximum count and rank them accordingly
Store the result into another table
**************************************************************************************/
;with cte_grp1
as
(
SELECT
ClientID,
URL,
Count(URL) as CountOfURL
FROM Transactions
WHERE URL IS NOT NULL
GROUP BY ClientID,URL
-- Order by ClientID ASC,CountOfURL DESC
),
cte_grp2
as
(
SELECT y.ClientID,
y.URL,
x.MaxCount
FROM cte_grp1 y
INNER JOIN
(Select ClientID,URL,Max(CountOfURL) as MaxCount
FROM cte_grp1
Group by ClientID,URL ) x
ON x.ClientId=y.ClientID and x.URL=y.URL
--Order by y.ClientID,x.MaxCount DESC
)
Select ClientID,URL,MaxCount,
DENSE_RANK() OVER (PARTITION BY ClientId ORDER BY MaxCount DESC) as Rnk
INTO #Temp_Resultant
from cte_grp2
/*******************************************************************
Using this temp table we will be using FOR XML clause for those links
where a Client has visited the link one time each as they both quality
to be updated in the Clients table
At last.. Update based on ClientID
************************************************************************/
;with resultant
as
(
Select distinct t2.ClientID,STUFF((SELECT ','+ t1.URL
FROM #Temp_Resultant t1
WHERE Rnk=1
AND t1.ClientID=t2.ClientID
FOR XML PATH('')),1,1,'') as CommonURL
From #Temp_Resultant t2
)
Update A
SET A.URL=B.CommonURL
FROM Clients A INNER JOIN resultant B
ON A.Id=B.ClientID
---Check the results
Select * from Clients
Select * from Transactions

Remove duplicates before insert sql

I have temporary table #tempRD and I am trying to insert the resultset into a table as follows:
insert into Routing (RoutingKeyID, LocationID, Data, ServiceID, CountryID)
select
rk.ID, rd.LocationID, rd.Data, rd.service, rd.CountryID
from
#tempRD rd
inner join
RoutingKey rk on rk.serviceID = #ID and rk.Name=rd.[Key]
Now when this happens I get duplicate key errors
Cannot insert duplicate key row in object 'dbo.Routing' with unique index 'UIX_Routing_RoutingKeyID_CountryID'. The duplicate key value is (51, 433)
How can I check if a row by routingkeyid and countryid already exists before I do an insert?
I have used a cursor to do this but it takes a long long time.
Try this?
INSERT INTO
Routing (
RoutingKeyID,
LocationID,
Data,
ServiceID,
CountryID)
SELECT
rk.ID,
rd.LocationID,
rd.Data,
rd.[service],
rd.CountryID
FROM
#tempRD rd
INNER JOIN RoutingKey rk ON rk.serviceID = #ID AND rk.Name = rd.[Key]
LEFT JOIN Routing r ON r.RoutingKeyID = rk.ID AND r.CountryID = rd.CountryID
WHERE
r.RoutingKeyID IS NULL;
insert into Routing (RoutingKeyID, LocationID, Data, ServiceID, CountryID)
select *
from (
select rk.ID, rd.LocationID, rd.Data, rd.service, rd.CountryID, rank() over(partition by rk.ID, rd.CountryID order by rd.LocationID, rd.Data, rd.Service) as rnk
from #tempRD rd
inner join RoutingKey rk on rk.serviceID = #ID and rk.Name=rd.[Key]
) a
where rnk = 1
With the rank() over(partition by ... order by ....) you create a ranking of records that have a duplicate combination of rk.ID, rd.CountryID. At the end you apply the WHERE-clause to only use the first occurrence of the combination.
You could also use a select distinct if the other fields are also duplicates. I didn't try the code, there may be typo's ;-)
Using Row_number window function you can find the duplicates. Note i have ordered by id desc to get the latest record of duplicate.
;WITH cte
AS (SELECT Row_number() OVER(partition BY RoutingKeyID, CountryID ORDER BY id DESC) rn,
rk.ID,
rd.LocationID,
rd.Data,
rd.service,
rd.CountryID
FROM #tempRD rd)
INSERT INTO Routing
(RoutingKeyID,LocationID,Data,ServiceID,CountryID)
SELECT rk.ID,
rd.LocationID,
rd.Data,
rd.service,
rd.CountryID
FROM cte
WHERE rn = 1
Can use something like
MERGE INTO Routing
USING (SELECT #RoutingKeyID, #LocationID, #Data, #ServiceID, #CountryID) AS source (RoutingKeyID, LocationID, Data, ServiceID, CountryID)
ON (Routing.RoutingKeyID= source.RoutingKeyID)
WHEN NOT MATCHED THEN
INSERT (RoutingKeyID, LocationID, Data, ServiceID, CountryID)
VALUES (source.RoutingKeyID, source.LocationID, source.Data, source.ServiceID, source.CountryID)