SQL Select Best practice

SQL Select Best practice - sql

The following works, I'm just wondering if this is the correct approach to finding the latest value for each audit field.
USE tempdb
CREATE Table Tbl(
TblID Int,
AuditFieldID Int,
AuditValue Int,
AuditDate Date
)
GO
INSERT INTO Tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(1,10,101,'1/1/2001')
INSERT INTO Tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(2,10,102,'1/1/2002')
INSERT INTO Tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(3,20,201,'1/1/2001')
INSERT INTO Tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(4,20,202,'1/1/2009')
SELECT AuditFieldID,AuditValue,AuditDate
FROM Tbl A
WHERE TblID=
(SELECT TOP 1 TblID
FROM Tbl
WHERE AuditFieldID=A.AuditFieldID
ORDER BY AuditDate DESC
)

Aggregate/ranking to get key and latest date, join back to get value.
This assumes SQL Server 2005+
DECLARE #tbl Table (
TblID Int,
AuditFieldID Int,
AuditValue Int,
AuditDate Date
)
INSERT INTO #tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(1,10,101,'1/1/2001')
INSERT INTO #tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(2,10,102,'1/1/2002')
INSERT INTO #tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(3,20,201,'1/1/2001')
INSERT INTO #tbl(TblID,AuditFieldID,AuditValue,AuditDate) VALUES(4,20,202,'1/1/2009')
;WITH cLatest AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY AuditFieldID ORDER BY AuditDate DESC) AS Ranking,
AuditFieldID, AuditDate
FROM
#tbl
)
SELECT
A.AuditFieldID, A.AuditValue, A.AuditDate
FROM
#tbl A
JOIN
cLatest C ON A.AuditFieldID = C.AuditFieldID AND A.AuditDate = C.AuditDate
WHERE
C.Ranking = 1

Simpler:
SELECT top 1 AuditFieldID,AuditValue,AuditDate
FROM Tbl
order by AuditDate DES

There are various methods for doing this. Different methods perform differently. I encourage you to look at this blog which explains the various methods.
Including an Aggregated Column's Related Values

you don't need the where statement as you are already selecting from tbl A AND selecting on the same field.

Related

How to rebuild a record from a change log

I have a table of changes to an entity. I am trying to rebuild the data from the changes.
This is my Changes table:
CREATE TABLE Changes
(
Id IDENTITY,
RecordId INT,
Field INT,
Val VARCHAR(MAX),
DateOfChange DATETIME
);
Field column is a reference to what field changed, Val is the new value, RecordId is the Id of the record that changed. Ideally the Record table would contain the latest values but I am not that lucky. There are 10 different fields that changes are tracked, mostly dates but some other types are thrown in there.
This is my Record table:
CREATE TABLE Records
(
Id IDENTITY,
AUserGeneratedIdentifer VARCHAR(12)
)
I'd like to have a view to query by the rolled up values.
SELECT
AUserGeneratedIdentitfier, DateOpened, DateClosed, etc
FROM
RecordView
WHERE
AUserGeneratedIdentitfier = 'something'
I am trying to implemented it with CTEs but I am wondering if this is the correct way. I am using a CTE per field I am trying to get to.
WITH DateOpened AS
(
SELECT
RecordId, Val,
ROW_NUMBER() OVER (PARTITION BY RecordId ORDER BY DateOfChanged DESC) Rank
FROM
Changes
WHERE
FieldId = #DateOpenedId
) --- ... Repeat for every field
SELECT (my fields)
FROM Records
INNER JOIN <all ctes> on Record Id
But this method feels wrong to me, possibly due to my lack of SQL experience. Is there a better way that I am missing here? What are the performance implications of having multiple CTEs on the same table and joining with them?
Please excuse the hastily thrown together pseudo code, I hope it illustrates my problem accurately

I think you should be able to use a single ROW_NUMBER and pivot it as below.
DECLARE #Records TABLE (Id INT IDENTITY, AUserGeneratedIdentifer VARCHAR(12))
DECLARE #Changes TABLE (Id INT IDENTITY, RecordId INT, Field INT, Val VARCHAR(MAX), DateOfChange DATETIME);
INSERT INTO #Records (AUserGeneratedIdentifer)
VALUES ('qwer'), ('asdf')
INSERT INTO #Changes (RecordId, Field, Val, DateOfChange)
VALUES (1, 1, 'foo', '2021-01-01' ),
(2, 1, 'fooz', '2021-01-01' ),
(2, 2, 'barz', '2021-01-01' ),
(1, 2, 'bar', '2021-01-01' ),
(1, 1, 'foo2', '2021-01-02' ),
(2, 2, 'barz2', '2021-01-02' )
SELECT piv.RecordId, piv.[1], piv.[2]
FROM (
SELECT C.RecordId, C.Field, C.Val, ROW_NUMBER() OVER (PARTITION BY C.RecordId, C.Field ORDER BY C.DateOfChange DESC) RowNum
FROM #Changes C
) sub
PIVOT (
MAX(Val)
FOR Field IN ([1], [2])
) piv
WHERE piv.RowNum = 1

Get SQL Distinct Row Without NULL Values

I'm trying to get a distinct row using SQL from set of records that have matching key/id value, but NULLs in different columns. Hard to explain so please see screenshot. Any ideas?
create temporary table my_table (
id varchar(30), segmentdate1 date, converted1 varchar(10), segmentdate2 date, converted2 varchar(10)
);
insert into my_table (
id, segmentdate1, converted1, segmentdate2, converted2
)
values
('Michael','9/15/2020','No',NULL,NULL),
('Michael',NULL,NULL,'7/1/2019','Yes')
;

You seem to want aggregation:
select id, max(segmentdate1) as segmentdate1, max(converted1) as converted1,
max(segmentdate2) as segmentdate2, max(converted2) as converted2
from t
group by id;
Note: I made up names for the columns so they are unique.
This is probably a result set created from another query. That query probably has the wrong group by keys. You should probably fix that query.

declare #my_table table (
id varchar(30), segmentdate1 date, converted1 varchar(10), segmentdate2 date, converted2 varchar(10)
);
insert into #my_table (
id, segmentdate1, converted1, segmentdate2, converted2
)
values
('Michael','9/15/2020','No',NULL,NULL),
('Michael',NULL,NULL,'7/1/2019','Yes')
;
select id,max(isnull(segmentdate1,'1200-01-01')) segmentdate2
,max(isnull(converted1,'')) converted1, max(isnull(segmentdate2,'1200-01-01')) segmentdate2
,max(isnull(converted2,'')) converted2
from #my_table
group by id

How to get the each record with some condition

I have following data:
DECLARE #temp TABLE (
ID int
,sn varchar(200)
,comment varchar(2000)
,rownumber int
)
insert into #temp values(1,'sn1',NULL,1)
insert into #temp values(2,'sn1','aaa',2)
insert into #temp values(3,'sn1','bbb',3)
insert into #temp values(4,'sn1',NULL,4)
insert into #temp values(5,'sn2',NULL,1)
insert into #temp values(6,'sn2',NULL,2)
insert into #temp values(7,'sn2',NULL,3)
select * from #temp
And I want to output like this:
2 sn1 aaa 2
5 sn2 NULL 1
same sn, if comment have value, get this lower rownumber's record. For sn1, have two records with comment value, so here, get the the record with rownumber=2
If comment doesn't have value, get the lower rownumber's record. For sn2, get the record with rownumber=1
May I know how to write this SQL?

This is a prioritization query. I think row_number() is the simplest method:
select t.*
from (select t.*,
row_number() over (partition by sn
order by (case when comment is not null then 1 else 2 end),
rownumber
) as seqnum
from #temp t
) t
where seqnum = 1;
Here is a db<>fiddle.

sorting SQL with substring on string

i have the data like this:
CODE_VD
N_10_19_xxx
N_0_3_xxx
N_121_131_xxx
N_100_120_xxx
N_80_90_xxx
N_20_29_xxx
as you can see i need to sort just the first number after N_,i don't know how can i get this number.
i have tried with susbsting(CODE_VD,2,3) but not exactly what i expected.
i want to get this:
CODE_VD
N_0_3_xxx
N_10_19_xxx
N_20_29_xxx
N_80_90_xxx
N_100_120_xxx
N_121_131_xxx
how can i do that ?

DECLARE #MyTable TABLE
(
CODE_VD VARCHAR(20)
)
INSERT INTO #MyTable
( CODE_VD )
VALUES
('N_10_19_xxx'),
('N_0_3_xxx'),
('N_121_131_xxx'),
('N_100_120_xxx'),
('N_80_90_xxx'),
('N_20_29_xxx');
SELECT * FROM
(
SELECT
*,
CONVERT(INT,
SUBSTRING(mt.CODE_VD,
3,
CHARINDEX('_', mt.CODE_VD, 3) - 3)) ConvCol
FROM #MyTable mt
) mt
ORDER BY mt.ConvCol
I converted to int to get the sort to work correctly, because 100 > 20

SELECT SUBSTRING(CODE_VD,3, CHARINDEX('_',CODE_VD, 3)-3)

declare #t Table (CODE_VD VARCHAR(MAX))
INSERT INTO #t (CODE_VD)VALUES ('N_10_19_xxx')
INSERT INTO #t (CODE_VD)VALUES ('N_0_3_xxx')
INSERT INTO #t (CODE_VD)VALUES ('N_121_131_xxx')
INSERT INTO #t (CODE_VD)VALUES ('N_100_120_xxx')
;WITH
sorted
AS
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY SUBSTRING(CODE_VD,3, CHARINDEX('_',CODE_VD, 3)-3) ORDER BY CODE_VD) AS sequence_id
FROM
#t
)
SELECT
CODE_VD
FROM
sorted
WHERE
sequence_id = 1

optimizing SQL query with multiple keys

I have a table in a database with a primary key, and a 'second' key as well. This second key can have the same value occur more than once in the table, but often i only want to return the most recent row for that second key. I have an existing query that works below, but I feel like it's very ugly and there should be a simpler way to do this instead of creating a table variable, going through a loop, and inserting 1 row into the table variable on each pass through the loop. Am i making this too hard?
declare #RowCnt int
declare #MaxRows int
declare #secondID as uniqueidentifier
DECLARE #retList TABLE(
firstGUID uniqueidentifier,
secondGUID uniqueidentifier,
name nvarchar(50),
DateCreated datetime
)
select #RowCnt = 1
declare #Import table (rownum int IDENTITY (1, 1) Primary key NOT NULL , secondGUID uniqueidentifier)
insert into #Import (secondGUID) SELECT DISTINCT dbo.TestTable.secondGUID FROM dbo.TestTable
select #MaxRows=count(*) from #Import
while #RowCnt <= #MaxRows
begin
select #secondID=secondGUID from #Import where rownum = #RowCnt
INSERT INTO #retList
SELECT TOP (1) firstGUID,secondGUID,name,datecreated
FROM dbo.TestTable
WHERE dbo.TestTable.secondGUID = #secondID
ORDER BY DateCreated Desc
Set #RowCnt = #RowCnt + 1
END
select * from #retList
EDIT:
for example, imagine the table has these values
firstGUID secondGUID Name DateCreated
EAD50999-E9B1-43F0-9FA6-615405FA5A9A 6163B6ED-6AF4-494E-ACE6-184F4804847B Test1 2014-04-11 15:12:36.303
A9645486-1021-4E98-92AC-1205CC3FB9D3 6163B6ED-6AF4-494E-ACE6-184F4804847B Test2 2014-04-10 15:21:46.087
DEE375BB-BFAF-44BE-AC64-06D7702E2ACB 3BD0A2F0-4E44-43B9-BD24-003B518609C7 Test3
2014-04-11 15:22:37.097
I only want the Test1 and Test3 rows to be returned.

You could use SQLServer's analytical functions:
select firstGUID, secondGUID, name, datecreated
from (select t.*,
rank() over (partition by secondGUID order by datecreated desc) r
from TestTable t) ilv
where r=1

I'm not 100% sure I understand what you're asking, but it sounds like you want to select only the rows containing the max DateCreated. The normal way of doing that is to join with a subselect that uses a group by clause, eg.:
select tt.*
from TestTable tt
join (
select firstguid, max(DateCreated) as maxdate
from TestTable
group by firstguid
) gtmp on tt.firstguid = gtmp.firstguid and tt.dateCreated = gtmp.maxdate

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Select Best practice - sql

Simpler: SELECT top 1 AuditFieldID,AuditValue,AuditDate FROM Tbl order by AuditDate DES

There are various methods for doing this. Different methods perform differently. I encourage you to look at this blog which explains the various methods. Including an Aggregated Column's Related Values

you don't need the where statement as you are already selecting from tbl A AND selecting on the same field.

Related

How to rebuild a record from a change log

Get SQL Distinct Row Without NULL Values

How to get the each record with some condition

sorting SQL with substring on string

optimizing SQL query with multiple keys

Categories

Resources