SQL Server : ROW_NUMBER() OVER on table variable to amend duplicate columns - sql

Lets say I have table variable declared like so...
DECLARE #LocalTable TABLE
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
And I populate it like so...
INSERT INTO #LocalTable
SELECT
IdColumn,
NameColumn
FROM SourceTable
NameColumn in the source table may have duplicate values, and therefore NameField in the local table will have the same duplicate values.
And let's say I want to insert the local table into a target table like so...
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT
IdField,
NameField
FROM
#LocalTable
BUT: NewNameColumn in TargetTable has a UNIQUE constraint, and so duplicates cause an exception.
I want to apply this example,
ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY NameField)
Such that the NameField is appended/suffixed with a number digit indicating its duplication.
I have this working example that can select correctly appended values, but I cannot get this to work in an update statement like this:
UPDATE localtable
SET NameField = AppendedNameField
FROM #LocalTable AS localtable
SELECT
CONCAT(Ref.NameField, ROW_NUMBER() OVER (PARTITION BY Ref.NameField
ORDER BY Source.IdField)), *
FROM
#LocalTable AS Source
INNER JOIN
#LocalTable AS Ref ON Ref.NameField = Source.NameField
AND Ref.IdField != Source.IdField
Thanks in advance.

If I have understood what you are trying to do.
WITH CTE AS
(
SELECT
CONCAT(NameField, ROW_NUMBER()
OVER(PARTITION BY NameField ORDER BY IdField)) AS NewName, *
FROM #LocalTable
)
UPDATE
CTE SET Name = NewName
If you only want to do it to duplicated names you can add a COUNT(*) OVER (PARTITION BY Name) into the CTE and conditional logic using that.

You don't necessarily need to update the table, couldn't you just add the suffix when inserting?
DECLARE #LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT #LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT IdField,
CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM #LocalTable
ORDER BY IdField;
Alternatively, you can update by simply wrapping the above select in a subquery, and updating that:
DECLARE #LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT #LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
UPDATE t
SET NameField = NewNameField
FROM
(
SELECT IdField, NameField,
NewNameField = CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM #LocalTable
) AS t;
SELECT * FROM #LocalTable;

Just add an identity field to the temp table.
DECLARE #LocalTable TABLE
(
ix int identity primary key,
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
Insert into #LocalTable(IdColumn, NameColumn)
SELECT
IdColumn,
NameColumn
FROM SourceTable
-- Make sure same names are consecutive in the table
ORDER BY NameColumn
The set NameColumn like so:
update lt set
NameColumn = NameColumn
-- Add a number based on the ix, minus the lowest ix entry for the same name
+ cast(
(select lt.ix - min(lt2.ix) + 1
from #localTable lt2 where lt2.name = lt.name)
as nvarchar(10))
from #LocalTable lt
-- Only do those with duplicated names
where lt.NameColumn in (
select NameColumn from #localtable group by NameColumn having count(1) > 1
)

At first, consider using Temp table instead of temp variable.
Second, try to change NVARCHAR(MAX) by something smaller, like INT
Here is the code to include only unique values in the NameField:
CREATE TABLE #LocalTable
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
INSERT INTO #LocalTable
VALUES (1,'A'), (2,'B'), (3,'B')
INSERT INTO TargetTable
(
NewIdColumn,
NewNameColumn
)
SELECT IdField, NameField
FROM #LocalTable
WHERE IdField in (
SELECT MIN(IdField) FROM #LocalTable
GROUP BY NameField
);
Be aware that "IdField" of duplicate records will be ignored and not inserted into target table.

Try using below code:
WITH TargetTable AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID
FROM #LocalTable AS L
),
UpdatedData AS(
SELECT Source.NameField,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID,
CONCAT(Ref.NameField, ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField)) AS AppendedNameField
FROM #LocalTable AS Source
INNER JOIN #LocalTable AS Ref ON Ref.NameField = Source.NameField AND Ref.IdField != Source.IdField
)
UPDATE T
SET NameField=U.AppendedNameField
FROM TargetTable AS T
JOIN UpdatedData AS U ON T.NameField=U.NameField AND T.UniqueID=U.UniqueID;

Related

SQL Dynamically update duplicate row values to be unique

The Problem
I need to update a table so that any duplicate rows are updated to have unique values.
The Catch
I need to dynamically ensure that the value I am updating the duplicate row to is also unique.
My Solution So Far (with test case)
CREATE TABLE #temp (name nvarchar(100), ID uniqueidentifier)
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
;WITH cte AS (
SELECT Name
, ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
FROM #temp
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SELECT * FROM #temp
DROP TABLE #temp
As you can tell, this will update the table so there is only one row with the name 'Duplicate' but two rows with the name 'Duplicate (2)'. How can I check and account for duplicates in the value I am updating to?
You could use another CTe which gets you the highest Number and then use that to generate the "next" number.
for 2 or more digits you need to adapt it
CREATE TABLE #temp (name nvarchar(100), AssetMakeID uniqueidentifier)
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
;WITH CTE1 AS (SELECT
MAX( COALESCE(REPLACE(REPLACE(SUBSTRING(name, PATINDEX('%([0-9])%', name), PATINDEX('%)%', name + 't') - PATINDEX('%(%',
name) + 1),'(','') ,')','') ,0)) hinum
,SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) name
FROM #temp
WHERE SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) IS NOT NULL
GROUP BY SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) ),
cte AS (
SELECT #temp.Name
, CASE WHEN ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) > 1 THEN
ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) + hinum -1
ELSe ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) END RowNum
FROM #temp LEFT JOIN CTE1 ON #temp.name = CTE1.name
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SELECT * FROM #temp
name
AssetMakeID
Duplicate
32208c09-c0c3-408c-ab60-273811722194
Duplicate (3)
32208c09-c0c3-408c-ab60-273811722194
Duplicate (2)
32208c09-c0c3-408c-ab60-273811722194
fiddle
Well the easy way is to use a unique string in the update so there is no way your update can cause a duplicate. The current timestamp (with milliseconds) works well. Like this:
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ') at ',convert(varchar(22),getdate(),126))
WHERE RowNum > 1
This will cope with one level of duplication e.g. 'Duplicate (2)' but not two e.g. 'Duplicate (2) (2)'.
Essentially just apply the same logic again in a second cte. In fact you should be able to do this using a recursive CTE to get it to work for all levels.
That said you could use a more unique method of de-duplicating names e.g. just add a guid and it will be unique.
WITH cte1 AS (
SELECT Name, Id
, ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
-- You should already have one, but if not generate it
, ROW_NUMBER() OVER (ORDER BY Name) UniqueId
FROM #temp
), cte2 as (
SELECT NewName Name, RowNum, UniqueId
, ROW_NUMBER() OVER (PARTITION BY NewName, ID ORDER BY NewName) RowNum2
FROM cte1
CROSS APPLY (
VALUES (CASE WHEN RowNum = 1 THEN Name ELSE CONCAT(Name, ' (', RowNum, ')') END)
) n (NewName)
)
UPDATE c1 SET
Name = CASE WHEN RowNum2 = 1 THEN c2.Name ELSE CONCAT(c2.Name, ' (', RowNum2, ')') END
FROM cte1 c1
INNER JOIN cte2 c2 on c2.UniqueId = c1.UniqueId
WHERE c1.RowNum > 1 or RowNum2 > 1;
I am going to choose another answer as the correct answer since I personally prefer it, but I thought I'd post what I ended up doing myself.
DROP TABLE IF EXISTS #temp
CREATE TABLE #temp (name nvarchar(100), ID uniqueidentifier)
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
DECLARE #doWhileTrueFlag bit = 1
WHILE (#doWhileTrueFlag = 1)
BEGIN
;WITH cte AS (
SELECT
Name,
ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
FROM #temp
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SET #doWhileTrueFlag = CASE
WHEN ##ROWCOUNT > 0 THEN 1
ELSE 0
END
END
SELECT * FROM #temp
DROP TABLE #temp
This performs the update I was already doing in a loop until no more updates are done. A rather inelegant solution, but the names created are prettier for the clients.

How to get the each record with some condition

I have following data:
DECLARE #temp TABLE (
ID int
,sn varchar(200)
,comment varchar(2000)
,rownumber int
)
insert into #temp values(1,'sn1',NULL,1)
insert into #temp values(2,'sn1','aaa',2)
insert into #temp values(3,'sn1','bbb',3)
insert into #temp values(4,'sn1',NULL,4)
insert into #temp values(5,'sn2',NULL,1)
insert into #temp values(6,'sn2',NULL,2)
insert into #temp values(7,'sn2',NULL,3)
select * from #temp
And I want to output like this:
2 sn1 aaa 2
5 sn2 NULL 1
same sn, if comment have value, get this lower rownumber's record. For sn1, have two records with comment value, so here, get the the record with rownumber=2
If comment doesn't have value, get the lower rownumber's record. For sn2, get the record with rownumber=1
May I know how to write this SQL?
This is a prioritization query. I think row_number() is the simplest method:
select t.*
from (select t.*,
row_number() over (partition by sn
order by (case when comment is not null then 1 else 2 end),
rownumber
) as seqnum
from #temp t
) t
where seqnum = 1;
Here is a db<>fiddle.

How to update only MAX column in SQL Server

I want to update the MAX value's status column with static values in SQL Server:
Script to generate sample data:
CREATE TABLE A
(
seq INT,
TrnId NVARCHAR(MAX),
Status NVARCHAR(10)
)
INSERT INTO A VALUES (1,'A1','A')
INSERT INTO A VALUES (2,'A1','A')
INSERT INTO A VALUES (3,'A1','A')
INSERT INTO A VALUES (4,'A1','P')
INSERT INTO A VALUES (1,'B1','A')
INSERT INTO A VALUES (2,'B1','A')
INSERT INTO A VALUES (3,'B1','A')
INSERT INTO A VALUES (4,'B1','P')
CREATE TABLE #temp
(
TrnId NVARCHAR(MAX)
)
INSERT INTO #temp VALUES ('A1')
INSERT INTO #temp VALUES ('B1')
I have TrnId In #temp table and I want to update only the MAX values column for both TrnId as A1 and B1 with status = 'A'
An example without a CTE:
UPDATE A SET A.Status = 'A'
FROM #temp T
INNER JOIN (SELECT TrnId, MAX(seq) as MaxSeq FROM A GROUP BY TrnId) M ON M.TrnId = T.TrnId
INNER JOIN A ON A.TrnId = T.TrnId AND A.seq = M.MaxSeq
One method uses an updatable CTE:
with toupdate as (
select a.*,
row_number() over (partition by trnid order by status desc) as seqnum
from a
)
update toupdate
set status = 'A'
from toupdate join
#temp t
on toupdate.trnid = t.trnid
where seqnum = 1;

Deleting records that are similar with previous one SQL Server

I am looking for a query which fetches me the data that is different compared to the previous row,
A sample code (with table creation and data)
create table #temp
(id int, eid int, name char(10),estid int, ecid int, epid int, etc char(5) )
insert into #temp values (1,1,'a',1,1,1,'a')
insert into #temp values (2,1,'a',1,1,1,'a')
insert into #temp values (3,1,'a',2,1,1,'a')
insert into #temp values (4,1,'a',1,1,1,'a')
insert into #temp values (5,1,'a',1,1,1,'a')
insert into #temp values (6,1,'a',1,2,1,'a')
insert into #temp values (7,1,'a',1,1,1,'a')
insert into #temp values (8,1,'a',2,1,1,'a')
insert into #temp values (9,1,'a',1,1,1,'a')
insert into #temp values (10,1,'a',1,1,1,'a')
insert into #temp values (11,2,'a',1,1,1,'a')
insert into #temp values (12,2,'a',1,1,1,'a')
insert into #temp values (13,2,'a',2,1,1,'a')
insert into #temp values (14,2,'a',1,1,1,'a')
insert into #temp values (15,2,'a',1,1,1,'a')
insert into #temp values (16,2,'a',1,2,1,'a')
insert into #temp values (17,2,'a',1,1,1,'a')
insert into #temp values (18,2,'a',2,1,1,'a')
insert into #temp values (19,2,'a',1,1,1,'a')
insert into #temp values (20,2,'a',1,1,1,'a')
I tried with some ways of getting the data as the way that i expected
SELECT * INTo #Temp_Final
FROM #temp
WHERE #temp.%%physloc%%
NOT IN (SELECT Min(b.%%physloc%%)
FROM #temp b
GROUP BY eid,name,estid,ecid,epid,etc)
ORDER BY id
SELECT * FROM #temp WHERE id not in (SELECT id FROM #Temp_Final) ORDER BY id
But i wasn't getting the result as i expected...
This is how the result needs to be
select * from #temp where id in (1,3,4,6,7,8,9,11,13,14,16,17,18,19)
You can do this with a simple self-join and appropriate comparison:
select t.*
from #temp t left outer join
#temp tprev
on t.id = tprev.id + 1
where tprev.id is null or
t.name <> tprev.name or
t.estid <> tprev.estid or
t.ecid <> tprev.ecid or
t.epid <> tprev.epid or
t.etc <> tprev.etc;
This assumes that the ids are sequential with no gaps. If the ids are not, you can get the previous id using a correlated subquery or the lag() function.
Your title says "delete" but the question seems to just want the list of such rows. You can phrase this as a delete query if you need to.
For SQL Server 2012 (SQL Fiddle)
WITH CTE
AS (SELECT *,
LAG(eid) OVER (ORDER BY id) AS prev_eid,
LAG(name) OVER (ORDER BY id) AS prev_name,
LAG(estid) OVER (ORDER BY id) AS prev_estid,
LAG(ecid) OVER (ORDER BY id) AS prev_ecid,
LAG(epid) OVER (ORDER BY id) AS prev_epid,
LAG(etc) OVER (ORDER BY id) AS prev_etc
FROM #temp)
DELETE FROM CTE
WHERE EXISTS (SELECT eid,
name,
estid,
ecid,
epid,
etc
INTERSECT
SELECT prev_eid,
prev_name,
prev_estid,
prev_ecid,
prev_epid,
prev_etc)
select
t.id,
t.eid,
t.name,
t.estid,
t.ecid,
t.epid,
t.etc
from #temp t
left join #temp d
on d.id = t.id-1
and d.eid = t.eid
and d.name = t.name
and d.estid = t.estid
and d.ecid = t.ecid
and d.epid = t.epid
and d.etc = t.etc
where d.id is null

SQL Query using distinct and max

I have a dataset like:
type seqID text
A 1 Text1a
A 2 Text2a
A 3 Text3a
B 1 Text1b
B 2 Text2b
How do I get the row back by type with the highest seqID grouped by type? So in the above example I would want the row that has A, 3, Text3a and B, 2, Text2b returned.
SELECT *
FROM tmp t1
WHERE NOT EXISTS
(SELECT 1 FROM tmp t2 WHERE t1.type = t2.type AND t2.seqID > t1.seqID)
It shouldn't exists any other row with the same type and higher seqID.
You kind of need an ID, but since "Text" seems unique for this example
CREATE TABLE #TMP
(type VARCHAR(3), seqID INT, [text] varchar(256))
insert #TMP values ('A' , 1 , 'Text1a')
insert #TMP values ('A' , 2 , 'Text2a')
insert #TMP values ('A' , 3 , 'Text3a')
insert #TMP values ('B' , 1 , 'Text1b')
insert #TMP values ('B' , 2 , 'Text2b')
SELECT * FROM #TMP T
where [text] IN
(SELECT TOP 1 [text] FROM #TMP t2 WHERE t.type = t2.type ORDER BY t2.seqID DESC)
SELECT tbl.*
FROM
( SELECT type, MAX(seqID)
FROM tbl
GROUP BY type) maxes
WHERE
tbl.type= maxes.type AND
tbl.seqID= maxes.seqID
SELECT t.* FROM
(
SELECT type, MAX(seqID) as maxId
FROM Table
GROUP BY type
) m
INNER JOIN Table t ON m.maxId = t.seqId
Using CTE
;WITH maxIds(maxId)
AS
(
SELECT type, MAX(seqID) as maxId
FROM Table
GROUP BY type
)
SELECT t.* FROM
Table t
INNER JOIN maxIds m ON m.maxId = t.seqID
If you are on SQL Server 2005+, you could use a ranking function (more specifically, ROW_NUMBER()):
SELECT
type,
seqID,
text
FROM (
SELECT
*,
rnk = ROW_NUMBER() OVER (PARTITION BY type ORDER BY seqID DESC)
FROM atable
) s
WHERE rnk = 1
create table #tlb1(
[type] VARCHAR(3), seqID INT, [text] varchar(max)
)
declare #type varchar(3), #text varchar(max);
declare #seqID int;
declare seq_cursor cursor for
select [type], max(seqID) from tbl group by [type]
open seq_cursor
fetch next from seq_cursor into #type,#seqID
while(##fetch_status=0)
begin
set #text= (select [text] from tbl where [type]=#type and seqID=#seqid);
insert into #tlb1 values (#type, #seqID,#text);
fetch next from seq_cursor into #type,#seqID
end
select * from #tlb1
close seq_cursor
deallocate seq_cursor
truncate table #tlb1
Try:
SELECT type, max(seqID),text
FROM 'db'
GROUP BY type
As easy as that.
EDITED solution. Consider this a psuedo-code (since I am not familiar with SQL server syntax):
SELECT a.type, a.seqID, a.text FROM table a
JOIN
(SELECT type, max(seqID) seqID FROM table GROUP BY type) b
ON a.seqID = b.seqID AND a.type=b.type