The Problem
I need to update a table so that any duplicate rows are updated to have unique values.
The Catch
I need to dynamically ensure that the value I am updating the duplicate row to is also unique.
My Solution So Far (with test case)
CREATE TABLE #temp (name nvarchar(100), ID uniqueidentifier)
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
;WITH cte AS (
SELECT Name
, ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
FROM #temp
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SELECT * FROM #temp
DROP TABLE #temp
As you can tell, this will update the table so there is only one row with the name 'Duplicate' but two rows with the name 'Duplicate (2)'. How can I check and account for duplicates in the value I am updating to?
You could use another CTe which gets you the highest Number and then use that to generate the "next" number.
for 2 or more digits you need to adapt it
CREATE TABLE #temp (name nvarchar(100), AssetMakeID uniqueidentifier)
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, AssetMakeID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
;WITH CTE1 AS (SELECT
MAX( COALESCE(REPLACE(REPLACE(SUBSTRING(name, PATINDEX('%([0-9])%', name), PATINDEX('%)%', name + 't') - PATINDEX('%(%',
name) + 1),'(','') ,')','') ,0)) hinum
,SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) name
FROM #temp
WHERE SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) IS NOT NULL
GROUP BY SUBSTRING(name,1, PATINDEX('% ([0-9])%', name) ) ),
cte AS (
SELECT #temp.Name
, CASE WHEN ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) > 1 THEN
ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) + hinum -1
ELSe ROW_NUMBER() OVER (PARTITION BY #temp.Name, AssetMakeID ORDER BY #temp.Name) END RowNum
FROM #temp LEFT JOIN CTE1 ON #temp.name = CTE1.name
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SELECT * FROM #temp
name
AssetMakeID
Duplicate
32208c09-c0c3-408c-ab60-273811722194
Duplicate (3)
32208c09-c0c3-408c-ab60-273811722194
Duplicate (2)
32208c09-c0c3-408c-ab60-273811722194
fiddle
Well the easy way is to use a unique string in the update so there is no way your update can cause a duplicate. The current timestamp (with milliseconds) works well. Like this:
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ') at ',convert(varchar(22),getdate(),126))
WHERE RowNum > 1
This will cope with one level of duplication e.g. 'Duplicate (2)' but not two e.g. 'Duplicate (2) (2)'.
Essentially just apply the same logic again in a second cte. In fact you should be able to do this using a recursive CTE to get it to work for all levels.
That said you could use a more unique method of de-duplicating names e.g. just add a guid and it will be unique.
WITH cte1 AS (
SELECT Name, Id
, ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
-- You should already have one, but if not generate it
, ROW_NUMBER() OVER (ORDER BY Name) UniqueId
FROM #temp
), cte2 as (
SELECT NewName Name, RowNum, UniqueId
, ROW_NUMBER() OVER (PARTITION BY NewName, ID ORDER BY NewName) RowNum2
FROM cte1
CROSS APPLY (
VALUES (CASE WHEN RowNum = 1 THEN Name ELSE CONCAT(Name, ' (', RowNum, ')') END)
) n (NewName)
)
UPDATE c1 SET
Name = CASE WHEN RowNum2 = 1 THEN c2.Name ELSE CONCAT(c2.Name, ' (', RowNum2, ')') END
FROM cte1 c1
INNER JOIN cte2 c2 on c2.UniqueId = c1.UniqueId
WHERE c1.RowNum > 1 or RowNum2 > 1;
I am going to choose another answer as the correct answer since I personally prefer it, but I thought I'd post what I ended up doing myself.
DROP TABLE IF EXISTS #temp
CREATE TABLE #temp (name nvarchar(100), ID uniqueidentifier)
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate', '32208C09-C0C3-408C-AB60-273811722194')
INSERT INTO #temp (Name, ID)
VALUES ('Duplicate (2)', '32208C09-C0C3-408C-AB60-273811722194')
DECLARE #doWhileTrueFlag bit = 1
WHILE (#doWhileTrueFlag = 1)
BEGIN
;WITH cte AS (
SELECT
Name,
ROW_NUMBER() OVER (PARTITION BY Name, ID ORDER BY Name) RowNum
FROM #temp
)
UPDATE cte
SET Name = CONCAT(Name, ' (', RowNum, ')')
WHERE RowNum > 1
SET #doWhileTrueFlag = CASE
WHEN ##ROWCOUNT > 0 THEN 1
ELSE 0
END
END
SELECT * FROM #temp
DROP TABLE #temp
This performs the update I was already doing in a loop until no more updates are done. A rather inelegant solution, but the names created are prettier for the clients.
Related
I have following data:
DECLARE #temp TABLE (
ID int
,sn varchar(200)
,comment varchar(2000)
,rownumber int
)
insert into #temp values(1,'sn1',NULL,1)
insert into #temp values(2,'sn1','aaa',2)
insert into #temp values(3,'sn1','bbb',3)
insert into #temp values(4,'sn1',NULL,4)
insert into #temp values(5,'sn2',NULL,1)
insert into #temp values(6,'sn2',NULL,2)
insert into #temp values(7,'sn2',NULL,3)
select * from #temp
And I want to output like this:
2 sn1 aaa 2
5 sn2 NULL 1
same sn, if comment have value, get this lower rownumber's record. For sn1, have two records with comment value, so here, get the the record with rownumber=2
If comment doesn't have value, get the lower rownumber's record. For sn2, get the record with rownumber=1
May I know how to write this SQL?
This is a prioritization query. I think row_number() is the simplest method:
select t.*
from (select t.*,
row_number() over (partition by sn
order by (case when comment is not null then 1 else 2 end),
rownumber
) as seqnum
from #temp t
) t
where seqnum = 1;
Here is a db<>fiddle.
Lets say I have table variable declared like so...
DECLARE #LocalTable TABLE
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
And I populate it like so...
INSERT INTO #LocalTable
SELECT
IdColumn,
NameColumn
FROM SourceTable
NameColumn in the source table may have duplicate values, and therefore NameField in the local table will have the same duplicate values.
And let's say I want to insert the local table into a target table like so...
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT
IdField,
NameField
FROM
#LocalTable
BUT: NewNameColumn in TargetTable has a UNIQUE constraint, and so duplicates cause an exception.
I want to apply this example,
ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY NameField)
Such that the NameField is appended/suffixed with a number digit indicating its duplication.
I have this working example that can select correctly appended values, but I cannot get this to work in an update statement like this:
UPDATE localtable
SET NameField = AppendedNameField
FROM #LocalTable AS localtable
SELECT
CONCAT(Ref.NameField, ROW_NUMBER() OVER (PARTITION BY Ref.NameField
ORDER BY Source.IdField)), *
FROM
#LocalTable AS Source
INNER JOIN
#LocalTable AS Ref ON Ref.NameField = Source.NameField
AND Ref.IdField != Source.IdField
Thanks in advance.
If I have understood what you are trying to do.
WITH CTE AS
(
SELECT
CONCAT(NameField, ROW_NUMBER()
OVER(PARTITION BY NameField ORDER BY IdField)) AS NewName, *
FROM #LocalTable
)
UPDATE
CTE SET Name = NewName
If you only want to do it to duplicated names you can add a COUNT(*) OVER (PARTITION BY Name) into the CTE and conditional logic using that.
You don't necessarily need to update the table, couldn't you just add the suffix when inserting?
DECLARE #LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT #LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
INSERT INTO TargetTable (NewIdColumn, NewNameColumn)
SELECT IdField,
CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM #LocalTable
ORDER BY IdField;
Alternatively, you can update by simply wrapping the above select in a subquery, and updating that:
DECLARE #LocalTable TABLE (IdField INT, NameField VARCHAR(50));
INSERT #LocalTable VALUES (1, 'Not Duplicate'), (2, 'Duplicate'), (3, 'Duplicate');
UPDATE t
SET NameField = NewNameField
FROM
(
SELECT IdField, NameField,
NewNameField = CONCAT(NameField,
CASE WHEN COUNT(*) OVER(PARTITION BY NameField) > 1
THEN ROW_NUMBER() OVER(PARTITION BY NameField ORDER BY IdField)
ELSE ''
END)
FROM #LocalTable
) AS t;
SELECT * FROM #LocalTable;
Just add an identity field to the temp table.
DECLARE #LocalTable TABLE
(
ix int identity primary key,
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
Insert into #LocalTable(IdColumn, NameColumn)
SELECT
IdColumn,
NameColumn
FROM SourceTable
-- Make sure same names are consecutive in the table
ORDER BY NameColumn
The set NameColumn like so:
update lt set
NameColumn = NameColumn
-- Add a number based on the ix, minus the lowest ix entry for the same name
+ cast(
(select lt.ix - min(lt2.ix) + 1
from #localTable lt2 where lt2.name = lt.name)
as nvarchar(10))
from #LocalTable lt
-- Only do those with duplicated names
where lt.NameColumn in (
select NameColumn from #localtable group by NameColumn having count(1) > 1
)
At first, consider using Temp table instead of temp variable.
Second, try to change NVARCHAR(MAX) by something smaller, like INT
Here is the code to include only unique values in the NameField:
CREATE TABLE #LocalTable
(
IdField NVARCHAR(MAX),
NameField NVARCHAR(MAX)
)
INSERT INTO #LocalTable
VALUES (1,'A'), (2,'B'), (3,'B')
INSERT INTO TargetTable
(
NewIdColumn,
NewNameColumn
)
SELECT IdField, NameField
FROM #LocalTable
WHERE IdField in (
SELECT MIN(IdField) FROM #LocalTable
GROUP BY NameField
);
Be aware that "IdField" of duplicate records will be ignored and not inserted into target table.
Try using below code:
WITH TargetTable AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID
FROM #LocalTable AS L
),
UpdatedData AS(
SELECT Source.NameField,
ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField) AS UniqueID,
CONCAT(Ref.NameField, ROW_NUMBER() OVER(PARTITION BY Ref.NameField ORDER BY Source.IdField)) AS AppendedNameField
FROM #LocalTable AS Source
INNER JOIN #LocalTable AS Ref ON Ref.NameField = Source.NameField AND Ref.IdField != Source.IdField
)
UPDATE T
SET NameField=U.AppendedNameField
FROM TargetTable AS T
JOIN UpdatedData AS U ON T.NameField=U.NameField AND T.UniqueID=U.UniqueID;
I have a scenario where I have to select multiple rows from table, I have multiple rows of one record but with different status,
at times I have two identical rows with identical data for status < for that case I canted to select Non zero for the first occurrence and set 0 for the remaining occurrences.
Below is the Image to show and I have marked strike-out and marked 0 for the remaining occurrence.
And body could suggest better SQL Query:
Here is the Query: I am getting zero value for status 1 for ID =1 but I need to show first as regular and then 0 if that status repeats again.
CREATE TABLE #Temp
(ID INT,
ItemName varchar(10),
Price Money,
[Status] INT,
[Date] Datetime)
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,2,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-28')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-26')
INSERT INTO #Temp VALUES(2,'DEF',25,3,'2014-08-27')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-28')
INSERT INTO #Temp VALUES(3,'GHI',30,1,'2014-08-27')
SELECT CASE WHEN Temp.Status = 1 THEN
0
ELSE
Temp.Price END AS Price,
* FROM (SELECT * FROM #Temp) Temp
DROP TABLE #Temp
Here is the result:
You might modify your inner select using Row_Number() and set price to Zero for RowNumber > 1.
SELECT CASE WHEN Temp.RowNumber > 1 THEN
0
ELSE
Temp.Price END AS Price,
* FROM (
SELECT *,Row_Number() over (PARTITION by ID,Status ORDER BY ID,Date) AS 'RowNumber'
FROM #Temp
) Temp
Order by ID,Date
You can try this:
;WITH DataSource AS
(
SELECT RANK() OVER (PARTITION BY [ID], [ItemName], [Price], [Status] ORDER BY Date) AS [RankID]
,*
FROM #Temp
)
SELECT [ID]
,[ItemName]
,IIF([RankID] = 1, [Price], 0)
,[Status]
,[Date]
FROM DataSource
ORDER BY [ID]
,[Date]
Here is the output:
please try this below code . it is working for me.
CREATE TABLE #Temp
(ID INT,
ItemName varchar(10),
Price Money,
[Status] INT,
[Date] Datetime)
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,2,'2014-08-27')
INSERT INTO #Temp VALUES(1,'ABC',10,1,'2014-08-28')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-26')
INSERT INTO #Temp VALUES(2,'DEF',25,3,'2014-08-27')
INSERT INTO #Temp VALUES(2,'DEF',25,1,'2014-08-28')
INSERT INTO #Temp VALUES(3,'GHI',30,1,'2014-08-27')
select *,case when a.rn=1 and status!=2 then price else 0 end as price from
(select *,ROW_NUMBER() over(partition by status,date order by date asc) rn from #Temp) a
order by ItemName asc
You can do this with UNION:
SELECT * FROM #Temp t
WHERE NOT EXISTS
(SELECT * FROM #Temp
WHERE t.id = id and t.status = status and t.date < date)
UNION ALL
SELECT ID, ItemName, 0 as Price, status, date
WHERE EXISTS
(SELECT * FROM #Temp
WHERE t.id = id and t.status = status and t.date < date)
Or subquery:
SELECT CASE
WHEN (SELECT COUNT(*)
FROM #Temp
WHERE t.id = id and t.status = status
and t.date > date) > 1 THEN 0 ELSE price END as NewPrice, t.*
FROM #Temp t
Or possibly RANK() function:
SELECT CASE
WHEN RANK() OVER (PARTITION BY id, status ORDER BY date) > 1
THEN 0 ELSE Price END,
t.*
FROM #Temp t
I am looking for a query which fetches me the data that is different compared to the previous row,
A sample code (with table creation and data)
create table #temp
(id int, eid int, name char(10),estid int, ecid int, epid int, etc char(5) )
insert into #temp values (1,1,'a',1,1,1,'a')
insert into #temp values (2,1,'a',1,1,1,'a')
insert into #temp values (3,1,'a',2,1,1,'a')
insert into #temp values (4,1,'a',1,1,1,'a')
insert into #temp values (5,1,'a',1,1,1,'a')
insert into #temp values (6,1,'a',1,2,1,'a')
insert into #temp values (7,1,'a',1,1,1,'a')
insert into #temp values (8,1,'a',2,1,1,'a')
insert into #temp values (9,1,'a',1,1,1,'a')
insert into #temp values (10,1,'a',1,1,1,'a')
insert into #temp values (11,2,'a',1,1,1,'a')
insert into #temp values (12,2,'a',1,1,1,'a')
insert into #temp values (13,2,'a',2,1,1,'a')
insert into #temp values (14,2,'a',1,1,1,'a')
insert into #temp values (15,2,'a',1,1,1,'a')
insert into #temp values (16,2,'a',1,2,1,'a')
insert into #temp values (17,2,'a',1,1,1,'a')
insert into #temp values (18,2,'a',2,1,1,'a')
insert into #temp values (19,2,'a',1,1,1,'a')
insert into #temp values (20,2,'a',1,1,1,'a')
I tried with some ways of getting the data as the way that i expected
SELECT * INTo #Temp_Final
FROM #temp
WHERE #temp.%%physloc%%
NOT IN (SELECT Min(b.%%physloc%%)
FROM #temp b
GROUP BY eid,name,estid,ecid,epid,etc)
ORDER BY id
SELECT * FROM #temp WHERE id not in (SELECT id FROM #Temp_Final) ORDER BY id
But i wasn't getting the result as i expected...
This is how the result needs to be
select * from #temp where id in (1,3,4,6,7,8,9,11,13,14,16,17,18,19)
You can do this with a simple self-join and appropriate comparison:
select t.*
from #temp t left outer join
#temp tprev
on t.id = tprev.id + 1
where tprev.id is null or
t.name <> tprev.name or
t.estid <> tprev.estid or
t.ecid <> tprev.ecid or
t.epid <> tprev.epid or
t.etc <> tprev.etc;
This assumes that the ids are sequential with no gaps. If the ids are not, you can get the previous id using a correlated subquery or the lag() function.
Your title says "delete" but the question seems to just want the list of such rows. You can phrase this as a delete query if you need to.
For SQL Server 2012 (SQL Fiddle)
WITH CTE
AS (SELECT *,
LAG(eid) OVER (ORDER BY id) AS prev_eid,
LAG(name) OVER (ORDER BY id) AS prev_name,
LAG(estid) OVER (ORDER BY id) AS prev_estid,
LAG(ecid) OVER (ORDER BY id) AS prev_ecid,
LAG(epid) OVER (ORDER BY id) AS prev_epid,
LAG(etc) OVER (ORDER BY id) AS prev_etc
FROM #temp)
DELETE FROM CTE
WHERE EXISTS (SELECT eid,
name,
estid,
ecid,
epid,
etc
INTERSECT
SELECT prev_eid,
prev_name,
prev_estid,
prev_ecid,
prev_epid,
prev_etc)
select
t.id,
t.eid,
t.name,
t.estid,
t.ecid,
t.epid,
t.etc
from #temp t
left join #temp d
on d.id = t.id-1
and d.eid = t.eid
and d.name = t.name
and d.estid = t.estid
and d.ecid = t.ecid
and d.epid = t.epid
and d.etc = t.etc
where d.id is null
This question already has answers here:
How can I remove duplicate rows?
(43 answers)
Closed 9 years ago.
I have a table that I need to delete duplicates. However, the table is designed so that each customer can have identical items. Here is a screen shot for example:
When I try to delete duplicates I also get Book Number that are the same for both customer.
The Green area shows actual duplicates, however the blue shows books that are not duplicate because customers can borrow the same books.
How do I delete only where the row is the same for each customer? So the green area.
Here is my code: It does not work if both customer has the same Book number.
WITH CTE AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY BookNumber ORDER BY BookNumber DESC) AS DUPS
FROM Store.Books
)
SELECT * FROM CTE WHERE DUPS > 1
the below query should give you a list of all the duplicate booknumbers with the customerIDs, all you need to do is a simple delete statement with the results to remove the duplicate records
SELECT count(bookNumber), booknumber, customerID FROM TableName GROUP BY booknumber, customerID having count(booknumber)> 1
WITH CTE AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY BookNumber, Customerid ORDER BY BookNumber) AS DUPS
FROM Store.Books
)
SELECT * FROM CTE WHERE DUPS > 1
-- if you want to delete, replace last line with this:
--DELETE FROM CTE WHERE DUPS > 1
I should mention that order by booknumber desc wasn't necessary so I removed the 'desc' part
Brothers, This is one of the way to find out duplicates data, try it=)
DECLARE #tempTable TABLE(
CustomerID SMALLINT,
BookLoan NVARCHAR(255),
BookNumber INT,
BookAuthor NVARCHAR(255)
)
INSERT INTO #tempTable Values(112,'Clash Of Titans',12345,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'Clash Of Titans',12345,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'Clash Of Titans',23457,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'History of Soda',99899,'Brian Adams ')
Select *,Count(*) 'Occurrance' From #tempTable Group by
CustomerID,BookLoan,BookNumber,BookAuthor having count(*) > 1
Delete from #temptable
where CustomerID = (Select customerID From #tempTable Group by
CustomerID,BookLoan,BookNumber,BookAuthor having count(*) > 1)
AND BookLoan = (Select BookLoan From #tempTable Group by
CustomerID,BookLoan,BookNumber,BookAuthor having count(*) > 1)
AND BookNumber = (Select BookNumber From #tempTable Group by
CustomerID,BookLoan,BookNumber,BookAuthor having count(*) > 1)
AND BookAuthor = (Select BookAuthor From #tempTable Group by
CustomerID,BookLoan,BookNumber,BookAuthor having count(*) > 1)
Select * from #tempTable
Or alternative way as below here
DECLARE #tempTable TABLE(
CustomerID SMALLINT,
BookLoan NVARCHAR(255),
BookNumber INT,
BookAuthor NVARCHAR(255)
)
INSERT INTO #tempTable Values(112,'Clash Of Titans',12345,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'Clash Of Titans',12345,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'Clash Of Titans',23457,'Dick VanDyke ')
INSERT INTO #tempTable Values(112,'History of Soda',12345,'Brian Adams ')
;WITH CTE AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CustomerID, BookLoan,BookNumber,BookAuthor
ORDER BY BookNumber) AS DUPS
FROM #tempTable
)
DELETE FROM #tempTable
WHERE CustomerID = (SELECT CustomerID FROM CTE WHERE DUPS > 1)
AND BookLoan = (SELECT BookLoan FROM CTE WHERE DUPS > 1)
AND BookNumber = (SELECT BookNumber FROM CTE WHERE DUPS > 1)
AND BookAuthor = (SELECT BookAuthor FROM CTE WHERE DUPS > 1)
SELECT * FROM #tempTable