What is wrong with the CTE syntax in this Sql Server query? - sql

Can anyone explain why Sql Server is complaining about the syntax around the "WITH" clause?
Thanks for any help.
CREATE TABLE TestTable1 (
Id int not null,
Version int not null constraint d_Ver default (0),
[Name] nvarchar(50) not null,
CONSTRAINT pk_TestTable1 PRIMARY KEY (Id, Version)
);
GO
CREATE TRIGGER trg_iu_UniqueActiveName
ON [dbo].[TestTable1]
AFTER INSERT, UPDATE
AS
IF(UPDATE([Name]))
BEGIN
IF(
(
WITH MaxVers AS
(SELECT Id, Max(Version) AS MaxVersion
FROM [dbo].[TestTable1]
GROUP BY Id)
SELECT Count(1)
FROM [dbo].[TestTable1] t
INNER JOIN MaxVers ON t.Id = MaxVers.Id AND t.Version = MaxVers.MaxVersion
WHERE t.[Name] = inserted.[Name]
)
> 0
)
BEGIN
DECLARE #name nvarchar(50)
SELECT #name = [Name] FROM inserted;
RAISERROR('The name "%s" is already in use.', 16, 1, #name);
END
END;
GO
Edit 2:
For anyone who is curious, here is the CTE version that incorporates all of the great comments below. I think I will switch to the sub-query approach so that I can use the "EXISTS" as suggested.
CREATE TRIGGER trg_iu_UniqueActiveName
ON [dbo].[TestTable1]
AFTER INSERT, UPDATE
AS
IF(UPDATE([Name]))
BEGIN
DECLARE #cnt [int];
WITH MaxVers AS
(SELECT Id, Max(Version) AS MaxVersion
FROM [dbo].[TestTable1]
GROUP BY Id)
SELECT #cnt = COUNT(1)
FROM [dbo].[TestTable1] t
INNER JOIN MaxVers ON t.Id = MaxVers.Id AND t.Version = MaxVers.MaxVersion
INNER JOIN [inserted] i ON t.[Id] = MaxVers.[Id]
WHERE t.[Name] = i.[Name] AND NOT [t].[Id] = [i].[Id] ;
IF( #cnt > 0)
BEGIN
DECLARE #name nvarchar(50)
SELECT #name = [Name] FROM inserted;
RAISERROR('The name "%s" is already in use by an active entity.', 16, 1, #name);
ROLLBACK TRANSACTION;
END
END;
GO
Edit 3: Here is the "Exists" version (Note, I think that the select in the error handling part would not work correctly with more than one inserted record):
CREATE TRIGGER trg_iu_UniqueActiveName
ON [dbo].[TestTable1]
AFTER INSERT, UPDATE
AS
IF(UPDATE([Name]))
BEGIN
IF(EXISTS (
SELECT t.Id
FROM [dbo].[TestTable1] t
INNER JOIN (
SELECT Id, Max(Version) AS MaxVersion
FROM [dbo].[TestTable1]
GROUP BY Id) maxVer
ON t.[Id] = [maxVer].[Id] AND [t].[Version] = [maxVer].[MaxVersion]
INNER JOIN [inserted] i ON t.[Id] = MaxVer.[Id]
WHERE [t].[Name] = [i].[Name] AND NOT [t].[Id] = [i].[Id]
))
BEGIN
DECLARE #name nvarchar(50)
SELECT #name = [Name] FROM inserted;
RAISERROR('The name "%s" is already in use by an active entity.', 16, 1, #name);
ROLLBACK TRANSACTION;
END
END;
GO

The only thing I can figure is that the statement "When a CTE is used in a statement that is part of a batch, the statement before it must be followed by a semicolon." (Transact SQL Reference) means that a CTE can not be used within an IF statement.
BTW, you have two other errors: 1) inserted pseudo table is not included in the first sub-query, even though you reference it in the were clause. 2) Your trigger is assuming a single row is being inserted or updated. It is possible that there would be multiple duplicate names but the raiserror will only report one of them.
EDIT And avoid (select count(*) ...) > when exists (select * ....) will do The exists can stop at the first row.
EDIT 2 Crap. SQL Server trigges default to after triggers. So the row you are checking for existence on already exists in the table when the trigger fire:
CREATE TRIGGER trg_iu_UniqueActiveName
ON [dbo].[TestTable1]
AFTER INSERT, UPDATE
AS
IF(UPDATE([Name]))
BEGIN
IF EXISTS
(
SELECT *
FROM [dbo].[TestTable1] t
INNER JOIN inserted i on i.[NAME] = t.[NAME]
INNER JOIN (SELECT Id, Max(Version) AS MaxVersion
FROM [dbo].[TestTable1]
GROUP BY Id) MaxVers ON t.Id = MaxVers.Id AND t.Version = MaxVers.MaxVersion
)
BEGIN
DECLARE #name nvarchar(50)
SELECT #name = [Name] FROM inserted;
RAISERROR('The name "%s" is already in use.', 16, 1, #name);
END
END;
GO
insert into testTable1 (name) values ('Hello')
results in:
Msg 50000, Level 16, State 1, Procedure trg_iu_UniqueActiveName, Line 20
The name "Hello" is already in use.
(1 row(s) affected)
Plus, the raiserror does not perform a rollback, so the row is still there.

I don't think that you can use CTEs with inner queries.
Use this as workaround:
DECLARE #cnt int;
WITH MaxVers AS
(SELECT Id, Max(Version) AS MaxVersion
FROM [dbo].[TestTable1]
GROUP BY Id)
SELECT #cnt = Count(1)
FROM [dbo].[TestTable1] t
INNER JOIN MaxVers ON t.Id = MaxVers.Id AND t.Version = MaxVers.MaxVersion
WHERE t.[Name] = inserted.[Name];
IF #cnt > 0
BEGIN
DECLARE #name nvarchar(50)
SELECT #name = [Name] FROM inserted;
RAISERROR('The name "%s" is already in use.', 16, 1, #name);
END

Doesn't appear to like the WITH statement inside an IF does it.
Try the following SQL instead:
SELECT COUNT(1)
FROM TestTable1 t1
WHERE t.Name = (SELECT [Name] FROM inserted)
AND t.Version = (SELECT MAX(Version) FROM TestTable1 t2 WHERE t2.Id = t.Id)
Much simpler in my opinion. This doesn't account for multiple rows in the inserted table however. Change it to an IN rather than an = would probably do that.
As others have noted sometimes putting a semi-colon in from of the WITH statement works, but I couldn't get it to in this instance.

Related

Trigger that prevents update of column based on result of the user defined function

We have DVD Rental company. In this particular scenario we consider only Member, Rental and Membership tables.
The task is to write a trigger that prevents a customer from being shipped a DVD
if they have reached their monthly limit for DVD rentals as per their membership contract using the function.
My trigger leads to infinite loop. It works without While loop, but then it does not work properly, if I consider multiple updates to the Rental table. Where I am wrong?
-- do not run, infinite loop
CREATE OR ALTER TRIGGER trg_Rental_StopDvdShip
ON RENTAL
FOR UPDATE
AS
BEGIN
DECLARE #MemberId INT
DECLARE #RentalId INT
SELECT * INTO #TempTable FROM inserted
WHILE (EXISTS (SELECT RentalId FROM #TempTable))
BEGIN
IF UPDATE(RentalShippedDate)
BEGIN
IF (SELECT TotalDvdLeft FROM dvd_numb_left(#MemberId)) <= 0
BEGIN
ROLLBACK
RAISERROR ('YOU HAVE REACHED MONTHLY LIMIT FOR DVD RENTALS', 16, 1)
END;
END;
DELETE FROM #TempTable WHERE RentalID = #RentalId
END;
END;
My function looks as follows:
CREATE OR ALTER FUNCTION dvd_numb_left(#member_id INT)
RETURNS #tab_dvd_numb_left TABLE(MemberId INT, Name VARCHAR(50), TotalDvdLeft INT, AtTimeDvdLeft INT)
AS
BEGIN
DECLARE #name VARCHAR(50)
DECLARE #dvd_total_left INT
DECLARE #dvd_at_time_left INT
DECLARE #dvd_limit INT
DECLARE #dvd_rented INT
DECLARE #dvd_at_time INT
DECLARE #dvd_on_rent INT
SET #dvd_limit = (SELECT Membership.MembershipLimitPerMonth FROM Membership
WHERE Membership.MembershipId = (SELECT Member.MembershipId FROM Member WHERE Member.MemberId = #member_id))
SET #dvd_rented = (SELECT COUNT(Rental.MemberId) FROM Rental
WHERE CONCAT(month(Rental.RentalShippedDate), '.', year(Rental.RentalShippedDate)) = CONCAT(month(GETDATE()), '.', year(GETDATE())) AND Rental.MemberId = #member_id)
SET #dvd_at_time = (SELECT Membership.DVDAtTime FROM Membership
WHERE Membership.MembershipId = (SELECT Member.MembershipId FROM Member WHERE Member.MemberId = #member_id))
SET #dvd_on_rent = (SELECT COUNT(Rental.MemberId) FROM Rental
WHERE Rental.MemberId = #member_id AND Rental.RentalReturnedDate IS NULL)
SET #name = (SELECT CONCAT(Member.MemberFirstName, ' ', Member.MemberLastName) FROM Member WHERE Member.MemberId = #member_id)
SET #dvd_total_left = #dvd_limit - #dvd_rented
SET #dvd_at_time_left = #dvd_at_time - #dvd_on_rent
IF #dvd_total_left < 0
BEGIN
SET #dvd_total_left = 0
SET #dvd_at_time_left = 0
INSERT INTO #tab_dvd_numb_left(MemberId, Name, TotalDvdLeft, AtTimeDvdLeft)
VALUES(#member_id, #name, #dvd_total_left, #dvd_at_time_left)
RETURN;
END
INSERT INTO #tab_dvd_numb_left(MemberId, Name, TotalDvdLeft, AtTimeDvdLeft)
VALUES(#member_id, #name, #dvd_total_left, #dvd_at_time_left)
RETURN;
END;
Will be glad for any advice.
Your main issue is that even though you populate #TempTable you never pull any values from it.
CREATE OR ALTER TRIGGER trg_Rental_StopDvdShip
ON RENTAL
FOR UPDATE
AS
BEGIN
DECLARE #MemberId INT, #RentalId INT;
-- Move test for column update to the first test as it applies to the entire update, not per row.
IF UPDATE(RentalShippedDate)
BEGIN
SELECT * INTO #TempTable FROM inserted;
WHILE (EXISTS (SELECT RentalId FROM #TempTable))
BEGIN
-- Actually pull some information from #TempTable - this wasn't happening before
SELECT TOP 1 #RentalID = RentalId, #MemberId = MemberId FROM #TempTable;
-- Select our values to its working
-- SELECT #RentalID, #MemberId;
IF (SELECT TotalDvdLeft FROM dvd_numb_left(#MemberId)) <= 0
BEGIN
ROLLBACK
RAISERROR ('YOU HAVE REACHED MONTHLY LIMIT FOR DVD RENTALS', 16, 1)
END;
-- Delete the current handled row
DELETE FROM #TempTable WHERE RentalID = #RentalId
END;
-- For neatness I always drop temp tables, makes testing easier also
DROP TABLE #TempTable;
END;
END;
An easy way to debug simply triggers like this is to copy the T-SQL out and then create an #Inserted table variable e.g.
DECLARE #Inserted table (RentalId INT, MemberId INT);
INSERT INTO #Inserted (RentalId, MemberId)
VALUES (1, 1), (2, 2);
DECLARE #MemberId INT, #RentalId INT;
-- Move test for column update to the first test as it applies to the entire update, not per row.
-- IF UPDATE(RentalShippedDate)
BEGIN
SELECT * INTO #TempTable FROM #inserted;
WHILE (EXISTS (SELECT RentalId FROM #TempTable))
BEGIN
-- Actually pull some information from #TempTable - this wasn't happening before
SELECT TOP 1 #RentalID = RentalId, #MemberId = MemberId FROM #TempTable;
-- Select our values to its working
SELECT #RentalID, #MemberId;
-- IF (SELECT TotalDvdLeft FROM dvd_numb_left(#MemberId)) <= 0
-- BEGIN
-- ROLLBACK
-- RAISERROR ('YOU HAVE REACHED MONTHLY LIMIT FOR DVD RENTALS', 16, 1)
-- END;
-- Delete the current handled row
DELETE FROM #TempTable WHERE RentalID = #RentalId
END;
-- For neatness I always drop temp tables, makes testing easier also
DROP TABLE #TempTable;
END;
Note: throw is the recommended way to throw an error instead of raiserror.
Another thing to consider is that you must try to transform your UDF into an inline TVF because of some side effects.
Like this one:
CREATE OR ALTER FUNCTION dvd_numb_left(#member_id INT)
RETURNS TABLE
AS
RETURN
(
WITH
TM AS
(SELECT Membership.MembershipLimitPerMonth AS dvd_limit,
Membership.DVDAtTime AS dvd_at_time,
CONCAT(Member.MemberFirstName, ' ', Member.MemberLastName) AS [name]
FROM Membership AS MS
JOIN Member AS M
ON MS.MembershipId = M.MembershipId
WHERE M.MemberId = #member_id
),
TR AS
(SELECT COUNT(Rental.MemberId) AS dvd_rented
FROM Rental
WHERE YEAR(Rental.RentalShippedDate ) = YEAR(GETDATE)
AND MONTH(Rental.RentalShippedDate ) = MONTH(GETDATE)
AND Rental.MemberId = #member_id
)
SELECT MemberId, [Name],
CASE WHEN dvd_limit - dvd_rented < 0 THEN 0 ELSE dvd_limit - dvd_rented END AS TotalDvdLeft,
CASE WHEN dvd_limit - dvd_rented < 0 THEN 0 ELSE dvd_at_time - dvd_on_rent END AS AtTimeDvdLeft
FROM TM CROSS JOIN TR
);
GO
Which will be much more efficient.
The absolute rule to have performances is: TRY TO STAY IN A "SET BASED" CODE instead of iterative code.
The above function can be optimized by the optimzer whilet yours cannot and will needs 4 access to the same tables.

SQL - After insert Same Table

So I understand recursive triggers. Got to be careful of deadlocks etc. However this is only after an insert not after insert and update. Also, I have an audit trigger table that I am updating to make sure all is well. And querying after to double check. All looks fine but no update happens.
if exists (select 'a' from sys.triggers where name = 'invoicememologic')
begin
drop trigger invoicememologic
end
go
create trigger invoicememologic
on transactiontable
after insert
as
begin
declare #inum varchar(1000)
select #inum = min(transactioninvnum)
from
(select transactioninvnum
from inserted i
inner join project p on left(i.projectid, charindex(':', i.projectid)) = p.projectid
where right(i.projectid, 1) <> ':'
and abs(p.UseProjectMemoOnInv) = 1
group by transactioninvnum) b
while #inum is not null
begin
declare #rCount int
select #rCount = count(*)
from transactiontable
where TransactionInvNum = #inum
if #rCount = 1
begin
declare #tid varchar(100)
select #tid = transactionid
from transactiontable
where TransactionInvNum = #inum
declare #pmemo varchar(MAX)
select #pmemo = p.projectMemo
from transactiontable tt
inner join project p on left(tt.projectid, charindex(':', tt.projectid)) = p.projectid
where transactionInvNum = #inum
insert into audittrigger
values (#pmemo, #tid)
update transactiontable
set transactionmemo2 = #pmemo
where ltrim(rtrim(transactionid)) = ltrim(rtrim(#tid))
end
select #inum = min(transactioninvnum)
from
(select transactioninvnum
from inserted i
inner join project p on left(i.projectid, charindex(':', i.projectid)) = p.projectid
where abs(transactionjointinv) = 1
and right(i.projectid, 1) <> ':'
and abs(p.UseProjectMemoOnInv) = 1
group by transactioninvnum ) a
where transactioninvnum > #inum
end
end
Reason for trigger. 1 Invoice can be multiple rows in the database. 3 rows. So it only should update any one of the 3 rows. Doesn't matter. And it must grab the memo from the parent project of the phases that are being inserted into the database. hence the inner join on the left with charindex.
So I check the audit table. All looks well there. MEMO is there and the transactionid is there. I query after the trigger fires. TransactionID exists in the transactiontable but the memo2 is not being updated.
TransactionMemo2 is type of ntext. I thought it might be varchar with a direct update command will fail. I tried to update manually through setting a var as the text string and call the update manually with the transactionid being required. all worked fine. I am lost

SQL insert trigger condition statement and multiple rows

Could you please help me to finish my trigger. What i got so far:
CREATE TRIGGER [dbo].[atbl_Sales_OrdersLines_ITrigGG]
ON [dbo].[atbl_Sales_OrdersLines]
FOR INSERT
AS
BEGIN
DECLARE #ID INT = (SELECT ProductID
FROM INSERTED)
DECLARE #OrderedQ INT = (SELECT SUM(Amount)
FROM atbl_Sales_OrdersLines
WHERE ProductID = #ID)
DECLARE #CurrentQ INT = (SELECT Quantity
FROM atbl_Sales_Products
WHERE ProductID = #ID)
DECLARE #PossibleQ INT = (SELECT Amount
FROM INSERTED
WHERE ProductID = #ID)
IF (#CurrentQ - #OrderedQ >= #PossibleQ)
ELSE
END
I need to complete the code. Can not figure out how to do it. I need that if condition is met - trigger would allow the insert. If else, trigger would stop the insert/or rollback and prompt a message that quantity is not sufficient.
Also, will this code work if insert is multiple lines with different product ids?
Thanks.
Something like this might work. This trigger checks the products that are in the insert, summing the total that have been ordered (now and in the past), and if any of them exceed the available quantity, the whole transaction is rolled back. Whenever writing triggers, you want to avoid any assumptions that there is a single row being inserted/updated/deleted, and avoid cursors. You want to just use basic set based operations.
CREATE TRIGGER [dbo].[atbl_Sales_OrdersLines_ITrigGG]
ON [dbo].[atbl_Sales_OrdersLines]
FOR INSERT
AS
BEGIN
IF (exists (select 1 from (
select x.ProductId, totalOrdersQty, ISNULL(asp.Quantity, 0) PossibleQty from (
select i.ProductId, sum(aso.Amount) totalOrdersQty
from (select distinct ProductId from inserted) i
join atbl_Sales_OrdersLines aso on aso.ProductId = i.ProductId
group by productId) x
left join atbl_Sales_Product asp on asp.ProductId = x.ProductId
) x
where PossibleQty < totalOrdersQty))
BEGIN
RAISERROR ('Quantity is not sufficient' ,10,1)
ROLLBACK TRANSACTION
END
END
I still think this is a horrible idea.
Try this,
CREATE TRIGGER [dbo].[atbl_Sales_OrdersLines_ITrigGG]
ON [dbo].[atbl_Sales_OrdersLines]
INSTEAD OF INSERT --FOR INSERT
AS
BEGIN
DECLARE #ID INT = (SELECT ProductID
FROM INSERTED)
DECLARE #OrderedQ INT = (SELECT SUM(Amount)
FROM atbl_Sales_OrdersLines
WHERE ProductID = #ID)
DECLARE #CurrentQ INT = (SELECT Quantity
FROM atbl_Sales_Products
WHERE ProductID = #ID)
DECLARE #PossibleQ INT = (SELECT Amount
FROM INSERTED
WHERE ProductID = #ID)
IF (#CurrentQ - #OrderedQ >= #PossibleQ)
BEGIN
INSERT INTO YOURTABLE (COLUMN1, COLUMN2, COLUMN3, ..)
SELECT COLUMN1, COLUMN2, COLUMN3, ..
FROM inserted
END
ELSE
BEGIN
RAISERROR ('Quantity is not sufficient' ,10,1)
ROLLBACK TRANSACTION
END

Count difference between distinct and join over distinct

How do I count the number of distinct rows minus a join over those same distinct rows?
I'm writing after triggers where I need to raise an error if the user does not have rights to the rows submitted. I can do this in two statements but this seems inefficient.
DECLARE #AccessibleCount INT =
(
SELECT
COUNT(DISTINCT i.[ParentId])
FROM
inserted i
INNER JOIN [SuperSecret].[Parent] AS p ON
p.[Id] = i.[ParentId] AND
p.[LockedBy] = #UserId
);
DECLARE #ActualCount INT = (SELECT COUNT(DISTINCT [ParentId]) FROM inserted);
IF (#AccessibleCount <> #ActualCount)
BEGIN
RAISERROR(...);
ROLLBACK TRANSACTION;
END
For performance sake, it seems like I should use a subquery over the distinct inserted.ParentId for both counts. I tried the following but it resulted in "Invalid object name 'i'."
DECLARE #ActualMinusAccessible INT =
(
SELECT
COUNT(*)
-
(
SELECT
COUNT(*)
FROM
i
INNER JOIN [SuperSecret].[Parent] AS p ON
p.[Id] = i.[ParentId] AND
p.[LockedBy] = #UserId
)
FROM
(
SELECT DISTINCT [ParentId] FROM inserted
) AS i
);
IF (#ActualMinusAccessible <> 0)
BEGIN
RAISERROR (...);
ROLLBACK TRANSACTION;
END
If am not wrong you want to Raise Error if a [ParentId] is inserted which is not present in [SuperSecret].[Parent] table. Try changing your SQL query like this.
IF EXISTS (SELECT 1
FROM inserted i
WHERE NOT EXISTS (SELECT 1
FROM [SuperSecret].[Parent] a
WHERE i.[ParentId] = a.[ParentId] AND a.[LockedBy] = #UserId))
BEGIN
RAISERROR (...);
ROLLBACK TRANSACTION;
END
OR
IF (SELECT Count(DISTINCT [ParentId]) - (SELECT Count(DISTINCT i.[ParentId])
FROM inserted i
INNER JOIN [SuperSecret].[Parent] AS p
ON p.[Id] = i.[ParentId]
AND p.[LockedBy] = #UserId)
FROM inserted) <> 0
BEGIN
RAISERROR (...);
ROLLBACK TRANSACTION;
END

Delete duplicate records in SQL Server?

Consider a column named EmployeeName table Employee. The goal is to delete repeated records, based on the EmployeeName field.
EmployeeName
------------
Anand
Anand
Anil
Dipak
Anil
Dipak
Dipak
Anil
Using one query, I want to delete the records which are repeated.
How can this be done with TSQL in SQL Server?
You can do this with window functions. It will order the dupes by empId, and delete all but the first one.
delete x from (
select *, rn=row_number() over (partition by EmployeeName order by empId)
from Employee
) x
where rn > 1;
Run it as a select to see what would be deleted:
select *
from (
select *, rn=row_number() over (partition by EmployeeName order by empId)
from Employee
) x
where rn > 1;
Assuming that your Employee table also has a unique column (ID in the example below), the following will work:
delete from Employee
where ID not in
(
select min(ID)
from Employee
group by EmployeeName
);
This will leave the version with the lowest ID in the table.
Edit
Re McGyver's comment - as of SQL 2012
MIN can be used with numeric, char, varchar, uniqueidentifier, or datetime columns, but not with bit columns
For 2008 R2 and earlier,
MIN can be used with numeric, char, varchar, or datetime columns, but not with bit columns (and it also doesn't work with GUID's)
For 2008R2 you'll need to cast the GUID to a type supported by MIN, e.g.
delete from GuidEmployees
where CAST(ID AS binary(16)) not in
(
select min(CAST(ID AS binary(16)))
from GuidEmployees
group by EmployeeName
);
SqlFiddle for various types in Sql 2008
SqlFiddle for various types in Sql 2012
You could try something like the following:
delete T1
from MyTable T1, MyTable T2
where T1.dupField = T2.dupField
and T1.uniqueField > T2.uniqueField
(this assumes that you have an integer based unique field)
Personally though I'd say you were better off trying to correct the fact that duplicate entries are being added to the database before it occurs rather than as a post fix-it operation.
DELETE
FROM MyTable
WHERE ID NOT IN (
SELECT MAX(ID)
FROM MyTable
GROUP BY DuplicateColumn1, DuplicateColumn2, DuplicateColumn3)
WITH TempUsers (FirstName, LastName, duplicateRecordCount)
AS
(
SELECT FirstName, LastName,
ROW_NUMBER() OVER (PARTITIONBY FirstName, LastName ORDERBY FirstName) AS duplicateRecordCount
FROM dbo.Users
)
DELETE
FROM TempUsers
WHERE duplicateRecordCount > 1
WITH CTE AS
(
SELECT EmployeeName,
ROW_NUMBER() OVER(PARTITION BY EmployeeName ORDER BY EmployeeName) AS R
FROM employee_table
)
DELETE CTE WHERE R > 1;
The magic of common table expressions.
Try
DELETE
FROM employee
WHERE rowid NOT IN (SELECT MAX(rowid) FROM employee
GROUP BY EmployeeName);
If you're looking for a way to remove duplicates, yet you have a foreign key pointing to the table with duplicates, you could take the following approach using a slow yet effective cursor.
It will relocate the duplicate keys on the foreign key table.
create table #properOlvChangeCodes(
id int not null,
name nvarchar(max) not null
)
DECLARE #name VARCHAR(MAX);
DECLARE #id INT;
DECLARE #newid INT;
DECLARE #oldid INT;
DECLARE OLVTRCCursor CURSOR FOR SELECT id, name FROM Sales_OrderLineVersionChangeReasonCode;
OPEN OLVTRCCursor;
FETCH NEXT FROM OLVTRCCursor INTO #id, #name;
WHILE ##FETCH_STATUS = 0
BEGIN
-- determine if it should be replaced (is already in temptable with name)
if(exists(select * from #properOlvChangeCodes where Name=#name)) begin
-- if it is, finds its id
Select top 1 #newid = id
from Sales_OrderLineVersionChangeReasonCode
where Name = #name
-- replace terminationreasoncodeid in olv for the new terminationreasoncodeid
update Sales_OrderLineVersion set ChangeReasonCodeId = #newid where ChangeReasonCodeId = #id
-- delete the record from the terminationreasoncode
delete from Sales_OrderLineVersionChangeReasonCode where Id = #id
end else begin
-- insert into temp table if new
insert into #properOlvChangeCodes(Id, name)
values(#id, #name)
end
FETCH NEXT FROM OLVTRCCursor INTO #id, #name;
END;
CLOSE OLVTRCCursor;
DEALLOCATE OLVTRCCursor;
drop table #properOlvChangeCodes
delete from person
where ID not in
(
select t.id from
(select min(ID) as id from person
group by email
) as t
);
Please see the below way of deletion too.
Declare #Employee table (EmployeeName varchar(10))
Insert into #Employee values
('Anand'),('Anand'),('Anil'),('Dipak'),
('Anil'),('Dipak'),('Dipak'),('Anil')
Select * from #Employee
Created a sample table named #Employee and loaded it with given data.
Delete aliasName from (
Select *,
ROW_NUMBER() over (Partition by EmployeeName order by EmployeeName) as rowNumber
From #Employee) aliasName
Where rowNumber > 1
Select * from #Employee
Result:
I know, this is asked six years ago, posting just incase it is helpful for anyone.
Here's a nice way of deduplicating records in a table that has an identity column based on a desired primary key that you can define at runtime. Before I start I'll populate a sample data set to work with using the following code:
if exists (select 1 from sys.all_objects where type='u' and name='_original')
drop table _original
declare #startyear int = 2017
declare #endyear int = 2018
declare #iterator int = 1
declare #income money = cast((SELECT round(RAND()*(5000-4990)+4990 , 2)) as money)
declare #salesrepid int = cast(floor(rand()*(9100-9000)+9000) as varchar(4))
create table #original (rowid int identity, monthyear varchar(max), salesrepid int, sale money)
while #iterator<=50000 begin
insert #original
select (Select cast(floor(rand()*(#endyear-#startyear)+#startyear) as varchar(4))+'-'+ cast(floor(rand()*(13-1)+1) as varchar(2)) ), #salesrepid , #income
set #salesrepid = cast(floor(rand()*(9100-9000)+9000) as varchar(4))
set #income = cast((SELECT round(RAND()*(5000-4990)+4990 , 2)) as money)
set #iterator=#iterator+1
end
update #original
set monthyear=replace(monthyear, '-', '-0') where len(monthyear)=6
select * into _original from #original
Next I'll create a Type called ColumnNames:
create type ColumnNames AS table
(Columnnames varchar(max))
Finally I will create a stored proc with the following 3 caveats:
1. The proc will take a required parameter #tablename that defines the name of the table you are deleting from in your database.
2. The proc has an optional parameter #columns that you can use to define the fields that make up the desired primary key that you are deleting against. If this field is left blank, it is assumed that all the fields besides the identity column make up the desired primary key.
3. When duplicate records are deleted, the record with the lowest value in it's identity column will be maintained.
Here is my delete_dupes stored proc:
create proc delete_dupes (#tablename varchar(max), #columns columnnames readonly)
as
begin
declare #table table (iterator int, name varchar(max), is_identity int)
declare #tablepartition table (idx int identity, type varchar(max), value varchar(max))
declare #partitionby varchar(max)
declare #iterator int= 1
if exists (select 1 from #columns) begin
declare #columns1 table (iterator int, columnnames varchar(max))
insert #columns1
select 1, columnnames from #columns
set #partitionby = (select distinct
substring((Select ', '+t1.columnnames
From #columns1 t1
Where T1.iterator = T2.iterator
ORDER BY T1.iterator
For XML PATH ('')),2, 1000) partition
From #columns1 T2 )
end
insert #table
select 1, a.name, is_identity from sys.all_columns a join sys.all_objects b on a.object_id=b.object_id
where b.name = #tablename
declare #identity varchar(max)= (select name from #table where is_identity=1)
while #iterator>=0 begin
insert #tablepartition
Select distinct case when #iterator=1 then 'order by' else 'over (partition by' end ,
substring((Select ', '+t1.name
From #table t1
Where T1.iterator = T2.iterator and is_identity=#iterator
ORDER BY T1.iterator
For XML PATH ('')),2, 5000) partition
From #table T2
set #iterator=#iterator-1
end
declare #originalpartition varchar(max)
if #partitionby is null begin
select #originalpartition = replace(b.value+','+a.type+a.value ,'over (partition by','') from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
select #partitionby = a.type+a.value+' '+b.type+a.value+','+b.value+') rownum' from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
end
else
begin
select #originalpartition=b.value +','+ #partitionby from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1
set #partitionby = (select 'OVER (partition by'+ #partitionby + ' ORDER BY'+ #partitionby + ','+b.value +') rownum'
from #tablepartition a cross join #tablepartition b where a.idx=2 and b.idx=1)
end
exec('select row_number() ' + #partitionby +', '+#originalpartition+' into ##temp from '+ #tablename+'')
exec(
'delete a from _original a
left join ##temp b on a.'+#identity+'=b.'+#identity+' and rownum=1
where b.rownum is null')
drop table ##temp
end
Once this is complied, you can delete all your duplicate records by running the proc. To delete dupes without defining a desired primary key use this call:
exec delete_dupes '_original'
To delete dupes based on a defined desired primary key use this call:
declare #table1 as columnnames
insert #table1
values ('salesrepid'),('sale')
exec delete_dupes '_original' , #table1