Delete duplicate rows from a table and referenced table microsoft sql server

Delete duplicate rows from a table and referenced table microsoft sql server - sql

I have table Person :
PersonId | FirstName | LastName |
1 | 'John' | 'Doe' |
2 | 'Mike' | 'Test' |
3 | 'John' | 'Doe' |
4 | 'Mike' | 'Test' |
5 | 'John' | 'Doe' |
6 | 'John' | 'Doe' |
Table Customer :
CustomerId | PersonId |
1001 | 1 |
1002 | 2 |
1003 | 3 |
1004 | 4 |
1005 | 5 |
1006 | 6 |
I want to delete Customer 1003,1004,1005,1006 because their Persons are duplicate, but PersonId is not same.
This should check FirstName and LastName in Person table and delete the duplicates in Customer table , Then delete duplicates in Person table. ( 3,4,5,6 )
Sorry if similar questions has been asked before but I couldn't do this.

Check This.
We delete first from Customer table. First we find duplicate records by using Row_number() and deleting personid which have to rank more than 1.
Below Query show duplicate records :
select ROW_NUMBER () over ( partition by firstname,lastname order by
PersonId ) RID, PersonId,FirstName,LastName
from #Person
After finding Duplicates we delete it from customer table then Person.
delete from Customer where PersonId in
(
select distinct PersonId P from
( select ROW_NUMBER () over ( partition by firstname,lastname order by PersonId ) RID, PersonId,FirstName,LastName from #Person )a
where RID>1
)
delete from Person where PersonId in
(
select distinct PersonId P from
( select ROW_NUMBER () over ( partition by firstname,lastname order by PersonId ) RID, PersonId,FirstName,LastName from #Person )a
where RID>1
)

Use this query to view the duplicates:
with duplicatecte(personid,rownum)As
(
select personid ,
row_Number() over(partition by FirstName+LastName order by personid)
from #person
)
select b.personid,customerid from duplicatecte a
inner join #customer b on a.personid=b.personid where rownum>1
Modify this query to delete as below
with duplicatecte(personid,rownum)As
(
select personid ,
row_Number() over(partition by FirstName+LastName order by personid)
from #person
)
delete b
from duplicatecte a
inner join #customer b on a.personid=b.personid where rownum>1

Begin Tran
CREATE TABLE #Person(PersonId INT,FirstName NVARCHAR(50),LastName NVARCHAR(50))
CREATE TABLE #Customer (CustomerId INT,PersonId INT)
INSERT INTO #Person
SELECT 1,'John','Doe' UNION ALL
SELECT 2 ,'Mike','Test' UNION ALL
SELECT 3 ,'John','Doe' UNION ALL
SELECT 4 ,'Mike','Test' UNION ALL
SELECT 5 ,'John','Doe' UNION ALL
SELECT 6 ,'John','Doe'
INSERT INTO #Customer
SELECT 1001, 1 UNION ALL
SELECT 1002 ,2 UNION ALL
SELECT 1003 ,3 UNION ALL
SELECT 1004 ,4 UNION ALL
SELECT 1005,5 UNION ALL
SELECT 1006,6
GO
WITH CTE (PersonId, DuplicateCount)
AS
(
SELECT FirstName,
ROW_NUMBER() OVER(PARTITION BY FirstName,LastName ORDER BY FirstName,PersonId) AS DuplicateCount
FROM #Person
)
--Select * from CTE WHERE DuplicateCount>1
DELETE FROM CTE WHERE DuplicateCount >1
DELETE FROM #Customer WHERE PersonId NOT IN(SELECT PersonId FROM #Person)
Select * from #Person
SELECT * from #Customer
ROLLBACK TRAN

declare #tbl table
(pid int
)
;with cte
as
(
select t1.*,row_number() over (partition by firstname,lastname order by personid) as rownum
from
person t1
)
delete
from
cte
output deleted.personid into #tbl where rownum>1
delete from customer where personid in (select personid from #tbl)

This will work for you:
Declare #person As table
(
PersonId int,
FirstName varchar(25),
LastName varchar(25)
)
Declare #customer As table
(
CustomerId int,
PersonId int
)
Insert Into #person (PersonId,FirstName,LastName) values(1,'John','Doe')
Insert Into #person (PersonId,FirstName,LastName) values(2,'Mike','Test')
Insert Into #person (PersonId,FirstName,LastName) values(3,'John','Doe')
Insert Into #person (PersonId,FirstName,LastName) values(4,'Mike','Test')
Insert Into #person (PersonId,FirstName,LastName) values(5,'John','Doe')
Insert Into #person (PersonId,FirstName,LastName) values(6,'John','Doe')
Insert Into #customer(CustomerId,PersonId) values(1001,1)
Insert Into #customer(CustomerId,PersonId) values(1002,2)
Insert Into #customer(CustomerId,PersonId) values(1003,3)
Insert Into #customer(CustomerId,PersonId) values(1004,4)
Insert Into #customer(CustomerId,PersonId) values(1005,5)
Insert Into #customer(CustomerId,PersonId) values(1006,6)
select p.PersonId into #temp from #person p right join
(Select PersonId,FirstName,LastName, ROW_NUMBER() over (partition by FirstName,LastName Order by PersonId) rownumber
from #person ) a
on p.PersonId=a.PersonId where a.rownumber>1
delete from #customer where PersonId in (select PersonId from #temp)
delete from #person where PersonId in (select PersonId from #temp)
select *from #customer
select *from #person

Related

How to populate given SQL records to all UserID

I have a table in this form:
id | firstname | lastname | userid
---+-----------+------------------------
1 | john | smith | 545868-5434-343435-35353
2 | adam | finger | 545868-5434-343435-35353
3 | teri | marti | 545868-5434-343435-35353
4 | pei | port | 545868-5434-343435-35353
In the DB i have many userid i need to populate the very same firstname and lastname to all userid found in the Database
Here is my SQl Query
SELECT
cID, c.firstname,c.lastname,
[s].UserID,c.OwnerID
FROM
Customer INNER JOIN [s] ON c.OwnerID = [s].UserID AND c.AssignedtoID =
[s].UserID AND c.CreatedByUserID = [s].UserID
AssignedtoID is the same as UserID

is this helpful for you.?
Create table #tmpCustomer (id int, firstname VARCHAR(50),lastname VARCHAR(50),userid VARCHAR(100))
INSERT INTO #tmpCustomer
SELECT 1, 'john','smith','545868-5434-343435-35353'
union
SELECT 2,'adam','finger','545868-5434-343435-35353'
union
SELECT 3,'teri','marti','545868-5434-343435-35353'
union
SELECT 4, 'pei','port','545868-5434-343435-35353'
union
SELECT 5, 'abc','xyz','545868-5434-343435-35354'
union
SELECT 6, 'mno','ert','545868-5434-343435-35354'
--select * from #tmpCustomer
;with cte1 AS(Select row_number()over(partition by userid order by id) rn,* from #tmpCustomer ),
cte2 AS (select * from cte1 where rn=1 )
update t
set t.firstname=c.firstname
from #tmpCustomer t
JOIN cte2 c on t.userid=c.userid
select * from #tmpCustomer
drop table #tmpCustomer

i don't know if i good understand your question, try solution posted below
DECLARE #cust as table (firstname varchar(20),lastname Varchar(20))
Insert #cust
values
('Suzan','Smith')
declare #id as table (id int identity,anything varchar(20),row_inserted datetime2 default (cast(sysdatetime() as datetime2)))
INSERT #id
(anything,row_inserted)
SELECT 'x' ,'20180305'
union all
select 'y','20180305'
union all
select 'z','20180305'
select s.id,c.firstname,
c.lastname
from #id as s
cross join #cust as c

Conditional selection of RowNum in SQL

I have written a query which returns me following data.
ID EmpFirstName EmpLastName RowNum
1 X Y 1
2 A B 1
3 A B 2
Now I want all records where RowNum is >1. For example, in this case I need 2 and 3 record in output.
If I put condition RowNum >1 then I will get only third record but I want 2 as well.

Assuming your query is this:
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum
FROM aTable
This is a classic query used to filter out any duplicate values.
In order effectively select all the records with the duplicate values I can suggest using the COUNT() window function:
;with a as (
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum,
COUNT(*) OVER (PARTITION BY EmpFirstName, EmpLastName) AS cnt
FROM aTable
)
SELECT * FROM a where cnt > 1
ORDER BY EmpFirstName, EmpLastName
To test it use this query:
drop table #tmp
CREATE table #tmp (ID int , EmpFirstName varchar(10) , EmpLastName varchar(10))
go
INSERT INTO #tmp VALUES
(1,'X','Y' )
,(2,'A','B')
,(3,'A','B')
,(4,'A','C')
,(5,'B','C')
,(6,'B','C')
;with a as (
select ID, EmpFirstName, EmpLastName,
ROW_NUMBER() OVER (PARTITION BY EmpFirstName, EmpLastName ORDER BY ID) AS RowNum,
COUNT(id) OVER (PARTITION BY EmpFirstName, EmpLastName) AS cnt
FROM #tmp
)
SELECT * FROM a where cnt > 1
ORDER BY EmpFirstName, EmpLastName
Result:
ID EmpFirstName EmpLastName RowNum cnt
----------- ------------ ----------- -------------------- -----------
2 A B 1 2
3 A B 2 2
5 B C 1 2
6 B C 2 2

I make sample data and use this query
CREATE table #tmp (ID int , EmpFirstName varchar(10) , EmpLastName varchar(10) ,RowNum int)
INSERT INTO #tmp VALUES
(1,'X','Y',1)
,(2,'A','B',1)
,(3,'A','B',2)
SELECT ID,EmpFirstName,EmpLastName,RowNum
FROM (
SELECT *
,ROW_NUMBER() OVER (ORDER BY ID) AS [NEWrownum]
FROM #tmp
) q
WHERE q.NEWrownum > 1

try this,
DECLARE #Result TABLE (ID INT, EmpFirstName VARCHAR(10), EmpLastName VARCHAR(10), RowNum INT)
INSERT INTO #Result
VALUES
(1, 'X', 'Y', 1)
,(2, 'A', 'B', 1)
,(3, 'A', 'B', 2)
SELECT r1.*
FROM #Result r1
INNER JOIN (SELECT * -- get duplicate records
FROM #Result
WHERE RowNum = 2
) as r2 ON r1.EmpFirstName = r2.EmpFirstName
AND r1.EmpLastName = r2.EmpLastName

SQL: Deleting row which values already exist

I have a table that look like this:
ID | DATE | NAME | VALUE_1 | VALUE_2
1 | 27.11.2015 | Homer | A | B
2 | 27.11.2015 | Bart | C | B
3 | 28.11.2015 | Homer | A | C
4 | 28.11.2015 | Maggie | C | B
5 | 28.11.2015 | Bart | C | B
I currently delete duplicate rows (thank to this thread) using this code :
WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY [VALUE_1], [VALUE_2]
ORDER BY [DATE] DESC) RN
FROM [MY_TABLE])
DELETE FROM cte
WHERE RN > 1
But this code don't delete exactly the lines I want. I would like to delete only rows which values already exist so in my example I would like to delete only line 5 because line 2 have the same values and is older.
Code to create my table and insert values:
CREATE TABLE [t_diff_values]
([id] INT IDENTITY NOT NULL PRIMARY KEY,
[date] DATETIME NOT NULL,
[name] VARCHAR(255) NOT NULL DEFAULT '',
[val1] CHAR(1) NOT NULL DEFAULT '',
[val2] CHAR(1) NOT NULL DEFAULT '');
INSERT INTO [t_diff_values] ([date], [name], [val1], [val2]) VALUES
('2015-11-27','Homer', 'A','B'),
('2015-11-27','Bart', 'C','B'),
('2015-11-28','Homer', 'A','C'),
('2015-11-28','Maggie', 'C','B'),
('2015-11-28','Bart', 'C','B');

You need to add one more CTE where you will index all islands and then apply your duplicate logic in second CTE:
DECLARE #t TABLE
(
ID INT ,
DATE DATE ,
VALUE_1 CHAR(1) ,
VALUE_2 CHAR(1)
)
INSERT INTO #t
VALUES ( 1, '20151127', 'A', 'B' ),
( 2, '20151128', 'C', 'B' ),
( 3, '20151129', 'A', 'B' ),
( 4, '20151130', 'A', 'B' );
WITH cte1
AS ( SELECT * ,
ROW_NUMBER() OVER ( ORDER BY date)
- ROW_NUMBER() OVER ( PARTITION BY VALUE_1, VALUE_2 ORDER BY DATE) AS gr
FROM #t
),
cte2
AS ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY VALUE_1, VALUE_2, gr ORDER BY date) AS rn
FROM cte1
)
DELETE FROM cte2
WHERE rn > 1
SELECT *
FROM #t

Try this
CREATE TABLE [dbo].[Employee](
[ID] INT NOT NULL,
[Date] DateTime NOT NULL,
[VAL1] varchar(20) NOT NULL,
[VAL2] varchar(20) NOT NULL
)
INSERT INTO [dbo].[Employee] VALUES
(1,'2015-11-27 10:44:33.087','A','B')
INSERT INTO [dbo].[Employee] VALUES
(2,'2015-11-28 10:44:33.087','C','B')
INSERT INTO [dbo].[Employee] VALUES
(3,'2015-11-29 10:44:33.087','A','B')
INSERT INTO [dbo].[Employee] VALUES
(4,'2015-11-30 10:44:33.087','A','B')
with cte as(
select
*,
rn = row_number() over(partition by [VAL1], [VAL2]
ORDER BY [DATE] DESC),
cc = count(*) over(partition by [VAL1], [VAL2])
from [Employee]
)
delete
from cte
where
rn > 1 and rn < cc
select * from [Employee]

You could use this query:
WITH cte AS
(
SELECT RN = ROW_NUMBER() OVER (ORDER BY ID)
, *
FROM #data
)
DELETE FROM c1
--SELECT *
FROM CTE c1
INNER JOIN CTE c2 ON c1.RN +1 = c2.RN AND c1.VALUE_1 = c2.VALUE_1 AND c1.VALUE_2 = c2.VALUE_2
Here I order them by ID. If the next one (RN+1) has similar V1 and V2, it is deleted.
Output:
ID DATE VALUE_1 VALUE_2
1 2015-11-27 A B
2 2015-11-28 C B
4 2015-11-30 A B
Data:
declare #data table(ID int, [DATE] date, VALUE_1 char(1), VALUE_2 char(1));
insert into #data(ID, [DATE], VALUE_1, VALUE_2) values
(1, '20151127', 'A', 'B'),
(2, '20151128', 'C', 'B'),
(3, '20151129', 'A', 'B'),
(4, '20151130', 'A', 'B');

TSQL Distinct Counts

I have a table that looks like this:
ID SuppressionTypeID PersonID
------------------------------
1 1 123
2 1 456
3 2 456
I want to get a rolling count (distinct people) rather than a normal group by count.
e.g. not this:
SuppressionTypeID Count
---------------------------
1 2
2 1
This:
SuppressionTypeID RecordsLost
----------------------------------
1 2
2 0
The latter being zero as we lost person 456 on suppresiontypeid 1.
Thanks in advance.

You may need to use a temporary table or a table variable as shown below
DECLARE #t TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
)
INSERT INTO #t
SELECT 1
,1
,123
UNION ALL
SELECT 2
,1
,456
UNION ALL
SELECT 3
,2
,456
DECLARE #t1 TABLE (
ID INT
,SuppressionTypeID INT
,PersonID INT
,firstid INT
)
INSERT INTO #t1
SELECT *
,NULL
FROM #t
UPDATE t1
SET t1.firstid = t2.firstid
FROM #t1 AS t1
INNER JOIN (
SELECT personid
,min(SuppressionTypeID) AS firstid
FROM #t1
GROUP BY personid
) AS t2 ON t1.PersonID = t2.PersonID
SELECT coalesce(t2.firstid, t1.SuppressionTypeID) AS SuppressionTypeID
,count(DISTINCT t2.personid) AS count
FROM #t1 AS t1
LEFT JOIN #t1 AS t2 ON t1.personid = t2.personid
AND t1.SuppressionTypeID = t2.firstid
GROUP BY coalesce(t2.firstid, t1.SuppressionTypeID)
The result is
SuppressionTypeID count
----------------- -----------
1 2
2 0

You can try;
with tmp_tbl as (
select
x.SuppressionTypeID, count(x.PersonID) as RecordsLost
from (
select
min(SuppressionTypeID) as SuppressionTypeID,
PersonID
from tbl
group by PersonID
) as x
group by x.PersonID
order by x.SuppressionTypeID
)
select
distict t.SuppressionTypeID, coalesce(tmp.RecordsLost, 0) as RecordsLost
from tbl t
left join tmp_tbl tmp on tmp.SuppressionTypeID = t.SuppressionTypeID

How do you order a group of records then insert their order placement too?

I have a table of logs that contain a ID and TIMESTAMP. I want to ORDER BY ID and then TIMESTAMP.
For example, this is what the result set would look like:
12345 05:40
12345 05:50
12345 06:22
12345 07:55
12345 08:33
Once that's done, I want to INSERT a order value in a third column that signifies it's placement in the group from earliest to latest.
So, you would have something like this:
12345 05:40 1 <---First entry
12345 05:50 2
12345 06:22 3
12345 07:55 4
12345 08:33 5 <---Last entry
How can I do that in a SQL statement? I can select the data and ORDER BY ID, TIMESTAMP. But, I can't seem to INSERT a order value based on the groupings. :(

Try this update not an insert:
Fiddle demo here:
;with cte as(
select id, yourdate, row_number() over(order by id,yourdate) rn
from yourTable
)
Update ut Set thirdCol = rn
From yourTable ut join cte on ut.Id = cte.id and ut.yourdate = cte.yourdate
NOTE: if you need to get the thirdColumn updated per id basis, please partition your rownumber by using row_number() over (partition by id, order by order by id,yourdate)
Results:
| ID | YOURDATE | THIRDCOL |
|-------|----------|----------|
| 12345 | 05:40 | 1 |
| 12345 | 05:50 | 2 |
| 12345 | 06:22 | 3 |
| 12345 | 07:55 | 4 |
| 12345 | 08:33 | 5 |

Using a derived table and an update.
IF OBJECT_ID('tempdb..#TableOne') IS NOT NULL
begin
drop table #TableOne
end
CREATE TABLE #TableOne
(
SomeColumnA int ,
LetterOfAlphabet varchar(12) ,
PositionOrdinal int not null default 0
)
INSERT INTO #TableOne ( SomeColumnA , LetterOfAlphabet )
select 123 , 'x'
union all select 123 , 'b'
union all select 123 , 'z'
union all select 123 , 't'
union all select 123 , 'c'
union all select 123 , 'd'
union all select 123 , 'e'
union all select 123 , 'a'
Select 'pre' as SpaceTimeContinium , * from #TableOne order by LetterOfAlphabet
Update
#TableOne
Set PositionOrdinal = derived1.rowid
From
( select SomeColumnA , LetterOfAlphabet , rowid = row_number() over (order by LetterOfAlphabet asc) from #TableOne innerT1 )
as derived1
join #TableOne t1
on t1.LetterOfAlphabet = derived1.LetterOfAlphabet and t1.SomeColumnA = derived1.SomeColumnA
Select 'post' as SpaceTimeContinium, * from #TableOne order by LetterOfAlphabet
IF OBJECT_ID('tempdb..#TableOne') IS NOT NULL
begin
drop table #TableOne
end

To get the order you desire without doing an insert and an update, you can set your clustered index to handle it for you. The example below creates a clustered primary key.
To do this you must remove any clustered index that you already have on the table because you can only have one clustered index per table.
CREATE TABLE dbo.Table_1
(
ID int NOT NULL,
DTStamp datetime NOT NULL
)
ALTER TABLE dbo.Table_1 ADD CONSTRAINT
PK_Table_1 PRIMARY KEY CLUSTERED
(
ID,
DTStamp
)
Insert some random data to test with...
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,getdate());
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,1,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,2,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,3,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12346,dateadd(mi,4,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,5,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,6,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,7,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12340,dateadd(mi,8,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,1,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,2,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,3,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,4,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,5,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,6,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,7,getdate()));
INSERT INTO [dbo].[Table_1]([ID],[DTStamp])VALUES(12344,dateadd(mi,8,getdate()));
Now query your table and check out the order...
SELECT [ID] ,[DTStamp] FROM [Table_1]
If you need the order to display in a query, you can add the row number with an over clause.
SELECT [ID] ,[DTStamp],row_number() over (partition by [ID] order by [ID] ,[DTStamp]) as SortOdr FROM [Table_1]

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Delete duplicate rows from a table and referenced table microsoft sql server - sql

declare #tbl table (pid int ) ;with cte as ( select t1.*,row_number() over (partition by firstname,lastname order by personid) as rownum from person t1 ) delete from cte output deleted.personid into #tbl where rownum>1 delete from customer where personid in (select personid from #tbl)

Related

How to populate given SQL records to all UserID

Conditional selection of RowNum in SQL

SQL: Deleting row which values already exist

TSQL Distinct Counts

How do you order a group of records then insert their order placement too?

Categories

Resources