Delete Duplicate rows from table which have same Id - sql

I have a table Emp which have records like this
Id Name
1 A
2 B
3 C
1 A
1 A
2 B
3 C
Now I want to delete the duplicate rows from the table
I am using this query to select or count number of duplicate records
SELECT NameCol, COUNT(*) as TotalCount FROM TestTable
GROUP BY NameCol HAVING COUNT(*) > 1
ORDER BY COUNT(*) DESC
and what query should i write to delete the duplicate rows from table.
if I write this query to delete the duplicate records then it is giving a (0) row Affected result.
`DELETE FROM TestTable
WHERE ID NOT IN ( SELECT MAX(ID) FROM
TestTable
GROUP BY NameCol
)`

For sqlserver 2005+
Testdata:
declare #t table(Id int, Name char(1))
insert #t values
(1,'A'),(2,'B'),(3,'C'),(1,'A'),(1,'A'),(2,'B'),(3,'C')
Delete statement(replace #t with your Emp table)
;with a as
(
select row_number() over (partition by id, name order by id) rn
from #t
)
delete from a where rn > 1
select * from #t

**Q How to Remove duplicate data with help of Rowid**
create table abcd(id number(10),name varchar2(20))
insert into abcd values(1,'abc')
insert into abcd values(2,'pqr')
insert into abcd values(3,'xyz')
insert into abcd values(1,'abc')
insert into abcd values(2,'pqr')
insert into abcd values(3,'xyz')
select * from abcd
id Name
1 abc
2 pqr
3 xyz
1 abc
2 pqr
3 xyz
Delete Duplicate record but keep Distinct Record in table
DELETE
FROM abcd a
WHERE ROWID > (SELECT MIN(ROWID) FROM abcd b
WHERE b.id=a.id
);
run the above query 3 rows delete
select * from abcd
id Name
1 abc
2 pqr
3 xyz

Related

Delete Rows based on two columns

How can I delete rows based on just two column conditions.
Example
Table 1
id name phone
1 aa 123
1 aa 345
1 bb 123
2 aa 456
1 NULL 123
1 123
My Expected output
id name phone
1 bb 123
2 aa 456
My condition to delete: if id and name is same, delete the rows
If one of the value in a condition is null or blank it should also delete the row as given in the input.
Delete from table1 t where exists (
Select * from
(Select id, name from table1 group by id, name having count(*) > 1) t2 where t.id = t2.id and t.name = t2.name)
This should do what you want. You can do the select first for testing purposes, then remove the Select and uncomment out the delete.
-- This joins on the table the set of data that has more then 1 row with duplicate IDs, and names. Then you can delete from here.
--DELETE t1
SELECT *
FROM Table1 T1
INNER JOIN (
-- this gets all the records that have more then 1 ID and Name that are the same.
SELECT ID, name
FROM Table1
GROUP BY ID, name
HAVING COUNT(*) > 1
) ToDelete ON T1.ID = ToDelete.ID
AND T1.name = ToDelete.name
create table #tablea (
id int,
name varchar(3),
phone int
)
insert into #tablea (id, name, phone)
values
(1,'aa','123'),
(1,'aa','345'),
(1,'bb','123'),
(2,'aa','456')
select * from #tablea
delete a
from #tablea a
inner join (
select id, name
from #tablea
group by id, name
having COUNT(*) > 1
) b on a.id = b.id and a.name = b.name
select * from #tablea
drop table #tablea

SQL adding two columns and group by count

I want to add two columns and group by the count.
For example say I have the following table:
ID -------- value1 ---------- value2
A ------------ 2 -----------------3
B ------------ 1 -----------------4
c ------------ 2 -----------------2
D ------------ 3 -----------------3
E ------------ 2 -----------------1
F ------------ 1 -----------------3
Ff you count the sum (select value1 + value 2) in each rows you will get 5,5,4,6,3,4.
I would like to get the following result.
5 ------ 2
4 ------ 2
3 ------ 1
6 ------ 1
You can just do:
select (val1 + val2), count(*)
from t
group by (val1 + val2)
order by count(*) desc;
This is one solution: make the SUM of the two columns in CTE and then just COUNT the id's grouped by the sum amount:
declare #tbl as table (
id varchar(1)
,val1 int
,val2 int
)
insert into #tbl values ('A',2,3)
insert into #tbl values ('B',1,4)
insert into #tbl values ('C',2,2)
insert into #tbl values ('D',3,3)
insert into #tbl values ('E',2,1)
insert into #tbl values ('F',1,3)
;WITH CTE AS (
SELECT
id
,val1+val2 as [sum]
FROM #tbl
)
SELECT
[sum]
,count(id) as [count]
FROM CTE
GROUP BY sum

SQL Server show non matching records

I have table like below .
create table #test (NAME varchar(100),TAGint,checkVAL varchar(1),CATEGORY int)
insert into #test values('jkl',1,'y',100)
insert into #test values('abc',1,'y',100)
insert into #test values('abc',1,'y',101)
insert into #test values('abc',2,'n',102)
insert into #test values('abc',3,'n',103)
insert into #test values('xyz',2,'y',104)
insert into #test values('xyz',1,'y',105)
insert into #test values('pqr',1,'y',105)
insert into #test values('pqr',1,'y',106)
insert into #test values('pqr',1,'y',106)
Now I want to show those records which have diffrent values in columns name , tag , checkVal .
This is what I have done.
select * from #test
;with cte as
(
select *,row_number() over(partition by NAME,TAG,checkVAL order by CATEGORY ) as rownum
from #test
)
select * from cte
where rownum=1
This is what is being returned
NAME TAG checkVAL CATEGORY rownum
-----------------------------------------
abc 1 y 100 1
abc 2 n 102 1
abc 3 n 103 1
jkl 1 y 100 1 --> This row should not come
pqr 1 y 105 1 --> This row should not come
xyz 1 y 105 1
xyz 2 y 104 1
What I am trying is that for any value in column NAME , if values are different in TAG or checkVAL or both , then those rows should only be shown.
Below row
jkl 1 y 100 1
Should not be shown because jkl has no other row to match.
Below row should not be shown
pqr 1 y 105 1
because all rows with NAME column value as pqr have same values in TAG and checkVAL columns
I want to preferably approach using CTE .
How about this -
select
*
from #test a
where exists
(
select *
from
#test b
where a.name = b.name and (a.tag <> b.tag or a.checkVAL <> b.checkVAL)
)

update oldID field based on fields in the same table

I need help with the following query.
create table #table1
(id int not null primary key identity,
customer_name varchar(25),
usage float,
oldID int null
)
insert into #table1 values('ABC',46.5,null)
insert into #table1 values('ABC',46.5,null)
insert into #table1 values('DEF',36.8,null)
insert into #table1 values('XYZ',50.1,null)
insert into #table1 values('DEF',36.8,null)
insert into #table1 values('XYZ',50.1,null)
select * from #table1
I want my table to be updated like this
id customer_name usage oldID
----------- ------------------------- ---------------------- -----------
1 ABC 46.5 NULL
2 ABC 46.5 1
3 DEF 36.8 NULL
4 XYZ 50.1 NULL
5 DEF 36.8 3
6 XYZ 50.1 4
The two records with the same name and usage means the later record was renewed.
In the new record the oldID field should point to its old record (ID).
Although in my actual table, I have a bunch of date fields which I probably can use but this would help me for now.
Try this using a CTE:
;WITH data AS
(
SELECT
id, customer_name,
OldID = (SELECT MIN(id) FROM #table1 t2 WHERE t2.customer_name = t.customer_name)
FROM #table1 t
)
UPDATE #table1
SET OldID = data.OldID
FROM Data
WHERE
data.customer_Name = #table1.customer_name
AND #table1.ID <> data.oldid
select * from #table1
The Data CTE basically just determines the minimum ID for each customer, and if that customer's ID isn't that minimum ID, then OldID is set to that ID value.
When I run this, I get a resulting output:
id customer_name usage oldID
1 ABC 46.5 NULL
2 ABC 46.5 1
3 DEF 36.8 NULL
4 XYZ 50.1 NULL
5 DEF 36.8 3
6 XYZ 50.1 4
With cte, without subquerys, updating only customers with several rows:
with cte as (
select customer_name, min( id ) as id
from #table1
group by customer_name
having count(*) > 1
)
update #table1
set oldID = cte.id
from cte
where #table1.customer_name = cte.customer_name
and #table1.id != cte.id

SQL Delete duplicate records and leave the rest

I have 2 tables a and b. A have 5 records and B have same records as A but 7 rows. Thats is same values in 7 rows. I wants to delete only the first 5 records in B since the row number is matches with A. How to do this. please help me.
table :A
col1 col2 col3 DuplicateCount
1 2 n 1
1 2 n 2
1 2 n 3
1 2 n 4
2 2 m 1
2 2 m 2
table b:
col1 col2 col3 DuplicateCount
1 2 n 1
1 2 n 2
1 2 n 3
1 2 n 4
1 2 n 5
1 2 n 6
desired data should reside in table b is
col1 col2 col3 DuplicateCount
1 2 n 5
1 2 n 6
which is nothing but the last 2 rows in the table b.
Try this :
delete from TableB
WHERE Id IN
(
select b.id
from TableB b, TableA a
WHERE b.Id = a.ID
)
I added id column to identify rows in table B, I am not sure how to delete only some of duplicate rows without id column:
declare #a table
(
id int primary key,
col1 int,
col2 int,
col3 varchar
)
declare #b table
(
id int primary key,
col1 int,
col2 int,
col3 varchar
)
insert into #a values (1,1,2,'n')
insert into #a values (2,1,2,'n')
insert into #a values (3,1,2,'n')
insert into #a values (4,1,2,'n')
insert into #a values (5,2,2,'n')
insert into #a values (6,2,2,'n')
insert into #b values (10,1,2,'n')
insert into #b values (20,1,2,'n')
insert into #b values (30,1,2,'n')
insert into #b values (40,1,2,'n')
insert into #b values (50,1,2,'n')
insert into #b values (60,1,2,'n')
delete from #b
where id in
(
(
select t1.id from
(
select
id,
cnt = count(*) over(partition by col1, col2, col3),
rn = row_number() over(partition by col1, col2, col3 order by id)
from #b
) t1
join
(
select
*,
cnt = count(*) over(partition by col1, col2, col3)
from #a
) t2 on
t1.cnt > 1 and t1.rn <= t2.cnt
)
)
select * from #b
You can use TOP key word for deleting first five records
DELETE TOP (select * from TableA a,TableB b where a.col1=b.col1 AND a.col2=b.col2 AND
a.col3=b.col3) FROM TableA
or
Note: The below is an example for deleting one or more records based on their IDs
DELETE From yourTable where ID in (2,3,4,5,6)