Delete duplicate rows in different columns - sql

Table1:
id name address1 address2 address3
------------------------------------------
1 Jenny A B NULL
2 John C NULL NULL
3 Jenny B A NULL
4 John NULL NULL C
.....
id1 and id3 are the same in this condition, id2 and id4 are the same too.
Can I delete id3 and id4? I'm using SQL Server 2019.

You need to define a condition for duplicate rows. One possible approach to define such a condition is to aggregate ordered addresses. The following statement demonstrates this approach:
Table:
CREATE TABLE Data (
id int,
name varchar(10),
address1 varchar(100),
address2 varchar(100),
address3 varchar(100)
)
INSERT INTO Data
(id, name, address1, address2, address3)
VALUES
(1, 'Jenny', 'A', 'B', NULL),
(2, 'John', 'C', NULL, NULL),
(3, 'Jenny', 'B', 'A', NULL),
(4, 'John ', NULL, NULL, 'C')
Statement:
DELETE x
FROM (
SELECT
d.*,
-- Condition for equal addresses
ROW_NUMBER() OVER (PARTITION BY c.CheckCondtition ORDER BY d.id) AS rn
-- Condition for equal name and addresses
-- ROW_NUMBER() OVER (PARTITION BY d.name, c.CheckCondtition ORDER BY d.id) AS rn
FROM Data d
CROSS APPLY (
SELECT CONCAT(',', [address])
FROM (VALUES (d.address1), (d.address2), (d.address3)) v([address])
ORDER BY [address]
FOR XML PATH('')
) c(CheckCondtition)
) x
WHERE x.rn > 1

If you're using SQL Server 2017 or later, you can build up a unique identifier for each row based on the name and each of the address fields using STRING_AGG; then find row numbers for each occurrence of that value (ordering by id), and then you can delete the rows with row number > 1:
WITH CTE AS (
SELECT id, name AS value
FROM data
UNION ALL
SELECT id, address1
FROM data
UNION ALL
SELECT id, address2
FROM data
UNION ALL
SELECT id, address3
FROM data
),
CTE2 AS (
SELECT id, STRING_AGG(value, '%') WITHIN GROUP (ORDER BY value) AS v
FROM CTE
GROUP BY id
),
CTE3 AS (
SELECT id, v,
ROW_NUMBER() OVER (PARTITION BY v ORDER BY id) AS rn
FROM CTE2
)
DELETE d
FROM data d
JOIN CTE3 ON CTE3.id = d.id
WHERE CTE3.rn > 1
Demo on SQLFiddle

I don't think fancy manipulation of the three columns is needed. Assuming you do not have duplicate values in the columns:
delete t1 from table1 t1
where exists (select 1
from table1 tt1
where tt1.id < t1.id and
(t1.address1 in (tt1.address1, tt1.address2, tt1.address3) or t1.address1 is null
) and
(t1.address2 in (tt1.address1, tt1.address2, tt1.address3) or t1.address2 is null
) and
(t1.address3 in (tt1.address1, tt1.address2, tt1.address3) or t1.address3 is null
) and
( (case when t1.address1 is null then 1 else 0 end +
case when t1.address2 is null then 1 else 0 end +
case when t1.address3 is null then 1 else 0 end
) =
(case when tt1.address1 is null then 1 else 0 end +
case when tt1.address2 is null then 1 else 0 end +
case when tt1.address3 is null then 1 else 0 end
)
)
);
This checks that each non-NULL value matches and the number of NULL values is the same. This implies that the two sets of values are equivalent.
The advantage is that you don't have to worry about separators -- which is a concern for either string_agg() or XML.

Related

Group by a column and build a single result row based on condition

I need to group rows by account.
If there's only one row in a group, select it.
If there are multiple rows per group, select columns of the row with order_number equal to 4 but set order_number to 1.
myTable data:
account order_number status state
1111 4 ok full
2256 4 ok full
3344 1 NULL NULL
1111 1 NULL NULL
8743 4 ok full
2256 1 NULL NULL
Here's what I've tried:
select
account,
order_number,
status,
state,
case
when order_number = '1' then 'pass'
when order_number = '4' then 'fail'
end as ' TEST RESULTS '
from myTable
This is the result I'm trying to achieve:
account order_number status state
1111 1 ok full
2256 1 ok full
3344 1 NULL NULL
8743 4 ok full
here is simplest way & probably most performant solution:
select accounts
,case when cnt > 1 then 1 else order_number end order_number
,status,state
from (
select *
, row_number() over (partition by account order by case when order_number = 4 then 1 else 0 end desc) rn
, count(*) over (partition by account) cnt
) t
where rn = 1
You can do it this way:
Create dummy table for testing
[Create table test1(accounts varchar(10), order_number int, status varchar(10),state varchar(10))
insert into test1 values('1111',4,'ok','full')
insert into test1 values( '2256' , 4 , 'ok' , 'full')
insert into test1 values('3344' , 1 , NULL , NULL)
insert into test1 values('1111' , 1 , NULL , NULL)
insert into test1 values('8743' , 4 , 'ok' , 'full')
insert into test1 values('2256' , 1 , NULL , NULL)][1]
Query, no hard coded values
Select accounts,
order_number,
status,
state
from (
select row_number() over(partition by t1.accounts order by t1.order_number desc) rnum,
t1.accounts,
isnull(t2.order_number,t1.order_number) order_number ,
t1.status,
t1.state
from test1 t1
left join (select * from test1 where order_number=1) t2 on t1.accounts = t2.accounts and t1.order_number <> t2.order_number
) a
where rnum = 1
Result set
accounts order_number status state
---------- ------------ ---------- ----------
1111 1 ok full
2256 1 ok full
3344 1 NULL NULL
8743 4 ok full
UPDATE: Adding Test Result Column
Select accounts,
order_number,
status,
state,
[TEST RESULTS]
from (
select row_number() over(partition by t1.accounts order by t1.order_number desc) rnum,
t1.accounts,
isnull(t2.order_number,t1.order_number) order_number ,
t1.status,
t1.state,
case
when isnull(t2.order_number,t1.order_number) = '1' then 'pass'
when isnull(t2.order_number,t1.order_number) = '4' then 'fail'
end as 'TEST RESULTS'
from test1 t1
left join (select * from test1 where order_number=1) t2 on t1.accounts = t2.accounts and t1.order_number <> t2.order_number
) a
where rnum = 1
Just another option using WITH TIES in concert with the window functions min() over() and row_number() over()
Example
Select top 1 with ties
account
,order_number = min(order_number) over(partition by account)
,status
,state
From myTable
Order By row_number() over (partition by account order by order_number desc)
Results
account order_number status state
1111 1 ok full
2256 1 ok full
3344 1 NULL NULL
8743 4 ok full
I only have Access to work with. Output accomplished with:
Query1:
SELECT Q1.account, Q1.order_number, Q2.status, Q2.state
FROM (SELECT DISTINCT account, order_number FROM myTable WHERE order_number = 1) AS Q1
INNER JOIN (SELECT DISTINCT account, status, state FROM myTable WHERE order_number=4) AS Q2
ON Q1.account = q2.account;
Query2:
SELECT account, order_number, status, state FROM Query1
UNION SELECT account, order_number, status, state FROM myTable WHERE NOT account IN(SELECT account FROM Query1);
This query has the desired result, but I have doubts about the WHERE NOT EXISTS part, because I don't know what is the meaning of order_number and, if your real problem is more complex than your question, it may become complicated.
Just changed ok to 1 and full to 1
SELECT [T1].[account], [T1].[order_number], COALESCE([T2].[status], [T1].[status]) AS [status], COALESCE([T2].[state], [T1].[state]) AS [state]
FROM [dbo].[myTable] [T1]
LEFT JOIN [dbo].[myTable] [T2]
ON [T2].[account] = [T1].[account]
AND [T2].[order_number] = 4
WHERE NOT EXISTS (
SELECT 1
FROM [dbo].[myTable] [T3]
WHERE [T3].[account] = [T1].[account]
AND [T1].[order_number] = 4
AND [T3].[order_number] = 1
);
If order_number is always 1 and 4 then below could be the most optimized solution for your problem. Here first I have put a account wise sequence number for all rows starting from 1 in descending order of order_number. So If there is one row for any account number then it 's sequence number (rn) will be 1 and if there are more than one rows then row with order_number 4 will have the sequence number (rn) 1.
So we got our row to select. And to replace order_number 4 with 1 if it's not the only row we calculated row count for each account as column cnt. If cnt>1 and order_number is 4 then we replaced order_number with 1 using case when.
Schema and insert statements:
create table myTable(account int, order_number int, status varchar(10), state varchar(10));
insert into myTable values(1111, 4, 'ok', 'full');
insert into myTable values(2256, 4, 'ok', 'full');
insert into myTable values(3344, 1, NULL, NULL);
insert into myTable values(1111, 1, NULL, NULL);
insert into myTable values(8743, 4, 'ok', 'full');
insert into myTable values(2256, 1, NULL, NULL);
Query:
with cte as
(
select account,order_number,status,state,row_number()over(partition by account order by order_number desc)rn,
count(order_number)over(partition by account )cnt
from mytable
)select account,(case when order_number=4 and cnt>1 then 1 else order_number end) order_number,status,state
from cte where rn=1
Output:
account
order_number
status
state
1111
1
ok
full
2256
1
ok
full
3344
1
null
null
8743
4
ok
full
db<>fiddle here
You can achieve the above result by using Aggregate Function Like MIN & MAX
DECLARE #myTable TABLE (account int, order_number int, status varchar(10), state varchar(10))
INSERT INTO #myTable VALUES(1111, 4, 'ok', 'full');
INSERT INTO #myTable VALUES(2256, 4, 'ok', 'full');
INSERT INTO #myTable VALUES(3344, 1, NULL, NULL);
INSERT INTO #myTable VALUES(1111, 1, NULL, NULL);
INSERT INTO #myTable VALUES(8743, 4, 'ok', 'full');
INSERT INTO #myTable VALUES(2256, 1, NULL, NULL);
Query:
SELECT account,MIN(order_number) order_number,MAX(status) status,MAX(State) State
FROM #myTable
GROUP BY account

Swap two adjacent rows of a column in sql

I'm trying to solve this following problem:
Write a sql query to swap two adjacent rows in a column of a table.
Input table
Name Id
A 1
B 2
C 3
D 4
E 5
Output table
Name Id
A 2
B 1
C 4
D 3
E 5
Description:- 1 is associated with A and 2 with B, swap them, thus now 1 is associated with B and 2 with A, Similarly do for C and D, Since E doesn't has any pair, leave it as it is.
Note:- This may be solved using CASE Statements, but I am trying for a generalized solution, Say currently it is only 5 rows, it may be 10,20 etc..
Eg:
SELECT
*,CASE WHEN Name = A then 2 ELSEIF Name = B then 1 etc...
FROM YourTable
You can use window functions to solve this.
on MySQL (>= 8.0):
SELECT ID, IFNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
on SQL-Server:
SELECT ID, ISNULL(CASE WHEN t.rn % 2 = 0 THEN LAG(Name) OVER (ORDER BY ID) ELSE LEAD(Name) OVER (ORDER BY ID) END, Name) AS Name
FROM (
SELECT ID, Name, ROW_NUMBER() OVER (ORDER BY ID) AS rn
FROM table_name
) t
demo on dbfiddle.uk
If you have sql-server, you can try this.
DECLARE #YourTable TABLE (Name VARCHAR(10), Id INT)
INSERT INTO #YourTable VALUES
('A', 1),
('B', 2),
('C', 3),
('D', 4),
('E', 5)
;WITH CTE AS (
SELECT *, ROW_NUMBER()OVER(ORDER BY Name) AS RN FROM #YourTable
)
SELECT T1.Name, ISNULL(T2.Id, T1.Id) Id FROM CTE T1
LEFT JOIN CTE T2 ON T1.RN + CASE WHEN T1.RN%2 = 0 THEN - 1 ELSE 1 END = T2.RN
Result:
Name Id
---------- -----------
A 2
B 1
C 4
D 3
E 5
You didn't specify your DBMS, but the following is standard ANSI SQL.
You can use a values() clause to provide the mapping of the IDs and then join against that:
with id_map (source_id, target_id) as (
values
(1, 2),
(2, 1)
)
select t.name, coalesce(m.target_id, t.id) as mapped_id
from the_table t
left join id_map m on m.source_id = t.id
order by name;
Alternatively if you only want to specify the mapping once for one direction, you can use this:
with id_map (source_id, target_id) as (
values
(1, 2)
)
select t.name,
case id
when m.source_id then m.target_id
when m.target_id then m.source_id
else id
end as mapped_id
from the_table t
left join id_map m on t.id in (m.source_id, m.target_id)
order by name;
Online example: https://rextester.com/FBFH52231

Need query for following scenario

I have a result like followed by
ID Name Status
1 A Y
2 A N
3 B Y
4 B Y
5 C N
in this case if status of Name A have two status then I need a select query for following outout
ID Name Status
1 A N
2 A N
3 B Y
4 B Y
5 C N
And sorry, I dont know how ask question for this scenario..
please provide the solution thanks in advance
This following script will select data as per your requirement-
SELECT yt.ID,
yt.Name,
CASE WHEN A.N>1 THEN 'N' ELSE Status END as Status
FROM your_table yt
LEFT JOIN (
SELECT Name,
COUNT(DISTINCT Status) as N
FROM your_table
GROUP BY Name
HAVING COUNT(DISTINCT Status) >1
) A on yt.Name = A.Name
Using LEFT JOIN with COALESCE in the SELECT will work in this case.
Demo with sample data:
DECLARE #TestTable TABLE (ID INT, [Name] VARCHAR (1), [Status] VARCHAR (1));
INSERT INTO #TestTable(ID, [Name], [Status]) VALUES
(1, 'A', 'Y'),
(2, 'A', 'N'),
(3, 'B', 'Y'),
(4, 'B', 'Y'),
(5, 'C', 'N');
SELECT T.ID,
COALESCE(Q.[Name], T.[Name]) AS [Name],
COALESCE(Q.[Status], T.[Status]) AS [Status]
FROM #TestTable T
LEFT JOIN (
SELECT DISTINCT [Name], 'N' AS [Status]
FROM #TestTable
WHERE [Status] = 'N'
) AS Q ON Q.[Name] = T.[Name]
Output:
ID Name Status
1 A N
2 A N
3 B Y
4 B Y
5 C N
Use a RANK in separate query to get the status for the latest id and left join on name against that query to use latest status for all rows for a name
SELECT a.id, a.name, b.status
FROM dbo.Table_3 a
LEFT JOIN (SELECT id, name, status, RANK() OVER (Partition BY name ORDER BY id desc) AS rnk
FROM dbo.table_3) b ON a.name = b.name AND b.rnk = 1
You can use a Windowed function so that you don't need to scan the table twice:
SELECT ID,
[Name],
CASE COUNT(CASE WHEN [Status] = 'N' THEN 1 END) OVER (PARTITION BY [Name]) WHEN 0 THEN [Status] ELSE 'N' END AS [Status]
FROM (VALUES(1,'A','Y'),
(2,'A','N'),
(3,'B','Y'),
(4,'B','Y'),
(5,'C','N')) V(ID, [Name], [Status]);
In below query the derived table a pulls the distinct record that has 'N' . Then joined it with main table and using case statement pulled the status.
Using Derived Table
select *,
case when a.name is not null then 'N' else #temp.status end [status]
from #temp
Left join (select distinct name from #temp where status ='N' )a on a.name = #temp.name
Using Case Statement
select *,
case (select count(*) from #temp t where status='N' and t.Name = #temp.Name)
when 1 then 'N'
else status
end [status]
from #temp
OR
select *,
case when (select count(*) from #temp t where status='N' and t.Name = #temp.Name) > 0 then 'N'
else status
end [status]
from #temp
Output
ID Name Status name status
1 A Y A N
2 A N A N
3 B Y NULL Y
4 B Y NULL Y
5 C N C N
For your particular example, you can just use a window function:
select ID, Name,
min(Status) over (partition by name) as status
from t;
This works because 'N' is less than 'Y', so the MIN() will return 'N' if any values are 'N'.

How to delete duplicate records in SQL when similar in two columns and different in one column

I have a table like this:
ID Name Family Phone_Number
1 A B 123456
2 c d 321456
3 A B
4 A B 456789
I want to delete records 3 and 4.
Try to figure out duplicates and then delete the duplicate rows:
WITH cte AS (
SELECT
FirstName
, LastName
, row_number() OVER(PARTITION BY FirstName, LastName ORDER BY FirstName) AS RN
FROM YourTABLE
)
DELETE cte WHERE RN > 1
An example:
DECLARE #table TABLE
(
ID INT,
FirstName VARCHAR(10),
LastName VARCHAR(10)
);
INSERT INTO #table
(
ID,
FirstName,
LastName
)
VALUES
(1, 'A' , 'B')
, (2, 'c' , 'd')
, (3, 'A' , 'B')
, (4, 'A' , 'B')
Query to delete:
;WITH cte AS (
SELECT
FirstName
, LastName
, row_number() OVER(PARTITION BY FirstName, LastName ORDER BY FirstName) AS RN
FROM #table
)
DELETE cte WHERE RN > 1
SELECT * FROM #table
OUTPUT:
ID FirstName LastName
1 A B
2 c d
Write sql and execute
; WITH TableBWithRowID AS
(
SELECT ROW_NUMBER() OVER (ORDER BY Name, Family) AS RowID, Name, Family
FROM TABLE1
)
DELETE o
FROM TableBWithRowID o
WHERE RowID < (SELECT MAX(rowID) FROM TableBWithRowID i WHERE i.Name =o.Name and i.Family=o.Family GROUP BY Name, Family)
replace TABLE1 with your table name
The below query will delete all the duplicates records based on the first and last name column. Assuming there is no null in the first and last name column.
You just need to provide/change at two places in below query
DELETE FROM <YourTableName>
where Id not in (
SELECT MIN(ID) as RowId
FROM <YourTableName>
GROUP BY FirstName, LastName
)
With EXISTS:
delete t from tablename t
where exists (
select 1 from tablename
where name = t.name and family = t.family and id < t.id
)
See the demo

Combine rows if value is blank

I'm using SQL-Server 2008. I need to combine rows with the same Name and increase counter when:
1 or more Id's for the same Name is blank
NOT merge rows if Id is NULL!
NOT merge rows if have the same Name, but different Ids
Output for now:
Name Id Cnt
John 1 1
Peter 2 2 -- This Peter with the same Id have 2 entries so Cnt = 2
Peter 3 1 -- This is other Peter with 1 entry so Cnt = 1
Lisa 4 1
Lisa NULL 1
David 5 1
David 1 -- here Id is blank ''
Ralph 2 -- Ralph have both rows with blank Id so Cnt = 2
Desired output:
Name Id Cnt
John 1 1
Peter 2 2
Peter 3 1
Lisa 4 1
Lisa NULL 1 -- null still here
David 5 2 -- merged with blank '' so Cnt = 2
Ralph 2 -- merged both blanks '' so Cnt = 2
SQL-Query:
This is sample query what I'm using for now:
SELECT Name,
Id,
COUNT(Id) AS Cnt
FROM Employees
WHERE Condition = 1
GROUP BY Name, Id
What I have tried:
Added aggregate MAX to Id in SELECT clause and grouped by Name only, but in this case merged rows with NULL values and with the same names with different Id's what is wrong for me.
SELECT Name,
MAX(Id), -- added aggregate
COUNT(Id) AS Cnt
FROM Employees
WHERE Condition = 1
GROUP BY Name -- grouped by Name only
Have you any ideas? If anything is not clear about problem - ask me, I will provide more details.
UPDATE:
DDL
CREATE TABLE Employees
(
Name NVARCHAR(40),
Id NVARCHAR(40)
);
DML
INSERT INTO Employees VALUES
('John' , '1')
,('Peter', '2')
,('Peter', '2')
,('Peter', '3')
,('Lisa' , '4')
,('Lisa' , NULL)
,('David', '5')
,('David', '')
,('Ralph', '')
,('Ralph', '')
DEMO: SQL FIDDLE
Edit
DECLARE #Data table (Name varchar(10), Id varchar(10)) -- Id must be varchar for blank value
INSERT #Data VALUES
('John', '1'),
('Peter', '2'),('Peter', '2'),
('Peter', '3'),--('Peter', ''), --For test
('Lisa', '4'),
('Lisa', NULL),
('David', '5'),
('David', ''),
('Ralph', ''), ('Ralph', '')
SELECT
Name,
Id,
COUNT(*) + ISNULL(
(SELECT COUNT(*) FROM #data WHERE Name = d.Name AND Id = '' AND d.Id <> '')
, 0) AS Cnt
FROM #data d
WHERE
Id IS NULL
OR Id <> ''
OR NOT EXISTS(SELECT * FROM #data WHERE Name = d.Name AND Id <> '')
GROUP BY Name, Id
You can use CASE statement inside your SELECT. It allows you to set Id = [some value] for employees where it is blank. Query can be something like this:
SELECT E.Name,
CASE
WHEN E.Id = ''
THEN
(Select Employees.Id from Employees where Employees.Id <> '' and E.Name = Employees.Name)
ELSE E.Id
END as Idx,
COUNT(Id) AS Cnt
FROM Employees as E
WHERE Condition = 1
GROUP BY Name, Idx
A version with window functions:
SELECT Name,ID, Cnt from
( select *, sum(1-AmtBlank) over (partition by Name, ID) + sum(case id when 0 then 1 else 0 end) over (partition by Name) Cnt,
rank() over (partition by Name order by AmtBlank ) rnk,
row_number() over (partition by Name, ID order by AmtBlank) rnr
FROM (select * , case id when '' then 1 else 0 end AmtBlank from Employees /*WHERE Condition = 1*/ ) e
) c where rnr=1 and rnk = 1
This uses case id when '' then 1 else 0 end AmtBlank to keep an amount for the blank amounts per row (making the amount for non blanks 1-AmtBlank) and 2 window functions, one with id for a count per name and id (sum(1-AmtBlank) over (partition by Name, ID)) and a count for all blanks in a name section (sum(case id when 0 then 1 else 0 end) over (partition by Name))
The row_number is used to subsequently fetch only the first rows of a group and rank is used to only include the blank records when there are no records with an id.
Try this. using cte and joins
;with cte as (
SELECT Name,
Id,
COUNT(*) AS Cnt
FROM Employees
WHERE isnull(Id,1)<>''
GROUP BY Name, Id
),
cte2 as (SELECT Name,id, COUNT(*) AS Cnt FROM Employees WHERE Id='' GROUP BY Name,id)
select cte.Name,cte.Id,(cte.cnt + ISNULL(cte2.Cnt,0)) as cnt
from cte
left JOIN cte2
on cte.Name = cte2.Name
union all
select cte2.Name,cte2.Id,cte2.cnt
from cte2
left JOIN cte
on cte.Name = cte2.Name
where cte.Name is null
You could try something like this.
;WITH NonBlanks AS
(
SELECT Name,
Id,
COUNT(ISNULL(Id, 1)) AS Cnt
FROM Employees
WHERE ISNULL(Id,0) <> ''
GROUP BY Name, Id
)
,Blanks AS
(
SELECT Name,
Id,
COUNT(ISNULL(Id, 1)) AS Cnt
FROM Employees
WHERE ID = ''
GROUP BY Name, Id
)
SELECT CASE WHEN nb.NAME IS NULL THEN b.NAME ELSE nb.NAME END NAME,
CASE WHEN nb.NAME IS NULL THEN b.Id ELSE nb.Id END Id,
(ISNULL(nb.Cnt,0) + ISNULL(b.Cnt,0)) Cnt
FROM NonBlanks nb FULL JOIN Blanks b
ON nb.Name = b.Name
This simple syntax is compatible with older versions or other RDBMSs
-- Self explained on comments
edited:
select name, id, count(*) from (
-- adds "normal" records
select name, id from Employees where id is null or id <> ''
-- adds one record to each name-notBlankId for each blank id (David, Peter if you add 'Peter','')
-- uncomment /*or id is null*/ if you want even null ids to recieve merged blanks
union all
select e1.name, e1.id
from (select distinct name, id from Employees where id <> '' /*or id is null*/ ) as e1
inner join (select name, id from Employees where id = '') as e2 on e1.name = e2.name
-- adds records that can't be merged (Ralph)
union all
select name, id from Employees e1
where e1.id = ''
and not exists(select * from Employees e2 where e1.name = e2.name and e2.id <> '')
) as fullrecords
group by name, id