Combine rows if value is blank - sql

I'm using SQL-Server 2008. I need to combine rows with the same Name and increase counter when:
1 or more Id's for the same Name is blank
NOT merge rows if Id is NULL!
NOT merge rows if have the same Name, but different Ids
Output for now:
Name Id Cnt
John 1 1
Peter 2 2 -- This Peter with the same Id have 2 entries so Cnt = 2
Peter 3 1 -- This is other Peter with 1 entry so Cnt = 1
Lisa 4 1
Lisa NULL 1
David 5 1
David 1 -- here Id is blank ''
Ralph 2 -- Ralph have both rows with blank Id so Cnt = 2
Desired output:
Name Id Cnt
John 1 1
Peter 2 2
Peter 3 1
Lisa 4 1
Lisa NULL 1 -- null still here
David 5 2 -- merged with blank '' so Cnt = 2
Ralph 2 -- merged both blanks '' so Cnt = 2
SQL-Query:
This is sample query what I'm using for now:
SELECT Name,
Id,
COUNT(Id) AS Cnt
FROM Employees
WHERE Condition = 1
GROUP BY Name, Id
What I have tried:
Added aggregate MAX to Id in SELECT clause and grouped by Name only, but in this case merged rows with NULL values and with the same names with different Id's what is wrong for me.
SELECT Name,
MAX(Id), -- added aggregate
COUNT(Id) AS Cnt
FROM Employees
WHERE Condition = 1
GROUP BY Name -- grouped by Name only
Have you any ideas? If anything is not clear about problem - ask me, I will provide more details.
UPDATE:
DDL
CREATE TABLE Employees
(
Name NVARCHAR(40),
Id NVARCHAR(40)
);
DML
INSERT INTO Employees VALUES
('John' , '1')
,('Peter', '2')
,('Peter', '2')
,('Peter', '3')
,('Lisa' , '4')
,('Lisa' , NULL)
,('David', '5')
,('David', '')
,('Ralph', '')
,('Ralph', '')
DEMO: SQL FIDDLE

Edit
DECLARE #Data table (Name varchar(10), Id varchar(10)) -- Id must be varchar for blank value
INSERT #Data VALUES
('John', '1'),
('Peter', '2'),('Peter', '2'),
('Peter', '3'),--('Peter', ''), --For test
('Lisa', '4'),
('Lisa', NULL),
('David', '5'),
('David', ''),
('Ralph', ''), ('Ralph', '')
SELECT
Name,
Id,
COUNT(*) + ISNULL(
(SELECT COUNT(*) FROM #data WHERE Name = d.Name AND Id = '' AND d.Id <> '')
, 0) AS Cnt
FROM #data d
WHERE
Id IS NULL
OR Id <> ''
OR NOT EXISTS(SELECT * FROM #data WHERE Name = d.Name AND Id <> '')
GROUP BY Name, Id

You can use CASE statement inside your SELECT. It allows you to set Id = [some value] for employees where it is blank. Query can be something like this:
SELECT E.Name,
CASE
WHEN E.Id = ''
THEN
(Select Employees.Id from Employees where Employees.Id <> '' and E.Name = Employees.Name)
ELSE E.Id
END as Idx,
COUNT(Id) AS Cnt
FROM Employees as E
WHERE Condition = 1
GROUP BY Name, Idx

A version with window functions:
SELECT Name,ID, Cnt from
( select *, sum(1-AmtBlank) over (partition by Name, ID) + sum(case id when 0 then 1 else 0 end) over (partition by Name) Cnt,
rank() over (partition by Name order by AmtBlank ) rnk,
row_number() over (partition by Name, ID order by AmtBlank) rnr
FROM (select * , case id when '' then 1 else 0 end AmtBlank from Employees /*WHERE Condition = 1*/ ) e
) c where rnr=1 and rnk = 1
This uses case id when '' then 1 else 0 end AmtBlank to keep an amount for the blank amounts per row (making the amount for non blanks 1-AmtBlank) and 2 window functions, one with id for a count per name and id (sum(1-AmtBlank) over (partition by Name, ID)) and a count for all blanks in a name section (sum(case id when 0 then 1 else 0 end) over (partition by Name))
The row_number is used to subsequently fetch only the first rows of a group and rank is used to only include the blank records when there are no records with an id.

Try this. using cte and joins
;with cte as (
SELECT Name,
Id,
COUNT(*) AS Cnt
FROM Employees
WHERE isnull(Id,1)<>''
GROUP BY Name, Id
),
cte2 as (SELECT Name,id, COUNT(*) AS Cnt FROM Employees WHERE Id='' GROUP BY Name,id)
select cte.Name,cte.Id,(cte.cnt + ISNULL(cte2.Cnt,0)) as cnt
from cte
left JOIN cte2
on cte.Name = cte2.Name
union all
select cte2.Name,cte2.Id,cte2.cnt
from cte2
left JOIN cte
on cte.Name = cte2.Name
where cte.Name is null

You could try something like this.
;WITH NonBlanks AS
(
SELECT Name,
Id,
COUNT(ISNULL(Id, 1)) AS Cnt
FROM Employees
WHERE ISNULL(Id,0) <> ''
GROUP BY Name, Id
)
,Blanks AS
(
SELECT Name,
Id,
COUNT(ISNULL(Id, 1)) AS Cnt
FROM Employees
WHERE ID = ''
GROUP BY Name, Id
)
SELECT CASE WHEN nb.NAME IS NULL THEN b.NAME ELSE nb.NAME END NAME,
CASE WHEN nb.NAME IS NULL THEN b.Id ELSE nb.Id END Id,
(ISNULL(nb.Cnt,0) + ISNULL(b.Cnt,0)) Cnt
FROM NonBlanks nb FULL JOIN Blanks b
ON nb.Name = b.Name

This simple syntax is compatible with older versions or other RDBMSs
-- Self explained on comments
edited:
select name, id, count(*) from (
-- adds "normal" records
select name, id from Employees where id is null or id <> ''
-- adds one record to each name-notBlankId for each blank id (David, Peter if you add 'Peter','')
-- uncomment /*or id is null*/ if you want even null ids to recieve merged blanks
union all
select e1.name, e1.id
from (select distinct name, id from Employees where id <> '' /*or id is null*/ ) as e1
inner join (select name, id from Employees where id = '') as e2 on e1.name = e2.name
-- adds records that can't be merged (Ralph)
union all
select name, id from Employees e1
where e1.id = ''
and not exists(select * from Employees e2 where e1.name = e2.name and e2.id <> '')
) as fullrecords
group by name, id

Related

Delete duplicate rows in different columns

Table1:
id name address1 address2 address3
------------------------------------------
1 Jenny A B NULL
2 John C NULL NULL
3 Jenny B A NULL
4 John NULL NULL C
.....
id1 and id3 are the same in this condition, id2 and id4 are the same too.
Can I delete id3 and id4? I'm using SQL Server 2019.
You need to define a condition for duplicate rows. One possible approach to define such a condition is to aggregate ordered addresses. The following statement demonstrates this approach:
Table:
CREATE TABLE Data (
id int,
name varchar(10),
address1 varchar(100),
address2 varchar(100),
address3 varchar(100)
)
INSERT INTO Data
(id, name, address1, address2, address3)
VALUES
(1, 'Jenny', 'A', 'B', NULL),
(2, 'John', 'C', NULL, NULL),
(3, 'Jenny', 'B', 'A', NULL),
(4, 'John ', NULL, NULL, 'C')
Statement:
DELETE x
FROM (
SELECT
d.*,
-- Condition for equal addresses
ROW_NUMBER() OVER (PARTITION BY c.CheckCondtition ORDER BY d.id) AS rn
-- Condition for equal name and addresses
-- ROW_NUMBER() OVER (PARTITION BY d.name, c.CheckCondtition ORDER BY d.id) AS rn
FROM Data d
CROSS APPLY (
SELECT CONCAT(',', [address])
FROM (VALUES (d.address1), (d.address2), (d.address3)) v([address])
ORDER BY [address]
FOR XML PATH('')
) c(CheckCondtition)
) x
WHERE x.rn > 1
If you're using SQL Server 2017 or later, you can build up a unique identifier for each row based on the name and each of the address fields using STRING_AGG; then find row numbers for each occurrence of that value (ordering by id), and then you can delete the rows with row number > 1:
WITH CTE AS (
SELECT id, name AS value
FROM data
UNION ALL
SELECT id, address1
FROM data
UNION ALL
SELECT id, address2
FROM data
UNION ALL
SELECT id, address3
FROM data
),
CTE2 AS (
SELECT id, STRING_AGG(value, '%') WITHIN GROUP (ORDER BY value) AS v
FROM CTE
GROUP BY id
),
CTE3 AS (
SELECT id, v,
ROW_NUMBER() OVER (PARTITION BY v ORDER BY id) AS rn
FROM CTE2
)
DELETE d
FROM data d
JOIN CTE3 ON CTE3.id = d.id
WHERE CTE3.rn > 1
Demo on SQLFiddle
I don't think fancy manipulation of the three columns is needed. Assuming you do not have duplicate values in the columns:
delete t1 from table1 t1
where exists (select 1
from table1 tt1
where tt1.id < t1.id and
(t1.address1 in (tt1.address1, tt1.address2, tt1.address3) or t1.address1 is null
) and
(t1.address2 in (tt1.address1, tt1.address2, tt1.address3) or t1.address2 is null
) and
(t1.address3 in (tt1.address1, tt1.address2, tt1.address3) or t1.address3 is null
) and
( (case when t1.address1 is null then 1 else 0 end +
case when t1.address2 is null then 1 else 0 end +
case when t1.address3 is null then 1 else 0 end
) =
(case when tt1.address1 is null then 1 else 0 end +
case when tt1.address2 is null then 1 else 0 end +
case when tt1.address3 is null then 1 else 0 end
)
)
);
This checks that each non-NULL value matches and the number of NULL values is the same. This implies that the two sets of values are equivalent.
The advantage is that you don't have to worry about separators -- which is a concern for either string_agg() or XML.

Need query for following scenario

I have a result like followed by
ID Name Status
1 A Y
2 A N
3 B Y
4 B Y
5 C N
in this case if status of Name A have two status then I need a select query for following outout
ID Name Status
1 A N
2 A N
3 B Y
4 B Y
5 C N
And sorry, I dont know how ask question for this scenario..
please provide the solution thanks in advance
This following script will select data as per your requirement-
SELECT yt.ID,
yt.Name,
CASE WHEN A.N>1 THEN 'N' ELSE Status END as Status
FROM your_table yt
LEFT JOIN (
SELECT Name,
COUNT(DISTINCT Status) as N
FROM your_table
GROUP BY Name
HAVING COUNT(DISTINCT Status) >1
) A on yt.Name = A.Name
Using LEFT JOIN with COALESCE in the SELECT will work in this case.
Demo with sample data:
DECLARE #TestTable TABLE (ID INT, [Name] VARCHAR (1), [Status] VARCHAR (1));
INSERT INTO #TestTable(ID, [Name], [Status]) VALUES
(1, 'A', 'Y'),
(2, 'A', 'N'),
(3, 'B', 'Y'),
(4, 'B', 'Y'),
(5, 'C', 'N');
SELECT T.ID,
COALESCE(Q.[Name], T.[Name]) AS [Name],
COALESCE(Q.[Status], T.[Status]) AS [Status]
FROM #TestTable T
LEFT JOIN (
SELECT DISTINCT [Name], 'N' AS [Status]
FROM #TestTable
WHERE [Status] = 'N'
) AS Q ON Q.[Name] = T.[Name]
Output:
ID Name Status
1 A N
2 A N
3 B Y
4 B Y
5 C N
Use a RANK in separate query to get the status for the latest id and left join on name against that query to use latest status for all rows for a name
SELECT a.id, a.name, b.status
FROM dbo.Table_3 a
LEFT JOIN (SELECT id, name, status, RANK() OVER (Partition BY name ORDER BY id desc) AS rnk
FROM dbo.table_3) b ON a.name = b.name AND b.rnk = 1
You can use a Windowed function so that you don't need to scan the table twice:
SELECT ID,
[Name],
CASE COUNT(CASE WHEN [Status] = 'N' THEN 1 END) OVER (PARTITION BY [Name]) WHEN 0 THEN [Status] ELSE 'N' END AS [Status]
FROM (VALUES(1,'A','Y'),
(2,'A','N'),
(3,'B','Y'),
(4,'B','Y'),
(5,'C','N')) V(ID, [Name], [Status]);
In below query the derived table a pulls the distinct record that has 'N' . Then joined it with main table and using case statement pulled the status.
Using Derived Table
select *,
case when a.name is not null then 'N' else #temp.status end [status]
from #temp
Left join (select distinct name from #temp where status ='N' )a on a.name = #temp.name
Using Case Statement
select *,
case (select count(*) from #temp t where status='N' and t.Name = #temp.Name)
when 1 then 'N'
else status
end [status]
from #temp
OR
select *,
case when (select count(*) from #temp t where status='N' and t.Name = #temp.Name) > 0 then 'N'
else status
end [status]
from #temp
Output
ID Name Status name status
1 A Y A N
2 A N A N
3 B Y NULL Y
4 B Y NULL Y
5 C N C N
For your particular example, you can just use a window function:
select ID, Name,
min(Status) over (partition by name) as status
from t;
This works because 'N' is less than 'Y', so the MIN() will return 'N' if any values are 'N'.

How to delete duplicate records in SQL when similar in two columns and different in one column

I have a table like this:
ID Name Family Phone_Number
1 A B 123456
2 c d 321456
3 A B
4 A B 456789
I want to delete records 3 and 4.
Try to figure out duplicates and then delete the duplicate rows:
WITH cte AS (
SELECT
FirstName
, LastName
, row_number() OVER(PARTITION BY FirstName, LastName ORDER BY FirstName) AS RN
FROM YourTABLE
)
DELETE cte WHERE RN > 1
An example:
DECLARE #table TABLE
(
ID INT,
FirstName VARCHAR(10),
LastName VARCHAR(10)
);
INSERT INTO #table
(
ID,
FirstName,
LastName
)
VALUES
(1, 'A' , 'B')
, (2, 'c' , 'd')
, (3, 'A' , 'B')
, (4, 'A' , 'B')
Query to delete:
;WITH cte AS (
SELECT
FirstName
, LastName
, row_number() OVER(PARTITION BY FirstName, LastName ORDER BY FirstName) AS RN
FROM #table
)
DELETE cte WHERE RN > 1
SELECT * FROM #table
OUTPUT:
ID FirstName LastName
1 A B
2 c d
Write sql and execute
; WITH TableBWithRowID AS
(
SELECT ROW_NUMBER() OVER (ORDER BY Name, Family) AS RowID, Name, Family
FROM TABLE1
)
DELETE o
FROM TableBWithRowID o
WHERE RowID < (SELECT MAX(rowID) FROM TableBWithRowID i WHERE i.Name =o.Name and i.Family=o.Family GROUP BY Name, Family)
replace TABLE1 with your table name
The below query will delete all the duplicates records based on the first and last name column. Assuming there is no null in the first and last name column.
You just need to provide/change at two places in below query
DELETE FROM <YourTableName>
where Id not in (
SELECT MIN(ID) as RowId
FROM <YourTableName>
GROUP BY FirstName, LastName
)
With EXISTS:
delete t from tablename t
where exists (
select 1 from tablename
where name = t.name and family = t.family and id < t.id
)
See the demo

Find non Identical values using Group by in SQL Server

I need to find the Non-Identical values are exist in a particular Group. Kindly have a look in the following Table Contact
ContactId FirstName LastName Mobile
_________________________________________________
1 Emma Watsan 9991234567
2 Jhon Wick 8887654321
1 Emma Watsan 9990001111
Here I need to fetch the Emma Watsan and need to find the Mobile numbers are Identical (bool - bit) If both Mobile numbers are Identical than 1 otherwise 0.
I tried the following Query
SELECT COUNT(*) FROM Contact c
GROUP BY c.ContactId, c.FirstName, c.LastName
HAVING COUNT(*) >1
Kindly assist me how to find the result.
for get the infor related to Emma Watsan you could ue
select * from Contact
where ContactId in (
select c.ContactId FROM Contact c
group by c.ContactId
having COUNT(*) >1
)
for get the Contacts that have the same numebr
select c.ContactId, COUNT(distinct Mobile) FROM Contact c
group by c.ContactId
having COUNT(distinct Mobile)>1
Use Count(Distinct [Mobile]) to get the number of distinct mobile nmumbers per ContactId. And the use a CASE expression to give 0 or 1 based of the count. If count is greater than 1 then 0 else 1.
Query
select t.[Name], case when t.[Mobile] > 1 then 0 else 1 end as [Mobile_Identity] from(
select ContactId,
max([FirstName] + ' ' + [LastName]) as [Name],
count(distinct [Mobile]) as [Mobile]
from contacts
group by ContactId
)t;
And if you want to retrieve only the rows with multiple mobile numbers, then use a having clause.
select t.[Name], case when t.[Mobile] > 1 then 0 else 1 end as [Mobile_Identity] from(
select ContactId,
max([FirstName] + ' ' + [LastName]) as [Name],
count(distinct [Mobile]) as [Mobile]
from contacts
group by ContactId
having count(distinct [Mobile]) > 1
)t;
Try This:
create table Contacts(ContactId int,FirstName varchar(15),LastName varchar(15),Mobile bigint)
insert into Contacts
select 1 ,'Emma','Watsan',9991234567
union all
select 2,'Jhon','Wick',8887654321
union all
select 1,'Emma','Watsan',9990001111
select c.ContactId, c.FirstName, c.LastName,IIF(cnt>1,1,0)ISIdentitcal
from (
SELECT c.ContactId, c.FirstName, c.LastName,
COUNT(*)cnt FROM Contacts c
GROUP BY c.ContactId, c.FirstName, c.LastName)c
How about:
Select distinct a.ContactId, case when b.mobile is null then 0 else 1 end as [is_duplicate]
from Contact a
left join Contact b
on a.ContactId = b.ContactId
and a.mobile = b.mobile
and a.id <> b.id
Where [id] is the primary key column in the table (you should have one).
Hope this helps.
PS: the table isn't normalized properly - if the ContactIDs repeat, the first and last name should not be in the same table.
I would do it something like this...(sample table variable with data included)
DECLARE #TABLE TABLE (ContactID INT, Firstname VARCHAR(55), Lastname VARCHAR(55), Mobile VARCHAR(55));
INSERT INTO #TABLE VALUES (1, 'Emma', 'Watsan', '9991234567');
INSERT INTO #TABLE VALUES (2, 'Jhon', 'Wick', '8887654321');
INSERT INTO #TABLE VALUES (1, 'Emma', 'Watsan', '9990001111');
INSERT INTO #TABLE VALUES (1, 'Emma', 'Watsan', '9990001111');
SELECT
T1.FirstName + ' ' + T1.LastName AS Name
,T1.Mobile
,MAX(CASE WHEN T2.RowID IS NULL THEN 0 ELSE 1 END) AS Duplicate
FROM
(
SELECT
ROW_NUMBER() OVER (ORDER BY FirstName,LastName, Mobile) AS RowID
,*
FROM #TABLE
) T1
LEFT JOIN
(
SELECT
ROW_NUMBER() OVER (ORDER BY FirstName,LastName, Mobile) AS RowID
,*
FROM #TABLE
) T2
ON T1.ContactID = T2.ContactID
AND T1.Mobile = T2.Mobile
AND T1.RowID <> T2.RowID
GROUP BY T1.FirstName + ' ' + T1.LastName, T1.Mobile
;
If the actual table already has row numbers, than the row_number() function can be skipped and the actual row ID of the table used in its place.
In the example here, Emma Watsan has the same number twice (on purpose), and another number that shows only once in the table. The duplicate mobile number is marked (Duplicate = 1), but the other numbers are not, as desired.

CASE condition and how to code it?

I am creating a column in a new table( Table B) called Number of different locations. This column is derived from two columns in a Table A - Customer and Location.
Sample Data from Table A .
Customer Location
Mr James Smith Los Angeles
Mr David Jones London
Mr James Smith Paris
So the pseudo code ?
[Number of Different Locations] =
CASE
When Customer has more than one location ( count greater than 1 of for distinct customer)
Then populate those entries as 'Y'
Else 'N'
Now I have tried a few ways to code the 1st condition but it does not work .
CASE
When EXISTS ( select distinct customer, count ( Location ) from Table B
group by customer)
then 'Y'
Else 'N'
What am I doing wrong ? All the values are coming out in resultant table as 'Y'
SELECT Customer, Location, [Number of different locations] =
CASE
When EXISTS ( select distinct customer, count ( Location ) from Table B
group by customer
having count(location)>1)
then 'Y'
Else 'N'
END
FROM [Table]
you didn't specify "Having > 1"
You don't really need a separate subselect for that. This can be handled entirely within a GROUP BY clause and using the count of locations to determine if there are multiple locations.
SELECT Customer
, MultipleLocations =
CASE WHEN COUNT(Location) > 1
THEN 'Y'
ELSE 'N'
END
FROM YourTable
GROUP BY
Customer
Should your table contain multiple records for a customer with the same location, you can add a DISTINCT clause to accomodate for this.
SELECT Customer
, MultipleLocations =
CASE WHEN COUNT(DISTINCT Location) > 1
THEN 'Y'
ELSE 'N'
END
FROM YourTable
GROUP BY
Customer
Maybe this will help did it in another way:
DECLARE #tbl TABLE
(
Customer VARCHAR(100),
Location VARCHAR(100)
)
INSERT INTO #tbl
SELECT 'Mr James Smith','Los Angeles'
UNION ALL
SELECT 'Mr David Jones','London'
UNION ALL
SELECT 'Mr James Smith','Paris'
;WITH CTE AS
(
SELECT
COUNT(*) OVER(PARTITION BY tbl.Customer) AS NbrOf,
tbl.Customer,
tbl.Location
FROM
#tbl AS tbl
)
SELECT
CTE.Customer,
CTE.Location,
(
CASE
WHEN CTE.NbrOf>1
THEN 'Y'
ELSE 'N'
END
) AS newColumn
FROM
CTE
WITH TableA
AS
(
SELECT *
FROM (
VALUES ('Mr James Smith', 'Los Angeles'),
('Mr David Jones', 'London'),
('Mr James Smith', 'Paris')
) AS T (Customer, Location)
),
TableACustomerTallies
AS
(
SELECT Customer, COUNT(DISTINCT Location) AS Tally
FROM TableA
GROUP
BY Customer
)
SELECT Customer,
'Y' AS HasMultipleLocations
FROM TableACustomerTallies
WHERE Tally > 1
UNION
SELECT Customer,
'N' AS HasMultipleLocations
FROM TableACustomerTallies
WHERE Tally <= 1;