How to check duplicate column values? - sql

I have create stored procedure to select data and result is:
ID NAME EMAIL
1 John asd#asd.com
2 Sam asd#asd.com
3 Tom asd#asd.com
4 Bob bob#asd.com
5 Tom asc#asd.com
and I would like to get result like:
ID NAME EMAIL
1 John asd#asd.com
2 Sam asd#asd.com
3 Tom asd#asd.com, asc#asd.com
4 Bob bob#asd.com
so, how can I do it?
Thanks.

select
id,
name,
email
from (
select
rn = row_number() over(partition by name order by id asc),
id,
name,
email = stuff((select ', ' + convert(varchar, t2.email)
from #table_var t2
where t1.name = t2.name
for xml path(''))
,1,2,'')
from #table_var t1
group by t1.id, t1.name
)t
where rn = 1
order by id

GROUP BY is what you're after.
For example
SELECT name, email, count(email)
FROM table
GROUP BY name, email
will return something like
1 John asd#asd.com 1
2 Sam asd#asd.com 1
3 Tom asd#asd.com 2
4 Bob bob#asd.com 1
adding
HAVING count(email) > 1
to the end will result in
1 Tom asd#asd.com 2

Just another way, could help
;WITH cte
AS
(
SELECT Id
,Name
,Email
,ROW_NUMBER() OVER(PARTITION BY Name,Email ORDER BY Id) AS rowNum
FROM Table
)
SELECT Id,Name,Email
FROM cte
WHERE rowNum=1;

A solution is :
select distinct e1.Name,
(case when e2.Email is null then e1.Email else
( case when e1.Email > e2.Email then e1.Email + ','+ e2.Email else e2.Email + ','+ e1.Email end )
end ) from MyTable e1
left join MyTable e2 on e1.Name = e2.Name and e1.Email <> e2.Email

Related

SQL query to find same ID but different names

I have a table like this:
ID | name
45 Alex
98 Diana
32 Peter
98 Daniel
45 Alex
23 Bob
98 Jake
I need to find all rows where is the same ID but different name.
You could use first/last value() window functions here:
with n as (
select *,
First_Value(name) over(partition by id order by (select null)) n1,
Last_Value(name) over(partition by id order by (select null)) n2
from t
)
select Id, Name
from n
where n1 != n2
You can use exists:
select t.*
from mytable t
where exists (select 1
from mytable t2
where t2.id = t.id and t2.name <> t.name
);
A windowed count is usually the most efficient:
SELECT
t.ID,
t.name
FROM (
SELECT *,
c = COUNT() OVER (PARTITION BY t.name)
FROM YourTable t
) t
WHERE c > 1;
SELECT
ID,
name,
CASE WHEN COUNT(ID) OVER(PARTITION BY ID) = 1 THEN 'OK' ELSE 'NOT OK' END AS CountID
FROM Table
GROUP BY ID,name
ORDER BY ID
Example:

sql get duplicate column values grouped by another column

I've got the following temp table as an output from a query:
FacilityID UserID User_Name
1046 105 John Smith
1046 106 John Smith
1046 110 Jack Welsh
1091 107 Ana Romero
1091 248 Rebecca Cruz
1095 418 Alex Sterling
I need to display only these facilities that have users with the same name, and only these names should pass the query filter. This is to find out if any facility has users with exactly same name (even though these are different people). So, considering table above, I need to display only the following:
FacilityID UserID User_Name
1046 105 John Smith
1046 106 John Smith
I would use exists :
select t.*
from table t
where exists (select 1
from table t1
where t1.FacilityID = t.FacilityID and
t1.User_Name = t.User_Name and t.userid <> t1.userid
);
You can use exists:
select t.*
from t
where exists (select 1
from t t2
where t2.FacilityID = t.FacilityID and t2.user_name = t.user_name and
t2.UserId <> t.userId and
);
If you have a query returning results, then window functions are also a good choice:
with t as (<your query here>)
select t.*
from (select t.*, min(userid) over (partition by FacilityID, user_name) as min_ui,
max(userid) over (partition by FacilityID, user_name) as max_ui
from t
) t
where min_ui <> max_ui;
I would use the EXISTS clause:
(Example uses a CTE [TEMP] as a test)
;WITH TEMP (FacilityID, UserID, User_Name) AS (
SELECT * FROM (
VALUES
('1046','105','John Smith'),
('1046','106','John Smith'),
('1046','110','Jack Welsh'),
('1091','107','Ana Romero'),
('1091','248','Rebecca Cruz'),
('1095','418','Alex Sterling')
) AS A (Column1, Column2, Column3)
)
SELECT TEMP.*
FROM TEMP
WHERE EXISTS (SELECT 1
FROM TEMP SubT
WHERE SubT.FACILITYID = TEMP.FACILITYID
AND SubT.USER_NAME = TEMP.USER_NAME
AND TEMP.USERID <> SubT.USERID
)
I'll chip in my solution:
select FacilityID, UserID, User_Name from (
select FacilityID, UserID, User_Name
count(*) over (partition by User_Name) cnt
from MY_TABLE
) a where cnt > 1

Merge duplicate rows

I have a Customer table which contains an ID and Email field. I've written the following query to return all duplicate Customers with the same Email:
SELECT ID, Email
FROM Customer a
WHERE EXISTS (SELECT 1
FROM Customer b
WHERE a.Email = b.Email
GROUP BY Email
HAVING COUNT(Email) = 2)
ORDER BY Email
This is returning records that look like the following:
ID Email
1 a#hotmail.com
2 a#hotmail.com
3 b#gmail.com
4 b#gmail.com
While this works, I actually need the data in the following format:
ID1 Email1 ID2 Email2
1 a#hotmail.com 2 a#hotmail.com
3 b#gmail.com 4 b#gmail.com
What is the best way to achieve this?
One method is conditional aggregation . . . assuming you have at most two emails:
select max(case when seqnum = 1 then id end) as id_1,
email as email_1,
max(case when seqnum = 2 then id end) as id_2,
email as email_2
from (select t.*, row_number() over (partition by email order by id) as seqnum
from t
) t
group by email;
Actually, why not just do:
select email, count(*) as num_dups, min(id) as id_1,
(case when count(*) > 1 then max(id) end) as id_2
from t
group by email;
Try:
SELECT MIN(ID) ID, Email, MAX(ID) ID2, Email AS EMAIL2
FROM Customer GROUP BY Email
if you want HAVING COUNT(Email) = 2, it will be like this
SELECT MIN(ID) ID, Email, MAX(ID) ID2, Email AS EMAIL2
FROM Customer GROUP BY Email
HAVING COUNT(Email) = 2
Your layout assumes that you can only have a total of 2 duplicates.
Maybe list the IDs instead like below?
declare #Duplicates table (Email varchar(50), Customers varchar(100))
insert #Duplicates select Email, '' from Customer group by Email having count(*) > 1
UPDATE d
SET
Customers= STUFF(( SELECT ','+ cast(ID as varchar(10))
FROM Customer c
WHERE c.Email = d.Email
FOR XML PATH(''), TYPE).value('.','VARCHAR(max)'), 1, 1, '')
FROM #Duplicates AS d
select * from #Duplicates
order by Email

Need SQL to get top row

I'm using a SQL Server database and have this data:
Loc dept deptdesc
-----------------------
1 201 ccccc
1 201 fffff
1 201 uuu
2 202 lllll
3 203 ooo
3 203 yyy
3 203 mmm
3 203 bbbb
I need help with the SQL query to get data:
Loc dept deptdesc
----------------------------
1 201 ccccc
2 202 lllll
3 203 ooo
You stated in your comments that it can be any of the descriptions, assuming this is true a simple group by will work.
SELECT Loc, dept, MIN(deptdesc)
FROM YourTable
GROUP BY Loc, dept
You can do this using Row_Number() and only taking the first of each group.
;With Cte As
(
Select *, Row_Number() Over (Partition By Dept Order By (Select Null)) As RN
From YourTable
)
Select Loc, Dept, DeptDesc
From Cte
Where RN = 1
Use the below query.. You can use CTE.
WITH cte_1
AS
( Select Loc,Dept,DeptDesc
,Row_number()over(partition by Loc,Dept Order by (select 1)) as RNO
From YourTable)
Select Loc,Dept,DeptDesc
From cte_1
Where RNO =1
Have assumed there is a surrogate id
WITH firstVal AS(
SELECT
DISTINCT first_value(column4) OVER(PARTITION BY column1 ORDER BY column2 ) AS id
FROM
(VALUES
(1,201,'ccccc',100)
,(1,201,'fffff',101)
,(1,201,'uuu',102),
(2,202,'lllll',103),
(3,203,'ooo',104),
(3,203,'yyy',105),
(3,203,'mmm',106),
(3,203,'bbbb',107)
)
)
SELECT
column1,column2,column3
FROM
(VALUES
(1,201,'ccccc',100)
,(1,201,'fffff',101)
,(1,201,'uuu',102),
(2,202,'lllll',103),
(3,203,'ooo',104),
(3,203,'yyy',105),
(3,203,'mmm',106),
(3,203,'bbbb',107)
) vals
INNER JOIN firstVal ON firstVal.id = vals.column4

Make Two Queries into 1 result set with 2 columns

Say I have a table that looks like this:
Person Table
ID AccountID Name
1 6 Billy
2 6 Joe
3 6 Tom
4 8 Jamie
5 8 Jake
6 8 Sam
I have two queries that I know work by themselves:
Select Name Group1 from person where accountid = 6
Select Name Group2 from person where accountid = 8
But I want a single Result Set to look like this:
Group1 Group2
Billy Jamie
Joe Jake
Tom Same
You can use row_number() to assign a distinct value for each row, ans then use a FULL OUTER JOIN to join the two subqueries:
select t1.group1,
t2.group2
from
(
select name group1,
row_number() over(order by id) rn
from yourtable
where accountid = 6
) t1
full outer join
(
select name group2,
row_number() over(order by id) rn
from yourtable
where accountid = 8
) t2
on t1.rn = t2.rn;
See SQL Fiddle with Demo
I agree you should do this client side. But it can be done in T/SQL:
select G1.Name as Group1
, G2.Name as Group2
from (
select row_number() over (order by ID) as rn
, *
from Group
where AccountID = 6
) as G1
full outer join
(
select row_number() over (order by ID) as rn
, *
from Group
where AccountID = 8
) as G2
on G1.rn = G2.rn
order by
coalesce(G1.rn, G2.rn)