Find duplicates, display each result in sql - sql

I want to write something like this :
select t.id, t.name, from table t
group by t.name having count(t.name) > 1
To produce the following :
id name count
904834 jim 2
904835 jim 2
90145 Fred 3
90132 Fred 3
90133 Fred 3

For SQL Server 2005+, you can do the following:
SELECT *
FROM (SELECT id, Name, COUNT(*) OVER(PARTITION BY Name) [Count]
FROM table) t
WHERE [Count]>1

If you remove the ID column then you can get all the names that have multiple entries
select t.name
from table t
group by t.name
having count(t.name) > 1
For each name, if you want the minimum or maximum id you can do this
select t.id, t.name, min (t.id) as min_id, max (t.id) as max_id
from table t
group by t.name
having count(t.name) > 1
For each name, if you want all the ids that are duplicates, then you have to use a subquery
select t.id, t.name
from table t
where name in
(
select t1.name
from table t1
group by t1.name
having count(t1.name) > 1
)

Just join the table to a subquery pulling the count for each name
SELECT t.ID, t.Name, d.Count
FROM #MyTable t
JOIN
(
SELECT name, COUNT(*) as Count
FROM #MyTable
GROUP BY Name
HAVING COUNT(*) > 1
) D
ON t.Name = d.Name

Assuming mysql (when I wrote the answer, I do not think the person specified the dbms)
SELECT t.id, t.name, (SELECT COUNT(t2.name) FROM test t2 ) AS t_count
FROM test t
HAVING t_count > 1;

Similar to previous answers with less code. Tested on SQL Server 2008:
SELECT t.id, t.name,COUNT(*)
FROM table t
GROUP BY t.id, t.name
HAVING COUNT(t.id) > 1

Please Check it once .... in SQL Server 2008
SELECT t.id,
t.NAME,
Count(t.id) AS duplicate id,
count(t.NAME) AS duplicate names
FROM t
GROUP BY t.id,
t.NAME
HAVING count(t.NAME) > 1

Related

How to count count customers that have only 1 product / all products

I have table like this:
id
name
product
1
Ben
x
2
Ann
y
3
Kate
y
4
John
x
4
John
y
I wonder:
How to count customers who have only x product?
How to count customers who have both products?
You could use these two queries to get job done
How to count customers who have only x product?
select t.ID, t.NAME
from t
group by t.ID, t.NAME
having count(distinct t.PRODUCT) = 1
and max(t.PRODUCT) = 'x'
;
How to count customers who have both products?
You could use this analytic solution for that
select distinct ID, NAME
from (
select t.ID, t.NAME, t.PRODUCT, count(distinct t.PRODUCT)over(partition by t.ID ) cnt
from t
where t.PRODUCT in ('x', 'y')
)
where cnt > 1
or you could also use this aggregate solution
select t.ID, t.NAME
from t
where t.PRODUCT in ('x', 'y')
group by t.ID, t.NAME
having listagg(t.PRODUCT, ',')within group (order by t.PRODUCT) like '%x,y%'
;
Here I used a comma separator for the listagg aggregate function because I assumed none of your product names contain a comma ",". Otherwise you need to choose properly your owner separator for listagg function.
demo
For only x product:
SELECT COUNT(*) FROM TABLE_NAME
WHERE PRODUCT='x'
For both products:
SELECT COUNT(DISTINCT(NAME)) FROM TABLE_NAME
For customer that has only x product:
SELECT PRODUCT, COUNT(NAME) FROM TABLE_NAME
WHERE PRODUCT='x' GROUP BY PRODUCT

SQL - Removing Row Groups

I have a table with the following information:
Is there a way to remove all groups which have multiple IDs? For example group 3 would be removed because it consists of ID 1 and 2.
Thank you!
A simple, portable and efficient approach is not exists:
select t.*
from mytable t
where not exists (
select 1
from mytable t1
where t1.group = t.group and t1.id <> t.id
)
For performance, consider an index on (group, id).
Side note: group is a SQL keyword (as in group by), hence not a good choice for a column name.
You can use below query to remove all groups having multiple IDs
Delete from <your_table_name> where Group in (select Group from <your_table_name> group by Group,ID having count(*) > 1)
inner query will return Group having multiple IDs.
select * from temp where group in (
select groups from temp group by id,group having count(1)<3)
delete from temp where group in (
select groups from temp group by id,group having count(1)<3)
Try to execute below query:
select id,group from table where group in
(
select group from(
select group,count(distinct id) as cn from table group by 1 having cn=1) a
)

Identify duplicates rows based on multiple columns

#SQL Experts,
I am trying to fetch duplicate records from SQL table where 1st Column and 2nd Column values are same but 3rd column values should be different.
Below is my table
ID NAME DEPT
--------------------
1 VRK CSE
1 VRK ECE
2 AME MEC
3 BMS CVL
From the above table , i am trying to fetch first 2 rows, below is the Query, suggest me why isn't give correct results.
SELECT A.ID, A.NAME, A.DEPT
FROM TBL A
INNER JOIN TBL B ON A.ID = B.ID
AND A.NAME = B.NAME
AND A.DEPT <> B.DEPT
Somehow I am not getting the expected results.
Your sample data does not make it completely clear what you want here. Assuming you want to target groups of records having duplicate first/second columns with all third column values being unique, then we may try:
SELECT ID, NAME, DEPT
FROM
(
SELECT ID, NAME, DEPT,
COUNT(*) OVER (PARTITION BY ID, NAME) cnt,
MIN(DEPT) OVER (PARTITION BY ID, NAME) min_dept,
MAX(DEPT) OVER (PARTITION BY ID, NAME) max_dept
FROM yourTable
) t
WHERE cnt > 1 AND min_dept = max_dept;
UPDATE
select *
from
(
select *,
COUNT(*) over (partition by id, [name]) cnt1,
COUNT(*) over (partition by id, [name], dept) cnt2
from dbo.T
) x
where x.cnt1 > 1 and x.cnt2 < x.cnt1;
For find duplicate column
select x.id, x.name, count(*)
from
(select distinct a.id, a.name, a.dept
from tab a) x
group by x.id, x.name
having count(*) > 1
If you want the original rows, I would just go for exists:
select t.*
from tbl t
where exists (select 1
from tbl t
where t2.id = t.id and t2.name = t.name and
t2.dept <> t.dept
);
If you just want the id/name pairs:
select t.id, t.name
from tbl t
group by t.id, t.name
having min(t.dept) <> max(t.dept);

SQL Server query to select local maximums

I have this data. I need to get the lowest $ full rows for each person.
Amount Date Name
$123 Jun 1 Peter
$120 Jun 5 Peter
$123 Jun 5 Paul
$100 Jun 1 Paul
$220 Jun 3 Paul
The result of the SQl Server query should be:
$120 Jun 5 Peter
$100 Jun 1 Paul
SQL Server 2005+ Version
;WITH CTE AS
(
SELECT
Amount, [Date], Name,
ROW_NUMBER() OVER (PARTITION BY Name ORDER BY [Amount]) AS RowNum
FROM Table
)
SELECT *
FROM CTE
WHERE RowNum = 1
Alternative Version
SELECT t.Amount, t.[Date], t.Name
FROM
(
SELECT Name, MIN(Amount) AS MinAmount
FROM Table
GROUP BY Name
) m
INNER JOIN Table t
ON t.Name = m.Name
AND t.Amount = m.Amount
One way which works on SQL Server 7 and up
select t1.*
from(select min(amount) Minamount,name
from Yourtable
group by name) t2
join Yourtable t1 on t1.name = t2.name
and t1.amount = t2.Minamount
There are a couple of ways to solve this, see here: Including an Aggregated Column's Related Values
SELECT * FROM TableName T1 WHERE NOT EXISTS
(SELECT * FROM TableName T2
WHERE T2.Name = T1.Name AND T2.Amount < T1.Amount)
In the event of ties, both rows will be shown in this scenario.
Group on the person to get the lowest amount for each person, then join the table to get the date for each row:
select y.Amount, y.Date, y.Name
from (
select min(Amount), Name
from TheTable
group by Name
) x
inner join TheTable y on x.Name = y.Name and x.Amount = y.Amount
If the amount can exist on more than one date for a person, pick one of the dates, for example the first:
select y.Amount, min(y.Date), y.Name
from (
select min(Amount), Name
from TheTable
group by Name
) x
inner join TheTable y on x.Name = y.Name and x.Amount = y.Amount
group by y.Amount, y.Name
Not quite the most efficient possible, but simpler to read:
SELECT DISTINCT [Name], [Date], MIN([Amount]) OVER(PARTITION BY [Name])
FROM #Table

Select count(*) from multiple tables

How can I select count(*) from two different tables (call them tab1 and tab2) having as result:
Count_1 Count_2
123 456
I've tried this:
select count(*) Count_1 from schema.tab1 union all select count(*) Count_2 from schema.tab2
But all I have is:
Count_1
123
456
SELECT (
SELECT COUNT(*)
FROM tab1
) AS count1,
(
SELECT COUNT(*)
FROM tab2
) AS count2
FROM dual
As additional information, to accomplish same thing in SQL Server, you just need to remove the "FROM dual" part of the query.
Just because it's slightly different:
SELECT 'table_1' AS table_name, COUNT(*) FROM table_1
UNION
SELECT 'table_2' AS table_name, COUNT(*) FROM table_2
UNION
SELECT 'table_3' AS table_name, COUNT(*) FROM table_3
It gives the answers transposed (one row per table instead of one column), otherwise I don't think it's much different. I think performance-wise they should be equivalent.
My experience is with SQL Server, but could you do:
select (select count(*) from table1) as count1,
(select count(*) from table2) as count2
In SQL Server I get the result you are after.
Other slightly different methods:
with t1_count as (select count(*) c1 from t1),
t2_count as (select count(*) c2 from t2)
select c1,
c2
from t1_count,
t2_count
/
select c1,
c2
from (select count(*) c1 from t1) t1_count,
(select count(*) c2 from t2) t2_count
/
select
t1.Count_1,t2.Count_2
from
(SELECT count(1) as Count_1 FROM tab1) as t1,
(SELECT count(1) as Count_2 FROM tab2) as t2
A quick stab came up with:
Select (select count(*) from Table1) as Count1, (select count(*) from Table2) as Count2
Note: I tested this in SQL Server, so From Dual is not necessary (hence the discrepancy).
For a bit of completeness - this query will create a query to give you a count of all of the tables for a given owner.
select
DECODE(rownum, 1, '', ' UNION ALL ') ||
'SELECT ''' || table_name || ''' AS TABLE_NAME, COUNT(*) ' ||
' FROM ' || table_name as query_string
from all_tables
where owner = :owner;
The output is something like
SELECT 'TAB1' AS TABLE_NAME, COUNT(*) FROM TAB1
UNION ALL SELECT 'TAB2' AS TABLE_NAME, COUNT(*) FROM TAB2
UNION ALL SELECT 'TAB3' AS TABLE_NAME, COUNT(*) FROM TAB3
UNION ALL SELECT 'TAB4' AS TABLE_NAME, COUNT(*) FROM TAB4
Which you can then run to get your counts. It's just a handy script to have around sometimes.
As I can't see any other answer bring this up.
If you don't like sub-queries and have primary keys in each table you can do this:
select count(distinct tab1.id) as count_t1,
count(distinct tab2.id) as count_t2
from tab1, tab2
But performance wise I believe that Quassnoi's solution is better, and the one I would use.
SELECT (SELECT COUNT(*) FROM table1) + (SELECT COUNT(*) FROM table2) FROM dual;
Here is from me to share
Option 1 - counting from same domain from different table
select distinct(select count(*) from domain1.table1) "count1", (select count(*) from domain1.table2) "count2"
from domain1.table1, domain1.table2;
Option 2 - counting from different domain for same table
select distinct(select count(*) from domain1.table1) "count1", (select count(*) from domain2.table1) "count2"
from domain1.table1, domain2.table1;
Option 3 - counting from different domain for same table with "union all" to have rows of count
select 'domain 1'"domain", count(*)
from domain1.table1
union all
select 'domain 2', count(*)
from domain2.table1;
Enjoy the SQL, I always do :)
select (select count(*) from tab1) count_1, (select count(*) from tab2) count_2 from dual;
--============= FIRST WAY (Shows as Multiple Row) ===============
SELECT 'tblProducts' [TableName], COUNT(P.Id) [RowCount] FROM tblProducts P
UNION ALL
SELECT 'tblProductSales' [TableName], COUNT(S.Id) [RowCount] FROM tblProductSales S
--============== SECOND WAY (Shows in a Single Row) =============
SELECT
(SELECT COUNT(Id) FROM tblProducts) AS ProductCount,
(SELECT COUNT(Id) FROM tblProductSales) AS SalesCount
If the tables (or at least a key column) are of the same type just make the union first and then count.
select count(*)
from (select tab1key as key from schema.tab1
union all
select tab2key as key from schema.tab2
)
Or take your satement and put another sum() around it.
select sum(amount) from
(
select count(*) amount from schema.tab1 union all select count(*) amount from schema.tab2
)
Declare #all int
SET #all = (select COUNT(*) from tab1) + (select count(*) from tab2)
Print #all
or
SELECT (select COUNT(*) from tab1) + (select count(*) from tab2)
JOIN with different tables
SELECT COUNT(*) FROM (
SELECT DISTINCT table_a.ID FROM table_a JOIN table_c ON table_a.ID = table_c.ID );
SELECT (
SELECT COUNT(*)
FROM tbl1
)
+
(
SELECT COUNT(*)
FROM tbl2
)
as TotalCount
If you're using Google BigQuery this will work.
SELECT
date,
SUM(Table_1_Id_Count) AS Table_1_Id_Count,
SUM(Table_2_Id_Count) AS Table_2_Id_Count
FROM
(
SELECT
Id AS Table_1_Id,
date,
COUNT(Id) AS Table_1_Id_Count,
0 AS Table_2_Id_Count
FROM
`your_project_name.Table_1`
GROUP BY
Id,
date
UNION ALL
SELECT
Id AS Table_2_Id,
date,
0 AS Table_1_Id_Count,
COUNT(Id) AS Table_2_Id_Count
FROM
`your_project_name.Table_2`
GROUP BY
Id,
date
)
GROUP BY
date
select
(select count() from tab1 where field like 'value') +
(select count() from tab2 where field like 'value')
count
select #count = sum(data) from
(
select count(*) as data from #tempregion
union
select count(*) as data from #tempmetro
union
select count(*) as data from #tempcity
union
select count(*) as data from #tempzips
) a