Deleting duplicate record from table - SQL query - sql

I need to delete duplicate rows only from the table, like I have 3 duplicate rows in the table, my query will delete 2 rows from 3 duplicated rows.
How can I get this? Please help me.

Please try the below query, it will definitely meet your objective
SET ROWCOUNT 1
DELETE test
FROM test a
WHERE (SELECT COUNT(*) FROM test b WHERE b.name = a.name) > 1
WHILE ##rowcount > 0
DELETE test
FROM test a
WHERE (SELECT COUNT(*) FROM test b WHERE b.name = a.name) > 1
SET ROWCOUNT 0
where test is your table name

This works in SQL Server although it isn't a single statement:
Declare #cnt int;
Select #cnt=COUNT(*) From DupTable Where (Col1=1); -- Assumes you are trying to delete the duplicates where some condition (e.g. Col1=1) is true.
Delete Top (#cnt-1) From DupTable
It also doesn't require any extra assumptions (like the existance of another column that makes each row unique). After all, Santanu did say that the rows were duplicates and not just the one column.
However, the right answer, in my view, is to get a real table structure. That is, add an IDENTITY column to this table so that you can use a single SQL command to do your work. Like this:
ALTER TABLE dbo.DupTable ADD
IDCol int NOT NULL IDENTITY (1, 1)
GO
Then the delete is trivial:
DELETE FROM DupTable WHERE IDCol NOT IN
(SELECT MAX(IDCol) FROM DupTable GROUP BY Col1, Col2, Col3)

DELETE FROM Table t1, Table t2 WHERE t1.colDup = t2.colDup AND t1.date < t2.date
Will delete every duplicate row from Table (on column colDup) except the oldest (i.e. lowset date).

DELETE FROM `mytbl`
INNER JOIN (
SELECT 1 FROM `mytbl`
GROUP BY `duplicated_column` HAVING COUNT(*)=2
) USING(`id`)
Edit:
My bad, the above query won't work.
Assuming table structure:
id int auto_increment
num int # <-- this is the column with duplicated values
The following query would work in MySQL (i checked):
DELETE `mytbl` FROM `mytbl`
INNER JOIN
(
SELECT `num` FROM `mytbl`
GROUP BY `num` HAVING COUNT(*)=2
) AS `tmp` USING (`num`)
The query would delete the rows that have 2 (not more or else) duplicated values in the num column.
Edit (again):
I suggest to add a key on the num column.
Edit(#3):
In case that the author wanted to delete the duplicated rows, the following should work for MySQL (it worked for me):
DELETE `delete_duplicated_rows` FROM `delete_duplicated_rows`
NATURAL JOIN (
SELECT *
FROM `delete_duplicated_rows`
GROUP BY `num1` HAVING COUNT(*)=2
) AS `der`
While assuming table structure is:
CREATE TABLE `delete_duplicated_rows` (
`num1` tinyint(4) DEFAULT NOT NULL,
`num2` tinyint(4) DEFAULT NOT NULL
) ENGINE=MyISAM;

If you have the id's of the rows you want to delete then...
DELETE FROM table WHERE id IN (1, 4, 7, [id numbers to delete...])

I think each table has unique identifier.
So if it exists then you can write following query:
Delete Table1 from Table1 t1 where 2 >= (select count(id) from Table1 where dupColumn = t1.dupColumn) and
t1.id not in (select max (id) from Table1 where dupColumn = t1.dupColumn)
OOps. It seems it is possible to use second filter only
Delete Table1 from Table1 t1 where
t1.id not in (select max (id) from Table1 where dupColumn = t1.dupColumn)

-- Just to demonstrates Marks example
.
-- START === 1.0.dbo..DuplicatesTable.TableCreate.sql
/****** Object: Table [dbo].[DuplicatesTable]
Script Date: 03/29/2010 21:24:02 ******/
IF EXISTS (SELECT * FROM sys.objects
WHERE
object_id = OBJECT_ID(N'[dbo].[DuplicatesTable]')
AND type in (N'U'))
DROP TABLE [dbo].[DuplicatesTable]
GO
/****** Object: Table [dbo].[DuplicatesTable]
Script Date: 03/29/2010 21:24:02 ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE TABLE [dbo].[DuplicatesTable](
[ColA] [varchar](10) NOT NULL, -- the name of the DuplicatesTable
[ColB] [varchar](10) NULL, -- the description of the e DuplicatesTable
)
/*
<doc>
Models a DuplicatesTable for
</doc>
*/
GO
--============================================================ DuplicatesTable START
declare #ScriptFileName varchar(2000)
SELECT #ScriptFileName = '$(ScriptFileName)'
SELECT #ScriptFileName + ' --- DuplicatesTable START ========================================='
declare #TableName varchar(200)
select #TableName = 'DuplicatesTable'
SELECT 'SELECT name from sys.tables where name =''' + #TableName + ''''
SELECT name from sys.tables
where name = #TableName
DECLARE #TableCount INT
SELECT #TableCount = COUNT(name ) from sys.tables
where name =#TableName
if #TableCount=1
SELECT ' DuplicatesTable PASSED. The Table ' + #TableName + ' EXISTS '
ELSE
SELECT ' DuplicatesTable FAILED. The Table ' + #TableName + ' DOES NOT EXIST '
SELECT #ScriptFileName + ' --- DuplicatesTable END ========================================='
--============================================================ DuplicatesTable END
GO
-- END === 1.0.dbo..DuplicatesTable.TableCreate.sql
.
-- START === 1.1..dbo..DuplicatesTable.TableInsert.sql
BEGIN TRANSACTION;
INSERT INTO [dbo].[DuplicatesTable]([ColA], [ColB])
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA', N'ColB' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1' UNION ALL
SELECT N'ColA1', N'ColB1'
COMMIT;
RAISERROR (N'[dbo].[DuplicatesTable]: Insert Batch: 1.....Done!', 10, 1) WITH NOWAIT;
GO
-- END === 1.1..dbo..DuplicatesTable.TableInsert.sql
.
-- START === 2.0.RemoveDuplicates.Script.sql
ALTER TABLE dbo.DuplicatesTable ADD
DuplicatesTableId int NOT NULL IDENTITY (1, 1)
GO
-- Then the delete is trivial:
DELETE FROM dbo.DuplicatesTable WHERE DuplicatesTableId NOT IN
(SELECT MAX(DuplicatesTableId) FROM dbo.DuplicatesTable GROUP BY ColA , ColB)
Select * from DuplicatesTable ;
-- END === 2.0.RemoveDuplicates.Script.sql

Related

Check if a temp table exists when I only know part of the name?

I have a function for checking if certain tables exist in my database, using part of the table name as a key to match (my table naming conventions include unique table name prefixes). It uses a select statement as below, where #TablePrefix is a parameter to the function and contains the first few characters of the table name:
DECLARE #R bit;
SELECT #R = COUNT(X.X)
FROM (
SELECT TOP(1) 1 X FROM sys.tables WHERE [name] LIKE #TablePrefix + '%'
) AS X;
RETURN #R;
My question is, how can I extend this function to work for #temp tables too?
I have tried checking the first char of the name for # then using the same logic to select from tempdb.sys.tables, but this seems to have a fatal flaw - it returns a positive result when any temp table exists with a matching name, even if not created by the current session - and even if created by SPs in a different database. There does not seem to be any straightforward way to narrow the selection down to only those temp tables that exist in the context of the current session.
I cannot use the other method that seems universally to be suggested for checking temp tables - IF OBJECT('tempdb..#temp1') IS NOT NULL - because that requires me to know the full name of the table, not just a prefix.
create table #abc(id bit);
create table #abc_(id bit);
create table #def__(id bit);
create table #xyz___________(id bit);
go
select distinct (left(t.name, n.r)) as tblname
from tempdb.sys.tables as t with(nolock)
cross join (select top(116) row_number() over(order by(select null)) as r from sys.all_objects with(nolock)) as n
where t.name like '#%'
and object_id('tempdb..'+left(t.name, n.r)) is not null;
drop table #abc;
drop table #abc_;
drop table #def__;
drop table #xyz___________;
Try something like this:
DECLARE #TablePrefix VARCHAR(50) = '#temp';
DECLARE #R BIT, #pre VARCHAR(50) = #TablePrefix + '%';
SELECT #R = CASE LEFT ( #pre, 1 )
WHEN '#' THEN (
SELECT CASE WHEN EXISTS ( SELECT * FROM tempdb.sys.tables WHERE [name] LIKE #pre ) THEN 1
ELSE 0
END )
ELSE (
SELECT CASE WHEN EXISTS ( SELECT * FROM sys.tables WHERE [name] LIKE #pre ) THEN 1
ELSE 0
END )
END;
SELECT #R AS TableExists;

how to get a select count(x) from a query of table names

I have a query the brings back a list of tables and the counts of those tables.
select *
from error
with a result of
tablename | errorcnt
----------+---------
table1 | 5
table2 | 256
and so on.
I need to do a join so I can get another count from each table as to the records that have been corrected example
select count(fixed)
from table1
so my new result would be
tablename | errorcnt | fixed
----------+----------+------
table1 | 5 | 3
table2 | 256 | 239
and so on.
Without doing a cursor how could I do? I guess a sub query using 'tablename'.
The comment you made:
This is how i populate my errortable SELECT T.name TableName,i.Rows
NumberOfRows FROM sys.tables T JOIN sys.sysindexes I ON T.OBJECT_ID =
I.ID WHERE indid IN (0,1) ORDER BY i.Rows DESC,T.name
Means you are looking for tables, and their respective indexes, for tables that are either a heap (i.e. has no index) or have a clustered index. I'm not sure why this would classify as an "error". I'd expect you to want to look for only heaps. i.e. on where indid = 0. Regardless, I suppose the "fixed" would be to return tables that, for example, didn't have a clustered index which now does. In that case I don't understand the schema and think you have asked a XY Question
With that being said,based off the other comments, you could use derived tables and join on the literal values of error.tablename to prevent the use of a cursor.
select
error.tablename
,error.errorcnt
,fixed = coalesce(t1.ct, t2.ct) --add in for each join.
from
error
left join (select count(fixed) as ct from table1 where fixed = 'Y') as t1 on error.tablename = 'table1'
left join (select count(fixed) as ct from table2 where fixed = 'Y') as t2 on error.tablename = 'table2'
--continue the joins for all values in error.tablename
A cursor would be less code, and dynamic, but you asked for a way without a cursor.
you can use temp table and while loop avoid cursor
DECLARE
#SQLQuery NVARCHAR(100),
#Tablename VARCHAR(100)
CREATE TABLE
#error
(
tablename VARCHAR(100),
errorcnt INT
)
CREATE TABLE
#Table1
(
fixed INT
)
CREATE TABLE
#Table2
(
fixed INT
)
CREATE TABLE
#Temp_fixed
(
fixed INT
)
INSERT INTO
#error
VALUES
(
'#Table1',
5
),
(
'#Table2',
256
)
INSERT INTO
#Table1
VALUES
(
3
)
INSERT INTO
#Table2
VALUES
(
239
)
SELECT
tablename,
errorcnt,
-1 AS fixed
INTO
#Temp_error
FROM
#error
WHILE EXISTS(SELECT TOP 1 1 FROM #Temp_error WHERE fixed = -1)
BEGIN
SET
#Tablename = (SELECT TOP 1 tablename FROM #Temp_error WHERE fixed = -1)
SET
-- #SQLQuery = 'SELECT COUNT(fixed) FROM ' + #Tablename
#SQLQuery = 'SELECT SUM(fixed) FROM ' + #Tablename
INSERT INTO
#Temp_fixed
(
fixed
)
EXECUTE
sp_executesql
#SQLQuery
UPDATE
#Temp_error
SET
fixed = ISNULL((SELECT TOP 1 fixed FROM #Temp_fixed), 0)
WHERE
tablename = #Tablename
TRUNCATE TABLE #Temp_fixed
END
SELECT
tablename,
errorcnt,
fixed
FROM
#Temp_error
DROP TABLE #error
DROP TABLE #Table1
DROP TABLE #Table2
DROP TABLE #Temp_error
DROP TABLE #Temp_fixed

How to interrogate multiple tables with different structure?

I am using Sql-Server 2016 in a C# application.
Let's say I have two tables:
CREATE TABLE Table_A
(
UserID NVARCHAR2(15),
FullName NVARCHAR2(25),
Available NUMBER(1),
MachineID NVARCHAR2(20),
myDate date
);
and
CREATE TABLE Table_B
(
UserID NVARCHAR2(15),
FullName NVARCHAR2(25),
Team NVARCHAR2(15),
MachineID NVARCHAR2(20),
Stuff NUMBER(2)
);
I want to perform a global select so that I will get as result data from both tables, somehow concatenated and of course, when a column does not exist in one of the tables, that column to be automatically populated with NULL, and if a column exists on both tables the results must be merged in a single column.
The first solution that pops-up is a UNION with NULL aliases for the missing columns, sure. The problem is that at runtime I will not be able to know in advance which tables are interrogated so that I could anticipate the column names. I need a more general solution.
The expected result from the two tables must look like this:
user_Table_A; fullName_Table_A; 1; machineID_Table_A; 12-JUN-18; NULL; 10;
user_Table_B; fullName_Table_B; NULL; machineID_Table_B; NULL; team_Table_B; 20;
The data for the two tables is inserted with the following commands:
INSERT INTO Table_A VALUES ('user_Table_A', 'fullName_Table_A', 1, 'machineID_Table_A', TO_DATE('12-06-2018', 'DD-MM-YYYY'));
INSERT INTO Table_B VALUES ('user_Table_B', 'fullName_Table_B', 'team_Table_B', 'machineID_Table_B', 20);
You can do something like this. I havent have time to completely tweak it, so there can be something the order of the columns. But perhaps it can get you started:
You also write that you use Oracle - Im not sure what you wanted, but this is in pure sql-server version.
SQL:
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
/*Then it exists*/
DROP TABLE #temp;
GO
DECLARE #SQLList nvarchar(max)
DECLARE #SQLList2 nvarchar(max)
DECLARE #SQL nvarchar(max)
with table_a as (
select column_name as Table_aColumnName,ORDINAL_POSITION from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = 'table_a'
)
,
table_b as (
select column_name as Table_bColumnName,ORDINAL_POSITION from INFORMATION_SCHEMA.COLUMNS
where TABLE_NAME = 'table_b'
)
,preresult as (
select case when Table_aColumnName IS null then 'NULL as ' + Table_bColumnName else Table_aColumnName end as Table_a_ColumnName,case when Table_bColumnName IS null then 'NULL as ' +Table_aColumnName else Table_bColumnName end as Table_b_ColumnName
,a.ORDINAL_POSITION,b.ORDINAL_POSITION as Table_b_Ordinal from table_a a full join Table_B b on a.Table_aColumnName = b.Table_bColumnName
)
select * into #temp from preresult
SET #SQLList = (
select distinct display = STUFF((select ','+table_a_columnName from #temp b order by table_b_ordinal FOR XML PATH('')),1,1,'') from #temp a
)
SET #SQLList2 = (
select distinct display = STUFF((select ','+table_b_columnName from #temp b order by Table_b_Ordinal FOR XML PATH('')),1,1,'') from #temp a
)
SET #SQL = 'select ' +#SQLList +' from dbo.Table_a union all select ' + #SQLList2 + ' from dbo.table_b'
exec(#SQL)
Result:

SQL Loop through tables and columns to find which columns are NOT empty

I created a temp table #test containing 3 fields: ColumnName, TableName, and Id.
I would like to see which rows in the #test table (columns in their respective tables) are not empty? I.e., for every column name that i have in the ColumnName field, and for the corresponding table found in the TableName field, i would like to see whether the column is empty or not. Tried some things (see below) but didn't get anywhere. Help, please.
declare #LoopCounter INT = 1, #maxloopcounter int, #test varchar(100),
#test2 varchar(100), #check int
set #maxloopcounter = (select count(TableName) from #test)
while #LoopCounter <= #maxloopcounter
begin
DECLARE #PropIDs TABLE (tablename varchar(max), id int )
Insert into #PropIDs (tablename, id)
SELECT [tableName], id FROM #test
where id = #LoopCounter
set #test2 = (select columnname from #test where id = #LoopCounter)
declare #sss varchar(max)
set #sss = (select tablename from #PropIDs where id = #LoopCounter)
set #check = (select count(#test2)
from (select tablename
from #PropIDs
where id = #LoopCounter) A
)
print #test2
print #sss
print #check
set #LoopCounter = #LoopCounter + 1
end
In order to use variables as column names and table names in your #Check= query, you will need to use Dynamic SQL.
There is most likely a better way to do this but I cant think of one off hand. Here is what I would do.
Use the select and declare a cursor rather than a while loop as you have it. That way you dont have to count on sequential id's. The cursor would fetch fields columnname, id and tablename
In the loop build a dynamic sql statement
Set #Sql = 'Select Count(*) Cnt Into #Temp2 From ' + TableName + ' Where ' + #columnname + ' Is not null And ' + #columnname <> '''''
Exec(#Sql)
Then check #Temp2 for a value greater than 0 and if this is what you desire you can use the #id that was fetched to update your #Temp table. Putting the result into a scalar variable rather than a temp table would be preferred but cant remember the best way to do that and using a temp table allows you to use an update join so it would well in my opinion.
https://www.mssqltips.com/sqlservertip/1599/sql-server-cursor-example/
http://www.sommarskog.se/dynamic_sql.html
Found a way to extract all non-empty tables from the schema, then just joined with the initial temp table that I had created.
select A.tablename, B.[row_count]
from (select * from #test) A
left join
(SELECT r.table_name, r.row_count, r.[object_id]
FROM sys.tables t
INNER JOIN (
SELECT OBJECT_NAME(s.[object_id]) table_name, SUM(s.row_count) row_count, s.[object_id]
FROM sys.dm_db_partition_stats s
WHERE s.index_id in (0,1)
GROUP BY s.[object_id]
) r on t.[object_id] = r.[object_id]
WHERE r.row_count > 0 ) B
on A.[TableName] = B.[table_name]
WHERE ROW_COUNT > 0
order by b.row_count desc
How about this one - bitmask computed column checks for NULLability. Value in the bitmask tells you if a column is NULL or not. Counting base 2.
CREATE TABLE FindNullComputedMask
(ID int
,val int
,valstr varchar(3)
,NotEmpty as
CASE WHEN ID IS NULL THEN 0 ELSE 1 END
|
CASE WHEN val IS NULL THEN 0 ELSE 2 END
|
CASE WHEN valstr IS NULL THEN 0 ELSE 4 END
)
INSERT FindNullComputedMask
SELECT 1,1,NULL
INSERT FindNullComputedMask
SELECT NULL,2,NULL
INSERT FindNullComputedMask
SELECT 2,NULL, NULL
INSERT FindNullComputedMask
SELECT 3,3,3
SELECT *
FROM FindNullComputedMask

Collation Conflict can not solve in SQL server

This is my query:
SELECT
CASE WHEN (
select Count(*) From (
select * from [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA
union
select * from [vmslcsql11].[HSRTest].dbo.External_Member_data) as t
)
<>
(
Select count(*) From [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA
)
THEN 'Data is not Identical'
ELSE 'Date is identical'
END AS RowCountResult
I am getting Following error
cannot resolve the collation conflict between sql_latin1_general_cp1_ci_as and sql_latin1_general_cp1_ci_ai
I know that error is because collation mismatch for one of the my column external Data with nvarchar type
I can solve the error by using DefaultCollation. As much as I understood DefaultCollation is used only with column_name. I am using * here. I don't know how to solve this error with scenario
Please find below the solution for your problem.
Change the table names with your tables.
declare #count as int
set #count =
(Select Count(*) from
(select * from Broker
union
select * from Broker) as t)
SELECT
CASE WHEN
(
#count <> (select count(*) from Broker)
)
THEN 'Data is not Identical'
ELSE 'Date is identical'
END AS RowCountResult
Your issue is related to using select *. Always avoid using this.
You were trying to union two columns with different collations. You wouldn't know which ones though because you were using select *
The statement below doesn't use select * at all and is likely to be much faster as it should work out the count on the remote server and return only three rows in total. Also it does not use union which is an expensive operation.
The important question is: did you mean to use union instead of union all ?
SELECT
RowCount1,
RowCount2,
CASE WHEN RowCount1 <> RowCount2
THEN 'Data is not Identical'
ELSE 'Date is identical'
END AS RowCountResult
FROM
(
SELECT
(select Count(*) From mslccard08.[carekey].dbo.EXTERNAL_MEMBER_DATA)
+
(select Count(*) from [vmslcsql11].[HSRTest].dbo.External_Member_data)
As RowCount1,
(
Select count(*) From [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA
)
As RowCount2
) As SubTable
SELECT
CASE WHEN (
select Count(1) From (
select field1 COLLATE sql_latin1_general_cp1_ci_as field1, field2 COLLATE sql_latin1_general_cp1_ci_as field2 from [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA
union
select field1 COLLATE sql_latin1_general_cp1_ci_as field1, field2 COLLATE sql_latin1_general_cp1_ci_as field2 from [vmslcsql11].[HSRTest].dbo.External_Member_data
) t)
<>
(
Select count(*) From [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA
)
THEN 'Data is not Identical'
ELSE 'Date is identical'
END AS RowCountResult
You need to correct the field names, it should work in union.
Changing collation with * does not seem to be possible.
EDIT:Temp Table Option to change COLLATE when using * in select
Create a temp table for first statement of the union then upsert records from second statement of the union and use this temp table for getting both table's count, refer code example below.
create table t1(name varchar(10) COLLATE French_CI_AS);
create table t2(name varchar(10) COLLATE Latin1_General_CI_AS);
insert into t1 values ('`ffffn1');
insert into t2 values('general');
select * into #t from t1; -- First table of union
Insert into #t select * from t2; -- second table of union
select count(*) from #t; -- working
------------------------------------------
So your query will change to;
select * into #EXTERNAL_MEMBER_DATA from [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA ; -- Create temp table having carekey records
Insert into #EXTERNAL_MEMBER_DATA select * from [vmslcsql11].[HSRTest].dbo.External_Member_data; -- add records from HSRTest
-- Use the temp table in query
SELECT CASE WHEN (select Count(1) From #EXTERNAL_MEMBER_DATA )
<>
(Select count(*) From [mslccard08].[carekey].dbo.EXTERNAL_MEMBER_DATA)
THEN 'Data is not Identical'
ELSE 'Date is identical'
END AS RowCountResult