SQL Delete duplicate records and leave the rest

SQL Delete duplicate records and leave the rest - sql

I have 2 tables a and b. A have 5 records and B have same records as A but 7 rows. Thats is same values in 7 rows. I wants to delete only the first 5 records in B since the row number is matches with A. How to do this. please help me.
table :A
col1 col2 col3 DuplicateCount
1 2 n 1
1 2 n 2
1 2 n 3
1 2 n 4
2 2 m 1
2 2 m 2
table b:
col1 col2 col3 DuplicateCount
1 2 n 1
1 2 n 2
1 2 n 3
1 2 n 4
1 2 n 5
1 2 n 6
desired data should reside in table b is
col1 col2 col3 DuplicateCount
1 2 n 5
1 2 n 6
which is nothing but the last 2 rows in the table b.

Try this :
delete from TableB
WHERE Id IN
(
select b.id
from TableB b, TableA a
WHERE b.Id = a.ID
)

I added id column to identify rows in table B, I am not sure how to delete only some of duplicate rows without id column:
declare #a table
(
id int primary key,
col1 int,
col2 int,
col3 varchar
)
declare #b table
(
id int primary key,
col1 int,
col2 int,
col3 varchar
)
insert into #a values (1,1,2,'n')
insert into #a values (2,1,2,'n')
insert into #a values (3,1,2,'n')
insert into #a values (4,1,2,'n')
insert into #a values (5,2,2,'n')
insert into #a values (6,2,2,'n')
insert into #b values (10,1,2,'n')
insert into #b values (20,1,2,'n')
insert into #b values (30,1,2,'n')
insert into #b values (40,1,2,'n')
insert into #b values (50,1,2,'n')
insert into #b values (60,1,2,'n')
delete from #b
where id in
(
(
select t1.id from
(
select
id,
cnt = count(*) over(partition by col1, col2, col3),
rn = row_number() over(partition by col1, col2, col3 order by id)
from #b
) t1
join
(
select
*,
cnt = count(*) over(partition by col1, col2, col3)
from #a
) t2 on
t1.cnt > 1 and t1.rn <= t2.cnt
)
)
select * from #b

You can use TOP key word for deleting first five records
DELETE TOP (select * from TableA a,TableB b where a.col1=b.col1 AND a.col2=b.col2 AND
a.col3=b.col3) FROM TableA
or
Note: The below is an example for deleting one or more records based on their IDs
DELETE From yourTable where ID in (2,3,4,5,6)

Related

Case when duplicate add one more letter

For example: I have a table with these records below
1 A
2 A
3 B
4 C
...
and I need to migrate these record in to another table
1 AA
2 AB
3 B
4 C
...
Meaning if the record is duplicate, it will automatically add one more letter alphabetically.

Just a slightly different approach
Example
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',char(64+row_number() over ( partition by SomeCol order by ID ))) )
From #YourTable
Returns
ID SomeCol NewVal
1 A AA
2 A AB
3 B B
4 C C
EDIT - Requested UPDATE
Declare #YourTable Table (ID int,[SomeCol] varchar(50))
Insert Into #YourTable Values
(1,'A')
,(2,'A')
,(3,'B')
,(4,'C')
Select *
,NewVal = concat(SomeCol,IIF(sum(1) over (partition by SomeCol)=1,'',replace(char(63+row_number() over ( partition by SomeCol order by ID )),'#','')) )
From #YourTable
Returns
ID SomeCol NewVal
1 A A
2 A AA
3 B B
4 C C

We might be able to handle this requirement with the help of a calendar table mapping secondary letters to duplicate sequence counts:
WITH letters AS (
SELECT 1 AS seq, 'A' AS let UNION ALL
SELECT 2, 'B' UNION ALL
SELECT 3, 'C' UNION ALL
...
SELECT 26, 'Z' UNION ALL
...
),
cte AS (
SELECT id, let, ROW_NUMBER() OVER (PARTITION BY let ORDER BY id) rn,
COUNT(*) OVER (PARTITION BY let) cnt
FROM yourTable
)
SELECT t1.id, t1.let + CASE WHEN t1.cnt > 1 THEN t2.let ELSE '' END AS let
FROM cte t1
LEFT JOIN letters t2
ON t1.id = t2.seq
ORDER BY t1.id;
Demo

SQL how to highlight duplicates under some conditions

I need to mark dupliactes in the data buy only under some complex conditions. Let's say I have a table like this:
col1 col2
1 a
1 a
1 a
2 #B
2 #B
1 a
3 #B
3 #B
2 #B
1 a
4 #A
4 #A
5 c
I need to mark those records where:
value in col2 begins with a '#' AND ( it is a duplicate value in col2 AND it is under different values in col1).
so I need to get this:
col1 col2 newcol
1 a
1 a
1 a
2 #B 1
2 #B 1
1 a
3 #B 1
3 #B 1
2 #B 1
1 a
4 #A
4 #A
5 c
the reason why rows with "#B" in col2 are marked is because it is a duplicate in col2 AND "#B" can be found under "3" and "2" (so 2 or more different values) in col1. The reson why records with "#A" are NOT marked is because while the are a duplicate in col2 they are only under one value ("4") in col1.
I am working in dashDB

I think DashDB supports window functions. If so, you can do:
select col1, col2,
(case when min_col1 <> max_col1 then 1 end) as flag
from (select t.*,
min(col1) over (partition by col2) as min_col1,
max(col1) over (partition by col2) as max_col1
from t
) t;
You can also do something similar without window functions.
Here is an alternative method:
select t.*, t2.flag
from t join
(select col2,
(case when min(col1) <> max(col1) then 1 end) as flag
from t
group by col2
) t2
on t.col2 = t2.col2;

Pivoting row's to columns

How to achieve the below??
Anyone help me out
col_1 col_2
A 1
B 1
C 1
B 2
C 4
A 2
A 6
Output:
A B C
1 1 1
2 2 4
6

This will do the job, but it seems like quite an odd thing to want to do, so I am probably missing something?
CREATE TABLE #table (col1 CHAR(1), col2 INT);
INSERT INTO #table SELECT 'A', 1;
INSERT INTO #table SELECT 'B', 1;
INSERT INTO #table SELECT 'C', 1;
INSERT INTO #table SELECT 'B', 2;
INSERT INTO #table SELECT 'C', 4;
INSERT INTO #table SELECT 'A', 2;
INSERT INTO #table SELECT 'A', 6;
WITH Ranked AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY col1 ORDER BY col2) AS rank_id
FROM
#table),
Numbers AS (
SELECT 1 AS number
UNION ALL
SELECT number + 1 FROM Numbers WHERE number < 50)
SELECT
MAX(CASE WHEN col1 = 'A' THEN col2 END) AS [A],
MAX(CASE WHEN col1 = 'B' THEN col2 END) AS [B],
MAX(CASE WHEN col1 = 'C' THEN col2 END) AS [C]
FROM
Numbers n
INNER JOIN Ranked r ON r.rank_id = n.number
GROUP BY
n.number;
Results are:
A B C
1 1 1
2 2 4
6 NULL NULL

Looks like you are trying to pivot without aggregation? Here is another option:
select A, B, C from
( select col1, col2, dense_rank() over (partition by col1 order by col2) dr from #table) t
pivot
( max(t.col2) for t.col1 in (A, B, C)) pvt;
Also check this out for more examples/discussion: TSQL Pivot without aggregate function

Assigning a Row Number in SQL Server, but grouped on a value

I want to select 2 columns from a table, and assign a int value to each value. However, I want the 1st column ID to be the same for all values that are the same.
For the 2nd column, I want each value to numbered as well, but partitioned by the first column. I have figured this piece out, but I can't get the first part to work.
Here is the test scenario I'm using.
DECLARE #TestTable as Table (Column1 char(1), Column2 char(1))
INSERT INTO #TestTable SELECT 'A','A'
INSERT INTO #TestTable SELECT 'A','B'
INSERT INTO #TestTable SELECT 'A','C'
INSERT INTO #TestTable SELECT 'B','D'
INSERT INTO #TestTable SELECT 'B','E'
INSERT INTO #TestTable SELECT 'B','F'
INSERT INTO #TestTable SELECT 'B','G'
INSERT INTO #TestTable SELECT 'B','H'
INSERT INTO #TestTable SELECT 'C','A'
INSERT INTO #TestTable SELECT 'C','B'
INSERT INTO #TestTable SELECT 'C','C'
SELECT
Row_Number() OVER (Partition BY Column1 ORDER BY Column1) as Column1_ID,
Column1,
Row_Number() OVER (Partition BY Column1 ORDER BY Column1, Column2) as Column2_ID,
Column2
FROM #TestTable
When I run this, the values in Column2_ID are correct, but I would like the values for Column1_ID to be as follows.
Column1_ID Column1 Column2_ID Column2
1 A 1 A
1 A 2 B
1 A 3 C
2 B 1 D
2 B 2 E
2 B 3 F
2 B 4 G
2 B 5 H
3 C 1 A
3 C 2 B
3 C 3 C

You just need to use a different ranking function,
dense_rank() OVER (ORDER BY Column1) as Column1_ID
http://msdn.microsoft.com/en-us/library/ms173825.aspx
SQL Fiddle : http://www.sqlfiddle.com/#!6/d41d8/1832

T-SQL Distinct column problem when trying to filter duplicates out

I have the following data
COL-1 COL-2
1 0TY/OK
1 0TY/OK
1 0TY/OK
1 0TY/OK
1 0TY/OK
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
3 7U5/2M
3 7U5/2M
3 7U5/2M
3 7U5/2M
And i want to construct a select query to retrieve that data in the output below
COL-1 COL-2 COL-3
1 0TY/OK 0TY/OK
1 0TY/OK 2KP/L
1 0TY/OK 7U5/2M
1 0TY/OK
1 0TY/OK
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
2 2KP/L
3 7U5/2M
3 7U5/2M
3 7U5/2M
3 7U5/2M
I want COL3 to return the distinct values of COL2
Using SELECT COL1, COL2, DISTINCT COL2 AS COL3 FROM MYTable does not work is SQL SERVER

Although I'm sure that some SQL wizard will be able to construct a way to do this, I feel the need to point out that conceptually this doesn't make sense - the values in the rows of column 3 are completely unrelated to the row values in columns 1 and 2.
Can you not simply return the distinct values of COL2 in a separate query?
SELECT DISTINCT COL2 FROM MyTable
(Note that you can return multiple resultsets from a single SQL query)

This is really unusual, and I can't see why you want this in one result set as it does not make any sense... There is no reason to associate the rows of the distinct query with the rows in the non-distinct query., but what you have to do is simply run both queries
Select Col1, Col2 From Table
Order By Col1, Col2
And
Select Distinct Col2 From Table
and join them together (To join them on row number, add a Row_Number() function to each query:
Select Col1, Col2, Col3
From (Select Row_Number() Over(Order By Col1, Col2)RowNum,
Col1, Col2
From Table) T1
Left Join
(Select Distinct Col2 As Col3,
(Select Count(Distinct Col2)
From Table
Where Col2 <= T2.Col3) RowNum
From Table) T2
On T2.RowNum = T1.RowNum

Try this out..
WITH MyTable AS
(
SELECT 1 Col1,CONVERT (VarChar (25), '0TY/OK') Col2 UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 1,'0TY/OK' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 2,'2KP/L' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M' UNION ALL
SELECT 3,'7U5/2M'
)
,
AllData AS
(
SELECT
*,
ROW_NUMBER () OVER (ORDER BY Col2) as Id
FROM MyTable
)
,
DistinctData AS
(
SELECT
Distinct Col2 AS Col3
FROM MyTable
),
DistinctWithRowNumber AS
(
SELECT
*,
ROW_NUMBER () OVER (ORDER BY Col3) as Id
FROM DistinctData
)
SELECT
Col1,
Col2,
Col3
FROM AllData
LEFT JOIN DistinctWithRowNumber
ON AllData.Id = DistinctWithRowNumber.Id
returns this result
Col1 Col2 Col3
----------- ------------------------- -------------------------
1 0TY/OK 0TY/OK
1 0TY/OK 2KP/L
1 0TY/OK 7U5/2M
1 0TY/OK NULL
1 0TY/OK NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
2 2KP/L NULL
3 7U5/2M NULL
3 7U5/2M NULL
3 7U5/2M NULL
3 7U5/2M NULL

You can use CTEs to create a ROW_NUMBER and JOIN over those virtual columns.
DECLARE #t TABLE (
Col1 INT
,Col2 VARCHAR(10)
);
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (1, '0TY/OK');
INSERT INTO #t VALUES (2, '2KP/L,');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (2, '2KP/L');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
INSERT INTO #t VALUES (3, '7U5/2M');
; WITH all_data AS (
SELECT
Col1
,Col2
,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RowNum
FROM #t
),
distinct_data AS (
SELECT
Col2
,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) AS RowNum
FROM #t
GROUP BY
Col2
)
SELECT
all_data.Col1
,all_data.Col2
,distinct_data.Col2
FROM all_data
LEFT JOIN distinct_data ON all_data.RowNum = distinct_data.RowNum

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Delete duplicate records and leave the rest - sql

Try this : delete from TableB WHERE Id IN ( select b.id from TableB b, TableA a WHERE b.Id = a.ID )

You can use TOP key word for deleting first five records DELETE TOP (select * from TableA a,TableB b where a.col1=b.col1 AND a.col2=b.col2 AND a.col3=b.col3) FROM TableA or Note: The below is an example for deleting one or more records based on their IDs DELETE From yourTable where ID in (2,3,4,5,6)

Related

Case when duplicate add one more letter

SQL how to highlight duplicates under some conditions

Pivoting row's to columns

Assigning a Row Number in SQL Server, but grouped on a value

T-SQL Distinct column problem when trying to filter duplicates out

Categories

Resources