Delete duplicates, and reassign FK value in SQL - sql

I have two tables:
Table1:
[id], [ColA]
Table2:
[id], [Table1Id]
A previous update caused duplicates in Table1 and those duplicates to be assocated with rows in Table2. It looks like this in the db:
Table1
1, 89
2, 89
Table2
6, 1
7, 2
I would like to delete the duplicates and reassign the value of the Table2.Table1Id to the one Table1.id row that should be left. Is this possible in a single statement? Ive seen this post on how to delete all but one, but I am lost on how to reassign the Table2.Table1Id value.
edit:
The end result should look like this:
Table1
2, 89
Table2
6, 2
7, 2

drop table if exists dbo.Table2;
drop table if exists dbo.Table1;
create table dbo.Table1 (
ID int primary key
, ColA int
);
create table dbo.Table2 (
ID int
, Table1ID int
);
alter table dbo.Table2
add constraint FK_Table2_Table1
foreign key (Table1ID)
references dbo.Table1 (ID);
insert into dbo.Table1 (ID, ColA)
values (1, 89), (2, 89);
insert into dbo.Table2 (ID, Table1ID)
values (6, 1), (7, 2);
drop table if exists #temp;
select
ttt.ID
into #temp
from (
select
t.ID
, t.ColA
, ROW_NUMBER() over (order by t.ID) as Rbr
from dbo.Table1 t
inner join (
select
t.ColA
from dbo.Table1 t
group by t.ColA
having COUNT (t.ID) > 1
) tt on t.ColA = tt.ColA
) ttt
where ttt.Rbr > 1
update t2
set
Table1ID = t1i.ID
from #temp t
inner join dbo.Table2 t2 on t.ID = t2.Table1ID
inner join dbo.Table1 t1 on t.ID = t1.ID
inner join dbo.Table1 t1i on t1.ColA = t1i.ColA
and t1i.ID not in (select t.ID from #temp t)
delete t1
from #temp t
inner join dbo.Table1 t1 on t.ID = t1.ID
select
*
from dbo.Table1 t
select
*
from dbo.Table2 t

Related

How to run only the first conditional OR in WHERE?

I have a query and I would like to get only those rows who satisfies the first OR condition. I want to check conditions such as if the first OR will not have satisfy condition, then I should see the second OR and etc.
This is a simple example. However, these are really big tables with 30 millions of row. I cannot use full join. And my code should be put into function.
My sample data:
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
and its data:
INSERT INTO #tbl_1 ([Id]) VALUES
(1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (8)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT * FROM #tbl_1 AS t
WHERE t.Id IN (SELECT Id FROM #tbl_2 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_3 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_4 AS t2)
Then I would like to see just one item:
3
But it gives: 3, 4
Or another example:
INSERT INTO #tbl_1 ([Id]) VALUES
(1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (1), (2)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT * FROM #tbl_1 AS t
WHERE t.Id IN (SELECT Id FROM #tbl_2 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_3 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_4 AS t2)
Then I would like to see just two first items:
1, 2
But it gives: 1, 2, 3, 4
I've tried to use OR. However, it takes all items which satisfy condition.
How is it possible to write WHERE statement where just one first OR statement will be executed?
You want a ranking. Rows from #tbl_2 have precedence over rows from table #tbl_3 and these again have precedence over rows from table #tbl_4.
Ideally you would just have one table instead of three and store the priority along:
INSERT INTO #ids (id, priority) VALUES (1, 1), (2, 1), (3, 2), (4, 3);
As we are looking for the best overall priority (in contrast to, say, looking for the best match per product or the like), we can join, order our rows by priority and use TOP WITH TIES to only keep the rows with the best priority.
SELECT TOP(1) WITH TIES t.*
FROM #tbl_1 AS t
JOIN #ids ids ON ids.id = t.id
ORDER BY ids.priority;
If you want to stick with the three tabes, then add the priority on-the-fly:
WITH ids AS
(
SELECT id, 1 AS priority FROM #tbl_2
UNION ALL
SELECT id, 2 AS priority FROM #tbl_3
UNION ALL
SELECT id, 3 AS priority FROM #tbl_4
)
SELECT TOP(1) WITH TIES t.*
FROM #tbl_1 AS t
JOIN ids ON ids.id = t.id
ORDER BY ids.priority;
Try this:
SELECT * FROM tbl_1 AS t
WHERE
EXISTS(SELECT 1 FROM tbl_2 t2 WHERE t2.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) > 0
OR (
EXISTS(SELECT 1 FROM tbl_3 t3 WHERE t3.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_3 t3
ON t3.id = t1.id) > 0
)
OR (
EXISTS(SELECT 1 FROM tbl_4 t4 WHERE t4.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_3 t3
ON t3.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_4 t4
ON t4.id = t1.id) > 0
)
In this way you have the OR exclusive
Sql Fiddle #1 (with dataset 1, 2; 3; 4)
Sql Fiddle #2 (with dataset 8; 3; 4)
A TOP (1) WITH TIES with an ORDER BY over a CASE WHEN for the 3 table id's would do the job.
If nothing matched with #tbl_2 then the matches of #tbl_3 will come on top. And if no match with #tbl_2 & #tbl_3 then #tbl_4 will come on top.
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
INSERT INTO #tbl_1 ([Id]) VALUES (1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (8)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT TOP (1) WITH TIES t.*
FROM #tbl_1 AS t
LEFT JOIN #tbl_2 t2 ON t2.Id = t.Id
LEFT JOIN #tbl_3 t3 ON t3.Id = t.Id
LEFT JOIN #tbl_4 t4 ON t4.Id = t.Id
WHERE t.Id IN (t2.Id, t3.Id, t4.Id)
ORDER BY CASE
WHEN t2.Id IS NOT NULL THEN 2
WHEN t3.Id IS NOT NULL THEN 3
WHEN t4.Id IS NOT NULL THEN 4
END;
Id
3
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
INSERT INTO #tbl_1 ([Id]) VALUES (1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (1), (2)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT TOP (1) WITH TIES t.*
FROM #tbl_1 AS t
LEFT JOIN #tbl_2 t2 ON t2.Id = t.Id
LEFT JOIN #tbl_3 t3 ON t3.Id = t.Id
LEFT JOIN #tbl_4 t4 ON t4.Id = t.Id
WHERE t.Id IN (t2.Id, t3.Id, t4.Id)
ORDER BY CASE
WHEN t2.Id IS NOT NULL THEN 2
WHEN t3.Id IS NOT NULL THEN 3
WHEN t4.Id IS NOT NULL THEN 4
END;
Id
1
2
Test on db<>fiddle here
Assuming t2, t3 & t4 have unique values.
You would probably would like to verify that the execution plan is based on merge joins / hash joins and not nested loops.
select top 1 with ties
t1.*
from #tbl_1 AS t1
left join #tbl_2 as t2 on t2.id = t1.id
left join #tbl_3 as t3 on t3.id = t1.id
left join #tbl_4 as t4 on t4.id = t1.id
where coalesce(t2.id,t3.id,t4.id) is not null
order by case
when t2.Id is not null then 1
when t3.Id is not null then 2
when t4.Id is not null then 3
end
fiddle
Another solution with no JOINs at all. Only GROUP BY :-)
select top 1 with ties
id
from ( select id, 999 from #tbl_1
union all select id, 1 from #tbl_2
union all select id, 2 from #tbl_3
union all select id, 3 from #tbl_4
) t (id,priority)
group by id
having max(priority) = 999
order by min(priority)
fiddle
One other idea you might entertain, although not exactly elegant, is a more procedural approach so you only end up touching a table if you need to which will be more performant if that's a concern.
Insert qualifying IDs into a temp table and only move on if there are no results at each stage, for example
create table #results (id int);
insert into #results
select id
from t1 where exists (select * from t2 where t2.id=t1.id)
if ##RowCount=0
begin
insert into #results
select id
from t1 where exists (select * from t3 where t3.Id=t1.Id)
if ##RowCount=0
begin
insert into #results
select id
from t1 where exists (select * from t4 where t4.Id=t1.Id)
end
end
select id
from #results
Example Fiddle

Update table using like operator

I have 2 tables
table 1
Diagnosis GroupID
155.0 - blaaaaaa GAS
table 2
Code GroupID
155.0
155.0
155.0
155.0
I did try to update GroupID of second table from first
update Table2 set GroupID= GroupID from table 1 where Diagnosis like '%'+Code+'%'
but its not working
Your current syntax for an update join is slightly off. Try using this instead:
UPDATE t2
SET GroupID = t1.GroupID
FROM Table2 t2
INNER JOIN Table1 t1
ON t1.Diagnosis LIKE '%' + t2.Code + '%'
You can use the following code:
UPDATE
Tb2 SET Tb2.GroupId = Tb1.GroupId
FROM table1 as Tb1 INNER JOIN table2 as Tb2
ON Tb1.Diagnosis LIKE '%'+Tb2.Code+'%';
This would help, http://rextester.com/NILBI39557
CREATE TABLE Table1
(
Diagnosis VARCHAR(255)
,GROUPID VARCHAR(255)
);
Insert Into Table1 Values('155.0 - blaaaaaa','GAS');
CREATE TABLE Table2
(
Code VARCHAR(255)
,GROUPID VARCHAR(255)
);
Insert Into Table2 VALUES ('155.0', NULL);
Insert Into Table2 VALUES ('155.0', NULL);
Insert Into Table2 VALUES ('155.0', NULL);
Insert Into Table2 VALUES ('155.0', NULL);
SELECT * FROM Table2;
UPDATE T2
SET T2.GroupId = T1.GroupId
FROM Table1 as T1 INNER JOIN Table2 as T2
ON T1.Diagnosis LIKE '%'+T2.Code+'%';
SELECT * FROM Table2;

How to left join to first row in SQL Server

How to left join two tables, selecting from second table only the first row?
My question is a follow up of:
SQL Server: How to Join to first row
I used the query suggested in that thread.
CREATE TABLE table1(
id INT NOT NULL
);
INSERT INTO table1(id) VALUES (1);
INSERT INTO table1(id) VALUES (2);
INSERT INTO table1(id) VALUES (3);
GO
CREATE TABLE table2(
id INT NOT NULL
, category VARCHAR(1)
);
INSERT INTO table2(id,category) VALUES (1,'A');
INSERT INTO table2(id,category) VALUES (1,'B');
INSERT INTO table2(id,category) VALUES (1,'C');
INSERT INTO table2(id,category) VALUES (3,'X');
INSERT INTO table2(id,category) VALUES (3,'Y');
GO
------------------
SELECT
table1.*
,FirstMatch.category
FROM table1
CROSS APPLY (
SELECT TOP 1
table2.id
,table2.category
FROM table2
WHERE table1.id = table2.id
ORDER BY id
)
AS FirstMatch
However, with this query, I get inner join results. I want to get left join results. The tabel1.id in desired results should have '2' with NULL. How to do it?
use row_number and left join
with cte as(
select id,
category,
row_number() over(partition by id order by category) rn
from table2
)
select t.id, cte.category
from table1 t
left outer join cte
on t.id=cte.id and cte.rn=1
OUTPUT:
id category
1 A
2 (null)
3 X
SQLFIDDLE DEMO
select table1.id,
(SELECT TOP 1 category FROM table2 WHERE table2.id=table1.id ORDER BY category ASC) AS category
FROM table1
SELECT table1.id ,table2.category
FROM table1 Left join table2
on table1.id = table2.id
where table2.category = ( select top 1 category from table2 t where table1.id = t.id)
OR table2.category is NULL
Following the comment of t-clausen.dk this does the job:
change CROSS APPLY to OUTER APPLY

SQL Join query to show records if exists in master table or not

Table1
id name color
1,'a','red'
2,'a','blue'
3,'b','red'
4,'c','red'
5,'d','red'
6,'a','green'
declare #t1 table (id int, name varchar(10),color varchar(5))
insert into #t1 values(1,'a','red')
insert into #t1 values(2,'a','blue')
insert into #t1 values(3,'b','red')
insert into #t1 values(4,'c','red')
insert into #t1 values(5,'d','red')
table t2 (master table )
color
red
blue
green
declare #t2 table (color varchar(5))
insert into #t2 values ('red')
insert into #t2 values ('blue')
insert into #t2 values ('green')
The output will be
'a','red'
'a','blue'
'a','green'
We need to retrieve the name from table 1 what are all having all the t2 color...
You can get the names in t1 that match all master colors using group by, having, and join:
select t1.name
from t1 join
t2
on t1.color = t2.color
group by t1.name
having count(distinct t1.color) = (select count(*) from t2);
This returns the names. If you want the detailed rows, then use this as a subquery or CTE and join t1 back to these results.
And to get the detailed rows:
with n as (
select t1.name
from t1 join
t2
on t1.color = t2.color
group by t1.name
having count(distinct t1.color) = (select count(*) from t2)
)
select t1.*
from t1 join
n
on t1.name = n.name;

left outer join in t-sql

I have the following two tables. I am using SQL Server 2008 R2
Create table #tmp1 (
a char(1)
)
Create table #tmp2 (
id int,
a char(1),
val int
)
insert #tmp1 values ('A')
insert #tmp1 values ('B')
insert #tmp1 values ('C')
insert #tmp2 values (1, 'A', 10)
insert #tmp2 values (1, 'B', 20)
insert #tmp2 values (2, 'A', 30)
insert #tmp2 values (2, 'C', 40)
select * from #tmp1 t1 left outer join #tmp2 t2 on t1.a = t2.a
order by t2.id
This returns the result set
A 1 A 10
B 1 B 20
C 2 C 40
A 2 A 30
I would like to have the following result set
A 1 A 10
B 1 B 20
C 1 null null
A 2 A 30
B 2 null null
C 2 C 40
Right now i am acheiving this by creating a new table with a cross join like this and then doing a outer join
select * into #tmp3 from #tmp1 cross join (select distinct ID from #tmp2) t
select * from #tmp3 t1 left outer join #tmp2 t2 on t1.a = t2.a and t1.id = t2.id
Is there a better way to do this ?
Thanks
To get what you want, you need a "driving" table. That is, you want a complete list of all combinations, and then to join to the other tables to get the matches. Here is one way:
select t1.a, t2.*
from (select t1.a as a, t2.id as id
from (select distinct a from #tmp1 t1) t1
cross join
(select distinct id from #tmp2 t2) t2
) driving left outer join
#tmp1 t1
on t1.a = driving.a left outer join
#tmp2 t2
on t2.id = driving.id and
t2.a = driving.a
order by t2.id
What you are looking for is a cartesian product of the values in #tbl1 and the values in the id column in #tbl2. Because the values in #tbl2.id are not unique it might be a better design to have an additional table with a row for each #tbl2.id value. Then you can use this solution:
Create table #tmp1 (
a char(1)
)
Create table #tmp2 (
id int,
a char(1),
val int
)
Create table #tmp3 (
id int
)
insert #tmp1 values ('A')
insert #tmp1 values ('B')
insert #tmp1 values ('C')
insert #tmp3 values (1)
insert #tmp3 values (2)
insert #tmp2 values (1, 'A', 10)
insert #tmp2 values (1, 'B', 20)
insert #tmp2 values (2, 'A', 30)
insert #tmp2 values (2, 'C', 40)
SELECT t3.id,t1.a,t2.val
FROM #tmp1 AS t1
CROSS JOIN #tmp3 AS t3
LEFT OUTER JOIN #tmp2 AS t2
ON t1.a = t2.a AND t3.id = t2.id
ORDER BY t3.id, t1.a;
If that is not an option use this instead:
SELECT t3.id,t1.a,t2.val
FROM #tmp1 AS t1
CROSS JOIN (SELECT DISTINCT id FROM #tmp2) AS t3
LEFT OUTER JOIN #tmp2 AS t2
ON t1.a = t2.a AND t3.id = t2.id
ORDER BY t3.id, t1.a;