left outer join in t-sql - sql

I have the following two tables. I am using SQL Server 2008 R2
Create table #tmp1 (
a char(1)
)
Create table #tmp2 (
id int,
a char(1),
val int
)
insert #tmp1 values ('A')
insert #tmp1 values ('B')
insert #tmp1 values ('C')
insert #tmp2 values (1, 'A', 10)
insert #tmp2 values (1, 'B', 20)
insert #tmp2 values (2, 'A', 30)
insert #tmp2 values (2, 'C', 40)
select * from #tmp1 t1 left outer join #tmp2 t2 on t1.a = t2.a
order by t2.id
This returns the result set
A 1 A 10
B 1 B 20
C 2 C 40
A 2 A 30
I would like to have the following result set
A 1 A 10
B 1 B 20
C 1 null null
A 2 A 30
B 2 null null
C 2 C 40
Right now i am acheiving this by creating a new table with a cross join like this and then doing a outer join
select * into #tmp3 from #tmp1 cross join (select distinct ID from #tmp2) t
select * from #tmp3 t1 left outer join #tmp2 t2 on t1.a = t2.a and t1.id = t2.id
Is there a better way to do this ?
Thanks

To get what you want, you need a "driving" table. That is, you want a complete list of all combinations, and then to join to the other tables to get the matches. Here is one way:
select t1.a, t2.*
from (select t1.a as a, t2.id as id
from (select distinct a from #tmp1 t1) t1
cross join
(select distinct id from #tmp2 t2) t2
) driving left outer join
#tmp1 t1
on t1.a = driving.a left outer join
#tmp2 t2
on t2.id = driving.id and
t2.a = driving.a
order by t2.id

What you are looking for is a cartesian product of the values in #tbl1 and the values in the id column in #tbl2. Because the values in #tbl2.id are not unique it might be a better design to have an additional table with a row for each #tbl2.id value. Then you can use this solution:
Create table #tmp1 (
a char(1)
)
Create table #tmp2 (
id int,
a char(1),
val int
)
Create table #tmp3 (
id int
)
insert #tmp1 values ('A')
insert #tmp1 values ('B')
insert #tmp1 values ('C')
insert #tmp3 values (1)
insert #tmp3 values (2)
insert #tmp2 values (1, 'A', 10)
insert #tmp2 values (1, 'B', 20)
insert #tmp2 values (2, 'A', 30)
insert #tmp2 values (2, 'C', 40)
SELECT t3.id,t1.a,t2.val
FROM #tmp1 AS t1
CROSS JOIN #tmp3 AS t3
LEFT OUTER JOIN #tmp2 AS t2
ON t1.a = t2.a AND t3.id = t2.id
ORDER BY t3.id, t1.a;
If that is not an option use this instead:
SELECT t3.id,t1.a,t2.val
FROM #tmp1 AS t1
CROSS JOIN (SELECT DISTINCT id FROM #tmp2) AS t3
LEFT OUTER JOIN #tmp2 AS t2
ON t1.a = t2.a AND t3.id = t2.id
ORDER BY t3.id, t1.a;

Related

How to run only the first conditional OR in WHERE?

I have a query and I would like to get only those rows who satisfies the first OR condition. I want to check conditions such as if the first OR will not have satisfy condition, then I should see the second OR and etc.
This is a simple example. However, these are really big tables with 30 millions of row. I cannot use full join. And my code should be put into function.
My sample data:
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
and its data:
INSERT INTO #tbl_1 ([Id]) VALUES
(1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (8)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT * FROM #tbl_1 AS t
WHERE t.Id IN (SELECT Id FROM #tbl_2 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_3 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_4 AS t2)
Then I would like to see just one item:
3
But it gives: 3, 4
Or another example:
INSERT INTO #tbl_1 ([Id]) VALUES
(1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (1), (2)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT * FROM #tbl_1 AS t
WHERE t.Id IN (SELECT Id FROM #tbl_2 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_3 AS t2)
OR t.Id IN (SELECT Id FROM #tbl_4 AS t2)
Then I would like to see just two first items:
1, 2
But it gives: 1, 2, 3, 4
I've tried to use OR. However, it takes all items which satisfy condition.
How is it possible to write WHERE statement where just one first OR statement will be executed?
You want a ranking. Rows from #tbl_2 have precedence over rows from table #tbl_3 and these again have precedence over rows from table #tbl_4.
Ideally you would just have one table instead of three and store the priority along:
INSERT INTO #ids (id, priority) VALUES (1, 1), (2, 1), (3, 2), (4, 3);
As we are looking for the best overall priority (in contrast to, say, looking for the best match per product or the like), we can join, order our rows by priority and use TOP WITH TIES to only keep the rows with the best priority.
SELECT TOP(1) WITH TIES t.*
FROM #tbl_1 AS t
JOIN #ids ids ON ids.id = t.id
ORDER BY ids.priority;
If you want to stick with the three tabes, then add the priority on-the-fly:
WITH ids AS
(
SELECT id, 1 AS priority FROM #tbl_2
UNION ALL
SELECT id, 2 AS priority FROM #tbl_3
UNION ALL
SELECT id, 3 AS priority FROM #tbl_4
)
SELECT TOP(1) WITH TIES t.*
FROM #tbl_1 AS t
JOIN ids ON ids.id = t.id
ORDER BY ids.priority;
Try this:
SELECT * FROM tbl_1 AS t
WHERE
EXISTS(SELECT 1 FROM tbl_2 t2 WHERE t2.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) > 0
OR (
EXISTS(SELECT 1 FROM tbl_3 t3 WHERE t3.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_3 t3
ON t3.id = t1.id) > 0
)
OR (
EXISTS(SELECT 1 FROM tbl_4 t4 WHERE t4.id = t.id)
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_2 t2
ON t2.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_3 t3
ON t3.id = t1.id) = 0
AND
(SELECT COUNT(1) FROM tbl_1 t1
JOIN tbl_4 t4
ON t4.id = t1.id) > 0
)
In this way you have the OR exclusive
Sql Fiddle #1 (with dataset 1, 2; 3; 4)
Sql Fiddle #2 (with dataset 8; 3; 4)
A TOP (1) WITH TIES with an ORDER BY over a CASE WHEN for the 3 table id's would do the job.
If nothing matched with #tbl_2 then the matches of #tbl_3 will come on top. And if no match with #tbl_2 & #tbl_3 then #tbl_4 will come on top.
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
INSERT INTO #tbl_1 ([Id]) VALUES (1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (8)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT TOP (1) WITH TIES t.*
FROM #tbl_1 AS t
LEFT JOIN #tbl_2 t2 ON t2.Id = t.Id
LEFT JOIN #tbl_3 t3 ON t3.Id = t.Id
LEFT JOIN #tbl_4 t4 ON t4.Id = t.Id
WHERE t.Id IN (t2.Id, t3.Id, t4.Id)
ORDER BY CASE
WHEN t2.Id IS NOT NULL THEN 2
WHEN t3.Id IS NOT NULL THEN 3
WHEN t4.Id IS NOT NULL THEN 4
END;
Id
3
DECLARE #tbl_1 TABLE (Id INT)
DECLARE #tbl_2 TABLE (Id INT)
DECLARE #tbl_3 TABLE (Id INT)
DECLARE #tbl_4 TABLE (Id INT)
INSERT INTO #tbl_1 ([Id]) VALUES (1), (2), (3), (4), (5), (6)
INSERT INTO #tbl_2 ([Id]) VALUES (1), (2)
INSERT INTO #tbl_3 ([Id]) VALUES (3)
INSERT INTO #tbl_4 ([Id]) VALUES (4)
SELECT TOP (1) WITH TIES t.*
FROM #tbl_1 AS t
LEFT JOIN #tbl_2 t2 ON t2.Id = t.Id
LEFT JOIN #tbl_3 t3 ON t3.Id = t.Id
LEFT JOIN #tbl_4 t4 ON t4.Id = t.Id
WHERE t.Id IN (t2.Id, t3.Id, t4.Id)
ORDER BY CASE
WHEN t2.Id IS NOT NULL THEN 2
WHEN t3.Id IS NOT NULL THEN 3
WHEN t4.Id IS NOT NULL THEN 4
END;
Id
1
2
Test on db<>fiddle here
Assuming t2, t3 & t4 have unique values.
You would probably would like to verify that the execution plan is based on merge joins / hash joins and not nested loops.
select top 1 with ties
t1.*
from #tbl_1 AS t1
left join #tbl_2 as t2 on t2.id = t1.id
left join #tbl_3 as t3 on t3.id = t1.id
left join #tbl_4 as t4 on t4.id = t1.id
where coalesce(t2.id,t3.id,t4.id) is not null
order by case
when t2.Id is not null then 1
when t3.Id is not null then 2
when t4.Id is not null then 3
end
fiddle
Another solution with no JOINs at all. Only GROUP BY :-)
select top 1 with ties
id
from ( select id, 999 from #tbl_1
union all select id, 1 from #tbl_2
union all select id, 2 from #tbl_3
union all select id, 3 from #tbl_4
) t (id,priority)
group by id
having max(priority) = 999
order by min(priority)
fiddle
One other idea you might entertain, although not exactly elegant, is a more procedural approach so you only end up touching a table if you need to which will be more performant if that's a concern.
Insert qualifying IDs into a temp table and only move on if there are no results at each stage, for example
create table #results (id int);
insert into #results
select id
from t1 where exists (select * from t2 where t2.id=t1.id)
if ##RowCount=0
begin
insert into #results
select id
from t1 where exists (select * from t3 where t3.Id=t1.Id)
if ##RowCount=0
begin
insert into #results
select id
from t1 where exists (select * from t4 where t4.Id=t1.Id)
end
end
select id
from #results
Example Fiddle

Skip all rows if one row id is present in another table

I have three tables with this relation:
T1.journo = T2.journo
T2.recid = T3.spid
T1
ticketno journo
1 A1
2 A2
T2
journo recid
A1 1
A1 2
A1 3
A2 4
A2 5
A2 6
T3
spid
2
I want only those entries of T1 where T2.recid is not present in T3.spid.
Below query will just omit the 2nd row of T2. But I want all the rows of T2 with journo=A1 omitted because one of the recid of A1 is present in T3.
select T1.* from T1 join T2 on T1.journo = T2.journo
where T2.recid not in (select spid from T3)
Desired Output:
ticketno journo
2 A2
Any tips?
To me, this sounds like not exists:
select t1.*
from t1
where not exists (select 1
from t2 join
t3
on t2.recid = t3.spid
where t2.journo = t1.journo
);
Should be able to use not in with a sub query.
declare #t1 table (ticketno int identity(1,1), journo varchar(2))
declare #t2 table (journo varchar(2), recid int identity(1,1))
declare #t3 table (spid int)
insert into #t1
values
('A1'),
('A2')
insert into #t2
values
('A1'),
('A1'),
('A1'),
('A2'),
('A2'),
('A2')
insert into #t3
values
(2)
select T1.* , T2.*
from #t1 T1
inner join #t2 T2 on T1.journo = T2.journo
where T2.journo not in (select t22.journo from #t2 t22 where t22.recid in (select * from #t3))
Or, not exists correlated
where not exists(select t22.journo from #t2 t22 where t22.recid in (select * from #t3) and t22.journo = T2.journo)
Join T2 on T3 with left a left join. group on the t1 variables then do a having check of COUNT(t3.spid) = 0.
Any nulls shouldn't be counted and so zero on the count is what your looking for.
SELECT t1.*
FROM #t1 t1
JOIN #t2 t2 ON t2.journo = t1.journo
LEFT JOIN #t3 t3 ON t3.spid = t2.recid
GROUP BY t1.ticketno, t1.journo
HAVING COUNT(t3.spid) = 0
SQL, the ugly but necessary part of our lives:
SELECT * FROM T1
INNER JOIN
(SELECT a1.journo
, SUM(spid_present) AS 'total_spids'
FROM
(SELECT T2.journo
, T2.ticketno
, CASE
WHEN t3.spid IS NOT NULL
THEN 1
ELSE 0
END AS 'spid_present'
FROM T2
LEFT JOIN T3
ON T2.recid = T3.spid) a1
GROUP BY a1.journo) a2
ON T1.journo = a2.journo
AND a2.total_spids = 0}
Using EXCEPT:
create table t1 (ticketno int, journo char(2))
insert into t1 values (1, 'A1'), (2, 'A2')
create table t2 (journo char(2), recid int)
insert into t2 values ('A1', 1), ('A1', 2), ('A1', 3), ('A2', 4), ('A2', 5), ('A2', 6)
create table t3 ([SPID] int)
insert into t3 values (2)
select t1.* from t1
except
select t1.*
from t1
inner join t2 on t2.journo = t1.journo
inner join t3 on t2.recid = t3.[SPID]
You can try the below SQL query.
select T.* from T1 , T2 a where T1.journo = a.journo and a.recid not exists (select 1 from T3, T2 where T2.recid= T3.spid and T2.recid =a.reci);
By my understanding, simple join
select t1.* from #t1 t1
join #t2 t2
on t1.journo <> t2.journo
join #t3 t3
on t2.recid = t3.spid

Delete duplicates, and reassign FK value in SQL

I have two tables:
Table1:
[id], [ColA]
Table2:
[id], [Table1Id]
A previous update caused duplicates in Table1 and those duplicates to be assocated with rows in Table2. It looks like this in the db:
Table1
1, 89
2, 89
Table2
6, 1
7, 2
I would like to delete the duplicates and reassign the value of the Table2.Table1Id to the one Table1.id row that should be left. Is this possible in a single statement? Ive seen this post on how to delete all but one, but I am lost on how to reassign the Table2.Table1Id value.
edit:
The end result should look like this:
Table1
2, 89
Table2
6, 2
7, 2
drop table if exists dbo.Table2;
drop table if exists dbo.Table1;
create table dbo.Table1 (
ID int primary key
, ColA int
);
create table dbo.Table2 (
ID int
, Table1ID int
);
alter table dbo.Table2
add constraint FK_Table2_Table1
foreign key (Table1ID)
references dbo.Table1 (ID);
insert into dbo.Table1 (ID, ColA)
values (1, 89), (2, 89);
insert into dbo.Table2 (ID, Table1ID)
values (6, 1), (7, 2);
drop table if exists #temp;
select
ttt.ID
into #temp
from (
select
t.ID
, t.ColA
, ROW_NUMBER() over (order by t.ID) as Rbr
from dbo.Table1 t
inner join (
select
t.ColA
from dbo.Table1 t
group by t.ColA
having COUNT (t.ID) > 1
) tt on t.ColA = tt.ColA
) ttt
where ttt.Rbr > 1
update t2
set
Table1ID = t1i.ID
from #temp t
inner join dbo.Table2 t2 on t.ID = t2.Table1ID
inner join dbo.Table1 t1 on t.ID = t1.ID
inner join dbo.Table1 t1i on t1.ColA = t1i.ColA
and t1i.ID not in (select t.ID from #temp t)
delete t1
from #temp t
inner join dbo.Table1 t1 on t.ID = t1.ID
select
*
from dbo.Table1 t
select
*
from dbo.Table2 t

T-SQL Group Check

I have three tables - Table_1, Table_2 and Table_3. Here's what each looks like:
Table_1
ID | TicketID | Rule
---------------------------
1 | 101 | NULL
Table_2
TicketID | Location
---------------------------
101 | A
101 | B
Table_3
Location | Rule
--------------------
A | R1
B | R1
A | R2
B | R2
C | R2
My goal is to populate the Rule column in Table_1 (which should be a select distinct of the Rule column in Table_3). The process should be:
Take the TicketID from Table_1. Join Table_1 to Table_2 on TicketID and get the Locations associated with that ticket.
Look up Table_3 and check what Rule needs to be applied. The check needs to be at the group level. For instance, in this example, Ticket 101 has locations A,B. Table_3 has A,B against Rule R1 but also has A,B,C against Rule R2. The correct rule should be R1 as Ticket 101 has no allocation for location C. Hope this make sense. What would be the easiest way of achieving this? Thanks in advance!
Please try the below code. Its working fine in SQL server 2012.
DECLARE #table_1 TABLE
(ID int, TicketID int, [Rule] Varchar(10))
DECLARE #table_2 TABLE
(TicketID int, Location Varchar(10))
DECLARE #table_3 TABLE
(Location Varchar(10),[Rule] Varchar(10))
INSERT #table_1
(ID,TicketID,[Rule])
VALUES
(1,101,NULL)
INSERT #table_2
(TicketID,Location)
VALUES
(101,'A'),
(101,'B')
INSERT #table_3
(Location,[Rule])
VALUES
('A','R1'),
('B','R1'),
('A','R2'),
('B','R2'),
('C','R2')
SELECT DISTINCT [RULE] FROM #table_3 t3 WHERE t3.Location IN
(SELECT t2.Location FROM #table_2 t2 INNER JOIN #table_1 t1 ON t1.TicketID = t2.TicketID)
AND [RULE] NOT IN
(SELECT t.[RULE] FROM #table_3 t WHERE t.Location NOT IN
(SELECT t2.Location FROM #table_2 t2 INNER JOIN #table_1 t1 ON t1.TicketID = t2.TicketID))
TRY THIS..
SELECT t1.TicketID
,t2.[Rule]
FROM (SELECT DISTINCT t2.TicketID,[Rule]
FROM #table_3 t3 INNER JOIN
#table_2 t2 ON t2.Location = t3.Location) t2
INNER JOIN #table_1 t1 ON t2.TicketID = t1.TicketID
Use this select statement with the above declared table variable
--http://stackoverflow.com/questions/37344006/t-sql-group-check
set nocount on
drop table #temp
DECLARE #table_1 TABLE
(ID int, TicketID int, [Rule] Varchar(10))
DECLARE #table_2 TABLE
(TicketID int, Location Varchar(10),obs int)
DECLARE #table_3 TABLE
(Location Varchar(10),[Rule] Varchar(10), obs int)
INSERT #table_1
(ID,TicketID,[Rule])
VALUES
(1,101,NULL)
INSERT #table_2
(TicketID,Location)
VALUES
(101,'A'),
(101,'B'),
--(101,'C')
(102,'A'),
(102,'B'),
(102,'C'),
(102,'S')
INSERT #table_3
(Location,[Rule])
VALUES
('A','R1'),
('B','R1'),
('A','R2'),
('B','R2'),
('C','R2'),
('S','R2')
declare #table_4 table (src varchar(2), id varchar(3), obs int)
insert into #table_4
select 't2', [ticketid], count(*) from #table_2 group by [ticketid]
Insert into #table_4
select 't3',[rule], count(*) from #table_3 group by [rule]
update #table_2
set obs = t4.obs
from #table_2 t2
join #table_4 t4 on t4.id = t2.ticketid
where t4.src = 't2'
update #table_3
set obs = t4.obs
from #table_3 t3
join #table_4 t4 on rtrim(t4.id) = rtrim(t3.[rule])
where t4.src = 't3'
select t2.ticketid,t2.location as t2location,t2.obs as t2obs,t3.location as t3location,t3.[Rule],t3.obs as t3obs
into #temp
from #table_2 t2
full join #table_3 t3 on t3.location = t2.location
delete #temp
where [rule] in (select [rule] from #temp where ticketid is null)
--select t.*
--from #temp t
select distinct ticketid,[rule]
from #temp
where t2location = t3location and t2obs = t3obs
Try this code :
SELECT DISTINCT [RULE] FROM Table_3 t3 WHERE t3.Location IN
(SELECT t2.Location FROM Table_2 t2 INNER JOIN Table_1 t1 ON t1.TicketID = t2.TicketID)
AND [RULE] NOT IN
(SELECT t.[RULE] FROM Table_3 t WHERE t.Location NOT IN
(SELECT t2.Location FROM Table_2 t2 INNER JOIN Table_1 t1 ON t1.TicketID = t2.TicketID))

SQL Join query to show records if exists in master table or not

Table1
id name color
1,'a','red'
2,'a','blue'
3,'b','red'
4,'c','red'
5,'d','red'
6,'a','green'
declare #t1 table (id int, name varchar(10),color varchar(5))
insert into #t1 values(1,'a','red')
insert into #t1 values(2,'a','blue')
insert into #t1 values(3,'b','red')
insert into #t1 values(4,'c','red')
insert into #t1 values(5,'d','red')
table t2 (master table )
color
red
blue
green
declare #t2 table (color varchar(5))
insert into #t2 values ('red')
insert into #t2 values ('blue')
insert into #t2 values ('green')
The output will be
'a','red'
'a','blue'
'a','green'
We need to retrieve the name from table 1 what are all having all the t2 color...
You can get the names in t1 that match all master colors using group by, having, and join:
select t1.name
from t1 join
t2
on t1.color = t2.color
group by t1.name
having count(distinct t1.color) = (select count(*) from t2);
This returns the names. If you want the detailed rows, then use this as a subquery or CTE and join t1 back to these results.
And to get the detailed rows:
with n as (
select t1.name
from t1 join
t2
on t1.color = t2.color
group by t1.name
having count(distinct t1.color) = (select count(*) from t2)
)
select t1.*
from t1 join
n
on t1.name = n.name;