SQL Join data and get rows that don't match with NULL - sql

I have two tables that I want join as follows:
Table 1
Code1 | Code2 | Date(1) | Amount(1)
A | AA | 201802 | 100
A | AA | 201803 | 50
A | AA | 201804 | 30
Table 2
Code1 | Code2 | Date(2) | Amount(2)
A | AA | 201801 | 20
A | AA | 201802 | 10
A | AA | 201803 | 10
And I want the resulting table to look like this:
Result
Code1 | Code2 | Date(1) | Date(2) | Amount(1) | Amount(2)
A | AA | NULL | 201801 | NULL | 20
A | AA | 201802 | 201802 | 100 | 10
A | AA | 201803 | 201803 | 50 | 10
A | AA | 201804 | NULL | 30 | NULL
So I need to join these two tables
on table1.Code1 = table2.Code1 AND table1.Code2 = table2.Code2 AND table1.Date(1) = table2.Date(2)
But I also want the rows where the dates don't match with a null is the columns related to the non matching table (such as the row for Date(1) = 201804 in my example).
I have tried joining that two tables with left, right and outer join but I still am not successful in getting the rows with the nulls (probably because Code1 and Code2 don't exist for that particular missing row)
Maybe a cross apply could work, but I am not sure how to execute it.
I want the most efficient way in terms of performance because this is a part of a big query containing lots of data and lots of calculations.
UPDATE:
The code I used is:
Select table1.Code 1, table1.Code2, Table1.Date(1), table2.Date(2), table1.Amount(1), table2.amount(2)
FROM Table1
Full Outer Join
table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date(1) = table2.date(2)
Which gives me the following result:
Code1 | Code2 | Date(1) | Date(2) | Amount(1) | Amount(2)
A | AA | 201802 | 201802 | 100 | 10
A | AA | 201803 | 201803 | 50 | 10
Which is missing these two rows:
A | AA | NULL | 201801 | NULL | 20
A | AA | 201804 | NULL | 30 | NULL

You may try this.
--sample dataset
DECLARE #tab1 as table (
Code1 varchar(10),
Code2 varchar(10),
Date1 int,
Amount1 int )
insert into #tab1
values
('A', 'AA', 201802, 100),
('A', 'AA', 201803, 50),
('A', 'AA', 201804, 30),
('B', 'AA', 201802, 100) --additional
DECLARE #tab2 as table (
Code1 varchar(10),
Code2 varchar(10),
Date2 int,
Amount2 int )
insert into #tab2
values
('A', 'AA', 201802, 100),
('A', 'AA', 201803, 50),
('A', 'AA', 201801, 30)
query
SELECT *
FROM (
select
coalesce(table1.Code1,table2.Code1) as Code1,
coalesce(table1.Code2,table2.Code2) as Code2,
table1.Date1,
table2.Date2,
table1.Amount1,
table2.amount2
FROM #tab1 as Table1
Full Outer Join #tab2 as table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date1= table2.date2
) as t1
CROSS APPLY ( --to exclude records not matched by "Code 1 and Code 2"
SELECT top 1
Code1
FROM #tab2 as t
where t.Code1 = t1.Code1
and t.Code2 = t1.Code2
) as c
ORDER BY t1.Date1
or like this:
select
coalesce(table1.Code1,table2.Code1) as Code1,
coalesce(table1.Code2,table2.Code2) as Code2,
table1.Date1,
table2.Date2,
table1.Amount1,
table2.amount2
FROM #tab1 as Table1
Full Outer Join #tab2 as table2 ON
table1.Code1 = table2.Code1
AND table1.Code2 = table2.Code2
AND table1.date1= table2.date2
where exists (select null --to exclude records not matched by "Code 1 and Code 2"
from #tab2 as t2
where coalesce(table1.Code1,table2.Code1) = t2.Code1
and coalesce(table1.Code2,table2.Code2) = t2.Code2)
ORDER BY table1.Date1

My suggested solution involves a full join and another join to a derived table that contains all the combinations of code1 and code2 that exists in both tables, using the intersect operator.
First, create and populate sample data (Please save us this step in your future questions):
DECLARE #T1 AS TABLE
(
Code1 char(1),
Code2 char(2),
Date1 char(6),
Amount1 int
)
DECLARE #T2 AS TABLE
(
Code1 char(1),
Code2 char(2),
Date2 char(6),
Amount2 int
)
INSERT INTO #T1 (Code1, Code2, Date1, Amount1) VALUES
('A', 'AA', '201802', 100)
,('A', 'AA', '201803', 50)
,('A', 'AA', '201804', 30)
,('B', 'AA', '201802', 30); -- Note: Added to the original sample data
INSERT INTO #T2 (Code1, Code2, Date2, Amount2) VALUES
('A', 'AA', '201801', 20)
,('A', 'AA', '201802', 10)
,('A', 'AA', '201803', 10)
,('A', 'AB', '201802', 10); -- Note: Added to the original sample data
The query:
SELECT ISNULL(T1.Code1, T2.Code1) As Code1,
ISNULL(T1.Code2, T2.Code2) As Code2,
Date1, Date2, Amount1, Amount2
FROM #T1 As T1
FULL JOIN #T2 As T2
ON T1.Code1 = T2.Code1
AND T1.Code2 = T2.Code2
AND T1.Date1 = T2.Date2
-- Remove this next join if you want to get rows where codes don't match
JOIN (
SELECT Code1, Code2
FROM #T1
INTERSECT
SELECT Code1, Code2
FROM #T2
) As CommonCodes
ON CommonCodes.Code1 = ISNULL(T1.Code1, T2.Code1)
AND CommonCodes.Code2 = ISNULL(T1.Code2, T2.Code2)
ORDER BY Date1
Results:
Code1 Code2 Date1 Date2 Amount1 Amount2
A AA NULL 201801 NULL 20
A AA 201802 201802 100 10
A AA 201803 201803 50 10
A AA 201804 NULL 30 NULL
You can see a live demo on rextester.

Your updated query should work if you ISNULL the CodeX columns.
declare #t1 table (Code1 varchar(4), Code2 varchar(4), Date1 date, Amount1 int)
declare #t2 table (Code1 varchar(4), Code2 varchar(4), Date2 date, Amount2 int)
insert into #t1
values
('A', 'AA', '2018-02-01', 100 ),
('A', 'AA', '2018-03-01', 50 ),
('A', 'AA', '2018-04-01', 30 )
insert into #t2
values
('A', 'AA', '2018-01-01', 20 ),
('A', 'AA', '2018-02-01', 10 ),
('A', 'AA', '2018-03-01', 10 )
SELECT
code1
,code2
,date1
,date2
,amount1
,amount2
FROM (
SELECT code1, code2 FROM #t1
INTERSECT
SELECT code1, code2 FROM #t2
) t0
CROSS APPLY (
SELECT
date1, date2, amount1, amount2
FROM #t1 t1
FULL OUTER JOIN #t2 t2 ON t1.Code1 = t2.Code1 and t1.Code2 = t2.Code2 and date1 = date2
WHERE
t0.code1 = isnull(t1.Code1, t2.code1)
and t0.code2 = isnull(t1.Code2, t2.code2)
) tt
ORDER BY
date1, date2

Related

Join tables without primary key, foreign key that will also return the null values when joining the table

I have these tables:
Table 1:
Code1 Code2 Code3 Code4 ISCode5
-----------------------------------
xx NULL TEST1 TEST1 1
yy zzz TEST2 TEST2 1
NULL ss TEST3 TEST3 1
NULL aaa TEST4 TEST4 0
Table 2:
ID ColTest1 ColTest2 Code1 Code2
-----------------------------------------
1 2 3 xx NULL
1 3 4 yy zzz
2 5 6 NULL ss
2 5 6 NULL aaa
Expected output:
ID ColTest1 ColTest2 Code1 Code2
------------------------------------------
1 2 3 xx NULL
1 3 4 yy zzz
2 5 6 NULL ss
I need to join table1 and table 2 where IsCode5 = 1. But there is no primary key and foreign key for both tables. The only common columns for both tables is Code1 and Code2.
I have tried this:
SELECT
T2.ID, T2.ColTest1, T2.ColTest2, T2.Code1, T2.Code2
FROM
[dbo].[table2] T2
INNER JOIN
[dbo].[table1] T1 ON T1.Code1 = T2.Code1
AND T1.Code1 = T2.Code2
WHERE
T1.ISCode5 = 1
But it only returns:
ID ColTest1 ColTest2 Code1 Code2
-----------------------------------------
1 3 4 yy zzz
Do you have any idea to return the expected result? Thanks.
I think you just need OR instead of AND
-- DDL
declare #Table1 table (Code1 varchar(2), Code2 varchar(3), Code3 varchar(5), Code4 varchar(5), ISCode5 bit);
declare #Table2 table (ID int, ColTest1 int, ColTest2 int, Code1 varchar(2), Code2 varchar(3));
-- DML
insert into #Table1 (Code1, Code2, Code3, Code4, ISCode5)
values
('xx', NULL, 'TEST1', 'TEST1', 1),
('yy', 'zzz', 'TEST2', 'TEST2', 1),
(NULL, 'ss', 'TEST3', 'TEST3', 1),
(NULL, 'aaa', 'TEST4', 'TEST4', 0);
insert into #Table2 (ID, ColTest1, ColTest2, Code1, Code2)
values
(1, 2, 3, 'xx', NULL),
(1, 3, 4, 'yy', 'zzz'),
(2, 5, 6, NULL, 'ss'),
(2, 5, 6, NULL, 'aaa');
-- Query
select T2.ID, T2.ColTest1, T2.ColTest2, T1.Code1, T1.Code2
from #Table1 T1
inner join #Table2 T2 on t1.code1 = t2.code1 or t1.code2 = t2.code2 -- <= OR not AND
where ISCode5 = 1;
Results:
ID
ColTest1
ColTest2
Code1
Code2
1
2
3
xx
NULL
1
3
4
yy
zzz
2
5
6
NULL
ss
Note: if you add the DDL+DML as shown here you make it much easier for people to answer.
It seems your real problem is that NULL = NULL never returns TRUE, it returns UNKNOWN, which means the join fails.
There are a number of ways around this, primarily using OR ... IS NULL AND ... IS NULL, however a much neater solution involving INTERSECT is available
SELECT *
FROM Table2 t2
WHERE EXISTS (
SELECT t2.Code1, t2.Code2
INTERSECT
SELECT t1.Code1, t1.Code2
FROM Table1 t1
WHERE t1.ISCode5 = 1
);
-- alternately
SELECT t1.*
FROM Table2 t2
JOIN Table1 t1 ON EXISTS (
SELECT t2.Code1, t2.Code2
INTERSECT
SELECT t1.Code1, t1.Code2
)
WHERE t1.ISCode5 = 1;
db<>fiddle
This should optimize pretty well, as the compiler recognizes this construct and transforms it into an IS equality check, as documented in various places.

Join with dynamic Table and Column names

I'm trying to join from a table where the tables and fields are defined within the data instead of keys. So here is what I have
Table Root:
ID | Table | Field
---+---------+-----------
1 | Tab1 | Field1
2 | Tab2 | Field2
3 | Tab1 | Field2
4 | Tab3 | Field4
5 | Tab1 | Field1
Tab1
ID | Field1
---+---------
1 | A
2 | B
3 | C
4 | D
Tab2
ID | Field1 |Field2
---+--------+-----------
1 | X | Bla
2 | Y | 123
3 | Z | 456
Tab3 does not exist
I'd like to have a result like that one:
ID | Value
---+---------
1 | A
2 | 123
3 | NULL -- Field does not match
4 | NULL -- Tables does not exist
5 | NULL -- ID does not exist
Basicly trying to join using the the ID trageting a dynamic table and field.
My Starting Point is somehwere around Here, but this is just for a single specific table. I can't figure out how to join dynamicly or if it even possible without dynamic sql like exec.
you could solve this with a case expression and subqueries, like this example
declare #root table (id int, [table] varchar(10), Field varchar(10))
declare #tab1 table (id int, Field1 varchar(10))
declare #tab2 table (id int, Field1 varchar(10), Field2 varchar(10))
insert into #root (id, [table], Field)
values (1, 'Tab1', 'Field1'), (2, 'Tab2', 'Field2'), (3, 'Tab1', 'Field2'), (4, 'Tab3', 'Field4'), (5, 'Tab1', 'Field1')
insert into #tab1 (id, Field1)
values (1, 'A'), (2, 'B'), (3, 'C'), (4, 'D')
insert into #tab2 (id, Field1, Field2)
values (1, 'X', 'Bla'), (2, 'Y', '123'), (3, 'Z', '456')
select r.id,
case when r.[Table] = 'Tab1' and r.Field = 'Field1' then (select t1.Field1 from #tab1 t1 where t1.ID = r.ID)
when r.[Table] = 'Tab2' and r.Field = 'Field1' then (select t2.Field1 from #tab2 t2 where t2.id = r.id)
when r.[Table] = 'Tab2' and r.Field = 'Field2' then (select t2.Field2 from #tab2 t2 where t2.id = r.id)
end as Value
from #root r
the result is
id Value
-- -------
1 A
2 123
3 null
4 null
5 null

Use data to name column

I have 2 tables and I want to run a query where I use a value in one of the tables to change what column dateadd uses.
table1
id value date1 date2 date3
-------|-------|------------|------------|-----------|
1 | 10 | 04/03/2018 | 04/03/2017 |01/03/2016 |
2 | 1 | 04/03/2018 | 05/03/2015 |02/03/2018 |
3 | 2 | 04/03/2016 | 06/03/2016 |03/03/2018 |
4 | 1 | 04/03/2015 | 07/03/2018 |04/03/2017 |
5 | 2 | 04/03/2017 | 09/03/2018 |05/03/2019 |
table2
id value
-------|-------|
1 | date1 |
2 | date3 |
3 | date3 |
4 | date2 |
5 | date1 |
The normal way to do ID 1 would be something like dateadd(month,10,date1). I'm not sure how to do this without me writing it every single time though.
select *
from table1
join table2 on table1.id = table2.id
where DATEADD(month, table1.value, table1.[table2.value]) between '1/1/18' and '12/31/18'
Twelfth's answer is correct. I just wanted to see if his theory works, and it does - here's a working implementation.
declare #table1 table (id int, value int, date1 date, date2 date, date3 date)
declare #table2 table (id int, colname varchar(5))
insert into #table1 values (1,10,'04/03/2018','04/03/2017','01/03/2016')
insert into #table1 values (2,1 ,'04/03/2018','05/03/2015','02/03/2018')
insert into #table1 values (3,2 ,'04/03/2016','06/03/2016','03/03/2018')
insert into #table1 values (4,1 ,'04/03/2015','07/03/2018','04/03/2017')
insert into #table1 values (5,2 ,'04/03/2017','09/03/2018','05/03/2019')
insert into #table2 values (1, 'date1')
insert into #table2 values (2, 'date3')
insert into #table2 values (3, 'date3')
insert into #table2 values (4, 'date2')
insert into #table2 values (5, 'date1')
select id, colname, newdate
from
(
select sq.id, sq.colname, dateadd(month, sq.value, sq.dn) as newdate
from #table1 t1
unpivot
(
dn for colname in ([date1], [date2], [date3])
)sq
inner join #table2 t2 on sq.id = t2.id and sq.colname = t2.colname
)sq where newdate between '1/1/2018' and '12/31/2018'
Output:
id colname newdate
2 date3 2018-03-03
3 date3 2018-05-03
4 date2 2018-08-03
I've had this as theory, you're actually the first questioner I can try to apply it with. The idea is to unpivot your data and then join on the value column.
select id,column_name,value
from table1 t1
unpivot (
value
for column_name in (date1,date2,date3,date4,date5,date6,date7,date8,date9,date10)
) a
inner join table2 t2 on t1.id = t2.id and t2.value = a.column_name
where t2.value
between '1/1/18' and '12/31/18'
I can't guarantee that will work and am curious how it does for you.

SQL Server: Select all records from a table when values from three columns are in the columns of a previous query's result

I have a table with columns:
ID Date Code Amt Type
1 6/1/17 56 100.0 A
2 6/3/17 57 200.0 B
3 6/5/17 58 300.0 C
4 6/7/17 59 400.0 D
Based on a previous query ran, I received this result:
ID Date Code
1 6/1/17 56
2 6/3/17 57
I need to run a select query that grabs all the records from the first column based on the results from the query above, as in some kind of select that would only return:
ID Date Code Amt Type
1 6/1/17 56 100.0 A
2 6/3/17 57 200.0 B
Assuming you have a table TblYours
you could write a query like below
select * from
TblYours T join
(
select ID, Date, Code from #yourQuery -- replace the inner query with your query
) T2
ON T.ID=T2.ID
AND T.Date=T2.Date
AND T.Code= T2.Code
You can do it with a CTE like:
DECLARE #T TABLE (ID INT, Date DATE, Code INT, Amt DECIMAL(4,1), Type CHAR(1));
INSERT INTO #T VALUES
(1, '2017-01-06', 56, 100, 'A'),
(2, '2017-03-06', 57, 200, 'B'),
(3, '2017-05-06', 58, 300, 'C'),
(4, '2017-07-06', 59, 400, 'D');
WITH CTE AS(
SELECT ID, DATE, CODE /* I don't know how your query looks like*/
FROM #T
WHERE ID IN (1,2)
)
SELECT *
FROM #T
WHERE ID IN (SELECT ID FROM CTE);
Result:
+----+---------------------+------+-------+------+
| ID | Date | Code | Amt | Type |
+----+---------------------+------+-------+------+
| 1 | 06.01.2017 00:00:00 | 56 | 100,0 | A |
| 2 | 06.03.2017 00:00:00 | 57 | 200,0 | B |
+----+---------------------+------+-------+------+
Or using a subquery with INNER JOIN like:
DECLARE #T TABLE (ID INT, Date DATE, Code INT, Amt DECIMAL(4,1), Type CHAR(1));
INSERT INTO #T VALUES
(1, '2017-01-06', 56, 100, 'A'),
(2, '2017-03-06', 57, 200, 'B'),
(3, '2017-05-06', 58, 300, 'C'),
(4, '2017-07-06', 59, 400, 'D');
SELECT TT.*
FROM #T TT INNER JOIN
(SELECT ID, DATE, CODE /* I don't know how your query looks like*/
FROM #T
WHERE ID IN (1,2)) T ON TT.ID = T.ID;
Result:
+----+---------------------+------+-------+------+
| ID | Date | Code | Amt | Type |
+----+---------------------+------+-------+------+
| 1 | 06.01.2017 00:00:00 | 56 | 100,0 | A |
| 2 | 06.03.2017 00:00:00 | 57 | 200,0 | B |
+----+---------------------+------+-------+------+

How to merge ranges from different tables

Giving the following 2 tables:
T1
------------------
From | To | Value
------------------
10 | 20 | XXX
20 | 30 | YYY
30 | 40 | ZZZ
T2
------------------
From | To | Value
------------------
10 | 15 | AAA
15 | 19 | BBB
19 | 39 | CCC
39 | 40 | DDD
What is the best way to get the result below, using T-SQL on SQL Server 2008?
The From/To ranges are sequential (there are no gaps) and the next From always has the same value as the previous To
Desired result
-------------------------------
From | To | Value1 | Value2
-------------------------------
10 | 15 | XXX | AAA
15 | 19 | XXX | BBB
19 | 20 | XXX | CCC
20 | 30 | YYY | CCC
30 | 39 | ZZZ | CCC
39 | 40 | ZZZ | DDD
First I declare data that looks like the data you posted. Please correct me if any assumptions I have made are wrong. Better would be to post your own declaration in the question so we are all working with the same data.
DECLARE #T1 TABLE (
[From] INT,
[To] INT,
[Value] CHAR(3)
);
INSERT INTO #T1 (
[From],
[To],
[Value]
)
VALUES
(10, 20, 'XXX'),
(20, 30, 'YYY'),
(30, 40, 'ZZZ');
DECLARE #T2 TABLE (
[From] INT,
[To] INT,
[Value] CHAR(3)
);
INSERT INTO #T2 (
[From],
[To],
[Value]
)
VALUES
(10, 15, 'AAA'),
(15, 19, 'BBB'),
(19, 39, 'CCC'),
(39, 40, 'DDD');
Here is my select query to generate your expected result:
SELECT
CASE
WHEN [#T1].[From] > [#T2].[From]
THEN [#T1].[From]
ELSE [#T2].[From]
END AS [From],
CASE
WHEN [#T1].[To] < [#T2].[To]
THEN [#T1].[To]
ELSE [#T2].[To]
END AS [To],
[#T1].[Value],
[#T2].[Value]
FROM #T1
INNER JOIN #T2 ON
(
[#T1].[From] <= [#T2].[From] AND
[#T1].[To] > [#T2].[From]
) OR
(
[#T2].[From] <= [#T1].[From] AND
[#T2].[To] > [#T1].[From]
);
Stealing #isme's data setup, I wrote the following:
;With EPs as (
select [From] as EP from #T1
union
select [To] from #T1
union
select [From] from #T2
union
select [To] from #T2
), OrderedEndpoints as (
select EP,ROW_NUMBER() OVER (ORDER BY EP) as rn from EPs
)
select
oe1.EP,
oe2.EP,
t1.Value,
t2.Value
from
OrderedEndpoints oe1
inner join
OrderedEndpoints oe2
on
oe1.rn = oe2.rn - 1
inner join
#T1 t1
on
oe1.EP < t1.[To] and
oe2.EP > t1.[From]
inner join
#T2 t2
on
oe1.EP < t2.[To] and
oe2.EP > t2.[From]
That is, you create a set containing all of the possible end points of periods (EPs), then you "sort" those and assign each one a row number (OrderedEPs).
Then the final query assembles each "adjacent" pair of rows together, and joins back to the original tables to find which rows from each one overlap the selected range.
The below query finds the smallest ranges, then picks the values back out the tables again:
SELECT ranges.from, ranges.to, T1.Value, T2.Value
FROM (SELECT all_from.from, min(all_to.to) as to
FROM (SELECT T1.FROM
FROM T1
UNION
SELECT T2.FROM
FROM T2) all_from
JOIN (SELECT T1.TO
FROM T1
UNION
SELECT T2.FROM
FROM T2) all_to ON all_from.from < all_to.to
GROUP BY all_from.from) ranges
JOIN T1 ON ranges.from >= T1.from AND ranges.to <= T1.to
JOIN T2 ON ranges.from >= T2.from AND ranges.to <= T2.to
ORDER BY ranges.from
Thanks for the answers, but I ended using a CTE, wgich I think is cleaner.
DECLARE #T1 TABLE ([From] INT, [To] INT, [Value] CHAR(3));
DECLARE #T2 TABLE ([From] INT, [To] INT, [Value] CHAR(3));
INSERT INTO #T1 ( [From], [To], [Value]) VALUES (10, 20, 'XXX'), (20, 30, 'YYY'), (30, 40, 'ZZZ');
INSERT INTO #T2 ( [From], [To], [Value]) VALUES (10, 15, 'AAA'), (15, 19, 'BBB'), (19, 39, 'CCC'), (39, 40, 'DDD');
;with merged1 as
(
select
t1.[From] as from1,
t1.[to] as to1,
t1.Value as Value1,
t2.[From] as from2,
t2.[to] as to2,
t2.Value as Value2
from #t1 t1
inner join #T2 t2
on t1.[From] < t2.[To]
and t1.[To] >= t2.[From]
)
,merged2 as
(
select
case when from2>=from1 then from2 else from1 end as [From]
,case when to2<=to1 then to2 else to1 end as [To]
,value1
,value2
from merged1
)
select * from merged2