Full Outer Join Using Each Row Once - sql

I'm wondering if anyone's come across a neat solution to this problem. I'm trying to select data from a couple of tables, having the records match up row by row. I'm basically after a full outer join, but there's one crucial difference. If I have four rows with a particular value in the column I'm joining on in one table, and three rows with this value in another, I only want the first three results to be joined, and the fourth to act as if there had been no match.
The reason for this is to create a reconciliation report which ensures transactions are not counted multiple times when comparing results. I can get around this issue by using a bit of grouping and some aggregate functions, but this hides some of the detail which I'd like to keep.
Below is an example to show the sort of thing I'm after, with the invalid/pseudo code in the comments illustrating how I'm thinking of this as working:
declare #t1 table (id bigint identity(1,1) primary key clustered, foreignKeyId bigint, otherData nvarchar(10))
declare #t2 table (id bigint identity(1,1) primary key clustered, foreignKeyId bigint, moreData nvarchar(10))
insert #t1 select 1, '1.1.1'
union all select 1, '1.1.2'
union all select 1, '1.1.3'
union all select 3, '1.3.1'
union all select 3, '1.3.2'
union all select 3, '1.3.3'
union all select 4, '1.4.3'
insert #t2 select 1, '2.1.1'
union all select 1, '2.1.2'
union all select 1, '2.1.3'
union all select 2, '2.2.1'
union all select 3, '2.3.1'
union all select 3, '2.3.2'
union all select 5, '2.5.1'
union all select 5, '2.5.2'
--demo of the functionality i'm hoping to acheive
--
/*
select t1.id id1
, t2.id id2
, t1.foreignKeyId fk1
, t2.foreignKeyId fk2
, t1.otherData otherData
, t2.moreData moreData
from #t1 t1
full funky join #t2 t2
on t1.foreignKeyId = t2.foreignKeyId
order by t1.id, t2.id --we'd need an order by to ensure the match could be applied in a predictable manner
*/
--
declare #funkyjoin table (id1 bigint, id2 bigint, fk1 bigint, fk2 bigint, otherData nvarchar(10), moreData nvarchar(10))
declare #id1 bigint, #id2 bigint
insert #funkyjoin (id1, fk1, otherData)
select id, foreignKeyId, otherData from #t1
while exists(select 1 from #t2)
begin
select top 1 #id2 = id from #t2 order by id
set #id1 = null
select top 1 #id1 = id1
from #funkyjoin
where fk2 is null
and fk1 in (select foreignKeyId from #t2 where id = #id2)
if #id1 is null
begin
insert #funkyjoin (id2, fk2, moreData)
select id, foreignKeyId, moreData
from #t2
where id = #id2
end
else
begin
update #funkyjoin
set id2 = #id2
, fk2 = fk1 --since we're joining on this we can just match it
, moreData = (select moreData from #t2 where id = #id2)
where id1 = #id1
end
delete from #t2 where id = #id2 --since this is only an example let's not worry about keeping our source data
end
select *
from #funkyjoin
order by coalesce(id1, id2)
I've written a similar solution for when this scenario occurs on spreadsheets previously: http://officemacros.codeplex.com/#WorksheetMergeMacro

If I understand correctly, this may be what you're after:
select *
from (
select *,
row_number() over (partition by foreignKeyId order by id) as n
from #t1
) t1
full outer join (
select *,
row_number() over (partition by foreignKeyId order by id) as n
from #t2
) t2 on t1.foreignKeyId = t2.foreignKeyId and t1.n = t2.n

The best way to use up the rows is to add a pseudo-row number (using ROW_NUMBER) and include that in the join.

Related

How to loop through table using while loop and create another table with values needed

I have two tables: MainTable and MyTable. MyTable has unique ControlNo and ID. I need to add very first EffDate from MainTable to MyTablebased on ID and ControlNo.
For that I need to look at PreviousID column, then see if that PreviousID is in ID column and so on.
Desired output should look like this:
The below is an example with dummy data of getting proper EffDate by supplying an ID value. It works, but how can I loop through the whole MainTable, retrieve ID's and EffDate into separate table, then join that table to MyTable?
-- function returns PreviousID based on ID
CREATE FUNCTION [dbo].[GetPriorQuoteID](#ID varchar(50))
RETURNS varchar(50)
AS
BEGIN
DECLARE #RetVal varchar(50)
SET #RetVal = NULL
SELECT TOP 1 #RetVal = MainTable.PreviousID
FROM MainTable
WHERE MainTable.ID = #ID
RETURN #RetVal
END
-- create sample table
IF OBJECT_ID('MainTable') IS NOT NULL DROP TABLE MainTable;
select 3333 as ControlNo, 'QuoteID3' as ID, 'QuoteID2' as PreviousID, '2020-08-25' as EffDate
into MainTable
union all select 2222 as COntrolNo, 'QuoteID2', 'QuoteID1', '2019-08-25'
union all select 1111 as COntrolNo, 'QuoteID1', NULL, '2018-08-25'
union all select 7777 as COntrolNo, 'QuoteID6', 'QuoteID5', '2020-02-10'
union all select 6666 as COntrolNo, 'QuoteID5', NULL, '2019-02-10'
select * from MainTable
DECLARE #PriorQuote varchar(50)
DECLARE #RetVal VARCHAR(50) = ''
DECLARE #ControlNo INT
DECLARE #ID varchar(50) = 'QuoteID3'
SELECT TOP 1 #ControlNo = MainTable.ControlNo FROM MainTable WHERE MainTable.ID = #ID
Set #PriorQuote = #ID
SELECT TOP 1 #PriorQuote = MainTable.ID FROM MainTable WHERE MainTable.ControlNo = #ControlNo
WHILE dbo.GetPriorQuoteID(#PriorQuote) IS NOT NULL AND dbo.GetPriorQuoteID(#PriorQuote)<> #PriorQuote
BEGIN
SET #PriorQuote = dbo.GetPriorQuoteID(#PriorQuote)
END
SELECT TOP 1 #RetVal = CONVERT(VARCHAR(10), MainTable.EffDate, 101)
FROM MainTable
WHERE MainTable.ID = #PriorQuote
SELECT #RetVal
-- clean up
drop table MainTable
drop function GetPriorQuoteID
UPDATE: Adding dummy data tables
-- create sample table #MainTable
IF OBJECT_ID('tempdb..#MainTable') IS NOT NULL DROP TABLE #MainTable;
create table #MainTable (ControlNo int, ID varchar(50), PreviousID varchar(50), EffDate date)
insert into #MainTable values
(3333,'QuoteID3','QuoteID2', '2020-08-25'),
(2222,'QuoteID2','QuoteID1', '2019-08-25'),
(1111,'QuoteID1',NULL, '2018-08-25'),
(7777,'QuoteID6','QuoteID5', '2020-02-10'),
(6666,'QuoteID5',NULL, '2019-02-10')
--select * from #MainTable
-- create sample table #MyTable
IF OBJECT_ID('tempdb..#MyTable') IS NOT NULL DROP TABLE #MyTable;
create table #MyTable (ControlNo int, ID varchar(50), EffDate date)
insert into #MyTable values
(3333,'QuoteID3',NULL),
(7777,'QuoteID6',NULL)
--select * from #MyTable
You can use a recursive query to traverse the hierarchy.
I would start by joining the original table with the main table, which restricts the paths to just the rows we are interested in. Then, you can recurse towards the parent. Finally, we need to filter on the top parent per path: top() and row_number() come handy for this.
Consider:
with cte as (
select t.controlno, t.id, m.previousid, m.effdate, 1 lvl
from #maintable m
inner join #mytable t on t.controlno = m.controlno and t.id = m.id
union all
select c.controlno, c.id, m.previousid, m.effdate, c.lvl + 1
from cte c
inner join #maintable m on m.id = c.previousid
)
select top(1) with ties controlno, id, effdate
from cte
order by row_number() over(partition by controlno, id order by lvl desc)
Demo on DB Fiddle:
controlno | id | effdate
--------: | :------- | :---------
3333 | QuoteID3 | 2018-08-25
7777 | QuoteID6 | 2019-02-10
using CTE like below you can get the desired results.
See live demo
Learn more about recursive CTEs here
; with cte as
(
select EffDate, ControlNo, ID, Level=1 from MainTable
where PreviousID is NULL
union all
select C.EffDate, M.ControlNo, M.ID, Level=Level+1 from MainTable AS M
join cte as C on C.ID=M.PreviousID
)
select MyTable.*,cte.EffDate from cte join MyTable on Mytable.ID=cte.ID
You can use a recursive CTE for this:
WITH cte
AS
(
SELECT m.ID,m.PreviousID
FROM MainTable m
JOIN MainTable m2
ON m.previousID = m2.ID
WHERE m2.previousID IS NULL
UNION ALL
SELECT m2.ID,cte.previousID
FROM cte
JOIN MainTable m2
ON m2.previousID = cte.ID
)
SELECT *
FROM cte;
Here is a working example of the CTE approach with the table provided
;with recur_cte(ControlNo, ID, PreviousID, EffDate, HLevel) as (
select mt.ControlNo, cast(null as varchar(100)), mt.PreviousID, mt.EffDate, 1
from MainTable mt
where not exists(select 1
from MainTable mt_in
where mt.ID=mt_in.PreviousID)
union all
select rc.ControlNo, rc.ID, mt.PreviousID, mt.EffDate, rc.HLevel+1
from recur_cte rc
join MainTable mt on rc.PreviousID=mt.ID and rc.EffDate>mt.EffDate)
select * from recur_cte;
Results
ControlNo ID PreviousID EffDate HLevel
3333 NULL QuoteID2 2020-08-25 1
7777 NULL QuoteID5 2020-02-10 1
7777 NULL NULL 2019-02-10 2
3333 NULL QuoteID1 2019-08-25 2
3333 NULL NULL 2018-08-25 3

Looping over the insert data into multiple tables

I have a query where I need to run to do manual inserts
I can do it but there are many records and was looking if I can build something.
I have a structure somewhat like this:
Have 4 id of a table - primary key values as:
var ids = "1,2,3,4";
loop over ids {
insert into table1(col1,col2,col3)
select col1,newid(),getdate() from table1 where id = ids - 1 at a time
var selectedID = get the id of the inserted row and then insert into anotehr table as:
insert into table2(col1,col2,col3,col4)
select selectedID, getdate(),getdate(),4 from table2 where fkID = ids - one at a time
}
You can use both loops and cursors but often they can be avoided.
Is there a specific reason you note you want them inserted one at a time? An alternative would be to have the IDs staged, in a temp table, or CTE, e.g.
;WITH [Ids] AS
(
SELECT '1' AS [ID]
UNION
SELECT '2'
UNION
SELECT '3'
UNION
SELECT '4'
)
INSERT INTO [Table1]
(
[Col1],
[Col2],
[Col3]
)
SELECT [Col1],
NEWID(),
GETDATE()
FROM [Table1] T
INNER JOIN [Ids] I ON I.[ID] = T.[Id];
Which avoids the need for any loops, and should perform much better.
Edit
The way I would structure this, to make the query reusable would be as follows:
IF OBJECT_ID('tempdb..#IDS') IS NOT NULL
BEGIN
DROP TABLE #IDS
END
IF OBJECT_ID('tempdb..#Inserted_IDS') IS NOT NULL
BEGIN
DROP TABLE #Inserted_IDS
END
CREATE TABLE #IDS
(
ID INT
);
CREATE TABLE #Inserted_IDS
(
ID INT,
);
INSERT INTO #IDS
(
ID
)
SELECT 1 UNION
SELECT 2 UNION
SELECT 3 UNION
SELECT 4;
INSERT INTO [Table1]
(
[Col1],
[Col2],
[Col3]
)
OUTPUT Inserted.ID
INTO #Inserted_IDS
SELECT [Col1],
NEWID(),
GETDATE()
FROM [Table1] T
INNER JOIN #IDS I ON I.[ID] = T.[Id];
INSERT INTO [table2]
(
[col1],
[col2],
[col3],
[col4]
)
SELECT I.[ID],
getdate(),
getdate(),
4
FROM [#Inserted_IDS] I
DROP TABLE #IDS;
DROP TABLE #Inserted_IDS;
Therefore you only need to amend the IDs being entered into the temp table each time you need to do the inserts.

conditional union multiple tables

This is based on union tables on value
declare #t1 table (val int,datatype1 int,datatype2 int ,datatype3 int)
declare #t2 table (val int,datatype1 int ,datatype2 int ,datatype3 int )
declare #t3 table (val int,datatype1 int ,datatype2 int ,datatype3 int )
insert into #t1 values (10,1,0,0),(31,1,0,0),(20,1,0,0)
insert into #t2 values (31,0,1,0),(4,0,1,0)
insert into #t3 values (31,0,0,1),(5,0,0,1);
Below is the changes in requirement(case):
1. need to union #t1,#t2 & #t3
(if same value exist #t1 & #t2 multiple rows and #t2 & t3 only 1 row)
2. if any duplicate value (there is no chance dup in same table)
i) suppose 31 in #t1 , 31 in #t2 then multiple rows are allowed
ii) suppose 31 in #t2 & #t3 then only one records i.e #t3 updated to #t2
iii) if 31 in #t1 ,#t2,#t3 only 2 records i.e #t1,#t2 records with #t3 details updated to #t2 records
Now i) & iii) are working fine
select val,
max(datatype1) datatype1,
max(datatype2)datatype2,
max(datatype3)datatype3
from (
select 't1' AS tab_name, * from #t1
union all
select 't2' AS tab_name,* from #t2
union all
select 't3' AS tab_name,* from #t3
) as data
group by val, CASE WHEN tab_name in ('t2') THEN 1 END
order by val;
But Current Result showing multiple records for case 2 also any help
Expected Result:
Your question is very hard to follow. I'm a bit lost on the conditions, but this rather simple query returns the results that you specify:
select val,
max(datatype1) as datatype1,
max(datatype2) as datatype2,
max(datatype3) as datatype3,
max(datatype4) as datatype4
from (select 't1' AS tab_name, t1.* from t1
union all
select 't2' AS tab_name, t2.* from t2
union all
select 't3' AS tab_name, t3.* from t3
) data
group by val;
Here is a db<>fiddle.
I wonder if the culmination of all your results is a relatively simple aggregation.

Select record when it is the only record and not linked to in another table to a particular record

I am looking to select the id of a record from #table1 when that record is the only record in that table and is not currently linked in #t1Tot2 to a particular id from another table.
The following query below works, but I am wondering if there is a better way. It is setup to currently to return 55 the id of the only record added to table #table1. Inserting another record into #table1 would cause it to return no records ( good ), and linking #t2id in #t1Tot2 would make it return none as well ( good ). Is there a better way? Thanks.
DECLARE #t2id INT
SET #t2id = 1 --Record to link to
DECLARE #table1 TABLE
(
t1id int
)
DECLARE #t1Tot2 TABLE
(
t1id INT,
t2id int
)
INSERT INTO #table1
( t1id )
VALUES ( 55 -- t1id - int
)
--Will cause the query below to return no records because of having more than 1 record to be linked to
-- INSERT INTO #table1
-- ( t1id )
--VALUES ( 2 -- t1id - int
-- )
--Will cause the query below to return no records because of already being linked to the t1id
--INSERT INTO #t1Tot2
--( t1id, t2id )
--VALUES ( 55, -- t1id - int
--#t2id -- t2id - int
--)
SELECT MAX(a.t1id)
FROM #table1 a
LEFT JOIN #t1Tot2 b ON a.t1id = b.t1id AND b.t2id = 1
HAVING COUNT(1) = 1 AND SUM( CASE WHEN b.t2id IS NULL THEN 0 ELSE 1 END ) = 0
--declare #table1 table (t1id int)
--declare #t1Tot2 table (t1id int)
select a.t1id
from #table1 a
where (select count(*) from (select top 2 * from #table1) x) = 1
and not exists (
select * from #t1Tot2 b
where b.t1id = a.t1id)
select a.aid
from a
left outer join b on a.bid=b.bid
where b.bid is null
group by a.aid
having count(*) = 1
should work as well sql server specific sql btw.

Finding One record from table having unique PK and duplicate FK

I want one record from a table having unique Primary Key and duplicate Foreign Key
Please see attached image below
alt text http://img413.imageshack.us/img413/9940/findduplicate.png
Thanks
Select fk, Count(*)
from table1
group by fk
having count(*) > 1
Primary key by definition means there will only be one, so your question appears to actually appears to be are the any rows with more than 1 child row:
select *
from table1 t
where exists (
select id from table2 t2
where t2.fkid = t.id
group by t2.id
having count(*) > 1
)
This would retrieve all unique fk and textVal values from the table:
select distinct fk, textVal from myTable
Have a look at this example.
This will find you all IDs from TABLE1 where it is duplicated in TABLE2 as a FOREIGN KEY
DECLARE #Table1 TABLE(
id INT
)
DECLARE #Table2 TABLE(
id INT,
fkid INT
)
INSERT INTO #Table1 (id) SELECT 1
INSERT INTO #Table1 (id) SELECT 2
INSERT INTO #Table1 (id) SELECT 3
INSERT INTO #Table2 (id,fkid) SELECT 1, 1
INSERT INTO #Table2 (id,fkid) SELECT 2, 2
INSERT INTO #Table2 (id,fkid) SELECT 3, 2
INSERT INTO #Table2 (id,fkid) SELECT 4, 3
INSERT INTO #Table2 (id,fkid) SELECT 5, 3
INSERT INTO #Table2 (id,fkid) SELECT 6, 3
SELECT t2.fkid
FROM #Table2 t2
GROUP BY t2.fkid
HAVING COUNT(t2.fkid) > 1