Find lowest common parent in recursive SQL table - sql

Suppose I have a recursive table (e.g. employees with managers) and a list of size 0..n of ids. How can I find the lowest common parent for these ids?
For example, if my table looks like this:
Id | ParentId
---|---------
1 | NULL
2 | 1
3 | 1
4 | 2
5 | 2
6 | 3
7 | 3
8 | 7
Then the following sets of ids lead to the following results (the first one is a corner case):
[] => 1 (or NULL, doesn't really matter)
[1] => 1
[2] => 2
[1,8] => 1
[4,5] => 2
[4,6] => 1
[6,7,8] => 3
How to do this?
EDIT: Note that parent isn't the correct term in all cases. It's the lowest common node in all paths up the tree. The lowest common node can also be a node itself (for example in the case [1,8] => 1, node 1 is not a parent of node 1 but node 1 itself).
Kind regards,
Ronald

Here's one way of doing it; it uses a recursive CTE to find the ancestry of a node, and uses "CROSS APPLY" over the input values to get the common ancestry; you just change the values in #ids (table variable):
----------------------------------------- SETUP
CREATE TABLE MyData (
Id int NOT NULL,
ParentId int NULL)
INSERT MyData VALUES (1,NULL)
INSERT MyData VALUES (2,1)
INSERT MyData VALUES (3,1)
INSERT MyData VALUES (4,2)
INSERT MyData VALUES (5,2)
INSERT MyData VALUES (6,3)
INSERT MyData VALUES (7,3)
INSERT MyData VALUES (8,7)
GO
CREATE FUNCTION AncestorsUdf (#Id int)
RETURNS TABLE
AS
RETURN (
WITH Ancestors (Id, ParentId)
AS (
SELECT Id, ParentId
FROM MyData
WHERE Id = #Id
UNION ALL
SELECT md.Id, md.ParentId
FROM MyData md
INNER JOIN Ancestors a
ON md.Id = a.ParentId
)
SELECT Id FROM Ancestors
);
GO
----------------------------------------- ACTUAL QUERY
DECLARE #ids TABLE (Id int NOT NULL)
DECLARE #Count int
-- your data (perhaps via a "split" udf)
INSERT #ids VALUES (6)
INSERT #ids VALUES (7)
INSERT #ids VALUES (8)
SELECT #Count = COUNT(1) FROM #ids
;
SELECT TOP 1 a.Id
FROM #ids
CROSS APPLY AncestorsUdf(Id) AS a
GROUP BY a.Id
HAVING COUNT(1) = #Count
ORDER BY a.ID DESC
Update if the nodes aren't strictly ascending:
CREATE FUNCTION AncestorsUdf (#Id int)
RETURNS #result TABLE (Id int, [Level] int)
AS
BEGIN
WITH Ancestors (Id, ParentId, RelLevel)
AS (
SELECT Id, ParentId, 0
FROM MyData
WHERE Id = #Id
UNION ALL
SELECT md.Id, md.ParentId, a.RelLevel - 1
FROM MyData md
INNER JOIN Ancestors a
ON md.Id = a.ParentId
)
INSERT #result
SELECT Id, RelLevel FROM Ancestors
DECLARE #Min int
SELECT #Min = MIN([Level]) FROM #result
UPDATE #result SET [Level] = [Level] - #Min
RETURN
END
GO
and
SELECT TOP 1 a.Id
FROM #ids
CROSS APPLY AncestorsUdf(Id) AS a
GROUP BY a.Id, a.[Level]
HAVING COUNT(1) = #Count
ORDER BY a.[Level] DESC

After doing some thinking and some hints in the right direction from Marc's answer (thanks), I came up with another solution myself:
DECLARE #parentChild TABLE (Id INT NOT NULL, ParentId INT NULL);
INSERT INTO #parentChild VALUES (1, NULL);
INSERT INTO #parentChild VALUES (2, 1);
INSERT INTO #parentChild VALUES (3, 1);
INSERT INTO #parentChild VALUES (4, 2);
INSERT INTO #parentChild VALUES (5, 2);
INSERT INTO #parentChild VALUES (6, 3);
INSERT INTO #parentChild VALUES (7, 3);
INSERT INTO #parentChild VALUES (8, 7);
DECLARE #ids TABLE (Id INT NOT NULL);
INSERT INTO #ids VALUES (6);
INSERT INTO #ids VALUES (7);
INSERT INTO #ids VALUES (8);
DECLARE #count INT;
SELECT #count = COUNT(1) FROM #ids;
WITH Nodes(Id, ParentId, Depth) AS
(
-- Start from every node in the #ids collection.
SELECT pc.Id , pc.ParentId , 0 AS DEPTH
FROM #parentChild pc
JOIN #ids i ON pc.Id = i.Id
UNION ALL
-- Recursively find parent nodes for each starting node.
SELECT pc.Id , pc.ParentId , n.Depth - 1
FROM #parentChild pc
JOIN Nodes n ON pc.Id = n.ParentId
)
SELECT n.Id
FROM Nodes n
GROUP BY n.Id
HAVING COUNT(n.Id) = #count
ORDER BY MIN(n.Depth) DESC
It now returns the entire path from the lowest common parent to the root node but that is a matter of adding a TOP 1 to the select.

Related

Find data by multiple Lookup table clauses

declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
The Name Table has more than just 1,2,3 and the list to parse on is dynamic
NameTable
id | name
----------
1 foo
2 bar
3 steak
CharacterTable
id | name
---------
1 tom
2 jerry
3 dog
NameToCharacterTable
id | nameId | characterId
1 1 1
2 1 3
3 1 2
4 2 1
I am looking for a query that will return a character that has two names. For example
With the above data only "tom" will be returned.
SELECT *
FROM nameToCharacterTable
WHERE nameId in (1,2)
The in clause will return every row that has a 1 or a 3. I want to only return the rows that have both a 1 and a 3.
I am stumped I have tried everything I know and do not want to resort to dynamic SQL. Any help would be great
The 1,3 in this example will be a dynamic list of integers. for example it could be 1,3,4,5,.....
Filter out a count of how many times the Character appears in the CharacterToName table matching the list you are providing (which I have assumed you can convert into a table variable or temp table) e.g.
declare #Character table (id int, [name] varchar(12));
insert into #Character (id, [name])
values
(1, 'tom'),
(2, 'jerry'),
(3, 'dog');
declare #NameToCharacter table (id int, nameId int, characterId int);
insert into #NameToCharacter (id, nameId, characterId)
values
(1, 1, 1),
(2, 1, 3),
(3, 1, 2),
(4, 2, 1);
declare #RequiredNames table (nameId int);
insert into #RequiredNames (nameId)
values
(1),
(2);
select *
from #Character C
where (
select count(*)
from #NameToCharacter NC
where NC.characterId = c.id
and NC.nameId in (select nameId from #RequiredNames)
) = 2;
Returns:
id
name
1
tom
Note: Providing DDL+DML as shown here makes it much easier for people to assist you.
This is classic Relational Division With Remainder.
There are a number of different solutions. #DaleK has given you an excellent one: inner-join everything, then check that each set has the right amount. This is normally the fastest solution.
If you want to ensure it works with a dynamic amount of rows, just change the last line to
) = (SELECT COUNT(*) FROM #RequiredNames);
Two other common solutions exist.
Left-join and check that all rows were joined
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM #RequiredNames rn
LEFT JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = COUNT(nc.nameId) -- all rows are joined
);
Double anti-join, in other words: there are no "required" that are "not in the set"
SELECT *
FROM #Character c
WHERE NOT EXISTS (SELECT 1
FROM #RequiredNames rn
WHERE NOT EXISTS (SELECT 1
FROM #NameToCharacter nc
WHERE nc.nameId = rn.nameId AND nc.characterId = c.id
)
);
A variation on the one from the other answer uses a windowed aggregate instead of a subquery. I don't think this is performant, but it may have uses in certain cases.
SELECT *
FROM #Character c
WHERE EXISTS (SELECT 1
FROM (
SELECT *, COUNT(*) OVER () AS cnt
FROM #RequiredNames
) rn
JOIN #NameToCharacter nc ON nc.nameId = rn.nameId AND nc.characterId = c.id
HAVING COUNT(*) = MIN(rn.cnt)
);
db<>fiddle

SQL select -one to many joins want to have the manys

I have two tables, TBL_PARENT (parentID, ParentName) and TBL_CHILDREN (ParentID,Child_Name)
A Parent can have 0 to many children
What I want is a query to give me a list of parent and their children in single row per parent.
For example
Parent1 John,Mary
Parent2 jane,steve,jana
And the number of rows to be the total number of parents
try this query :
I have created 3 table 2 of them are already created on your database #parant, #ch
and the third one is a temp table to put the result in.
create table #parant (id int , name varchar(10))
create table #ch (id int , name varchar(10), pid int)
insert into #parant select 1,'PA'
insert into #parant select 2,'PB'
insert into #parant select 3,'PC'
insert into #ch select 1,'Ca',1
insert into #ch select 1,'Cb',1
insert into #ch select 1,'Cc',1
insert into #ch select 1,'Cd',3
insert into #ch select 1,'Cf',3
insert into #ch select 1,'Ch',1
create table #testTable (id int, name varchar(10),chid int, chname varchar(10), cpid int)
insert into #testTable
select x.id , x.name ,isnull( y.id ,0), isnull(y.name,'') ,isnull(y.pid ,0)
from #parant as x
left outer join #ch as y
on x .id = y .pid
SELECT t.ID, t.name , STUFF(
(SELECT ',' + s.chname
FROM #TestTable s
WHERE s.ID = t.ID
FOR XML PATH('')),1,1,'') AS CSV
FROM #TestTable AS t
GROUP BY t.ID, t.name
GO
drop table #testTable
drop table #ch
drop table #parant
for the above data i got the following result
1 PA Ca,Cb,Cc,Ch
2 PB
3 PC Cd,Cf
SELECT COUNT(P.parentID),
P.ParentName,
C.Child_Name
FROM TBL_PARENT as P
INNER JOIN TBL_CHILDREN as C
WHERE P.parentID == c.ParentID
GROUP BY P.ParentName;
The line P.parentID == c.ParentID is doing the Join, and the line count(P.parentID) is doing the count of all the parents and the line GROUP BY P.ParentName is grouping all the rows by the name of the parent so you can display all the children of every single parent.

distance between given set and some other sets - sql server 2005

I have a table which contains the item and category ids:
create table SomeTable (
ItemId int,
CategoryId int
)
Given some category ids (Set X) I would like to determine all item ids that share at least one category id and some stats for each of these item ids:
A – Number of category ids of item id that are not in set x
B – Number of category ids shared between item id and set x
C – Number of category ids in set x but which are not associated with item id
I have written some tsql code which involves a cross join and several ctes plus left joins. It works but is fairly slow.
I am sure someone must have encountered a similar problem. I would provide the code but the above description is simplified. Thanks.
Here's a couple of ideas. (I don't know how they'll compare performance wise with what you have already. Left for you to benchmark.)
set nocount on;
-- create a sample table
declare #T table ( ItemId int identity(1,1), CategoryId int );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 100 );
insert #T values ( 200 );
insert #T values ( 200 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 300 );
insert #T values ( 500 );
insert #T values ( 500 );
insert #T values ( 500 );
insert #T values ( 600 );
insert #T values ( 700 );
insert #T values ( 800 );
insert #T values ( 800 );
insert #T values ( 800 );
insert #T values ( 900 );
-- grab some CategoryIDs to work with
declare #X table ( CategoryId int );
insert #X
select CategoryID=200 union
select CategoryID=400 union
select CategoryID=600 union
select CategoryID=800
-- A. Number of category ids of item id that are not in set x
select distinct t.CategoryID from #T t
where not exists(select 1 from #X x where t.CategoryID = x.CategoryID)
-- or, using the set difference operator
select CategoryID from #T
except
select CategoryID from #X
-- B. Number of category ids shared between item id and set x
select distinct x.CategoryID from #X x
join #T t on t.CategoryID = x.CategoryID;
-- or, using set intersection
select CategoryID from #T
intersect
select CategoryID from #X
-- C. Number of category ids in set x but which are not associated with item id
select distinct x.CategoryID from #X x
where not exists(select 1 from #T t where t.CategoryID = x.CategoryID)
-- or, using the set difference operator
select CategoryID from #X
except
select CategoryID from #T
The problem with CTE is they are run each time they are referenced and do not have constraints. Load your Set X into a temporary table with primary key on ID. Then run the same joins against the temporary and you should see big performance gain. SQL does much better when joins are based on primary keys.

Recursive select in SQL

I have an issue I just can't get my head around. I know what I want, just simply can't get it out on the screen.
What I have is a table looking like this:
Id, PK UniqueIdentifier, NotNull
Name, nvarchar(255), NotNull
ParentId, UniqueIdentifier, Null
ParentId have a FK to Id.
What I want to accomplish is to get a flat list of all the id's below the Id I pass in.
example:
1 TestName1 NULL
2 TestName2 1
3 TestName3 2
4 TestName4 NULL
5 TestName5 1
The tree would look like this:
-1
-> -2
-> -3
-> -5
-4
If I now ask for 4, I would only get 4 back, but if I ask for 1 I would get 1, 2, 3 and 5.
If I ask for 2, I would get 2 and 3 and so on.
Is there anyone who can point me in the right direction. My brain is fried so I appreciate all help I can get.
declare #T table(
Id int primary key,
Name nvarchar(255) not null,
ParentId int)
insert into #T values
(1, 'TestName1', NULL),
(2, 'TestName2', 1),
(3, 'TestName3', 2),
(4, 'TestName4', NULL),
(5, 'TestName5', 1)
declare #Id int = 1
;with cte as
(
select T.*
from #T as T
where T.Id = #Id
union all
select T.*
from #T as T
inner join cte as C
on T.ParentId = C.Id
)
select *
from cte
Result
Id Name ParentId
----------- -------------------- -----------
1 TestName1 NULL
2 TestName2 1
5 TestName5 1
3 TestName3 2
Here's a working example:
declare #t table (id int, name nvarchar(255), ParentID int)
insert #t values
(1, 'TestName1', NULL),
(2, 'TestName2', 1 ),
(3, 'TestName3', 2 ),
(4, 'TestName4', NULL),
(5, 'TestName5', 1 );
; with rec as
(
select t.name
, t.id as baseid
, t.id
, t.parentid
from #t t
union all
select t.name
, r.baseid
, t.id
, t.parentid
from rec r
join #t t
on t.ParentID = r.id
)
select *
from rec
where baseid = 1
You can filter on baseid, which contains the start of the tree you're querying for.
Try this:
WITH RecQry AS
(
SELECT *
FROM MyTable
UNION ALL
SELECT a.*
FROM MyTable a INNER JOIN RecQry b
ON a.ParentID = b.Id
)
SELECT *
FROM RecQry
Here is a good article about Hierarchy ID models. It goes right from the start of the data right through to the query designs.
Also, you could use a Recursive Query using a Common Table Expression.
I'm guessing that the easiest way to accomplish what you're looking for would be to write a recursive query using a Common Table Expression:
MSDN - Recursive Queries Using Common Table Expressions

SQL Server: How to get all child records given a parent id in a self referencing table

Hi I have a table which references itself and I need to be able to select the parent and all it's child records from a given parent Id.
My table is as follows:
ID | ParentID | Name
-----------------------
1 NULL A
2 1 B-1
3 1 B-2
4 2 C-1
5 2 C-2
So for the above example I'd like to be able to pass in a value of 1 and get all the records above.
So far, I've come up with the following recursive table-valued-function but it's not behaving as expected (only returning the first record).
CREATE FUNCTION [dbo].[SelectBranches]
(
#id INT
,#parentId INT
)
RETURNS #branchTable TABLE
(
ID INT
,ParentID INT
,Name INT
)
AS
BEGIN
IF #branchId IS NOT NULL BEGIN
INSERT INTO #branchTable
SELECT
ID
,ParentID
,Name
FROM
tblLinkAdvertiserCity
WHERE
ID = #id
END
INSERT INTO #branchTable
SELECT
br.ID
,br.ParentID
,br.Name
FROM
#branchTable b
CROSS APPLY
dbo.SelectBranches(NULL, b.ParentID) br
RETURN
END
GO
You can try this
DECLARE #Table TABLE(
ID INT,
ParentID INT,
NAME VARCHAR(20)
)
INSERT INTO #Table (ID,ParentID,[NAME]) SELECT 1, NULL, 'A'
INSERT INTO #Table (ID,ParentID,[NAME]) SELECT 2, 1, 'B-1'
INSERT INTO #Table (ID,ParentID,[NAME]) SELECT 3, 1, 'B-2'
INSERT INTO #Table (ID,ParentID,[NAME]) SELECT 4, 2, 'C-1'
INSERT INTO #Table (ID,ParentID,[NAME]) SELECT 5, 2, 'C-2'
DECLARE #ID INT
SELECT #ID = 2
;WITH ret AS(
SELECT *
FROM #Table
WHERE ID = #ID
UNION ALL
SELECT t.*
FROM #Table t INNER JOIN
ret r ON t.ParentID = r.ID
)
SELECT *
FROM ret
Recursion in CTE looks bit expensive, so I have wrote this function which make use of recursive function call but much faster that CTE recursion.
CREATE FUNCTION [dbo].[Fn_GetSubCategories]
(
#p_ParentCategoryId INT
) RETURNS #ResultTable TABLE
(
Id INT
)
AS
BEGIN
--Insert first level subcategories.
INSERT INTO #ResultTable
SELECT Id FROM Category WHERE ParentCategoryId = #p_ParentCategoryId OR Id = #p_ParentCategoryId
DECLARE #Id INT
DECLARE #ParentCategory TABLE(Id INT)
DECLARE cur_categories CURSOR
LOCAL STATIC READ_ONLY FORWARD_ONLY FOR
SELECT Id FROM Category WHERE ParentCategoryId = #p_ParentCategoryId and Id != #p_ParentCategoryId
OPEN cur_categories
IF ##CURSOR_ROWS > 0
BEGIN
FETCH NEXT FROM cur_categories INTO #Id
WHILE ##FETCH_STATUS = 0
BEGIN
--Insert remaining level sub categories.
IF EXISTS(SELECT 1 FROM Category WHERE ParentCategoryId = #Id AND Id != #Id)
BEGIN
INSERT INTO #ResultTable
SELECT DISTINCT C.Id from Fn_GetSubCategories(#Id) C INNER JOIN #ResultTable R ON C.Id != R.Id
END
FETCH NEXT FROM cur_categories INTO #Id
END
--Delete duplicate records
;WITH CTE AS
(SELECT *,ROW_NUMBER() OVER (PARTITION BY Id ORDER BY Id) AS RN FROM #ResultTable)
DELETE FROM CTE WHERE RN<>1
END
CLOSE cur_categories
DEALLOCATE cur_categories
RETURN
END
Unless you are using Oracle, your table structure is not suitable for the problem described. What you are attempting to do is grab a hierarchy (traversing a tree structure).
There is an article, More Trees & Hierarchies in SQL, that describes one method of solving the hierarchy problem. He basically adds a "lineage" column describing the hierarchy to every row.