Recursive CTE for URL path generation - sql

We have a parent child relationship for pages in our content management system. The table is as follows
ContentID
Parent
Title
1
0
This Page
2
1
That Page
3
0
Another Page
4
3
Child of Another
5
4
Child of child
A parent of 0 indicates the ultimate parent.
I want to output just the parents of a given contentid in a path. As an example, the output for ContentID = 5 would be:
/another-page/child-of-another/
I've tried many recursive CTE examples that generate breadcrumbs and such like, but they always output the whole path:
/another-page/child-of-another/child-of-child/
I have a function to replace the spaces in the URL, so I'm just interested in the SQL to achieve the above.

One method, using an rCTE and STRING_AGG:
CREATE TABLE dbo.YourTable (ContentID int IDENTITY(1,1),
Parent int,
Title varchar(50));
INSERT INTO dbo.YourTable (Parent, Title)
VALUES(0,'This Page'),
(1,'That Page'),
(0,'Another Page'),
(3,'Child of Another'),
(4,'Child of child');
GO
DECLARE #ID int = 5;
WITH rCTE AS(
SELECT ContentID,
Parent,
Title,
1 AS Level
FROM dbo.YourTable
WHERE ContentID = #ID
UNION ALL
SELECT YT.ContentID,
YT.Parent,
YT.Title,
r.level+1 AS Level
FROM dbo.YourTable YT
JOIN rCTE r ON YT.ContentID = r.Parent)
SELECT '/' + STRING_AGG(REPLACE(Title,' ','-'),'/') WITHIN GROUP (ORDER BY Level DESC) + '/' AS Path
FROM rCTE
WHERE ContentID != #ID;
GO
DROP TABLE dbo.YourTable;
An alternative method, without using STRING_AGG, and instead using an additional JOIN in the rCTE:
DECLARE #ID int = 5;
WITH rCTE AS(
SELECT YT.ContentID,
YT.Parent,
CONVERT(varchar(8000),'/' + YT.Title) AS Title,
1 AS Level
FROM dbo.YourTable YT
JOIN dbo.YourTable P ON YT.ContentID = P.Parent
WHERE P.ContentID = #ID
UNION ALL
SELECT YT.ContentID,
YT.Parent,
CONVERT(varchar(8000),'/' + YT.Title + r.Title),
r.Level + 1
FROM dbo.YourTable YT
JOIN rCTE r ON YT.ContentID = r.Parent)
SELECT TOP 1 REPLACE(Title,' ','-') + '/' AS Path
FROM rCTE
ORDER BY Level DESC;

Related

Conversion of CTE to temp table in SQL Server to get All Possible Parents

I have one user table in which I maintain parent child relationship and I want to generate the result with all user id along with its parentid and all possible hierarchical parents as comma separated strings.
My table structure is as follows.
CREATE TABLE [hybarmoney].[Users](
[ID] [bigint] IDENTITY(1,1) NOT NULL,
[USERID] [nvarchar](100) NULL,
[REFERENCEID] [bigint] NULL
)
and I am getting the result using the below CTE
;WITH Hierarchy (
ChildId
,ChildName
,ParentId
,Parents
)
AS (
SELECT Id
,USERID
,REFERENCEID
,CAST('' AS VARCHAR(MAX))
FROM hybarmoney.Users AS FirtGeneration
WHERE REFERENCEID = 0
UNION ALL
SELECT NextGeneration.ID
,NextGeneration.UserID
,Parent.ChildId
,CAST(CASE
WHEN Parent.Parents = ''
THEN (CAST(NextGeneration.REFERENCEID AS VARCHAR(MAX)))
ELSE (Parent.Parents + ',' + CAST(NextGeneration.REFERENCEID AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM hybarmoney.Users AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.REFERENCEID = Parent.ChildId
)
SELECT *
FROM Hierarchy
ORDER BY ChildId
OPTION (MAXRECURSION 0)
But I have the limitation of MAXRECURSION and when I googled, I got to know that temp tables are an alternative solution but I was not able to do the same
and also i don't want to get all possible top parents, for my purpose I want to find 15 levels of hierarchical parents for each users. Is it possible to use temp tables for my purpose if possible how.
What you could do, in order to get only N levels of your CTE is to create an additional column where you keep track of each level.
;WITH Hierarchy (
ChildId
,ChildName
,ParentId
,LEVEL
,Parents
)
AS (
SELECT Id
,USERID
,REFERENCEID
,0 AS LEVEL
,CAST('' AS VARCHAR(MAX))
FROM hybarmoney.Users AS FirtGeneration
WHERE REFERENCEID = 0
UNION ALL
SELECT NextGeneration.ID
,NextGeneration.UserID
,Parent.ChildId
,LEVEL+1 AS LEVEL
,CAST(CASE
WHEN Parent.Parents = ''
THEN (CAST(NextGeneration.REFERENCEID AS VARCHAR(MAX)))
ELSE (Parent.Parents + ',' + CAST(NextGeneration.REFERENCEID AS VARCHAR(MAX)))
END AS VARCHAR(MAX))
FROM hybarmoney.Users AS NextGeneration
INNER JOIN Hierarchy AS Parent ON NextGeneration.REFERENCEID = Parent.ChildId
)
SELECT *
FROM Hierarchy
WHERE LEVEL <= 15
ORDER BY ChildId
OPTION (MAXRECURSION 0)
This works, assuming I understood correctly your following statement: "for my purpose I want to find 15 levels of hierarchical parents for each users", where you actually meant 15 levels of hierarchical parents for a single user (in your case REFERENCEID=0).
If you want this to generate a list of 15 hierarchical parents for each user in your hybarmoney.Users table, then move your CTE in a table valued function and implement a similar solution as explained here.
I got the same result using the flowing query if there may be better solution
Create PROC UspUpdateUserAndFiftenLevelIDs (#UserID BIGINT)
AS
BEGIN
DECLARE #REFERENCEIDString NVARCHAR(max)
SET #REFERENCEIDString = ''
DECLARE #ReferenceID BIGINT
SET #ReferenceID = #UserID
DECLARE #Count INT
SET #Count = 0
WHILE (#count < 15)
BEGIN
SELECT #ReferenceID = U.REFERENCEID
,#REFERENCEIDString = #REFERENCEIDString + CASE
WHEN #REFERENCEIDString = ''
THEN (CAST(U.REFERENCEID AS VARCHAR(100)))
ELSE (',' + CAST(U.REFERENCEID AS VARCHAR(MAX)))
END
FROM hybarmoney.Users U
WHERE U.ID = #ReferenceID
SET #Count = #Count + 1
END
SELECT #UserID
,#REFERENCEIDString
END

How to traverse a path in a table with id & parentId?

Suppose I have a table like:
id | parentId | name
1 NULL A
2 1 B
3 2 C
4 1 E
5 3 E
I am trying to write a scalar function I can call as:
SELECT dbo.GetId('A/B/C/E') which would produce "5" if we use the above reference table. The function would do the following steps:
Find the ID of 'A' which is 1
Find the ID of 'B' whose parent is 'A' (id:1) which would be id:2
Find the ID of 'C' whose parent is 'B' (id:2) which would be id:3
Find the ID of 'E' whose parent is 'C' (id:3) which would be id:5
I was trying to do it with a WHILE loop but it was getting very complicated very fast... Just thinking there must be a simple way to do this.
CTE version is not optimized way to get the hierarchical data. (Refer MSDN Blog)
You should do something like as mentioned below. It's tested for 10 millions of records and is 300 times faster than CTE version :)
Declare #table table(Id int, ParentId int, Name varchar(10))
insert into #table values(1,NULL,'A')
insert into #table values(2,1,'B')
insert into #table values(3,2,'C')
insert into #table values(4,1,'E')
insert into #table values(5,3,'E')
DECLARE #Counter tinyint = 0;
IF OBJECT_ID('TEMPDB..#ITEM') IS NOT NULL
DROP TABLE #ITEM
CREATE TABLE #ITEM
(
ID int not null
,ParentID int
,Name VARCHAR(MAX)
,lvl int not null
,RootID int not null
)
INSERT INTO #ITEM
(ID,lvl,ParentID,Name,RootID)
SELECT Id
,0 AS LVL
,ParentId
,Name
,Id AS RootID
FROM
#table
WHERE
ISNULL(ParentId,-1) = -1
WHILE ##ROWCOUNT > 0
BEGIN
SET #Counter += 1
insert into #ITEM(ID,ParentId,Name,lvl,RootID)
SELECT ci.ID
,ci.ParentId
,ci.Name
,#Counter as cntr
,ch.RootID
FROM
#table AS ci
INNER JOIN
#ITEM AS pr
ON
CI.ParentId=PR.ID
LEFT OUTER JOIN
#ITEM AS ch
ON ch.ID=pr.ID
WHERE
ISNULL(ci.ParentId, -1) > 0
AND PR.lvl = #Counter - 1
END
select * from #ITEM
Here is an example of functional rcte based on your sample data and requirements as I understand them.
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
id int
, parentId int
, name char(1)
)
insert #Something
select 1, NULL, 'A' union all
select 2, 1, 'B' union all
select 3, 2, 'C' union all
select 4, 1, 'E' union all
select 5, 3, 'E'
declare #Root char(1) = 'A';
with MyData as
(
select *
from #Something
where name = #Root
union all
select s.*
from #Something s
join MyData d on d.id = s.parentId
)
select *
from MyData
Note that if you change the value of your variable the output will adjust. I would make this an inline table valued function.
I think I have it based on #SeanLange's recommendation to use a recursive CTE (above in the comments):
CREATE FUNCTION GetID
(
#path VARCHAR(MAX)
)
/* TEST:
SELECT dbo.GetID('A/B/C/E')
*/
RETURNS INT
AS
BEGIN
DECLARE #ID INT;
WITH cte AS (
SELECT p.id ,
p.parentId ,
CAST(p.name AS VARCHAR(MAX)) AS name
FROM tblT p
WHERE parentId IS NULL
UNION ALL
SELECT p.id ,
p.parentId ,
CAST(pcte.name + '/' + p.name AS VARCHAR(MAX)) AS name
FROM dbo.tblT p
INNER JOIN cte pcte ON
pcte.id = p.parentId
)
SELECT #ID = id
FROM cte
WHERE name = #path
RETURN #ID
END

SQL Server Recursive Hierarchy Query

This is for SQL Server 2012. I need to generate a dataset containing links, and all links of links from a given starting ParentId given the following table
CREATE TABLE Relations (
Id INT NOT NULL PRIMARY KEY,
ParentId INT NOT NULL,
ChildId INT
);
So for the following dataset:
1 A B
2 B C
3 C D
4 F D
5 F G
6 X Y
7 Y Z
Starting with C, I'd expected to get back rows 1 to 5 as they're all linked to C through either parent or child hierarchies. E.g. G has parent F, which is parent of D, which is child of C.
It's not a standard hierarchy query as there's no real root, and I need to get links in both directions. So this means I can't use the CTE recursion trick.. here was my attempt:
--Hierarchical Query using Common Table Expressions
WITH ReportingTree (Id, Parent, Child, Lvl)
AS
(
--Anchor Member
SELECT Id,
ParentId,
ChildId,
0 as Lvl
FROM Relations WHERE ParentId = 9488
UNION ALL
--Recusive Member
SELECT
cl.Id,
cl.ParentId,
cl.ChildId,
r1.Lvl+1
FROM [dbo].[CompanyLinks] cl
INNER JOIN ReportingTree r1 ON ReportingTree.Parent = cl.Child
INNER JOIN ReportingTree r2 ON cl.FromCompanyId = r2.Parent <-- errors
)
SELECT * FROM ReportingTree
My second attempt involved a temp table and while loop. This works but turns out to be very slow:
BEGIN
CREATE TABLE #R (
Id INT NOT NULL PRIMARY KEY NONCLUSTERED,
ParentId INT NOT NULL,
ChildId INT
);
CREATE CLUSTERED INDEX IX_Parent ON #R (ParentId);
CREATE INDEX IX_Child ON #R (ChildId);
INSERT INTO #R
SELECT Id,ParentId ChildId
FROM Relations
WHERE ParentId = 9488 OR ChildId = 9488;
WHILE ##RowCount > 0
BEGIN
INSERT INTO #R
SELECT cl.Id,cl.ParentId, cl.ChildId
FROM #R INNER JOIN
Relations AS cl ON cl.ChildId = #R.ChildId OR cl.ParentId = #R.ParentId OR cl.ChildId = #R.Parent OR cl.ParentId = #R.Child
EXCEPT
SELECT Id,ParentId, ChildId
FROM #R;
END
SELECT * FROM Relations cl inner Join #Relations r ON cl.Id = #R.Id
DROP TABLE #R
END
Can anyone suggest a workable solution for this?
We match every row with every other row based on every combination of parent and child ids, and save the path along the way. Recursively we do this matching and make the path, in order to avoid infinite loops we check the path is not traversed previously, finally we have all nodes that has a path to the desired node(#Id):
WITH cte AS (
SELECT CompanyLinks.*, cast('(' + cast(ParentId as nvarchar(max)) + ','
+ cast(ChildId as nvarchar(max))+')' as nvarchar(max)) Path
FROM CompanyLinks
WHERE ParentId = #Id OR ChildId = #Id
UNION ALL
SELECT a.*,
cast(
c.Path + '(' +
cast(a.ParentId as nvarchar(max)) + ',' +
cast(a.ChildId as nvarchar(max)) + ')'
as nvarchar(max)
) Path
FROM CompanyLinks a JOIN cte c ON
a.ParentId = c.ChildId
OR c.ParentId = a.ChildId
OR c.ParentId = a.ParentId
OR c.ChildId = a.ChildId
where c.Path not like cast(
'%(' +
cast(a.ParentId as nvarchar(max)) + ',' +
cast(a.ChildId as nvarchar(max)) +
')%'
as nvarchar(max)
)
)
SELECT DISTINCT a.id, Company.Name, path from (
SELECT distinct ParentId as id, path FROM cte
union all
SELECT distinct ChildId as id, path FROM cte
) a inner join Company on Company.Id = a.Id
Here is a fiddle for it.
If you want distinct nodes just use:
SELECT DISTINCT id from (
SELECT distinct ParentId as id FROM cte
union all
SELECT distinct ChildId as id FROM cte
) a
at the end of query.
This query is actually a Breadth First Search on an un-directed graph.
Note: Based on Hogan comment, there is no need for checking the path, as there is a primary key in the relation table(which I did not noticed) we can look for the primary key in prior recursions to avoid infinite loops.

Writing a recursive SQL query on a self-referencing table

I have a database with a table called Items, that contains these columns:
ID - primary key, uniqueidentifier
Name - nvarchar(256)
ParentID - uniqueidentifier
The name field can be used to build out a path to the item, by iterating through each ParentId until it equals '11111111-1111-1111-1111-111111111111', which is a root item.
So if you had a table that had rows like
ID Name ParentID
-------------------------------------------------------------------------------------
11111111-1111-1111-1111-111111111112 grandparent 11111111-1111-1111-1111-111111111111
22222222-2222-2222-2222-222222222222 parent 11111111-1111-1111-1111-111111111112
33333333-3333-3333-3333-333333333333 widget 22222222-2222-2222-2222-222222222222
So if I looked up an item with id '33333333-3333-3333-3333-333333333333' in the example above, i'd want the path
/grandparent/parent/widget
returned. i've attempted to write a CTE, as it looks like that's how you'd normally accomplish something like this - but as I don't do very much SQL, I can't quite figure out where i'm going wrong. I've looked at some examples, and this is as close as I seem to be able to get - which only returns the child row.
declare #id uniqueidentifier
set #id = '10071886-A354-4BE6-B55C-E5DBCF633FE6'
;with ItemPath as (
select a.[Id], a.[Name], a.ParentID
from Items a
where Id = #id
union all
select parent.[Id], parent.[Name], parent.ParentID
from Items parent
inner join ItemPath as a
on a.Id = parent.id
where parent.ParentId = a.[Id]
)
select * from ItemPath
I have no idea how i'd declare a local variable for the path and keep appending to it in the recursive query. i was going to try to at least get all the rows to the parent before going after that. if anyone could help with that as well - i'd appreciate it.
well here's working solution
SQL FIDDLE EXAMPLE
declare #id uniqueidentifier
set #id = '33333333-3333-3333-3333-333333333333'
;with ItemPath as
(
select a.[Id], a.[Name], a.ParentID
from Items a
where Id = #id
union all
select parent.[Id], parent.[Name] + '/' + a.[Name], parent.ParentID
from ItemPath as a
inner join Items as parent on parent.id = a.parentID
)
select *
from ItemPath
where ID = '11111111-1111-1111-1111-111111111112'
I don't like it much, I think better solution will be to do it other way around. Wait a minute and I try to write another query :)
UPDATE here it is
SQL FIDDLE EXAMPLE
create view vw_Names
as
with ItemPath as
(
select a.[Id], cast(a.[Name] as nvarchar(max)) as Name, a.ParentID
from Items a
where Id = '11111111-1111-1111-1111-111111111112'
union all
select a.[Id], parent.[Name] + '/' + a.[Name], a.ParentID
from Items as a
inner join ItemPath as parent on parent.id = a.parentID
)
select *
from ItemPath
and now you can use this view
declare #id uniqueidentifier
set #id = '33333333-3333-3333-3333-333333333333'
select *
from vw_Names where Id = #id
I needed a slightly different version of this answer as I wanted to produce a list of all lineages in the tree. I also wanted to know the depth of each node. I added a temporary table of top level parents that I could loop through and a temporary table to build the result set.
use Items
Select *
Into #Temp
From Items
where ParentID=0
Declare #Id int
create table #Results
(
Id int,
Name nvarchar(max),
ParentId int,
Depth int
)
While (Select Count(*) From #Temp) > 0
Begin
Select Top 1 #Id = Id From #Temp
begin
with ItemPath as
(
select a.[Id], cast(a.[Name] as nvarchar(max))as Name, a.ParentID ,1 as
Depth
from Items a
where a.ID = #id
union all
select a.[Id], parent.[Name] + '/' + a.[Name], a.ParentID, 1 + Depth
from Items as a
inner join ItemPath as parent on parent.id = a.parentID
)
insert into #Results
select *
from ItemPath
end
Delete #Temp Where Id = #Id
End
drop table #Temp
select * from #Results
drop table #Results
If we start from the following table...
Id Name ParentID
1 Fred 0
2 Mary 0
3 Baker 1
4 Candle 2
5 Stick 4
6 Maker 5
We would get this result table.
Id Name ParentID Depth
1 Fred 0 1
2 Mary 0 1
3 Fred/Baker 1 2
4 Mary/Candle 2 2
5 Mary/Candle/Stick 4 3
6 Mary/Candle/Stick/Maker 5 4

SQL Split - insert into hierarchical table structure

I'm using a Split function (found on social.msdn.com) and when executing manually in a query window
SELECT * FROM dbo.Split('/ABC/DEF/GHI/JKL', '/')
I get the following
Id Name
-- ----
1
2 ABC
3 DEF
4 GHI
5 JKL
Where Id is just a sequential number indicating position within the original string and Name is the name of that node. No hierarchical information yet.
Now, the next step is to put this into a hierarchical data structure in the DB. I'm trying to do this in a stored proc and with my SQL skills being what they are, I've hit a wall. Here's what I'd like to have: (NOTE that the Id column above is not related to the Id or the ParentId column here.)
Id ParentId Name FullName
-- -------- ---- --------
1 NULL ABC /ABC
2 1 DEF /ABC/DEF
3 2 GHI /ABC/DEF/GHI
4 3 JKL /ABC/DEF/GHI/JKL
I've got this far with my SP (called GetId with param #FullName) - GetId should return the Id associated with this node. If the node doesn't exist, it should be created and the Id from that new row should be returned - in other words, the consumer of this SP shouldn't care or know if the node exists prior to its calling it:
DECLARE #count int
-- // is there already a row for this node?
SELECT #count = COUNT(CatId)
FROM Category
WHERE FullName = #FullName
-- // if no row for this node, create the row
-- // and perhaps create multiple rows in hierarchy up to root
IF (#count = 0)
BEGIN
SELECT * FROM Split(#FullName, '/')
-- // NOW WHAT ???
-- // need to insert row (and perhaps parents up to root)
END
-- // at this point, there should be a row for this node
-- // return the Id associated with this node
SELECT Id
FROM Category
WHERE FullName = #FullName
The Category table (adjacency list) where these items will eventually end up, through a series of inserts, has the following structure.
CREATE TABLE Category (
Id int IDENTITY(1,1) NOT NULL PRIMARY KEY,
ParentId int NULL,
Name nvarchar(255) NOT NULL,
FullName nvarchar(255) NOT NULL)
As result, I do not want to generate a value for the Id column in the Category table and need to get the appropriate ParentId for each node.
After the paths '/ABC/DEF/GHI/JKL' and '/ABC/DEF/XYZ/LMN/OPQ' were processed and I did a SELECT * FROM Category, I would expect to see the following:
Id ParentId Name FullName
-- -------- ---- --------
1 NULL ABC /ABC
2 1 DEF /ABC/DEF
3 2 GHI /ABC/DEF/GHI
4 3 JKL /ABC/DEF/GHI
5 2 XYZ /ABC/DEF/XYZ
6 5 LMN /ABC/DEF/XYZ/LMN
7 6 OPQ /ABC/DEF/XYZ/LMN/OPQ
Q: would it be possible to call back into the SP recursively starting at the outer most node, until the node existed or we were at the ultimate parent? Something to the effect of:
GetId(#FullName)
{
If Category exists with #FullName
return CatId
Else // row doesn't exist for this node
Split #FullName, order by Id DESC so we get the leaf node first
Create Category row
#FullName,
#Name,
#ParentId = Id of next FullName (call GetId with FullName of next row from Split)
}
You can use CTE to achieve this, in combination with RowNumbering
With TMP AS (
SELECT Id, Data as Name, RN=ROW_NUMBER() over (Order by Id ASC)
FROM dbo.Split('/ABC/DEF/GHI/JKL', '/')
where Data > ''
), TMP2 AS (
SELECT TOP 1 RN, CONVERT(bigint, null) ParentId, Name, convert(nvarchar(max),'/' + Name) FullName
From TMP
Order by RN
union all
SELECT n.RN, t.RN, n.Name, t.FullName + '/' + n.Name
from TMP2 t
inner join TMP n on n.RN = t.RN+1)
select *
from tmp2
order by RN
Now for the 2nd part, this inserts the entire hierarchy, but starts with ID=1
IF (#count = 0)
BEGIN
With TMP AS (
SELECT Id, Data as Name, RN=ROW_NUMBER() over (Order by Id ASC)
FROM dbo.Split('/ABC/DEF/GHI/JKL', '/')
where Data > ''
), TMP2 AS (
SELECT TOP 1 RN, CONVERT(bigint, null) ParentId, Name, convert(nvarchar(max),'/' + Name) FullName
From TMP
Order by RN
union all
SELECT n.RN, t.RN, n.Name, t.FullName + '/' + n.Name
from TMP2 t
inner join TMP n on n.RN = t.RN+1)
insert Category(CatId, ParentId, Name, FullName) --<< list correct column names
select RN, ParentId, Name, FullName
from tmp2
order by RN
END