Recursive query to check that all parents are enabled - sql-server-2005

I have a CMS system which has a sitemap table with a parent-child relationship and a content table. Sometimes I don't want to include content in queries if it's corresponding sitemap entry or any of its parents is disabled.
The basic table structure is:
tb_Sitemap: id, parent_id, enabled
tb_Content: id, sitemap_id
So I want to be able to add something to my queries like this:
SELECT * FROM tb_Content WHERE {tb_Sitemap.enabled and any or all parents are also enabled}
I know I need to use a CTE but I am unsure about how to add these to a WHERE clause or how to go about it.
I am guessing I need do something like, but not sure how to add to a WHERE clause:
;WITH cte (enabled)
AS
(
SELECT enabled FROM tb_Content WHERE id = tb_Content.sitemap_id
UNION ALL
SELECT CASE WHEN b.enabled != 1 THEN 0 ELSE a.enabled FROM tb_Sitemap a
INNER JOIN cte b ON a.parent_id = b.id
)
SELECT enabled FROM cte
Sample data:
tb_Sitemap
id: 1, parent_id: null, enabled: 1
id: 2, parent_id: 1, enabled: 1
id: 3, parent_id: 2, enabled: 1
id: 4, parent_id: 1, enabled: 0
id: 5, parent_id: 4, enabled: 1
id: 6, parent_id: 5, enabled: 1
tbl_Content
sitemap_id: 3 (this would appear because sitemap_id:3 is enabled as is all of its parents)
sitemap_id: 6 (this will not appear because although sitemap_id:6 is enabled, one of its parents is not)

-- A little test data.
declare #tb_Sitemap as table ( id int, parent_id int null, enabled bit )
insert into #tb_Sitemap ( id, parent_id, enabled ) values
( 1, NULL, 1 ), ( 2, 1, 1 ), ( 3, 2, 1 ),
( 4, 1, 0 ), ( 5, 4, 1 ), ( 6, 5, 1 )
declare #tb_Content as table ( sitemap_id int )
insert into #tb_Content ( sitemap_id ) values ( 3 ), ( 6 )
-- Query the little beggars.
; with CTE as (
-- Start at the root(s).
select id, parent_id, enabled, enabled as summary_enabled
from #tb_Sitemap
where parent_id is NULL
union all
-- Add one generation at a time.
select S.id, s.parent_id, s.enabled, cast( case when s.enabled = 1 and CTE.summary_enabled = 1 then 1 else 0 end as bit )
from CTE inner join
#tb_Sitemap as S on S.parent_id = CTE.id
)
select *, case when summary_enabled = 1 and sitemap_id is not NULL then '< winner!' else '' end as include
from CTE left outer join
#tb_Content as C on C.sitemap_id = CTE.id

Related

Query returning all the children, sibling and parent from a single Id

I have a table, Company:
CompanyId MotherCompanyId CompanyName
---------- ------------ ------------
1 NULL HpTopMother
2 1 HpTopDaughter1
3 1 HpTopDaughter2
4 NULL HpTopDaughter3
5 2 HpTopDaughter4
6 1 HpTopDaughter5
What I want to do is, from an Id, to return the parent of that Id (if any), its children, and its siblings.
So if I have CompanyId = 1, the query will return me
Id: 2,3,6
And if I have CompanyId = 2, the query will return me
Id: 1,3,5,6
I have tried something like:
DECLARE #cmpId BIGINT
SET #cmpId = 14085;
WITH CTE_FAMILY as(
-- To get the siblings
select CompanyId
from Company
where MotherCompanyId = (select MotherCompanyId from Company where CompanyId = #cmpId)
-- To get the daughters
UNION ALL
SELECT CompanyId
FROM Company
WHERE MotherCompanyId = #corId
)
SELECT *
FROM CTE_FAMILY
But this is returning only Id: 3,6 when I have CompanyId = 2.
Any idea of how to do what I want please?
Find each type you need and union all together
WITH CTE_FAMILY as
(
select CompanyId = c.MotherCompanyId, type = 'Parent'
from Company c
where c.CompanyId = #cmpId
and c.MotherCompanyId is not null
union all
select CompanyId = s.CompanyId, type = 'Sibling'
from Company c
inner join Company s on c.MotherCompanyId = s.MotherCompanyId
where c.CompanyId = #cmpId
and c.MotherCompanyId is not null
and c.CompanyId <> s.CompanyId
union all
select CompanyId = c.CompanyId, type = 'Child'
from Company c
where c.MotherCompanyId = #cmpId
)
select *
from CTE_FAMILY
Like an alternative, a query can be like this
WITH subject AS (
SELECT *
FROM Company
WHERE CompanyId = #cmpId
),
parent AS (
SELECT Company.*
FROM Company, subject
WHERE Company.CompanyId = subject.MotherCompanyId
),
direct_children AS (
SELECT Company.*
FROM Company, subject
WHERE Company.MotherCompanyId = subject.CompanyId
),
siblings AS (
SELECT Company.*
FROM Company, subject, parent
WHERE Company.MotherCompanyId = parent.CompanyId AND Company.CompanyId != subject.CompanyId
),
family AS (
SELECT * FROM parent
UNION ALL
SELECT * FROM direct_children
UNION ALL
SELECT * FROM siblings
)
SELECT * FROM family
Demo
To present an alternative approach, here's a solution that uses hierarchyid. First, the setup:
drop table if exists #d;
with d as (
select * from (values
(1, NULL, 'HpTopMother'),
(2, 1, 'HpTopDaughter1'),
(3, 1, 'HpTopDaughter2'),
(4, NULL, 'HpTopDaughter3'),
(5, 2, 'HpTopDaughter4'),
(6, 1, 'HpTopDaughter5')
) as x(CompanyId, MotherCompanyId, CompanyName)
),
rcte as (
select *,
[path] = cast(concat('/', CompanyID, '/') as varchar(100))
from d
where MotherCompanyId is null
union all
select child.*,
[path] = cast(concat(parent.[path], child.CompanyId, '/') as varchar(100))
from d as child
join rcte as parent
on child.MotherCompanyId = parent.CompanyId
)
select CompanyId, MotherCompanyId, CompanyName, cast([path] as hierarchyid) as [path]
into #d
from rcte;
By way of brief explanation, all I'm doing here is recreating your data and then doing the same recursive CTE dance to get the hierarchy. Where this approach starts to diverge is that I'm persisting the results of that. This could be done once in your actual table an maintained by your application as and when data changes. Note, that this doesn't mean running the recursive CTE again but rather if you know the parent (which already has a value for [path]) and the child, you can construct a path value for the child; no need to traverse the hierarchy back to the root.
Now that the hard work is done, we can query up (for parents, grandparents, etc), down (for children, grandchildren, etc), or laterally (for siblings).
declare #CompanyID int = 2;
declare #CompanyPath hierarchyid = (
select [path]
from #d
where CompanyId = #CompanyID
);
declare #ParentCompanyPath hierarchyid = #CompanyPath.GetAncestor(1);
select *
from #d as d
where d.CompanyId <> #CompanyID /* don't return self */
and (
/* find any parent, grandparent, etc companies */
d.[path].IsDescendantOf(#CompanyPath) = 1
/* find any child, grandchild, etc companies */
or #CompanyPath.IsDescendantOf(d.[path]) = 1
or (
/* find any row that has a shared parent */
d.[path].IsDescendantOf(#ParentCompanyPath) = 1
/* and our 'distance' from said the root
(and by extension the parent) is the same */
and d.[path].GetLevel() = #CompanyPath.GetLevel()
)
);
Note, you have choices as to how to write this. For example, I could have used join syntax instead (obviating the need to determine #CompanyPath or #ParentCompanyPath separately). Or each predicate could have been its own query and use union to jam them all together.
I didn't do it here because the result set is trivially sized, but you can put an index on hierarchyid columns which makes these sorts of queries efficient over non-trivial data sets.

Get root and top level from SQL tree

I have a tree table. And, I am going to get root and top level on this tree.
Help with the solution you can use anything you want
declare #disc table (
id int,
parent int,
label varchar(50)
)
insert into #disc
select *
from (
values (1, null, 'q_1'),
(2, 1, 'a_1_1'),
(3, 2, 'a_1_1_1'),
(4, 1, 'a_1_2'),
(5, null, 'q_5'),
(6, 5, 'a_5_1'),
(7, 5, 'a_5_2')
) x (id, parent, label);
1. q_1
2. a_1_1
3. a_1_1_1
4. a_1_2
5. q_5
6. a_5_1
7. a_5_2
And, my result should be like this:
1: 1, null, q_1
2: 2, 1, a_1_1
3: 5, null, q_5
4: 6, 5, a_5_1
or
1: 1, null, q_1
2: 5, null, q_5
3: 2, 1, a_1_1
4: 6, 5, a_5_1
I only found one way, but I believe there is a better solution:
with rec as (
select id, parent, label,
row_number() over(order by id) rnk,
1 lvl
from #disc
where parent is null
union all
select d.id, d.parent, d.label,
row_number() over(order by d.id) rnk,
r.lvl + 1
from rec r
join #disc d on r.id = d.parent
)
select *
from rec
where parent is null or (rnk = 1 and lvl = 2)
If I understand this, the parent value will be null in the root nodes. The next level down will have a root node as parent. So ...
;with roots as
(
select id, parent, label
from #disc
where parent is null
)
select id, parent, label
from roots
union
select id, parent, label
from #disc
where parent in (select id from roots)
It doesn't look like you actually want to recurse here.
You can just do a self-join inside an apply.
select
row_number() over (order by isnull(c.parent, c.id), c.id),
c.id,
c.parent,
c.label
from #disc p
cross apply (
select p.id, p.parent, p.label
union all
select top 1 c.id, c.parent, c.label
from #disc c
where p.id = c.parent
order by c.id
) c
where p.parent is null;
db<>fiddle

How to find equal subsets?

I have a table with subsets. How to find reader id's with the same subsets as given id? For example:
Input reader = 4
The expected output: reader 1 and 5.
Subsets size is not always = 3 as in the example it can be dynamic. What is correct SQL query?
declare #t table(
reader int not null,
book int,
pages int
)
insert into #t (reader, book, pages)
select 1, 1, 100 union
select 1, 2, 201 union
select 1, 3, 301 union
select 2, 1, 100 union
select 2, 3, 101 union
select 2, 3, 301 union
select 3, 1, 100 union
select 3, 2, 101 union
select 3, 3, 301 union
select 4, 1, 100 union
select 4, 2, 201 union
select 4, 3, 301 union
select 5, 1, 100 union
select 5, 2, 201 union
select 5, 3, 301
select * from #t
This is a bit of a pain, but you can use a self-join:
with t as (
select t.*, count(*) over (partition by reader) as cnt
from #t t
)
select t.reader
from t left join
t t2
on t2.book = t.book and
t2.pages = t.pages and
t2.cnt = t.cnt and
t2.reader = 4
group by t.reader, t.cnt
having count(*) = t.cnt and
count(*) = count(t2.reader);
The left join is needed to avoid a subsetting relationship. That is, having all the books for "4" plus additional books.
This is a generic approach to handle relational division. It checks if set x contains all elements from set y (and perhaps more):
with reqd as (
select book, pages
from #t
where reader = 1
)
select t.reader
from #t as t
inner join reqd on t.book = reqd.book and t.pages = reqd.pages
group by t.reader
having count(reqd.book) = (select count(*) from reqd)

Where clause on Running total

I have this table which stores containers by region and the number of coffee pouches in each of the containers.
if object_id( 'dbo.Container' ) is not null
drop table dbo.Container
go
create table dbo.Container
(
Id int not null,
Region int not null,
NumberOfCoffeePouches int not null,
constraint pkc_Container__Id primary key clustered(Id asc)
)
go
insert into dbo.Container
( Id , Region , NumberOfCoffeePouches )
values
( 1, 1, 10 ),
( 2, 1, 30 ),
( 3, 1, 5),
( 4, 1, 7),
( 5, 1, 1),
( 6, 1, 3),
( 7, 2, 4),
( 8, 2, 4),
( 9, 2, 4)
I need to list out the container Ids that will be used to fulfill an order of, say 50, coffee pouches. Over supplying is OK.
Here is query I have come up with
declare #RequiredCoffeePouches int = 50
select
sq2.Id,
sq2.NumberOfCoffeePouches,
sq2.RunningTotal,
sq2.LagRunningTotal
from
(
select
sq1.Id,
sq1.NumberOfCoffeePouches,
sq1.RunningTotal,
lag(sq1.RunningTotal, 1, 0) over (order by sq1.Id asc)
as 'LagRunningTotal'
from
(
select
c.Id,
c.NumberOfCoffeePouches,
sum(c.NumberOfCoffeePouches)
over (order by c.Id asc) as 'RunningTotal'
from
dbo.Container as c
where
c.Region = 1
) as sq1
) as sq2
where
sq2.LagRunningTotal <= #RequiredCoffeePouches
It gives the expected result
Id NumberOfCoffeePouches RunningTotal LagRunningTotal
----------- --------------------- ------------ ---------------
1 10 10 0
2 30 40 10
3 5 45 40
4 7 52 45
Question:
Is there a better and more optimized way to achieve this?
Specially the Container table is very large table and I think the sub query sq1 will unnecessarily calculate the RunningTotals for all the containers in the region. I was wondering if there is anyway to have sq1 stop processing more rows once the RunnningTotal exceeds over the #RequiredCoffeePouches.
Two things:
Moving your WHERE clause inside of the relevant sub-select can greatly increase the speed of the query because it'll pull less data. Using your example:
SELECT
sq2.Id,
sq2.NumberOfCoffeePouches,
sq2.RunningTotal,
sq2.LagRunningTotal
FROM
(
SELECT
sq1.Id,
sq1.NumberOfCoffeePouches,
sq1.RunningTotal,
lag(sq1.RunningTotal, 1, 0) over (order by sq1.Id asc) AS 'LagRunningTotal'
FROM
(
SELECT
c.Id,
c.NumberOfCoffeePouches,
SUM(c.NumberOfCoffeePouches) OVER (order by c.Id asc) AS 'RunningTotal'
FROM dbo.Container AS c
WHERE c.Region = 1
) AS sq1
WHERE sq2.LagRunningTotal <= #RequiredCoffeePouches
) AS sq2
CTEs can also improve performance:
;WITH sql1CTE AS (
SELECT
c.Id,
c.NumberOfCoffeePouches,
SUM(c.NumberOfCoffeePouches) OVER (order by c.Id asc) AS 'RunningTotal'
FROM dbo.Container AS c
WHERE c.Region = 1
),
sql2CTE AS (
SELECT
Id,
NumberOfCoffeePouches,
RunningTotal,
lag(RunningTotal, 1, 0) over (order by Id asc) AS 'LagRunningTotal'
FROM sql1CTE
WHERE LagRunningTotal <= #RequiredCoffeePouches
)
SELECT
Id,
NumberOfCoffeePouches,
RunningTotal,
LagRunningTotal
FROM sql2CTE
SQL Server CTE Basics
If you're using SSMS, select "Include Client Statistics" and "Include Actual Execution Plan" to keep track of how your query performs while you're crafting it.

SQL: Get last referring and post referring page during a signup process

I'm trying to write an efficient SQL query to select 'before' and 'after' pages for the signup process. I have a solution using for loops which doesn't scale and am hoping to get a SQL native solution.
For a single clientId, I would want to get the latest pages before sign up and after signup (only 1 from each side of the join process).
The join process ALWAYS has /join/complete
Input:
clientId time path
1 0 /page1
1 10 /page2
1 20 /join/<random_token_id>
1 30 /join/<random_token_id>/step2
1 40 /join/complete
1 50 /page2
2 0 /page3
2 10 /join/complete
Output
ClientId Before After
1 /page2 /page2
2 /page3 null
I would be grateful if there is an easy solution in SQL. If it's complex, just leave it out. I will leave the code running overnight.
#standardSQL
WITH lineup AS (
SELECT clientId, time, path,
ROW_NUMBER() OVER(PARTITION BY clientId ORDER BY time) pos
FROM `project.dataset.table`
), start AS (
SELECT row.clientId, row.pos FROM (
SELECT ARRAY_AGG(t ORDER BY pos LIMIT 1)[OFFSET(0)] row
FROM lineup t WHERE STARTS_WITH(path, '/join/')
GROUP BY clientId)
), complete AS (
SELECT clientId, pos FROM lineup WHERE path = '/join/complete'
), before AS (
SELECT lineup.clientId, path FROM lineup JOIN start
ON lineup.clientId = start.clientId AND lineup.pos = start.pos - 1
), after AS (
SELECT lineup.clientId, path FROM lineup JOIN complete
ON lineup.clientId = complete.clientId AND lineup.pos = complete.pos + 1
)
SELECT clientId, before.path AS before, after.path AS after
FROM before FULL OUTER JOIN after USING (clientId)
You can test / play with above using dummy data from your question as below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 clientId, 0 time, '/page1' path UNION ALL
SELECT 1, 10, '/page2' UNION ALL
SELECT 1, 20, '/join/<random_token_id>' UNION ALL
SELECT 1, 30, '/join/<random_token_id>/step2' UNION ALL
SELECT 1, 40, '/join/complete' UNION ALL
SELECT 1, 50, '/page2' UNION ALL
SELECT 2, 0, '/page3' UNION ALL
SELECT 2, 10, '/join/complete' UNION ALL
SELECT 3, 0, '/join/complete' UNION ALL
SELECT 3, 10, '/page4'
), lineup AS (
SELECT clientId, time, path,
ROW_NUMBER() OVER(PARTITION BY clientId ORDER BY time) pos
FROM `project.dataset.table`
), start AS (
SELECT row.clientId, row.pos FROM (
SELECT ARRAY_AGG(t ORDER BY pos LIMIT 1)[OFFSET(0)] row
FROM lineup t WHERE STARTS_WITH(path, '/join/')
GROUP BY clientId)
), complete AS (
SELECT clientId, pos FROM lineup WHERE path = '/join/complete'
), before AS (
SELECT lineup.clientId, path FROM lineup JOIN start
ON lineup.clientId = start.clientId AND lineup.pos = start.pos - 1
), after AS (
SELECT lineup.clientId, path FROM lineup JOIN complete
ON lineup.clientId = complete.clientId AND lineup.pos = complete.pos + 1
)
SELECT clientId, before.path AS before, after.path AS after
FROM before FULL OUTER JOIN after USING (clientId)
with result as
Row clientId before after
1 1 /page2 /page2
2 2 /page3 null
3 3 null /page4