Calculations through a hierarchy in SQL - sql

I am trying to perform some calculation by navigating through a hierarchy. In the simple example below, where organizations have a headcount and can be associated with parent organizations, the headcount is only specified for "leafs" organizations. I want to calculate the headcount all the way up the hierarchy using the simple rule: parent_headcount = sum(children_headcount).
I liked the idea of using SQL Common Table Expression for this, but this does not quite work. The determination of the level works (as it follows the natural top-down order of navigation), but not the headcount determination.
How would you fix this, or is there a better way to perform this calculation bottom-up?
-- Define the hierachical table Org
drop table if exists Org
create table Org (
ID int identity (1,1) not null, Name nvarchar(50), parent int null, employees int,
constraint [PK_Org] primary key clustered (ID),
constraint [FK_Parent] foreign key (parent) references Org(ID)
);
-- Fill it in with a simple example
insert into Org (name, parent, employees) values ('ACME', NULL, 0);
insert into Org (name, parent, employees) values ('ACME France', (select Org.ID from Org where Name = 'ACME'), 0);
insert into Org (name, parent, employees) values ('ACME UK', (select Org.ID from Org where Name = 'ACME'), 0);
insert into Org (name, parent, employees) values ('ACME Paris', (select Org.ID from Org where Name = 'ACME France'), 200);
insert into Org (name, parent, employees) values ('ACME Lyons', (select Org.ID from Org where Name = 'ACME France'), 100);
insert into Org (name, parent, employees) values ('ACME London', (select Org.ID from Org where Name = 'ACME UK'), 150);
select * from Org;
-- Try to determine the total number of employees at any level of the hierarchy
with Orgs as (
select
ID, name, parent, 0 as employees, 0 as level from Org where parent is NULL
union all
select
child.ID, child.name, child.parent, Orgs.employees + child.employees, level + 1 from Org child
join Orgs on child.parent = Orgs.ID
)
select * from Orgs;
This query returns:
The determination of the level is correct, but the calculation of the headcount is not (UK should be 150, France 300, and 450 at the top fo the hierarchy). It seems that CTE is suitable for top-down navigation, but not bottom-up?

Just another option using the datatype hierarchyid
Note: the #Top and Nesting is optional
Example
Declare #Top int = null
;with cteP as (
Select ID
,Parent
,Name
,HierID = convert(hierarchyid,concat('/',ID,'/'))
,employees
From Org
Where IsNull(#Top,-1) = case when #Top is null then isnull(Parent ,-1) else ID end
Union All
Select ID = r.ID
,Parent = r.Parent
,Name = r.Name
,HierID = convert(hierarchyid,concat(p.HierID.ToString(),r.ID,'/'))
,r.employees
From Org r
Join cteP p on r.Parent = p.ID)
Select Lvl = A.HierID.GetLevel()
,A.ID
,A.Parent
,Name = Replicate('|---',A.HierID.GetLevel()-1) + A.Name
,Employees = sum(B.Employees)
From cteP A
Join cteP B on B.HierID.ToString() like A.HierID.ToString()+'%'
Group By A.ID,A.Parent,A.Name,A.HierID
Order By A.HierID
Returns

You need to traverse hierarchy for every non-leafe node and the sum up all the paths from the node.
with Orgs as (
select
id as [top], ID, name, parent, 0 as employees, 0 as level
from Org g
where exists (select 1 from Org g2 where g.ID = g2.parent)
union all
select
orgs.[top], child.ID, child.name, child.parent, Orgs.employees + child.employees, level + 1 from Org child
join Orgs on child.parent = Orgs.ID
)
select [top] as id, sum(employees) employees
from Orgs
group by [top];
Db fiddle

Try this:
/***** DATA *************/
-- Define the hierachical table Org
drop table Org
create table Org (
ID int identity (1,1) not null, Name nvarchar(50), parent int null, employees int,
constraint [PK_Org] primary key clustered (ID),
constraint [FK_Parent] foreign key (parent) references Org(ID)
);
-- Fill it in with a simple example
insert into Org (name, parent, employees) values ('ACME', NULL, 0);
insert into Org (name, parent, employees) values ('ACME France', (select Org.ID from Org where Name = 'ACME'), 0);
insert into Org (name, parent, employees) values ('ACME UK', (select Org.ID from Org where Name = 'ACME'), 0);
insert into Org (name, parent, employees) values ('ACME Paris', (select Org.ID from Org where Name = 'ACME France'), 200);
insert into Org (name, parent, employees) values ('ACME Lyons', (select Org.ID from Org where Name = 'ACME France'), 100);
insert into Org (name, parent, employees) values ('ACME London', (select Org.ID from Org where Name = 'ACME UK'), 150);
select * from Org;
/******** END DATA ***********/
/******** QUERY ******/
-- Try to determine the total number of employees at any level of the hierarchy
with Orgs as (
select
ID, name, parent, employees, ID as RootID, 0 as level from Org
union all
select
child.ID , child.name, child.parent, child.employees, Orgs.RootID, level + 1 from Org child
join Orgs on child.parent = Orgs.ID
)
select Org.Id,
Org.Parent,
Org.Name,
Org.employees,
(select max(level) from Orgs a where a.Id = Org.Id) as [Level],
S.ProductCountIncludingChildren
from Org
inner join (
select RootID,
sum(employees) as ProductCountIncludingChildren
from Orgs
group by RootID
) as S
on Org.Id = S.RootID
left join Org Org2 on Org2.ID = Org.Parent
order by Org.Id
/**** END QUERY ******/

Related

SQL to Select only full groups of data

Let's say I have three sample tables for groups of people as shown below.
Table users:
id
name
available
1
John
true
2
Nick
true
3
Sam
false
Table groups:
id
name
1
study
2
games
Table group_users:
group_id
user_id
role
1
1
teach
1
2
stdnt
1
3
stdnt
2
1
tank
2
2
heal
And I need to show to a user all groups that he participates in and also available right now, which means all users in that group have users.available = true.
I tried something like:
SELECT `groups`.*, `users`.* , `group_users`.*
FROM `groups`
LEFT JOIN `group_users` ON `groups`.`id` = `group_users`.`group_id`
LEFT JOIN `users` ON `users`.`id` = `group_users`.`user_id`
WHERE `users`.`available` = true AND `users`.`id` = 1
But it just shows groups and part of their users, that are available. And I need to have ONLY the groups that have all their users available.
If I were to find all available groups as User 1 - I should get only group 2 and it's users. How to do this the right way?
Tables DDL:
CREATE TABLE users (
id int PRIMARY KEY,
name varchar(256) NOT NULL,
available bool
);
CREATE TABLE teams (
id int PRIMARY KEY,
name varchar(256) NOT NULL
);
CREATE TABLE team_users (
team_id int NOT NULL,
user_id int NOT NULL,
role varchar(64)
);
INSERT INTO users VALUES
(1, 'John', true ),
(2, 'Nick', true ),
(3, 'Sam' , false);
INSERT INTO teams VALUES
(1, 'study'),
(2, 'games');
INSERT INTO team_users VALUES
(1, 1, 'teach'),
(1, 2, 'stdnt'),
(1, 3, 'stdnt'),
(2, 1, 'tank' ),
(2, 2, 'heal' );
mySQL select version() output:
10.8.3-MariaDB-1:10.8.3+maria~jammy
Check do you need in this:
WITH cte AS (
SELECT users.name username,
teams.id teamid,
teams.name teamname,
SUM(NOT users.available) OVER (PARTITION BY teams.id) non_availabe_present,
SUM(users.name = #user_name) OVER (PARTITION BY teams.id) needed_user_present
FROM team_users
JOIN users ON team_users.user_id = users.id
JOIN teams ON team_users.team_id = teams.id
)
SELECT username, teamid, teamname
FROM cte
WHERE needed_user_present
AND NOT non_availabe_present;
https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=605cf10d147fd904fb2d4a6cd5968302
PS. I use user name as a criteria, you may edit and use user's identifier, of course.
Join the tables and aggregate with the conditions in the HAVING clause:
SELECT t.id, t.name
FROM teams t
INNER JOIN team_users tu ON t.id = tu.team_id
INNER JOIN users u ON u.id = tu.user_id
GROUP BY t.id
HAVING MIN(u.available) AND SUM(u.id = 1);
The HAVING clause is a simplification of:
HAVING MIN(u.available) = true AND SUM(u.id = 1) > 0
See the demo.
first you need to find those group which users is available. then find the all the group details of those group which is not related to those group which user is available.
SELECT * FROM team_users a
JOIN teams b ON a.team_id=b.id
JOIN users c ON a.user_id=c.id
WHERE NOT EXISTS
(
SELECT 1 FROM team_users tu
JOIN users u ON tu.user_id=u.id AND u.available =1
WHERE tu.team_id=a.Team_id
)

How to request JSON among three tables

Suppose I have 3 tables employee, child and employee_child.
The first table contains id and name columns, the second contains id and name as well, and in the last contains id, employee_id, child_id.
Those tables (employee and child) are associated in the table employee_child.
To help, I let these queries to populate those tables:
CREATE TABLE employee (
id INTEGER PRIMARY KEY,
name VARCHAR(128))
INSERT INTO employee(id, name)
VALUES(1, 'Josh'), (2, 'Michel'), (3, 'Will')
CREATE TABLE child (
id INTEGER PRIMARY KEY,
name VARCHAR(128))
INSERT INTO child(id, name)
VALUES(1, 'Karen'), (2, 'Adam'), (3, 'Ellen')
CREATE TABLE employee_child (
id INTEGER PRIMARY KEY,
employee_id INTEGER NOT NULL,
child_id INTEGER NOT NULL)
ALTER TABLE employee_child ADD FOREIGN KEY (employee_id) REFERENCES employee (id)
ALTER TABLE employee_child ADD FOREIGN KEY (child_id) REFERENCES child (id)
INSERT INTO employee_child(id, employee_id, child_id)
VALUES(1, 1, 1), (2, 1, 2), (3, 2, 3)
Results of selects from tables
I need to get the employee's name and his child's names by employee.id using JSON request, and get the following answer:
{
"id":1,
"name":"Josh",
"children":[
{
"id":1,
"name":"Karen"
},
{
"id":2,
"name":"Adam"
}
]
}
I've tried another way, but the most right answer that I got was this following query:
SELECT [''] = (SELECT e.id, e.name
FROM employee e
WHERE e.id = 1
FOR JSON PATH),
[children] = (SELECT c.id, c.name
FROM child c
INNER JOIN employee_child ec ON ec.child_id= c.id
WHERE ec.employee_id = 1
FOR JSON PATH)
FOR JSON PATH
I'd like to get a support to achieve the fully answer.
FOR JSON AUTO seems to get you exactly what you want:
SELECT e.id,
e.name,
children.id,
children.name
FROM dbo.employee e
JOIN dbo.employee_child ec ON e.id = ec.employee_id
JOIN dbo.child children ON ec.child_id = children.id
WHERE e.id = 1
FOR JSON AUTO;

SQL get Parent where Children have specific values

Given is a Parent with the field id and Child relation with parent_id and name. How would a query look like to get all Parents which have two children, one with the name 'John' and one with the name 'Mike'. My problem is, that I am not able to build a query which returns the Parents having both children. I used Where IN ('John', 'Mike') so I get also the Parents returned which have also one child with the name 'John' or 'Mike'. But I want only the Parents with both children only.
SELECT * FROM Parent
JOIN Child ON Child.parent_id = Parent.id
WHERE Child.name IN ('John', 'Mike')
My query is of course more complex and this is only an abstraction for what I want to achieve. I have in mind, that I first need to join the children on parent_id and make something with that, but no idea.
You can do two joins and look for your specific records. This example shows that parent 1 will return with both kiddos, but not parent 2 that only has a Mike.
DECLARE #parent TABLE (ID INT)
DECLARE #child TABLE (ID INT, parentID INT, name VARCHAR(100))
INSERT INTO #parent
VALUES
(1),
(2),
(3),
(4),
(5),
(6)
INSERT INTO #child (ID, parentID, name)
VALUES
(1, 1, 'Mike'),
(2, 1, 'John'),
(3, 2, 'Mike'),
(4, 2, 'Bill'),
(5, 3, 'Dave'),
(6, 4, 'Sam')
SELECT p.*
FROM #parent p
INNER JOIN #child c1
ON c1.parentID = p.id
AND c1.name = 'Mike'
INNER JOIN #child c2
ON c2.parentID = p.ID
AND c2.name = 'John'
Try having two steps in the where clause. Both conditions will have to be true to return a parent record.
where parent.id in (select parent_id from child where child.name='John')
and parent.id in (select parent_id from child where child.name='Mike')
something like this would work in postgres if you have having.
SELECT parent_id, SUM(num) FROM (
SELECT parent_id, 1 as num FROM Child Where name = 'John'
UNION
SELECT parent_id, 1 as num FROM Child Where name = 'Mike'
) parents
GROUP BY parent_id HAVING SUM(num) = 2
So,
added the solution with the double join into an Ecto query and it passed my tests :)
from c in Child,
join: p in Parent, on: c.parent_id = p.id,
join: cc in Child, on: p.id = cc.parent_id,
where: c.name == ^"John",
where: cc.name == ^"Mike"
select: count(p.id)
Thanks for the ideas and the fast help :)

Insert multiple references in a nested table

I have the table customer_table containing a list (nested table) of references toward rows of the account_table.
Here are my declarations :
Customer type:
CREATE TYPE customer as object(
custid integer,
infos ref type_person,
accounts accounts_list
);
accounts_list type:
CREATE TYPE accounts_list AS table of ref account;
Table:
CREATE TABLE customer_table OF customer(
custid primary key,
constraint c_inf check(infos is not null),
constraint c_acc check(accounts is not null)
)
NESTED TABLE accounts STORE AS accounts_refs_nt_table;
So I would like to insert multiple refs in my nested table when I create a customer, as an account can be shared.
I can't find out how to do that.
I tried:
INSERT INTO customer_table(
SELECT 0,
ref(p),
accounts_list(
SELECT ref(a) FROM account_table a WHERE a.accid = 0
UNION ALL
SELECT ref(a) FROM account_table a WHERE a.accid = 1
)
FROM DUAL
FROM person_table p
WHERE p.personid = 0
);
With no success.
Thank you
You can use the collect() function, e.g. in a subquery:
INSERT INTO customer_table(
SELECT 0,
ref(p),
(
SELECT CAST(COLLECT(ref(a)) AS accounts_list)
FROM account_table a
WHERE accid IN (0, 1)
)
FROM person_table p
WHERE p.personid = 0
);
As the documentation says, "To get accurate results from this function you must use it within a CAST function", so I've explicitly cast it to your account_list type.
If you don't want a subquery you could instead do:
INSERT INTO customer_table(
SELECT 0,
ref(p),
CAST(COLLECT(a.r) AS accounts_list)
FROM person_table p
CROSS JOIN (SELECT ref(a) AS r FROM account_table a WHERE accid IN (0, 1)) a
WHERE p.personid = 0
GROUP BY ref(p)
);
but I think that's a bit messier; check the performance of both though...

How to show fields from most recently added detail in a view?

QUERY:
drop table #foot
create table #foot (
id int primary key not null,
name varchar(50) not null
)
go
drop table #note
create table #note (
id int primary key not null,
note varchar(MAX) not null,
foot_id int not null references #foot(id)
)
go
insert into #foot values
(1, 'Joe'), (2, 'Mike'), (3, 'Rob')
go
insert into #note (id, note, foot_id) values (1, 'Joe note 1', 1)
go
insert into #note (id, note, foot_id) values(2, 'Joe note 2', 1)
go
insert into #note (id, note, foot_id) values(3, 'Mike note 1', 2)
go
select F.name, N.note, N.id
from #foot F left outer join #note N on N.foot_id=F.id
RESULT:
QUESTION:
How can I create a view/query resulting in one row for each master record (#foot) along with fields from the most recently inserted detail (#note), if any?
GOAL:
(NOTE: the way I would tell which one is most recent is the id which would be higher for newer records)
select t.name, t.note, t.id
from (select F.name, N.note, N.id,
ROW_NUMBER() over(partition by F.id order by N.id desc) as RowNum
from #foot F
left outer join #note N
on N.foot_id=F.id) t
where t.RowNum = 1
Assuming the ID created in the #note table is always incremental (imposed by using IDENTITY or by controlling the inserts to always increment the by by max value) you can use the following query (which uses rank function):
WITH Dat AS
(
SELECT f.name,
n.note,
n.id,
RANK() OVER(PARTITION BY n.foot_id ORDER BY n.id DESC) rn
FROM #foot f LEFT OUTER JOIN #note n
ON n.foot_id = f.id
)
SELECT *
FROM Dat
WHERE rn = 1