SQL: Optimizing Recursive CTE - sql

Example table structure:
EmployeeId TeamleaderId TopTeamleaderId LEVEL ParentTree CompanyId
1 0 0 0 NULL 1
2 1 1 1 2>1 1
3 2 1 2 3>2>1 1
TeamleaderId is foreignKey reference to EmployeeId in the same table
Goal:
Whenever a row is inserted in the table with EmployeeId, TeamleaderId, CompanyId automatically populate TopTeamleaderId, LEVEL and ParentTree with AFTER INSERT trigger
Code:
WITH CTE AS (
SELECT EmployeeId, TeamleaderId,0 AS [Level], CAST(EmployeeId AS varchar(100)) AS Heirarchy, TopTeamleaderId
FROM dbo.Employee
WHERE EmployeeId IN (SELECT EmployeeId FROM Employee WHERE TeamleaderId IS NULL
AND CompanyId IN(SELECT DISTINCT CompanyId FROM INSERTED))
UNION ALL
SELECT mgr.EmployeeId, mgr.TeamleaderId, CTE.[Level] +1 AS [Level],
CAST(( CAST(mgr.EmployeeId AS VARCHAR(100)) + '>' + CTE.Heirarchy) AS varchar(100)) AS Heirarchy, CTE.TopTeamleaderId
FROM CTE
INNER JOIN dbo.Employee AS mgr
ON TaskCTE.EmployeeId = mgr.ParentTeamleaderId
)
UPDATE Employee SET [LEVEL] = TC.[LEVEL], ParentTree = TC.Heirarchy, TopTeamleaderId = TC.TopTeamleaderId
FROM dbo.Employee AS Employee
JOIN (SELECT * FROM CTE WHERE EmployeeId IN(SELECT DISTINCT EmployeeId FROM INSERTED) AND ParentTeamleaderId IS NOT NULL) TC
ON
Employee.EmployeeId = TC.EmployeeId
Problem:
Imagine there are like 1000000 employees in a company, this query would take a long time to execute. How to optimize it so that only the parents of the inserted row are taken in to account?

Recursive CTE's are great, but as you can see the perfomance can suffer with larger hierarchies. It is my firm belief that there is no shame in temp tables.
The following will generate a 200K point hierarchy in 0.784 seconds.
Example
Select EmployeeId
,TeamleaderId
,Lvl=1
,TopTeamleaderId = 0
,ParentTree=cast(EmployeeId as varchar(500))
,CompanyID
Into #TempBld
From Employee
Where TeamleaderId is null
Declare #Cnt int=1
While #Cnt<=30 --<< Set Your Max Level
Begin
Insert Into #TempBld
Select A.EmployeeId
,A.TeamleaderId
,B.Lvl+1
,IIF(B.Lvl=1,B.EmployeeId,B.TopTeamleaderId)
,concat(A.EmployeeId,'>',B.ParentTree)
,A.CompanyID
From Employee A
Join #TempBld B on (B.Lvl=#Cnt and A.TeamleaderId=B.EmployeeId)
Set #Cnt=#Cnt+1
End
--Select * from #TempBld Order by ParentTree
Returns

Related

SQL - get all parents/childs?

hopefully someone can help with this. I have recieved a table of data which I need to restructure and build a Denorm table out of. The table structure is as follows
UserID Logon ParentID
2344 Test1 2000
2345 Test2 2000
The issue I have is the ParentID is also a UserID of its own and in the same table.
SELECT * FROM tbl where ParentID=2000 gives the below output
UserID Logon ParentID
2000 Test Team 2500
Again, the ParentID of this is also stored as a UserID..
SELECT * FROM tbl where ParentID=2500 gives the below output
UserID Logon ParentID
2500 Test Division NULL
I want a query that will pull all of these relationships and the logons into one row, with my output looking like the below.
UserID Username Parent1 Parent2 Parent3 Parent4
2344 Test1 Test Team Test Division NULL NULL
2345 Test2 Test Team Test Division NULL NULL
The maximum number of parents a user can have is 4, in this case there is only 2. Can someone help me with the query needed to build this?
Appreciate any help
Thanks
Jess
You can use basicly LEFT JOIN. If you have static 4 parent it should work. If you have unknown parents you should do dynamic query.
SELECT U1.UserId
,U1.UserName
,U2.UserName AS Parent1
,U3.UserName AS Parent2
,U4.UserName AS Parent3
,U5.UserName AS Parent4
FROM Users U1
LEFT JOIN Users U2 ON U1.ParentId = U2.UserId
LEFT JOIN Users U3 ON U2.ParentId = U3.UserId
LEFT JOIN Users U4 ON U3.ParentId = U4.UserId
LEFT JOIN Users U5 ON U4.ParentId = U5.UserId
EDIT : Additional(to exclude parent users from the list) :
WHERE NOT EXISTS (SELECT 1 FROM Users UC WHERE U1.UserId = UC.ParentId)
select
tb1.UserId as UserId,
tb1.UserName as UserName,
tb2.UserName as Parent1,
tb3.UserName as Parent2,
tb4.UserName as Parent3,
tb5.UserName as Parent4
from tbl t1
left join tbl t2 on t2.UserId=t1.ParentID
left join tbl t3 on t3.UserId=t2.ParentID
left join tbl t4 on t4.UserId=t3.ParentID
left join tbl t5 on t5.UserId=t4.ParentID;
you need to do 4 left joins in order to fetch 4 parent details
Use a recursive CTE to get the levels then pivot to put them in columns:
WITH cte(UserID, Logon, ParentID, ParentLogon, ParentLevel) AS
(
SELECT UserID, Logon, ParentID, Logon, 0
FROM users
UNION ALL
SELECT u.UserID, u.Logon, u.ParentID, cte.ParentLogon, ParentLevel + 1
FROM users u
JOIN cte ON cte.UserID = u.ParentID
)
SELECT UserId, Logon, Parent1, Parent2, Parent3, Parent4 FROM cte
PIVOT (
MAX(ParentLogon)
FOR ParentLevel
IN (
1 AS Parent1,
2 AS Parent2,
3 AS Parent3,
4 AS Parent4
)
)
See SQL Fiddle example
In order to get all parent or child, it's efficient to use a recursive function which would fetch the whole hierarchy.
Sample Table:
CREATE TABLE #TEST
(
[Name] varchar(100),
ManagerName Varchar(100),
Number int
)
Insert some values
Insert into Test values
('a','b'), ('b','c'), ('c','d'), ('d','e'), ('e','f'), ('f','g')
Create recursive function as below
CREATE FUNCTION [dbo].[fnRecursive] (#EmpName Varchar(100), #incremental int)
RETURNS #ret TABLE
(
ManagerName varchar(100),
Number int
)
AS
BEGIN
Declare #MgrName varchar(100)
SET #MgrName = (Select ManagerName from test where [name] = #EmpName)
Insert into #ret values (#MgrName, #incremental)
if(#MgrName is not null)
BEGIN
SET #incremental = #incremental + 1;
Insert into #ret
Select ManagerName, Number from [fnRecursive](#MgrName, #incremental)
END
RETURN;
END
If this function is joined with table, it should list the hierarchy for all employees
CREATE TABLE #TEST
(
[Name] varchar(100),
ManagerName Varchar(100),
Number int
)
Insert into #TEST
Select x.[Name], x.ManagerName,x.number from (
select t.[Name],a.ManagerName as managerName, a.number as number from TEST t outer apply
(
select * from [fnRecursive](t.[Name],1)
) a)
x
Select * from #Test
If we do a pivot on the table (excluding the 'Number' column). Assuming we store in the table "#temp" it should list all the managers as a column.
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(c.[ManagerName] )
FROM #temp c
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'select * from #temp
pivot
(
min([managername])
for managername in (' + #cols + ')
) p '
execute(#query)
But this doesn't name the column as 'Parent1', 'Parent2' instead with the dynamic column name.
Link below should help to set custom column name for the dynamic pivot table
https://stackoverflow.com/questions/16614994/sql-server-pivot-with-custom-column-names

How to avoid Recursive CTE repeating the anchor values

I am using following CTE to get hierarchical structure of manager and employees, i have multiple managers for one employee, in this case i do not want CTE to repeat for that employee again and again as it is doing in my code -
getemp() is a simple function returning employeeid, name and managerID
;With hierarchy as
(
select [Level]=1 , * from dbo.getemp() where managerid = 1
union all
select [Level]+1 , e.* from getemp() e
join hierarchy h on h.employeeid = e.managerid
)
Select * from hierarchy
After edit -
Following approach working for me. Is it possible with CTE ?
SET NOCOUNT ON;
DECLARE #Rows int
SELECT [Level] = ISNULL(1,0),
employeeid = ISNULL(employeeid, 0 ),
empname = CAST(empname as varchar(10)),
managerid = ISNULL(managerid,0)
into #Temp1
from dbo.getemp() as a1
where a1.managerid = #Top1
--select * from #Temp1
SELECT #Rows=##ROWCOUNT
DECLARE #I INT = 2;
while #Rows > 0
BEGIN
Insert into #Temp1
select #I as Level, b.employeeid, b.empname, b.managerid from #Temp1 as e
inner join (select [employeeid], [empname], [managerid] from dbo.GetEmp()) as b on b.managerid = e.employeeid
where e.Level = #I - 1
and not exists (
SELECT 1 FROM #Temp1 t
WHERE b.employeeid = t.employeeid
AND b.managerid = t.managerid);
SELECT #Rows=##ROWCOUNT
--SELECT #Rows AS Rows
IF #Rows > 0
BEGIN
SELECT #I = #I + 1;
END
END
select distinct * from #Temp1
END
Since you have several managers, which means that people can also be in several different levels due to having different levels in the manager, you could just take the minimum levels for each branch with something like this:
;With hierarchy as
(
select [Level]=1 , * from dbo.getemp() where managerid = 1
union all
select [Level]+1 , e.* from getemp() e
join hierarchy h on h.employeeid = e.managerid
)
Select min(Level) as Level, employeeid, name, managerid from hierarchy
group by employeeid, name, managerid
Using a function to return all the employees in every recursion might not be the best solution regarding to performance, especially if it's not an inline function. You might want to consider using for example a temp. table if you can't read the tables themselves directly.
Couldnt find the solution using CTE, so I have used the while loop to avoid the repeating anchors, here the code ..
DECLARE #Rows int
SELECT [Level] = ISNULL(1,0),
employeeid = ISNULL(employeeid, 0 ),
empname = CAST(empname as varchar(10)),
managerid = ISNULL(managerid,0)
into #Temp1
from dbo.getemp() as a1
where a1.managerid = #Top1
--select * from #Temp1
SELECT #Rows=##ROWCOUNT
DECLARE #I INT = 2;
while #Rows > 0
BEGIN
Insert into #Temp1
select #I as Level, b.employeeid, b.empname, b.managerid from #Temp1 as e
inner join (select [employeeid], [empname], [managerid] from dbo.GetEmp()) as b on b.managerid = e.employeeid
where e.Level = #I - 1
and not exists (
SELECT 1 FROM #Temp1 t
WHERE b.employeeid = t.employeeid
AND b.managerid = t.managerid);
SELECT #Rows=##ROWCOUNT
--SELECT #Rows AS Rows
IF #Rows > 0
BEGIN
SELECT #I = #I + 1;
END
END
select distinct * from #Temp1
END

TSQL Multiple count using same table with different JOIN

I have a weird situation and not too sure how to approach it.
I have 2 separate tables:
Table A is submissions
id
submitterQID
nomineeQID
story
Table B is employees
QID
Name
Department
I am trying to get the total number of submissions grouped by department as well as the total number of nominations.
This is what my Stored procedure looks like:
BEGIN
SELECT TOP 50 count(A.[nomineeQID]) AS totalNominations,
count(A.[subQID]) AS totalSubmissions,
B.[DepartmentDesc] AS department
FROM empowermentSubmissions AS A
JOIN empTable AS B
ON B.[qid] = A.[nomineeQID]
WHERE A.[statusID] = 3
AND A.[locationID] = #locale
GROUP BY B.[Department]
ORDER BY totalNominations DESC
FOR XML PATH ('data'), TYPE, ELEMENTS, ROOT ('root');
END
This issue with this is that the JOIN is joining by the nomineeQID only and not the subQID as well.
My end result I am looking for is:
Department Customer Service has 25 submissions and 90 nominations
ORDERED BY the SUM of both counts...
I tried to just JOIN again on the subQID but was told I cant join on the same table twice.
Is there an easier way to accomplish this?
This is a situaton where you'll need to gather your counts independently of each other. Using two left joins will cause some rows to be counted twice in the first left join when the join condition is met for both. Your scenario can be solved using either correlated subqueries or an outer apply gathering the counts on different criteria. I did not present a COUNT(CASE ... ) option here, because you don't have an either-or scenario in the data, you have two foreign keys to the employees table. So, setting up sample data:
declare #empowermentSubmissions table (submissionID int primary key identity(1,1), submissionDate datetime, nomineeQID INT, submitterQID INT, statusID INT, locationID INT)
declare #empTable table (QID int primary key identity(1,1), AreaDesc varchar(10), DepartmentDesc varchar(20))
declare #locale INT = 0
declare #n int = 1
while #n < 50
begin
insert into #empTable (AreaDesc, DepartmentDesc) values ('Area ' + cast((#n % 2)+1 as varchar(1)), 'Department ' + cast((#n % 4)+1 as varchar(1)))
set #n = #n + 1
end
set #n = 1
while #n < 500
begin
insert into #empowermentSubmissions (submissionDate, nomineeQID, submitterQID, StatusID, locationID) values (dateadd(dd,-(cast(rand()*600 as int)),getdate()), (select top 1 QID from #empTable order by newid()), (select top 1 QID from #empTable order by newid()), 3 + (#n % 2) - (#n % 3), (#n % 2) )
set #n = #n + 1
end
And now the OUTER APPLY option:
SELECT TOP 50 E.DepartmentDesc, SUM(N.Nominations) Nominations, SUM(S.TotalSubmissions) TotalSubmissions
FROM #empTable E
OUTER APPLY (
SELECT COUNT(submissionID) Nominations
FROM #empowermentSubmissions A
WHERE A.statusID = 3
AND A.nomineeQID = E.QID
AND A.locationID = #locale
) N
OUTER APPLY (
SELECT COUNT(submissionID) TotalSubmissions
FROM #empowermentSubmissions A
WHERE A.statusID = 3
AND A.submitterQID = E.QID
AND A.locationID = #locale
) S
GROUP BY E.DepartmentDesc
ORDER BY SUM(Nominations) + SUM(TotalSubmissions) DESC

Apply Case to get all records from SQL

I am applying one query where I need to either get results for single department or all department.
PersonID PersonName
1 'Abc'
2 'CDE'
3 'xyz'
DepartmentID DepartmentName
1 'Accounts'
2 'Finance'
HirarchyID personID DepartmentID
1 1 1
2 1 1
3 2 1
Now I want that in my sql query I have a parameter which pass as 1 = 'Accounts', 2='Finance' and 0 = 'Both'
How would I apply this #department parameter in my query ?
I tried applying with case but it will just allow me either accounts or finance.. but not both.
select * from persons p join
Hierarchy h on h.PersonID = p.PersonID JOIN
Department d on d.DepartmentID = h.DepartmentID
where case ???
My sample where clause in my query goes something like this :
WHERE (sa.Area in (case when #myMode = 1 THEN 'abc Mode'
when #myMode = 2 THEN 'XYZ Mode'
ELSE
'abc Mode,xyz Mode'
END))
Invert the condition by supplying the variable as the left hand side of an in():
select * from mytable
where #department in (DepartmentID, 0)
#DepartmentID is parameter that you are passing as "DepartmentID"
If(#DepartmentID=1)
begin
--your code
select * from Department where DepartmentID =1
end
else if(#DepartmentID=2)
begin
--your code
select * from Department where DepartmentID =2
end
else if(#DepartmentID=0)
begin
select * from Department where DepartmentID in(1,2)
end
I hope this will help....
Try this:
-- populate a temp table for demo purposes only
select *
into #departments
from (
select 1 as DepartmentId, 'Accounts' as DepartmentName
union all
select 2 as DepartmentId, 'Finance' as DepartmentName
) as q1
declare #deptName nvarchar(50) = 'Accounts';
declare #deptId int = coalesce((
select DepartmentId
from #departments
where DepartmentName = #deptName
),0);
select *
from #departments -- or dbo.someOtherTable ... which would make more sense
where #deptId in(DepartmentId,0)
go
This will return records for Accounts department only.
Conversely, if you want all departments, change this line of code as follows:
declare #deptName nvarchar(50) = '';
try this
SELECT person_table.*
FROM person_table ,
department_table ,
heirarchy_table
WHERE person_table.person_id = heirarchy_table.person_id
AND department_table.department_id = heirarchy_table.department_id
AND department_table.department_id = CASE WHEN (:PASSEDPARM = '0') THEN department_table.department_id
ELSE :PASSEDPARM
END;
db2 syntax used

sql query help - trying to get rid of temp tables

I have the following tables -
Resource
--------------------
Id, ProjectId, Hours, ApproverId
Project
--------------------
Id, Name
The input is ApproverId. I need to retrieve all the rows that have matching ApproverId (simple enough). And for every resource that I get back, I also need to get their hours (same table) whose approverId is not the one that is passed in (business requirement, to be grayed out in the UI). What I'm doing right now is - get all resources based on ApproverId, stored them in a temp table, then do a distinct on Resource.Id, store it in a different temp table, and then for every Resource.Id, get the rows where the ApproverId is not the one that is passed. Can I combine it all in a single query instead of using temp tables?
Thanks!
Edit: I'm using SQL Server 2008 R2.
Edit 2: Here's my stored procedure. I have changed the logic slightly after reading the comments. Can we get rid of all temp tables and make it faster -
ALTER PROCEDURE GetResourceDataByApprover
#ApproverId UNIQUEIDENTIFIER
AS
CREATE TABLE #Table1
(
Id SMALLINT PRIMARY KEY
IDENTITY(1, 1) ,
ResourceId UNIQUEIDENTIFIER
)
CREATE TABLE #Table2
(
ResourceId UNIQUEIDENTIFIER ,
ProjectId UNIQUEIDENTIFIER ,
ProjectName NVARCHAR(1024)
)
INSERT INTO #Table1
SELECT DISTINCT
ResourceId
FROM dbo.Resource T
WHERE T.ApproverId = #ApproverId
DECLARE #i INT
DECLARE #numrows INT
DECLARE #resourceId UNIQUEIDENTIFIER
SET #i = 1
SET #numrows = ( SELECT COUNT(*)
FROM #Table1
)
IF #numrows > 0
WHILE ( #i <= ( SELECT MAX(Id)
FROM #Table1
) )
BEGIN
SET #resourceId = ( SELECT ResourceId
FROM #Table1
WHERE Id = #i
)
INSERT INTO #Table2
SELECT
T.ResourceId ,
T.ProjectId ,
P.Name AS ProjectName
FROM dbo.[Resource] T
INNER JOIN dbo.Project P ON T.ProjectId = P.ProjectId
WHERE T.ResourceId = #resourceId
SET #i = #i + 1
END
SELECT *
FROM #Table1
SELECT *
FROM #Table2
DROP TABLE #Table1
DROP TABLE #Table2
This query should return two rows for every resource, one for the specified approver and one for all other approvers.
SELECT
Id,
CASE
WHEN ApproverId=#approverId THEN 'SpecifiedApprover'
ELSE 'OtherApprover'
END AS Approver,
SUM(Hours) AS Hours
FROM Resource
GROUP BY
Id,
CASE
WHEN ApproverId=#approverId THEN 'SpecifiedApprover'
ELSE 'OtherApprover'
END
Do you want to know how concrete Approver wastes his time?
SELECT p.Id, p.Name, SUM(r.Hours) as TotalHours
FROM Resource r
LEFT JOIN Project p
ON r.ProjectId = p.Id
WHERE ApproverId = %ConcreteApproverId%
GROUP BY p.Id, p.Name
HAVING SUM(r.Hours) > 0
This query will produce this table example:
+-----+----------+-------+
| Id | Project | Hours |
+-----+----------+-------+
| 203 | ProjectA | 25 |
| 202 | ProjectB | 34 |
| 200 | ProjectC | 46 |
+-----+----------+-------+