sql get average salary in self reference table - sql

I've seen several questions/answers on how to recursively query a self-referencing table, but I am struggling to apply the answers I've found to aggregate up to each parent, grandparent, etc. regardless of where the item sits in the hierarchy.
Need to get an average salary for each department including hierarchy.
It means department should include the average salary of each sub-department and so on.
I've got nex db schema:
CREATE TABLE Employee
(
Id INT NOT NULL ,
Name VARCHAR(200) NOT NULL ,
Department_Id INT NOT NULL ,
PRIMARY KEY ( Id )
);
CREATE TABLE Department
(
Id INT NOT NULL ,
DepartmentName VARCHAR(200) NOT NULL ,
Parent_Id INT ,
PRIMARY KEY ( Id )
);
CREATE TABLE Salary
(
Id INT NOT NULL ,
Date DATETIME NOT NULL ,
Amount INT NOT NULL ,
Employee_Id INT NOT NULL ,
PRIMARY KEY ( Id )
);
I've tried something like that but it includes only 1st level of a hierarchy.
SELECT d.Id ,
d.DepartmentName ,
( SELECT AVG(s.Amount)
FROM dbo.Department dd
LEFT JOIN dbo.Department sdd ON dd.Id = sdd.Parent_Id
JOIN dbo.Employee e ON e.Department_Id = sdd.Id
OR e.Department_Id = dd.Id
JOIN dbo.Salary s ON s.Employee_Id = e.Id
WHERE dd.Id = d.Id
) AS avg_dep_salary
FROM dbo.Department d
WHERE d.Parent_Id IS NULL;
How can get an average salary of all levels?
EDIT: Added some inserts
INSERT INTO Employee
( Id, Name, Department_Id )
VALUES ( 1, 'Peter', 1 ),
( 2, 'Alex', 1 ),
( 3, 'Sam', 2 ),
( 4, 'James', 2 ),
( 5, 'Anna', 3 ),
( 6, 'Susan', 3 ),
( 7, 'Abby', 4 ),
( 8, 'Endy', 4 );
INSERT INTO Department
( Id, DepartmentName, Parent_Id )
VALUES ( 1, 'IT', NULL ),
( 2, 'HR', NULL),
( 3, 'SubIT', 1 ),
( 4, 'SubSubIT', 3 );
INSERT INTO Salary
( Id, Date, Amount, Employee_Id )
VALUES ( 1, '2013-01-09 16:03:50.003', 3000, 1 ),
( 2, '2013-01-11 16:03:50.003', 5000, 2 ),
( 3, '2013-01-09 16:03:50.003', 2000, 3 ),
( 4, '2013-01-11 16:03:50.003', 1000, 4 ),
( 5, '2013-01-09 16:03:50.003', 4000, 5 ),
( 6, '2013-01-11 16:03:50.003', 6000, 6 ),
( 7, '2013-01-09 16:03:50.003', 7000, 7 ),
( 8, '2013-01-13 16:03:50.003', 9000, 8 );
Expected result is:
Department | Average_Salary
__________________________________
IT | ( X1 + X2 + X3 ) / 3
HR | ( Y1 ) / 1
SubIT | ( X2 + X3 ) / 2
SubSubIT | ( X3 ) / 1
Where:
X1 - Average salary of IT department
X2 - Average salary of SubIT department
X3 - Average salary of SubSubIT department
Y1 - Average
salary of HR department

Sample data
I've added few rows with a wider tree structure.
DECLARE #Employee TABLE
(
Id INT NOT NULL ,
Name VARCHAR(200) NOT NULL ,
Department_Id INT NOT NULL ,
PRIMARY KEY ( Id )
);
DECLARE #Department TABLE
(
Id INT NOT NULL ,
DepartmentName VARCHAR(200) NOT NULL ,
Parent_Id INT ,
PRIMARY KEY ( Id )
);
DECLARE #Salary TABLE
(
Id INT NOT NULL ,
Date DATETIME NOT NULL ,
Amount INT NOT NULL ,
Employee_Id INT NOT NULL ,
PRIMARY KEY ( Id )
);
INSERT INTO #Employee
( Id, Name, Department_Id )
VALUES
( 1, 'Peter', 1 ),
( 2, 'Alex', 1 ),
( 3, 'Sam', 2 ),
( 4, 'James', 2 ),
( 5, 'Anna', 3 ),
( 6, 'Susan', 3 ),
( 7, 'Abby', 4 ),
( 8, 'Endy', 4 ),
(10, 'e_A', 10),
(11, 'e_AB', 11),
(12, 'e_AC', 12),
(13, 'e_AD', 13),
(14, 'e_ACE', 14),
(15, 'e_ACF', 15),
(16, 'e_ACG', 16);
INSERT INTO #Department
( Id, DepartmentName, Parent_Id )
VALUES
( 1, 'IT', NULL ),
( 2, 'HR', NULL),
( 3, 'SubIT', 1 ),
( 4, 'SubSubIT', 3 ),
(10, 'A', NULL ),
(11, 'AB', 10),
(12, 'AC', 10),
(13, 'AD', 10),
(14, 'ACE', 12),
(15, 'ACF', 12),
(16, 'ACG', 12);
INSERT INTO #Salary
( Id, Date, Amount, Employee_Id )
VALUES
( 1, '2013-01-09 16:03:50.003', 3000, 1 ),
( 2, '2013-01-11 16:03:50.003', 5000, 2 ),
( 3, '2013-01-09 16:03:50.003', 2000, 3 ),
( 4, '2013-01-11 16:03:50.003', 1000, 4 ),
( 5, '2013-01-09 16:03:50.003', 4000, 5 ),
( 6, '2013-01-11 16:03:50.003', 6000, 6 ),
( 7, '2013-01-09 16:03:50.003', 7000, 7 ),
( 8, '2013-01-13 16:03:50.003', 9000, 8 ),
(10, '2013-01-13 16:03:50', 100, 10),
(11, '2013-01-13 16:03:50', 100, 11),
(12, '2013-01-13 16:03:50', 100, 12),
(13, '2013-01-13 16:03:50', 100, 13),
(14, '2013-01-13 16:03:50', 100, 14),
(15, '2013-01-13 16:03:50', 100, 15),
(16, '2013-01-13 16:03:50', 100, 16);
Query
WITH
CTE_Departments
AS
(
SELECT
D.Id
,D.Parent_Id
,D.DepartmentName
,SUM(Amount) AS DepartmentAmount
,COUNT(*) AS DepartmentCount
FROM
#Department AS D
INNER JOIN #Employee AS E ON E.Department_Id = D.Id
INNER JOIN #Salary AS S ON S.Employee_Id = E.Id
GROUP BY
D.Id
,D.Parent_Id
,D.DepartmentName
)
,CTE_Recursive
AS
(
SELECT
CTE_Departments.Id AS OriginalID
,CTE_Departments.DepartmentName AS OriginalName
,CTE_Departments.Id
,CTE_Departments.Parent_Id
,CTE_Departments.DepartmentName
,CTE_Departments.DepartmentAmount
,CTE_Departments.DepartmentCount
,1 AS Lvl
FROM CTE_Departments
UNION ALL
SELECT
CTE_Recursive.OriginalID
,CTE_Recursive.OriginalName
,CTE_Departments.Id
,CTE_Departments.Parent_Id
,CTE_Departments.DepartmentName
,CTE_Departments.DepartmentAmount
,CTE_Departments.DepartmentCount
,CTE_Recursive.Lvl + 1 AS Lvl
FROM
CTE_Departments
INNER JOIN CTE_Recursive ON CTE_Recursive.Id = CTE_Departments.Parent_Id
)
SELECT
OriginalID
,OriginalName
,SUM(DepartmentAmount) AS SumAmount
,SUM(DepartmentCount) AS SumCount
,SUM(DepartmentAmount) / SUM(DepartmentCount) AS AvgAmount
FROM CTE_Recursive
GROUP BY
OriginalID
,OriginalName
ORDER BY OriginalID
;
Result
+------------+--------------+-----------+----------+-----------+
| OriginalID | OriginalName | SumAmount | SumCount | AvgAmount |
+------------+--------------+-----------+----------+-----------+
| 1 | IT | 34000 | 6 | 5666 |
| 2 | HR | 3000 | 2 | 1500 |
| 3 | SubIT | 26000 | 4 | 6500 |
| 4 | SubSubIT | 16000 | 2 | 8000 |
| 10 | A | 700 | 7 | 100 |
| 11 | AB | 100 | 1 | 100 |
| 12 | AC | 400 | 4 | 100 |
| 13 | AD | 100 | 1 | 100 |
| 14 | ACE | 100 | 1 | 100 |
| 15 | ACF | 100 | 1 | 100 |
| 16 | ACG | 100 | 1 | 100 |
+------------+--------------+-----------+----------+-----------+
Run the query step-by-step, CTE-by-CTE to understand how it works.
CTE_Departments gives total amount and number of people for each department.
CTE_Recursive recursively generates child rows for each department, while keeping the OriginalID - the ID of the department where the recursion started.
Final query simply groups everything by this OriginalID.

Here is one way
with avg_per_dep as (
select
[Month] = eomonth(s.date), d.Id, d.DepartmentName
, avgDep = avg(s.Amount * 1.0)
from
Salary s
join Employee e on s.Employee_Id = e.Id
join Department d on e.Department_Id = d.Id
group by d.Id, d.DepartmentName, eomonth(s.date)
)
, rcte as (
select
i = Id, Id
, list = cast(',' + cast(Id as varchar(10)) + ',' as varchar(max))
, step = 1
from
Department
union all
select
a.i, b.Id, cast(a.list + cast(b.Id as varchar(10)) + ',' as varchar(max))
, step + 1
from
rcte a
join Department b on a.Id = b.Parent_Id
)
select
d.DepartmentName, c.[Month]
, Average_Salary = avg(c.avgDep)
from
(
select
top 1 with ties i, list
from
rcte
order by row_number() over (partition by i order by step desc)
) t
join avg_per_dep c on t.list like '%,' + cast(c.Id as varchar(10)) + ',%'
join Department d on t.i = d.Id
group by t.i, d.DepartmentName, c.[Month]
Output
DepartmentName [Month] Average_Salary
---------------------------------------------
IT 2013-01-31 5666.666666
HR 2013-01-31 1500.000000
SubIT 2013-01-31 6500.000000
SubSubIT 2013-01-31 8000.000000
Idea:
Calculate average salary per department
Get a list of departments with all childs with recursive CTE.
Join two table and calculate avg with childs

You could also use below query to get the expected result
WITH Department_Path
AS (SELECT Id, CAST(CONCAT('#', Id, '#') AS VARCHAR(255)) AS Path
FROM Department
WHERE Parent_Id IS NULL
UNION ALL
SELECT Child.Id, CAST(CONCAT(Parent.Path, Child.Id, '#') AS VARCHAR(255)) AS Path
FROM Department Child
INNER JOIN Department_Path Parent
ON Parent.Id = Child.Parent_Id)
SELECT Department.Id,
Department.DepartmentName,
AVG(Salary.Amount) As Average_Salary,
COUNT(Employee.Id) AS Employee_Count
FROM Department
INNER JOIN Department_Path
ON CHARINDEX(CONCAT('#', Department.Id, '#'), Department_Path.Path) > 0
INNER JOIN Employee
ON Employee.Department_Id = Department_Path.Id
INNER JOIN Salary
ON Salary.Employee_Id = Employee.Id
GROUP BY Department.Id,
Department.DepartmentName;
The idea is that each employee is belong to a list of hierarchy departments. For each department, we could retrieve all employee who belong to it and then calculate the average salary.

Related

How to change a value rank in a column MS SQL

I've a table with a column which is defining a rank value for display position:
Unid | Rank | Name
10 | 1 | A
20 | 2 | B
30 | 3 | C
40 | 4 | D
50 | 5 | E
How to update the table for have Name E on the top of the list and followed by the A, B , C , D names ?
One possible solution is to use ROW_NUMBER() with appropriate ORDER BY clause:
Table:
CREATE TABLE Data (
[Unid] int,
[Rank] int,
[Name] varchar(1)
)
INSERT INTO Data ([Unid], [Rank], [Name])
VALUES
(10, 1, 'A'),
(20, 2, 'B'),
(30, 3, 'C'),
(40, 4, 'D'),
(50, 5, 'E')
Statement:
UPDATE d
SET d.[Rank] = d.[NewRank]
FROM (
SELECT
[Rank],
ROW_NUMBER() OVER (ORDER BY CASE WHEN [Name] = 'E' THEN 0 ELSE 1 END, [Name]) AS [NewRank]
FROM Data
) d
Result:
Unid Rank Name
10 2 A
20 3 B
30 4 C
40 5 D
50 1 E

SQL select parent-child recursively based on a reference table

I saw many questions related to a recursive query but couldn't find any that shows how to use it based on a reference table.
I have a MasterTable where Id, ParentId columns are establishing the parent/child relation.
I have a SubTable where I have a bunch of Ids which could be a parent Id or child Id.
I would like to retrieve all related records (parent or child, recursively) from the MasterTable based on given SubTable
Current output:
id parentId
----------- -----------
1 NULL
2 1
3 1
4 NULL
5 4
6 5
7 6
Expected output
id parentId
----------- -----------
1 NULL
2 1
3 1
4 NULL
5 4
6 5
7 6
8 9
9 NULL
10 NULL
11 10
13 11
14 10
15 16
16 NULL
Comparison of actual vs expected:
Code:
DECLARE #MasterTable TABLE
(
id INT NOT NULL,
parentId INT NULL
);
DECLARE #SubTable TABLE
(
id INT NOT NULL
);
INSERT INTO #MasterTable (id, parentId)
VALUES (1, NULL), (2, 1), (3, 1), (4, NULL), (5, 4), (6, 5),
(7, 6), (8, 9), (9, NULL), (10, NULL), (11, 10), (12, NULL),
(13, 11), (13, 11), (14, 10), (15, 16), (16, NULL);
INSERT INTO #SubTable (id)
VALUES (1), (2), (3), (4), (6), (5), (7),
(8), -- it does not show
(13), -- it does not show
(15); -- it does not show
/* beside 8,13,15 it should add 9,11,14 and 10,16 */
;WITH cte AS
(
SELECT
mt1.id,
mt1.parentId
FROM
#MasterTable AS mt1
WHERE
mt1.parentId IS NULL
AND EXISTS (SELECT NULL AS empty
FROM #SubTable AS st
WHERE st.Id = mt1.id)
UNION ALL
SELECT
mt2.id,
mt2.parentId
FROM
#MasterTable AS mt2
INNER JOIN
cte AS c1 ON c1.id = mt2.parentId
)
SELECT DISTINCT
c2.id,
c2.parentId
FROM
cte AS c2
ORDER BY
id;
Is the following query suitable for the issue in question?
with
r as(
select
m.*, iif(m.parentid is null, 1, 0) p_flag
from #MasterTable m
join #SubTable s
on s.id = m.id
union all
select
m.*, iif(m.parentid is null, 1, r.p_flag)
from r
join #MasterTable m
on (r.p_flag = 1 and m.parentid = r.id) or
(r.p_flag = 0 and r.parentid = m.id)
)
select distinct
id, parentid
from r
order by id;
Output:
| id | parentid |
+----+----------+
| 1 | NULL |
| 2 | 1 |
| 3 | 1 |
| 4 | NULL |
| 5 | 4 |
| 6 | 5 |
| 7 | 6 |
| 8 | 9 |
| 9 | NULL |
| 10 | NULL |
| 11 | 10 |
| 13 | 11 |
| 14 | 10 |
| 15 | 16 |
| 16 | NULL |
Test it online with rextester.com.
;WITH cte
AS (
SELECT mt1.id,
mt1.parentId
FROM #MasterTable AS mt1
WHERE mt1.parentId IS NULL
UNION ALL
SELECT mt2.id,
mt2.parentId
FROM #MasterTable AS mt2
INNER JOIN cte AS c1
ON c1.id = mt2.parentId
)
SELECT DISTINCT c2.id,
c2.parentId
FROM cte AS c2
where
EXISTS (
SELECT 1 AS empty FROM #SubTable AS st
WHERE ( st.Id = c2.id or st.Id = c2.parentId)
)
or
EXISTS (
SELECT 1 AS empty FROM #MasterTable AS mt
WHERE ( c2.Id = mt.parentId or c2.parentId = mt.parentId)
)
ORDER BY id;
You may try this....
; with cte as(
select distinct mas.id, mas.parentId, iif(mas.parentid is null, 1, 0) PId
from #MasterTable mas inner join #SubTable sub
on sub.id in(mas.id, mas.parentid) ----- create top node having parentid is null
union all
select mas.id, mas.parentId, ct.PId
from cte ct inner join #MasterTable mas
on (ct.PId = 1 and mas.parentid = ct.id) or
(ct.PId = 0 and ct.parentid = mas.id) ----- create child node for correspoding parentid created above
)
select distinct id, parentid from cte order by id
option (MAXRECURSION 100); ---- Add Maxrecursion to prevent the infinite loop
You can find this link for more info on recursive query in SQL link. In this link see Example E or above.

Merging groups of interval data - SQL Server

I have two sets of interval data I.E.
Start End Type1 Type2
0 2 L NULL
2 5 L NULL
5 7 L NULL
7 10 L NULL
2 3 NULL S
3 5 NULL S
5 8 NULL S
11 12 NULL S
What I'd like to do is merge these sets into one. This seems possible by utilising an islands and gaps solution but due to the non-continuous nature of the intervals I'm not sure how to go about applying it... The output I'm expecting would be:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S
Anyone out there done something like this before??? Thanks!
Create script below:
CREATE TABLE Table1
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO Table1
([Start], [End], [Type1], [Type2])
VALUES
(0, 2, 'L', NULL),
(2, 3, NULL, 'S'),
(2, 5, 'L', NULL),
(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),
(5, 8, NULL, 'S'),
(7, 10, 'L', NULL),
(11, 12, NULL, 'S')
;
I assume that Start is inclusive, End is exclusive and given intervals do not overlap.
CTE_Number is a table of numbers. Here it is generated on the fly. I have it as a permanent table in my database.
CTE_T1 and CTE_T2 expand each interval into the corresponding number of rows using a table of numbers. For example, interval [2,5) generates rows with Values
2
3
4
This is done twice: for Type1 and Type2.
Results for Type1 and Type2 are FULL JOINed together on Value.
Finally, a gaps-and-islands pass groups/collapses intervals back.
Run the query step-by-step, CTE-by-CTE and examine intermediate results to understand how it works.
Sample data
I added few rows to illustrate a case when there is a gap between values.
DECLARE #Table1 TABLE
([Start] int, [End] int, [Type1] varchar(4), [Type2] varchar(4))
;
INSERT INTO #Table1 ([Start], [End], [Type1], [Type2]) VALUES
( 0, 2, 'L', NULL),
( 2, 3, NULL, 'S'),
( 2, 5, 'L', NULL),
( 3, 5, NULL, 'S'),
( 5, 7, 'L', NULL),
( 5, 8, NULL, 'S'),
( 7, 10, 'L', NULL),
(11, 12, NULL, 'S'),
(15, 20, 'L', NULL),
(15, 20, NULL, 'S');
Query
WITH
e1(n) AS
(
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
) -- 10
,e2(n) AS (SELECT 1 FROM e1 CROSS JOIN e1 AS b) -- 10*10
,e3(n) AS (SELECT 1 FROM e1 CROSS JOIN e2) -- 10*100
,CTE_Numbers
AS
(
SELECT ROW_NUMBER() OVER (ORDER BY n) AS Number
FROM e3
)
,CTE_T1
AS
(
SELECT
T1.[Start] + CA.Number - 1 AS Value
,T1.Type1
FROM
#Table1 AS T1
CROSS APPLY
(
SELECT TOP(T1.[End] - T1.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T1.Type1 IS NOT NULL
)
,CTE_T2
AS
(
SELECT
T2.[Start] + CA.Number - 1 AS Value
,T2.Type2
FROM
#Table1 AS T2
CROSS APPLY
(
SELECT TOP(T2.[End] - T2.[Start]) CTE_Numbers.Number
FROM CTE_Numbers
ORDER BY CTE_Numbers.Number
) AS CA
WHERE
T2.Type2 IS NOT NULL
)
,CTE_Values
AS
(
SELECT
ISNULL(CTE_T1.Value, CTE_T2.Value) AS Value
,CTE_T1.Type1
,CTE_T2.Type2
,ROW_NUMBER() OVER (ORDER BY ISNULL(CTE_T1.Value, CTE_T2.Value)) AS rn
FROM
CTE_T1
FULL JOIN CTE_T2 ON CTE_T2.Value = CTE_T1.Value
)
,CTE_Groups
AS
(
SELECT
Value
,Type1
,Type2
,rn
,ROW_NUMBER() OVER
(PARTITION BY rn - Value, Type1, Type2 ORDER BY Value) AS rn2
FROM CTE_Values
)
SELECT
MIN(Value) AS [Start]
,MAX(Value) + 1 AS [End]
,Type1
,Type2
FROM CTE_Groups
GROUP BY rn-rn2, Type1, Type2
ORDER BY [Start];
Result
+-------+-----+-------+-------+
| Start | End | Type1 | Type2 |
+-------+-----+-------+-------+
| 0 | 2 | L | NULL |
| 2 | 8 | L | S |
| 8 | 10 | L | NULL |
| 11 | 12 | NULL | S |
| 15 | 20 | L | S |
+-------+-----+-------+-------+
A step-by-step way is:
-- Finding all break points
;WITH breaks AS (
SELECT Start
FROM yourTable
UNION
SELECT [End]
FROM yourTable
) -- Finding Possible Ends
, ends AS (
SELECT Start
, (SELECT Min([End]) FROM yourTable WHERE yourTable.Start = breaks.Start) End1
, (SELECT Max([End]) FROM yourTable WHERE yourTable.Start < breaks.Start) End2
FROM breaks
) -- Finding periods
, periods AS (
SELECT Start,
CASE
WHEN End1 > End2 And End2 > Start THEN End2
WHEN End1 IS NULL THEN End2
ELSE End1
END [End]
FROM Ends
WHERE NOT(End1 IS NULL AND Start = End2)
) -- Generating results
SELECT p.Start, p.[End], Max(Type1) Type1, Max(Type2) Type2
FROM periods p, yourTable t
WHERE p.start >= t.Start AND p.[End] <= t.[End]
GROUP BY p.Start, p.[End];
In above query some situations may not fit at analyzing all of them, you can improve it as you want ;).
First getting all the numbers of start and end via a Union.
Then joining those numbers on both the 'L' and 'S' records.
Uses a table variable for the test.
DECLARE #Table1 TABLE (Start int, [End] int, Type1 varchar(4), Type2 varchar(4));
INSERT INTO #Table1 (Start, [End], Type1, Type2)
VALUES (0, 2, 'L', NULL),(2, 3, NULL, 'S'),(2, 5, 'L', NULL),(3, 5, NULL, 'S'),
(5, 7, 'L', NULL),(5, 8, NULL, 'S'),(7, 10, 'L', NULL),(11, 12, NULL, 'S');
select
n.Num as Start,
(case when s.[End] is null or l.[End] <= s.[End] then l.[End] else s.[End] end) as [End],
l.Type1,
s.Type2
from
(select Start as Num from #Table1 union select [End] from #Table1) n
left join #Table1 l on (n.Num >= l.Start and n.Num < l.[End] and l.Type1 = 'L')
left join #Table1 s on (n.Num >= s.Start and n.Num < s.[End] and s.Type2 = 'S')
where (l.Start is not null or s.Start is not null)
order by Start, [End];
Output:
Start End Type1 Type2
0 2 L NULL
2 3 L S
3 5 L S
5 7 L S
7 8 L S
8 10 L NULL
11 12 NULL S

SQL query to reconstruct inherited EAV model

I have 5 tables in my database representing an inherited EAV model:
CREATE TABLE AttributeNames
("ID" int, "Name" varchar(8))
;
INSERT INTO AttributeNames
("ID", "Name")
VALUES
(1, 'Color'),
(2, 'FuelType'),
(3, 'Doors'),
(4, 'Price')
;
CREATE TABLE MasterCars
("ID" int, "Name" varchar(10))
;
INSERT INTO MasterCars
("ID", "Name")
VALUES
(5, 'BMW'),
(6, 'Audi'),
(7, 'Ford')
;
CREATE TABLE MasterCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(10), "MasterCarId" int)
;
INSERT INTO MasterCarAttributes
("ID", "AttributeNameId", "Value", "MasterCarId")
VALUES
(100, 1, 'Red', 5),
(101, 2, 'Gas', 5),
(102, 3, '4', 5),
(102, 4, '$100K', 5),
(103, 1, 'Blue', 6),
(104, 2, 'Diesel', 6),
(105, 3, '3', 6),
(106, 4, '$80k', 6),
(107, 1, 'Green', 7),
(108, 2, 'Diesel', 7),
(109, 3, '5', 7),
(110, 4, '$60k', 7)
;
CREATE TABLE LocalCars
("ID" int, "MasterCarId" int)
;
INSERT INTO LocalCars
("ID", "MasterCarId")
VALUES
(8, '5'),
(9, '6'),
(10, NULL)
;
CREATE TABLE LocalCarAttributes
("ID" int, "AttributeNameId" int, "Value" varchar(6), "LocalCarId" int)
;
INSERT INTO LocalCarAttributes
("ID", "AttributeNameId", "Value", "LocalCarId")
VALUES
(43, 1, 'Yellow', 8),
(44, 3, '6', 9),
(45, 1, 'Red', 10),
(46, 2, 'Gas', 10),
(47, 3, '2', 10),
(48, 4, '$60k', 10)
;
I can retrieve all of master car attributes as follows:
SELECT MC.ID, MCA.AttributeNameId, MCA.Value
FROM MasterCars MC
left join MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
order by MC.ID;
Likewise, I can retrieve all of the local car attributes as follows:
SELECT LC.ID, LCA.AttributeNameId, LCA.Value
FROM LocalCars LC
left join LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
order by LC.ID;
If LocalCars.MasterCarId is not NULL, then that local car can inherit the attributes of that master car. A local car attribute with the same AttributeNameId overrides any master attribute with the same AttributeNameId.
So given the data above, I have 3 local cars each with 4 attributes (color, fuelType, doors, price). Inherited attribute values in bold:
Local Car Id = 1 (Yellow, Gas, 4, $100K)
Local Car Id = 2 (Blue, Diesel, 6, $80k)
Local Car Id = 3 (Red, Gas, 2, $60k)
I'm trying to find the necessary joins required to join the two queries above together to give a complete set of local cars attributes, some inherited:
LocalCarId AttributeNameId Value
------------------------------------------
1 1 Yellow
1 2 Gas
1 3 4
1 4 $100K
2 1 Blue
2 2 Diesel
2 3 6
2 4 $80K
3 1 Red
3 2 Gas
3 3 2
3 4 $60K
or possibly even:
LocalCarId AttributeNameId LocalValue MasterValue
-------------------------------------------------------------
1 1 Yellow Red
1 2 NULL Gas
1 3 NULL 4
1 4 NULL $100K
2 1 NULL Blue
2 2 NULL Diesel
2 3 6 3
2 4 NULL $80K
3 1 Red NULL
3 2 Gas NULL
3 3 2 NULL
3 4 $60K NULL
The problem can be solved by performing a union on all of your local car attributes and master car attributes. Each record is marked with an [IsMasterAttribute] flag. The next step is then use the ROW_NUMBER() window function to rank each of the duplicate attributes. The final step is to only select attributes which has a rank of 1.
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
FROM CTE_RankedAttributes
WHERE [AttributeRank] = 1
ORDER BY [ID]
The second output is also possible by performing a simple pivot on the final result:
;WITH CTE_CombinedAttributes
AS
(
SELECT 1 AS IsMasterAttribute
,LC.ID
,MC.ID AS MasterCarId
,MCA.AttributeNameId
,MCA.Value
FROM MasterCars MC
LEFT OUTER JOIN MasterCarAttributes MCA on MC.ID = MCA.MasterCarId
INNER JOIN LocalCars LC ON LC.MasterCarId = MC.ID
UNION ALL
SELECT 0 AS IsMasterAttribute
,LC.ID
,LC.MasterCarId
,LCA.AttributeNameId
,LCA.Value
FROM LocalCars LC
LEFT OUTER JOIN LocalCarAttributes LCA on LC.ID = LCA.LocalCarId
)
,
CTE_RankedAttributes
AS
(
SELECT [IsMasterAttribute]
,[ID]
,[AttributeNameId]
,[Value]
,ROW_NUMBER() OVER (PARTITION BY [ID], [AttributeNameId] ORDER BY [IsMasterAttribute]) AS [AttributeRank]
FROM CTE_CombinedAttributes
)
SELECT [ID]
,[AttributeNameId]
,MAX(
CASE [IsMasterAttribute]
WHEN 0 THEN [Value]
END
) AS LocalValue
,MAX(
CASE [IsMasterAttribute]
WHEN 1 THEN [Value]
END
) AS MasterValue
FROM CTE_RankedAttributes
GROUP BY [ID], [AttributeNameId]
ORDER BY [ID]
SQL Fiddle Demo
SELECT LC."ID" as LocalCarID,
COALESCE(LCA."AttributeNameId", MCA."AttributeNameId") as "AttributeNameId",
COALESCE(LCA."Value", MCA."Value") as "Value"
FROM LocalCars LC
LEFT JOIN MasterCars MC
ON LC."MasterCarId" = MC."ID"
LEFT JOIN MasterCarAttributes MCA
ON MC."ID" = MCA."MasterCarId"
LEFT JOIN LocalCarAttributes LCA
ON ( MCA."AttributeNameId" = LCA."AttributeNameId"
OR MCA."AttributeNameId" IS NULL)
-- This is the important part
-- Try to join with a MasterAtribute otherwise use the Car Atribute.
AND LC."ID" = LCA."ID"
OUTPUT
| LocalCarID | AttributeNameId | Value |
|------------|-----------------|--------|
| 1 | 1 | Blue |
| 1 | 2 | Gas |
| 2 | 1 | Green |
| 2 | 2 | Diesel |

SQL : how to find leaf rows?

i have a self related table myTable like :
ID | RefID
----------
1 | NULL
2 | 1
3 | 2
4 | NULL
5 | 2
6 | 5
7 | 5
8 | NULL
9 | 7
i need to get leaf rows on any depth
based on the table above, the result must be :
ID | RefID
----------
3 | 2
4 | NULL
6 | 5
8 | NULL
9 | 7
thank you
PS: the depth may vary , here is very small example
Try:
SELECT id,
refid
FROM mytable t
WHERE NOT EXISTS (SELECT 1
FROM mytable
WHERE refid = t.id)
DECLARE #t TABLE (id int NOT NULL, RefID int NULL);
INSERT #t VALUES (1, NULL), (2, 1), (3, 2), (5, NULL),
(6, 5), (4, NULL), (7, 5), (8, NULL), (9, 8), (10, 7);
WITH CTE AS
(
-- top level
SELECT id, RefID, id AS RootId, 0 AS CTELevel FROM #t WHERE REfID IS NULL
UNION ALL
SELECT T.id, T.RefID, RootId, CTELevel + 1 FROM #t T JOIN CTE ON T.RefID = CTE.id
), Leafs AS
(
SELECT
id, RefID, DENSE_RANK() OVER (PARTITION BY CTE.RootId ORDER BY CTELevel DESC) AS Rn
FROM CTE
)
SELECT
id, RefID
FROM
Leafs
WHERE
rn = 1
select ID, RefId
from myTable t1 left join myTable t2 on t1.ID = t2.RefID
where t2.RefID is null
try this:
SELECT *
FROM
my_table
WHERE
id NOT IN
(
SELECT DISTINCT
refId
FROM
my_table
WHERE
refId IS NOT NULL
)