To arrange the hierarchy in a table - sql

I have a table dbo.Hierarchy that contains the following data:
Level1 Level2 Level3 Level4 Level5 Level6 Level7 Level8 Level9 Level10
-----------------------------------------------------------------------
a b c d e f g h i j
k l m n o
There are a total of 10 levels and any item can have hierarchy upto any level. In the above data a is the parent of b, b is the parent of c and so on. j and o are the last levels in their respective hierarchies. How can I get the output in the below format:
Name ParentName LevelID
-------------------------------
a NULL 1
b a 2
j i 10
k NULL 1
l k 2
o n 5

Something like (untested)
with t(L1,L2,L3,L4,L5,L6,L7,L8,L9,L10) as (
values ('a','b','c','d','e','f','g','h','i','j')
, ('k','l','m','n','o',null,null,null,null,null)
)
select x.*
from t
cross apply (
values (L1,null,1),(L2,L1,2),(L3,L2,3),(L4,L3,4),(L5,L4,5)
, (L6,L5,6),(L7,L6,7),(L8,L7,8),(L9,L8,9),(L10,L9,10)) x (name, parentname, levelid)
where name is not null

Try this:
;with base as
(select *, row_number() over (order by level1) rn from tbl),
src as
(
select
valname as name,
cast(substring(colname,6,len(colname)) as int) as level,
rn from
(select * from base) s
unpivot
(
valname
for colname in ([level1],[level2],[level3],[level4],[level5],[level6],[level7], [level8],[level9],[level10])
) u
),
cte as
(select * from src
where level = 1
union all
select s.* from src s
inner join cte c on s.level = c.level + 1 and s.rn = c.rn)
select distinct s.name, t.name parentname, s.level levelid from
cte s
left join cte t on s.rn = t.rn and s.level = t.level + 1
Breakdown:
CTE base is used to generate row number as a derived column. We will use this column to keep track of which values belong to which row. This will help in uniquely mapping children to their parents.
CTE src is where we transform the table from this denormalised structure to a normalised one. Using UNPIVOT, we reduce the data set to 3 columns - name, level and the row number rn.
CTE cte is a recursive CTE that we use to get all possible combinations of parents and children (including immediate parents as well as ancestors).
Finally, we LEFT JOIN cte to itself on the condition that row number is same on both sides of the join i.e. the values belong to same record from the base table, and also that the value from the right side is the immediate ancestor (parent) of the value on the left side.
Demo
The huge mass of code above can be avoided, if you were to choose a normalised structure for your table. I would suggest something like this:
CREATE TABLE tbl
(ID int, --Keep track of values that are related to each other
Name varchar(100), --Name
Level int --The level for a particular value
)
With this proposed structure, all you would need is the recursive CTE (cte from the above code) and the left join to get the parent-child data. The beauty of this approach is that you can extend it to any number of levels that you like, without having to hard-code the level numbers.
Demo

Related

Combine rows from Mulitple tables into single table

I have one parent table Products with multiple child tables -Hoses,Steeltubes,ElectricCables,FiberOptics.
ProductId -Primary key field in Product table
ProductId- ForeignKey field in Hoses,Steeltubes,ElectricCables,FiberOptics.
Product table has 1 to many relationship with Child tables
I want to combine result of all tables .
For eg - Product P1 has PK field ProductId which is used in all child tables as FK.
If Hoses table has 4 record with ProductId 50 and Steeltubes table has 2 records with ProductId 50 when I perform left join then left join is doing cartesian product of records showing 8 record as result But it should be 4 records .
;with HOSESTEELCTE
as
(
select '' as ModeType, '' as FiberOpticQty , '' as NumberFibers, '' as FiberLength, '' as CableType , '' as Conductorsize , '' as Voltage,'' as ElecticCableLength , s.TubeMaterial , s.TubeQty, s.TubeID , s.WallThickness , s.DWP ,s.Length as SteelLength , h.HoseSeries, h.HoseLength ,h.ProductId
from Hoses h
left join
(
--'' as HoseSeries,'' as HoseLength ,
select TubeMaterial , TubeQty, TubeID , WallThickness , DWP , Length,ProductId from SteelTubes
) s on (s.ProductId = h.ProductId)
) select * from HOSESTEELCTE
Assuming there are no relationships between child tables and you simply want a list of all child entities which make up a product you could generate a cte which has a number of rows which are equal to the largest number of entries across all the child tables for a product. In the example below I have used a dates table to simplify the example.
so for this data
create table products(pid int);
insert into products values
(1),(2);
create table hoses (pid int,descr varchar(2));
insert into hoses values (1,'h1'),(1,'h2'),(1,'h3'),(1,'h4');
create table steeltubes (pid int,descr varchar(2));
insert into steeltubes values (1,'t1'),(1,'t2');
create table electriccables(pid int,descr varchar(2));
truncate table electriccables
insert into electriccables values (1,'e1'),(1,'e2'),(1,'e3'),(2,'e1');
this cte
;with cte as
(select row_number() over(partition by p.pid order by datekey) rn, p.pid
from dimdate, products p
where datekey < 20050105)
select * from cte
create a cartesian join (one of the rare ocassions where an implicit join helps) pid to rn
result
rn pid
-------------------- -----------
1 1
2 1
3 1
4 1
1 2
2 2
3 2
4 2
And if we add the child tables
;with cte as
(select row_number() over(partition by p.pid order by datekey) rn, p.pid
from dimdate, products p
where datekey < 20050106)
select c.pid,h.descr hoses,s.descr steeltubes,e.descr electriccables from cte c
left join (select h.*, row_number() over(order by h.pid) rn from hoses h) h on h.rn = c.rn and h.pid = c.pid
left join (select s.*, row_number() over(order by s.pid) rn from steeltubes s) s on s.rn = c.rn and s.pid = c.pid
left join (select e.*, row_number() over(order by e.pid) rn from electriccables e) e on e.rn = c.rn and e.pid = c.pid
where h.rn is not null or s.rn is not null or e.rn is not null
order by c.pid,c.rn
we get this
pid hoses steeltubes electriccables
----------- ----- ---------- --------------
1 h1 t1 e1
1 h2 t2 e2
1 h3 NULL e3
1 h4 NULL NULL
2 NULL NULL e1
In fact, the result having 8 rows can be expected to be the result, since your four records are joined with the first record in the other table and then your four records are joined with the second record of the other table, making it 4 + 4 = 8.
The very fact that you expect 4 records to be in the result instead of 8 shows that you want to use some kind of grouping. You can group your inner query issued for SteelTubes by ProductId, but then you will need to use aggregate functions for the other columns. Since you have only explained the structure of the desired output, but not the semantics, I am not able with my current knowledge about your problem to determine what aggregations you need.
Once you find out the answer for the first table, you will be able to easily add the other tables into the selection as well, but in case of large data you might get some scaling problems, so you might want to have a table where you store these groups, maintain it when something changes and use it for these selections.

How to do conditional update on columns using CTE?

I have a table CUST with following layout. There are no constraints. I do see that one ChildID has more than one ParentID associated with it. (Please see the records for ChildID = 115)
Here is what I need -
Wherever one child has more than 1 parent, I want to update those ParentID and ParentName with the ParentID and ParentName which has max match_per. So in the below image, I want ParentID 1111 and ParentName LEE YOUNG WOOK to update all records where ChildId = 115 (since the match_per 0.96 is maximum within the given set). In case there are two parents with equal max match_per, then I want to pick any 1 one of them.
I know it is possible using CTE but I don't know how to update CTE. Can anybody help?
One way of doing it
WITH CTE1 AS
(
SELECT *,
CASE WHEN match_per =
MAX(match_per) OVER (PARTITION BY ChildId)
THEN CAST(ParentId AS CHAR(10)) + ParentName
END AS parentDetailsForMax
FROM CUST
), CTE2 AS
(
SELECT *,
MAX(parentDetailsForMax) OVER (PARTITION BY ChildId) AS maxParentDetailsForMax
FROM CTE1
)
UPDATE CTE2
SET ParentId = CAST(LEFT(maxParentDetailsForMax,10) AS int),
ParentName = SUBSTRING(maxParentDetailsForMax,10,8000)
Getting both the parent id and parent name is a bit tricky. I think the logic is easiest using cross apply:
with toupdate as (
select t.*, p.parentId as new_parentId, p.parentName as new_parentName
max(match_per) over (partition by childid) as max_match_per,
count(*) over (partition by childid) as numparents
from table t cross apply
(select top 1 p.*
from table p
where p.childid = t.childid
order by match_per desc
) p
)
update toupdate
set parentId = new_ParentId,
parentName = new_ParentName
where numparents > 1;
As a note: the fact that parent id and parent name are both stored in the table, potentially multiple times seems like a problem. I would expect to look up the name, given the id, to reduce data redundancy.
Try something like this?? The first CTE will get Max(match_per) for each ChildID. Then, the second will use the new MaxMatchPer to find what its corresponding ParentID should be.
; WITH CTE AS (
SELECT ChildID,MAX(match_per) AS MaxMatchPer
FROM tbl
GROUP BY ChildID
), CTE1 AS (
SELECT t.ParentID, c.ChildID
FROM tbl t
JOIN CTE c
ON c.ChildID = t.ChildID
AND c.MaxMatchPer = t.match_per
)
UPDATE t
SET ParentID = c.ParentID
FROM tbl t
LEFT JOIN CTE1 c
ON c.ChildID = t.ChildID
Also, this is poor normalization. You should not have ParentName nor ChildName in this table.

SQL Server: querying hierarchical and referenced data

I'm working on an asset database that has a hierarchy. Also, there is a "ReferenceAsset" table, that effectively points back to an asset. The Reference Asset basically functions as an override, but it is selected as if it were a unique, new asset. One of the overrides that gets set, is the parent_id.
Columns that are relevant to selecting the heirarchy:
Asset: id (primary), parent_id
Asset Reference: id (primary), asset_id (foreignkey->Asset), parent_id (always an Asset)
---EDITED 5/27----
Sample Relevent Table Data (after joins):
id | asset_id | name | parent_id | milestone | type
3 3 suit null march shape
4 4 suit_banker 3 april texture
5 5 tie null march shape
6 6 tie_red 5 march texture
7 7 tie_diamond 5 june texture
-5 6 tie_red 4 march texture
the id < 0 (like the last row) signify assets that are referenced. Referenced assets have a few columns that are overidden (in this case, only parent_id is important).
The expectation is that if I select all assets from april, I should do a secondary select to get the entire tree branches of the matching query:
so initially the query match would result in:
4 4 suit_banker 3 april texture
Then after the CTE, we get the complete hierarchy and our result should be this (so far this is working)
3 3 suit null march shape
4 4 suit_banker 3 april texture
-5 6 tie_red 4 march texture
and you see, the parent of id:-5 is there, but what is missing, that is needed, is the referenced asset, and the parent of the referenced asset:
5 5 tie null march shape
6 6 tie_red 5 march texture
Currently my solution works for this, but it is limited to only a single depth of references (and I feel the implementation is quite ugly).
---Edited----
Here is my primary Selection Function. This should better demonstrate where the real complication lies: the AssetReference.
Select A.id as id, A.id as asset_id, A.name,A.parent_id as parent_id, A.subPath, T.name as typeName, A2.name as parent_name, B.name as batchName,
L.name as locationName,AO.owner_name as ownerName, T.id as typeID,
M.name as milestoneName, A.deleted as bDeleted, 0 as reference, W.phase_name, W.status_name
FROM Asset as A Inner Join Type as T on A.type_id = T.id
Inner Join Batch as B on A.batch_id = B.id
Left Join Location L on A.location_id = L.id
Left Join Asset A2 on A.parent_id = A2.id
Left Join AssetOwner AO on A.owner_id = AO.owner_id
Left Join Milestone M on A.milestone_id = M.milestone_id
Left Join Workflow as W on W.asset_id = A.id
where A.deleted <= #showDeleted
UNION
Select -1*AR.id as id, AR.asset_id as asset_id, A.name, AR.parent_id as parent_id, A.subPath, T.name as typeName, A2.name as parent_name, B.name as batchName,
L.name as locationName,AO.owner_name as ownerName, T.id as typeID,
M.name as milestoneName, A.deleted as bDeleted, 1 as reference, NULL as phase_name, NULL as status_name
FROM Asset as A Inner Join Type as T on A.type_id = T.id
Inner Join Batch as B on A.batch_id = B.id
Left Join Location L on A.location_id = L.id
Left Join Asset A2 on AR.parent_id = A2.id
Left Join AssetOwner AO on A.owner_id = AO.owner_id
Left Join Milestone M on A.milestone_id = M.milestone_id
Inner Join AssetReference AR on AR.asset_id = A.id
where A.deleted <= #showDeleted
I have a stored procedure that takes a temp table (#temp) and finds all the elements of the hierarchy. The strategy I employed was this:
Select the entire system heirarchy into a temp table (#treeIDs) represented by a comma separated list of each entire tree branch
Get entire heirarchy of assets matching query (from #temp)
Get all reference assets pointed to by Assets from heirarchy
Parse the heirarchy of all reference assets
This works for now because reference assets are always the last item on a branch, but if they weren't, i think i would be in trouble. I feel like i need some better form of recursion.
Here is my current code, which is working, but i am not proud of it, and I know it is not robust (because it only works if the references are at the bottom):
Step 1. build the entire hierarchy
;WITH Recursive_CTE AS (
SELECT Cast(id as varchar(100)) as Hierarchy, parent_id, id
FROM #assetIDs
Where parent_id is Null
UNION ALL
SELECT
CAST(parent.Hierarchy + ',' + CAST(t.id as varchar(100)) as varchar(100)) as Hierarchy, t.parent_id, t.id
FROM Recursive_CTE parent
INNER JOIN #assetIDs t ON t.parent_id = parent.id
)
Select Distinct h.id, Hierarchy as idList into #treeIDs
FROM ( Select Hierarchy, id FROM Recursive_CTE ) parent
CROSS APPLY dbo.SplitIDs(Hierarchy) as h
Step 2. Select the branches of all assets that match the query
Select DISTINCT L.id into #RelativeIDs FROM #treeIDs
CROSS APPLY dbo.SplitIDs(idList) as L
WHERE #treeIDs.id in (Select id FROM #temp)
Step 3. Get all Reference Assets in the branches
(Reference assets have negative id values, hence the id < 0 part)
Select asset_id INTO #REFLinks FROM #AllAssets WHERE id in
(Select #AllAssets.asset_id FROM #AllAssets Inner Join #RelativeIDs
on #AllAssets.id = #RelativeIDs.id Where #RelativeIDs.id < 0)
Step 4. Get the branches of anything found in step 3
Select DISTINCT L.id into #extraRelativeIDs FROM #treeIDs
CROSS APPLY dbo.SplitIDs(idList) as L
WHERE
exists (Select #REFLinks.asset_id FROM #REFLinks WHERE #REFLinks.asset_id = #treeIDs.id)
and Not Exists (select id FROM #RelativeIDs Where id = #treeIDs.id)
I've tried to just show the relevant code. I am super grateful to anyone who can help me find a better solution!
--getting all of the children of a root node ( could be > 1 ) and it would require revising the query a bit
DECLARE #AssetID int = (select AssetId from Asset where AssetID is null);
--algorithm is relational recursion
--gets the top level in hierarchy we want. The hierarchy column
--will show the row's place in the hierarchy from this query only
--not in the overall reality of the row's place in the table
WITH Hierarchy(Asset_ID, AssetID, Levelcode, Asset_hierarchy)
AS
(
SELECT AssetID, Asset_ID,
1 as levelcode, CAST(Assetid as varchar(max)) as Asset_hierarchy
FROM Asset
WHERE AssetID=#AssetID
UNION ALL
--joins back to the CTE to recursively retrieve the rows
--note that treelevel is incremented on each iteration
SELECT A.Parent_ID, B.AssetID,
Levelcode + 1 as LevelCode,
A.assetID + '\' + cast(A.Asset_id as varchar(20)) as Asset_Hierarchy
FROM Asset AS a
INNER JOIN dbo.Batch AS Hierarchy
--use to get children, since the parentId of the child will be set the value
--of the current row
on a.assetId= b.assetID
--use to get parents, since the parent of the Asset_Hierarchy row will be the asset,
--not the parent.
on Asset.AssetId= Asset_Hierarchy.parentID
SELECT a.Assetid,a.name,
Asset_Hierarchy.LevelCode, Asset_Hierarchy.hierarchy
FROM Asset AS a
INNER JOIN Asset_Hierarchy
ON A.AssetID= Asset_Hierarchy.AssetID
ORDER BY Hierarchy ;
--return results from the CTE, joining to the Asset data to get the asset name
---that is the structure you will want. I would need a little more clarification of your table structure
It would help to know your underlying table structure. There are two approaches which should work depending on your environment: SQL understands XML so you could have your SQL as an xml structure or simply have a single table with each row item having a unique primary key id and a parentid. id is the fk for the parentid. The data for the node are just standard columns. You can use a cte or a function powering a calculated column to determin the degree of nesting for each node. The limit is that a node can only have one parent.

Recursive query in SQL Server

I have a table with following structure
Table name: matches
That basically stores which product is matching which product. I need to process this table
And store in a groups table like below.
Table Name: groups
group_ID stores the MIN Product_ID of the Product_IDS that form a group. To give an example let's say
If A is matching B and B is Matching C then three rows should go to group table in format (A, A), (A, B), (A, C)
I have tried looking into co-related subqueries and CTE, but not getting this to implement.
I need to do this all in SQL.
Thanks for the help .
Try this:
;WITH CTE
AS
(
SELECT DISTINCT
M1.Product_ID Group_ID,
M1.Product_ID
FROM matches M1
LEFT JOIN matches M2
ON M1.Product_Id = M2.matching_Product_Id
WHERE M2.matching_Product_Id IS NULL
UNION ALL
SELECT
C.Group_ID,
M.matching_Product_Id
FROM CTE C
JOIN matches M
ON C.Product_ID = M.Product_ID
)
SELECT * FROM CTE ORDER BY Group_ID
You can use OPTION(MAXRECURSION n) to control recursion depth.
SQL FIDDLE DEMO
Something like this (not tested)
with match_groups as (
select product_id,
matching_product_id,
product_id as group_id
from matches
where product_id not in (select matching_product_id from matches)
union all
select m.product_id, m.matching_product_id, p.group_id
from matches m
join match_groups p on m.product_id = p.matching_product_id
)
select group_id, product_id
from match_groups
order by group_id;
Sample of the Recursive Level:
DECLARE #VALUE_CODE AS VARCHAR(5);
--SET #VALUE_CODE = 'A' -- Specify a level
WITH ViewValue AS
(
SELECT ValueCode
, ValueDesc
, PrecedingValueCode
FROM ValuesTable
WHERE PrecedingValueCode IS NULL
UNION ALL
SELECT A.ValueCode
, A.ValueDesc
, A.PrecedingValueCode
FROM ValuesTable A
INNER JOIN ViewValue V ON
V.ValueCode = A.PrecedingValueCode
)
SELECT ValueCode, ValueDesc, PrecedingValueCode
FROM ViewValue
--WHERE PrecedingValueCode = #VALUE_CODE -- Specific level
--WHERE PrecedingValueCode IS NULL -- Root

Common Table Expression Counters with 2 Unions

If I have a common table expression for a family with mother & father, how can I increment the 'Generation' counter? A family should have the child as generation zero, parents as generation 1, and the four grandparents as generation 2. But the loop is performed twice, one for each set of grandparents.
;WITH FamilyTree
AS
(
SELECT *, 0 AS Generation
FROM myTable
WHERE [id] = 99
UNION ALL
SELECT name, Generation + 1
FROM myTable AS Fam
INNER JOIN FamilyTree
ON Fam.[id] = FamilyTree.[motherid]
UNION ALL
SELECT name, Generation + 1
FROM myTable AS Fam
INNER JOIN FamilyTree
ON Fam.[id] = FamilyTree.[fatherid]
)
SELECT generation, name FROM FamilyTree
Change the join look at relatives in one generation gap in one go, rather then have 2 recursive clauses in the CTE. The 2 clauses form a a partial cross join which is why you have extra rows
;WITH FamilyTree
AS
(
SELECT *, 0 AS Generation
FROM myTable
WHERE [id] = 99
UNION ALL
SELECT name, Generation + 1
FROM myTable AS Fam
INNER JOIN FamilyTree
ON Fam.[id] IN (FamilyTree.[motherid], FamilyTree.[fatherid])
)
SELECT generation, name FROM FamilyTree