How to group nodes with relationships in SQL - sql

I have the following table which lists related nodes:
;WITH CTE AS
( SELECT *
FROM (VALUES (1,2)
,(2,1)
,(3,4)
,(3,5)
,(4,3)
,(4,5)
,(5,3)
,(5,4)
,(6,NULL)
,(7,NULL)
,(8,9)
,(9,8)
) AS ValuesTable(ID,RelatedID)
)
SELECT *
FROM CTE
How can I assign unique IDS (GUID or integer GroupID) to each group, So, 1 and 2 will be on the same group, 3, 4, 5 on a different group, 6 is alone in it's group and so is 7, and 8 and 9 are one more group?
My answer so far seems very cumbersome:
;WITH CTE AS
( SELECT *
FROM (VALUES (1,2)
,(2,1)
,(3,4)
,(3,5)
,(4,3)
,(4,5)
,(5,3)
,(5,4)
,(6,NULL)
,(7,NULL)
,(8,9)
,(9,8)
) AS ValuesTable(ID,RelatedID)
)
SELECT DENSE_RANK() OVER(ORDER BY CA.IDList) AS GroupID,
ID,
RelatedID
FROM CTE
CROSS APPLY (SELECT STUFF((SELECT ',' + CONVERT(NVARCHAR(255), ID)
FROM CTE AS CTEInner
WHERE CTEInner.ID = CTE.ID
OR CTEInner.ID = CTE.RelatedID
OR CTEInner.RelatedID = CTE.RelatedID
OR CTEInner.RelatedID = CTE.ID
FOR XML PATH(''),TYPE).value('(./text())[1]','NVARCHAR(MAX)'),1,1,'') AS IDList) AS CA
But it provides the correct answer:
GroupID ID RelatedID
1 1 2
1 2 1
2 3 4
2 3 5
2 4 3
2 4 5
2 5 3
2 5 4
3 6 NULL
4 7 NULL
5 8 9
5 9 8

Adding a unique number for each group is not hard but it does require a few steps.
The first step would be to select unique values for the groups - so for instance the group with (1, 2) and (2, 1) will contain only a single record - (1, 2).
The next step is to get rid of the records that creates multiple paths for the same relationship - in this case - (3, 4), (4, 5), (3, 5) - means that 5 is the related to both 3 and 4, but for the recursive cte to work, we only need a single relationship path - so either (3, 4), (4, 5) or (3, 4), (3, 5) but not both.
The next step is to create a recursive cte based on these unique values, so that each group can get it's unique number.
After that, you can select from the original cte joined to the recursive cte and get the unique group numbers:
;WITH CTE AS
( SELECT *
FROM (VALUES (1,2)
,(2,1)
,(3,4)
,(3,5)
,(4,3)
,(4,5)
,(5,3)
,(5,4)
,(6,NULL)
,(7,NULL)
,(8,9)
,(9,8)
) AS ValuesTable(ID,RelatedID)
)
, PreUniqueValues AS
(
SELECT MIN(ID) AS ID,
MAX(RelatedID) As RelatedID
FROM CTE AS B
GROUP BY (ID + ISNULL(RelatedID, 0)) + (ID * ISNULL(RelatedID, 0))
)
, UniqueValues AS
(
SELECT ID, MIN(RelatedID) As RelatedID
FROM PreUniqueValues
GROUP BY ID
)
, Recursive AS
(
SELECT ID, RelatedId, DENSE_RANK() OVER(ORDER BY ID) As GroupID
FROM UniqueValues AS T0
WHERE NOT EXISTS
(
SELECT 1
FROM UniqueValues AS T1
WHERE T1.ID = T0.RelatedID
)
UNION ALL
SELECT UV.ID, UV.RelatedID, GroupID
FROM UniqueValues As UV
JOIN Recursive As Re
ON UV.ID = Re.RelatedId
)
SELECT CTE.ID, CTE.RelatedID, GroupID
FROM CTE
JOIN Recursive
ON CTE.ID = Recursive.ID OR CTE.ID = ISNULL(Recursive.RelatedID, 0)
ORDER BY ID
Results:
ID RelatedID GroupID
1 2 1
2 1 1
4 3 2
4 5 2
5 3 2
5 4 2
6 NULL 3
7 NULL 4
8 9 5
9 8 5

This is a graph-walking problem and you would seem to need recursive CTEs. The logic looks like this:
WITH t AS (
SELECT *
FROM (VALUES (1,2)
,(2,1)
,(3,4)
,(3,5)
,(4,3)
,(4,5)
,(5,3)
,(5,4)
,(6,NULL)
,(7,NULL)
,(8,9)
,(9,8)
) AS ValuesTable(ID,RelatedID)
),
cte as (
select distinct id, id as relatedId, ',' + convert(varchar(max), id) + ',' as relatedIds
from t
union all
select cte.id, t.relatedId, cte.relatedIds + convert(varchar(max), t.relatedId) + ','
from cte join
t
on cte.relatedId = t.id
where cte.relatedId is not null and
cte.relatedIds not like '%,' + convert(varchar(max), t.relatedId) + ',%'
)
SELECT id, min(relatedId) as grp,
dense_rank() over (order by min(relatedId)) as grp_number
FROM cte
GROUP BY id;
Here is a db<>fiddle.

Related

Sql Server 2014 - Deep Recursive Parent-Child Self Join

I am trying to build a deep recursive self-join query. Having the table like:
Id | ParentId
1 | NULL
2 | 1
3 | 1
4 | 2
5 | 3
6 | 8
7 | 9
For Id 1 my query should be fetching 1,2,3,4,5 since they are either the children of 1 or children of the children of 1. In the given example 6 and 7 should not be included in the query result.
I tried using CTE but I am getting tons of duplicates:
WITH CTE AS (
SELECT Id, ParentId
FROM dbo.Table
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM dbo.Table t
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT * FROM CTE
Ideas?
You can try to use DISTINCT to filter duplicate rows.
;WITH CTE AS (
SELECT Id, ParentId
FROM T
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM T
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT DISTINCT Id, ParentId
FROM CTE
Try the following query with CTE where you can set parentId by #parentID:
DECLARE #parentID INT = 1
;WITH cte AS
(
SELECT
t.ID
, t.ParentId
FROM #table t
),
cteParent AS
(
SELECT
t.ID
, t.ParentId
FROM #table t
WHERE t.ParentId IN (SELECT t1.ID FROM #table t1 WHERE T1.ParentId = #parentID)
)
SELECT
DISTINCT c1.ID
, c1.ParentId
FROM cte c1
INNER JOIN cte c2 ON c2.ParentId = c1.ID
UNION ALL
SELECT *
FROM cteParent
And the sample data:
DECLARE #table TABLE
(
ID INT
, ParentId INT
)
INSERT INTO #table
(
ID,
ParentId
)
VALUES
(1, NULL )
, (2, 1 )
, (3, 1 )
, (4, 2 )
, (5, 3 )
, (6, 8 )
, (7, 9 )
OUTPUT:
ID ParentId
1 NULL
2 1
3 1
4 2
5 3
I don't see duplicates.
Your code returns the following on the data you provided:
Id ParentId
1
2 1
3 1
5 3
4 2
which is what you want.
Here is a db<>fiddle.
Here is the code:
WITH t as (
SELECT *
FROM (VALUES (1, NULL), (2, 1), (3, 1), (4, 2), (5, 3), (6, 8), (7, 9)
) v(id, parentId)
),
CTE AS (
SELECT Id, ParentId
FROM t
WHERE ParentId IS NULL
UNION ALL
SELECT t.Id, t.ParentId
FROM t
INNER JOIN CTE c ON t.ParentId = c.Id
)
SELECT *
FROM CTE;
If you are getting duplicates in your actual result set, then you presumably have duplicates in your original table. I would recommend removing them before doing the recursive logic:
with t as (
select distinct id, parentid
from <your query>
),
. . .
Then run the recursive logic.
Try this sql script which result Parent Child Hierarchy
;WITH CTE(Id , ParentId)
AS
(
SELECT 1 , NULL UNION ALL
SELECT 2 , 1 UNION ALL
SELECT 3 , 1 UNION ALL
SELECT 4 , 2 UNION ALL
SELECT 5 , 3 UNION ALL
SELECT 6 , 8 UNION ALL
SELECT 7 , 9
)
,Cte2
AS
(
SELECT Id ,
ParentId ,
CAST('\'+ CAST(Id AS VARCHAR(MAX))AS VARCHAR(MAX)) AS [Hierarchy]
FROM CTE
WHERE ParentId IS NULL
UNION ALL
SELECT c1.Id ,
c1.ParentId ,
[Hierarchy]+'\'+ CAST(c1.Id AS VARCHAR(MAX)) AS [Hierarchy]
FROM Cte2 c2
INNER JOIN CTE c1
ON c1.ParentId = c2.Id
)
SELECT Id,
RIGHT([Hierarchy],LEN([Hierarchy])-1) AS ParentChildHierarchy
FROM Cte2
GO
Result
Id ParentChildHierarchy
-------------------------
1 1
2 1\2
3 1\3
5 1\3\5
4 1\2\4
This query will help you
CREATE TABLE #table( ID INT, ParentId INT )
INSERT INTO #table(ID,ParentId)
VALUES (1, NULL ), (2, 1 ), (3, 1 ), (4, 2 ), (5, 3 ), (6, 8 ), (7, 9 )
;WITH CTE AS (
SELECT ID FROM #table WHERE PARENTID IS NULL
UNION ALL
SELECT T.ID FROM #table T
INNER JOIN #table T1 ON T.PARENTID =T1.ID
) SELECT * FROM CTE

Rows Columns Traverse

I have data in the below format
id idnew
1 2
3 4
2
4 7
6 8
7
Result Should be something like this
ID should be followed by idnew
1
2
3
4
2
4
7
6
8
7
Thanks in advance
This should maintain the order:
SELECT id
FROM (
SELECT id, ROW_NUMBER() OVER (ORDER BY id) AS RowNumber
FROM myTable
UNION ALL
SELECT idnew, ROW_NUMBER() OVER (ORDER BY idnew) +
(SELECT COUNT(*) FROM dbo.myTable) AS RowNumber
FROM myTable
WHERE idnew IS NOT NULL
) a
ORDER BY RowNumber
I am assuming the id column is NOT NULL-able.
NOTE: If you want to keep the NULL values from the idnew column AND maintain the order, then remove the WHERE clause and ORDER BY id in the second select:
SELECT id
FROM (
SELECT id, ROW_NUMBER() OVER (ORDER BY id) AS RowNumber
FROM myTable
UNION ALL
SELECT idnew, ROW_NUMBER() OVER (ORDER BY id) +
(SELECT COUNT(*) FROM dbo.myTable) AS RowNumber
FROM myTable
) a
ORDER BY RowNumber
This is fully tested, try it here: https://rextester.com/DVZXO21058
Setting up the table as you described:
CREATE TABLE myTable (id INT, idnew INT);
INSERT INTO myTable (id, idnew)
VALUES (1, 2),
(3, 4),
(2, NULL),
(4, 7),
(6, 8),
(7, NULL);
SELECT * FROM myTable;
Here is the query to do the trick:
SELECT mixed_id FROM
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS row_num,
id,
idnew
FROM myTable
) AS x
UNPIVOT
(
mixed_id for item in (id, idnew)
) AS y
WHERE mixed_id IS NOT NULL
ORDER BY row_num, mixed_id;
In order not to further complicate the query, this is taking advantage of 'id' would rank ahead of 'idnew' as a string. I believe string ranking is not the key issue here.
Using Cross Apply
;WITH CTE (id,idnew)
AS
(
SELECT 1,2 UNION ALL
SELECT 3,4 UNION ALL
SELECT 2,NULL UNION ALL
SELECT 4,7 UNION ALL
SELECT 6,8 UNION ALL
SELECT 7,NULL
)
SELECT New
FROM CTE
CROSS APPLY ( VALUES (id),(idnew))AS Dt (New)
WHERE dt.New IS NOT NULL
Result
New
---
1
2
3
4
2
4
7
6
8
7

SQL Grouping by first digit from sets of record

I need your help in SQL
I have a set of records of Cost center ID below.
what I want to do is to segregate/group them by inserting column to distinguish the category.
as you can see all digits start in 7 is belong to the bold digits.
my expected out is on below image also.
You can as the below:
DECLARE #Tbl TABLE (ID INT)
INSERT INTO #Tbl
VALUES
(735121201),
(735120001),
(5442244),
(735141094),
(735141097),
(4008060),
(735117603),
(40100000),
(735142902),
(735151199),
(4010070)
;WITH TableWithRowId
AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY(SELECT NULL)) RowId,
ID
FROM
#Tbl
), TempTable
AS
(
SELECT T.RowId + 1 AS RowId FROM TableWithRowId T
WHERE
LEFT(T.ID, 1) != 7
), ResultTable
AS
(
SELECT
T.RowId ,
T.ID,
DENSE_RANK() OVER (ORDER BY (SELECT TOP 1 A.RowId FROM TempTable A WHERE A.RowId > T.RowId ORDER BY A.RowId)) AS Flag
FROM TableWithRowId T
)
SELECT * FROM ResultTable
Result:
RowId ID Flag
----------- ----------- ----------
1 735121201 1
2 735120001 1
3 5442244 1
4 735141094 2
5 735141097 2
6 4008060 2
7 735117603 3
8 40100000 3
9 735142902 4
10 735151199 4
11 4010070 4
The following query is similer with NEER's
;WITH test_table(CenterID)AS(
SELECT '735121201' UNION ALL
SELECT '735120001' UNION ALL
SELECT '5442244' UNION ALL
SELECT '735141094' UNION ALL
SELECT '735141097' UNION ALL
SELECT '4008060' UNION ALL
SELECT '735117603' UNION ALL
SELECT '40100000' UNION ALL
SELECT '735142902' UNION ALL
SELECT '735151199' UNION ALL
SELECT '4010070'
),t1 AS (
SELECT *,ROW_NUMBER()OVER(ORDER BY(SELECT 1)) AS rn,CASE WHEN LEFT(t.CenterID,1)='7' THEN 1 ELSE 0 END AS isSeven
FROM test_table AS t
),t2 AS(
SELECT t1.*,ROW_NUMBER()OVER(ORDER BY t1.rn) AS toFilter
FROM t1 LEFT JOIN t1 AS pt ON pt.rn=t1.rn-1
WHERE pt.CenterID IS NULL OR (t1.isSeven=1 AND pt.isSeven=0)
)
SELECT t1.CenterID,x.toFilter FROM t1
CROSS APPLY(SELECT TOP 1 t2.toFilter FROM t2 WHERE t2.rn<=t1.rn ORDER BY rn desc) x
CenterID toFilter
--------- --------------------
735121201 1
735120001 1
5442244 1
735141094 2
735141097 2
4008060 2
735117603 3
40100000 3
735142902 4
735151199 4
4010070 4

select based on specific values

I have this table:
ID NO.
111 6
222 7
333 9
111 8
333 4
222 3
111 7
222 5
333 2
I want to select only 2 ID numbers from table where NO. column equal specific values.
For example i tried this query but i didn't get the expected result:
SELECT top 2 * FROM mytable where NO. in
(select NO. from mytable )
Expected result:
111 6
111 8
222 7
222 3
333 9
333 3
You seem to want to select two rows in the table for each id, based on a condition on the No column. For this, one method uses row_number():
select t.*
from (select t.*, row_number() over (partition by id order by id) as seqnum
from mytable t
where <condition goes here>
) t
where seqnum <= 2;
I'm guessing (333,3) is a mistake and you expect (333,2). If not I have no idea.
SELECT
ua.ID
, ua.[NO.]
FROM (
SELECT
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY t.[NO.] ASC) AS RowNum
, t.ID
, t.[NO.]
FROM dbo.t1 AS t
UNION ALL
SELECT
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY t.[NO.] DESC)
, ID
, t.[NO.]
FROM dbo.t1 AS t
) ua
WHERE ua.RowNum = 1
ORDER BY ID, ua.[NO.] DESC
If you're just trying to get top 2 values for each group, you need something to define the order, ie. a third column. Then you don't need UNION ALL, just use WHERE ua.RowNum < 3.
/*Select 2 random rows per id where the number of rows per id can vary between 1 and infinity
A good article for this:-*/
--https://www.mssqltips.com/sqlservertip/3157/different-ways-to-get-random-data-for-sql-server-data-sampling/
DECLARE #TABLE TABLE(ID INT,NO INT)
INSERT INTO #TABLE
VALUES
(111, 6),
(222, 7),
(333 , 9),
(111 , 8),
(333 , 4),
(222 , 3),
(111 , 7),
(222 , 5),
(333 , 2)
select t.* from
(
Select s.* ,ROW_NUMBER() OVER(PARTITION BY ID ORDER BY randomnumber) ROWNUMBER
from
(
SELECT ID,NO,
(ABS(CHECKSUM(NEWID())) % 100001) + ((ABS(CHECKSUM(NEWID())) % 100001) * 0.00001) [randomnumber]
FROM #TABLE
) s
) t
where t.rownumber < 3

SQL Server Create Grouping For Related Records

I'm running into an interesting scenario trying to assign an arbitrary FamilyId to fields that are related to each other.
Here is the structure that we're currently working with:
DataId OriginalDataId
3 1
4 1
5 1
6 1
3 2
4 2
5 2
6 2
7 10
8 10
9 10
11 15
What we're attempting to do is add a FamilyId column to all DataIds that have a relationship between each other.
In this case, Id's 3, 4, 5, and 6 have a relationship to 1. But 3, 4, 5, and 6 also have a relationship with 2. So 1, 2, 3, 4, 5, and 6 should all be considered to be in the same FamilyId.
7, 8, and 9 only have a relationship to 10, which puts this into a separate FamilyId. Same for 11 and 15.
What I am expecting as a result from this are the following results:
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3
Sample data, structure, and queries:
Declare #Results_Stage Table
(
DataId BigInt Not Null,
OriginalDataId BigInt Null
)
Insert #Results_Stage
Values (3,1), (4,1), (5,1), (6,1), (3,2), (4,2), (5,2), (6,2), (7,10), (8, 10), (9, 10), (11, 15)
Select DataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From #Results_Stage R
Union
Select OriginalDataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From #Results_Stage
I'm positive my attempt is nowhere near correct, but I'm honestly not sure where to even start on this -- or if it's even possible in SQL Server.
Does anyone have an idea on how to tackle this issue, or at least, something to point me in the right direction?
Edit Below is a query I've come up with so far to identify the other DataId records that should belong to the same FamilyId
Declare #DataId BigInt = 1
;With Children As
(
Select Distinct X.DataId
From #Results_Stage S
Outer Apply
(
Select Distinct DataId
From #Results_Stage R
Where R.OriginalDataId = S.DataId
Or R.OriginalDataId = S.OriginalDataId
) X
Where S.DataId = #DataId
Or S.OriginalDataId = #DataId
)
Select Distinct O.OriginalDataId
From Children C
Outer Apply
(
Select S.OriginalDataId
From #Results_Stage S
Where S.DataId = C.DataId
) O
Union
Select DataId
From Children
The following query, which employs FOR XML PATH:
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
can be used to produce this output:
OriginalDataId GroupValues
===========================
1 3, 4, 5, 6
2 3, 4, 5, 6
10 7, 8, 9
15 11
Using the above result set, we can easily identify each group and thus have something upon which DENSE_RANK() can be applied:
;WITH GroupedData AS (
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
), Families AS (
SELECT OriginalDataId, DENSE_RANK() OVER (ORDER BY GroupValues) AS FamilyId
FROM GroupedData
)
SELECT OriginalDataId AS DataId, FamilyId
FROM Families
UNION
SELECT DataId, F.FamilyId
FROM #Results_Stage R
INNER JOIN Families F ON R.OriginalDataId = F.OriginalDataId
ORDER BY FamilyId
Output from above is:
DataId FamilyId
===================
11 1
15 1
1 2
2 2
3 2
4 2
5 2
6 2
7 3
8 3
9 3
10 3
Check this ... it doesn't look too nice but is doing the job :)
DECLARE #T TABLE (DataId INT, OriginalDataId INT)
INSERT INTO #T(DataId , OriginalDataId)
select 3,1
union all select 4,1
union all select 5,1
union all select 6,1
union all select 3,2
union all select 4,2
union all select 5,2
union all select 6,2
union all select 7,10
union all select 8,10
union all select 9,10
union all select 11,15
SELECT * FROM #T
;WITH f AS (
SELECT DISTINCT OriginalDataId FROM #T
)
, m AS (
SELECT DISTINCT
DataId , OriginalDataId = MIN(OriginalDataId)
FROM #T
GROUP BY DataId
)
, m2 AS (
SELECT DISTINCT
x.DataId , x.OriginalDataId
FROM #T AS x
LEFT OUTER JOIN m ON x.DataId = m.DataId AND x.OriginalDataId = m.OriginalDataId
WHERE m.DataId IS NULL
)
, m3 AS (
SELECT DISTINCT DataId = x.OriginalDataId , m.OriginalDataId
FROM m2 AS x
INNER JOIN m ON x.DataId = m.DataId
)
, m4 AS (
SELECT DISTINCT
DataId = OriginalDataId , OriginalDataId
FROM #T
WHERE OriginalDataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DISTINCT
x.DataId , f.OriginalDataId
FROM f
INNER JOIN m AS x on x.OriginalDataId = f.OriginalDataId
WHERE x.DataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DataId , OriginalDataId FROM m3
)
, list AS (
SELECT
x.DataId, FamilyId = DENSE_RANK() OVER(ORDER BY x.OriginalDataId )
FROM m4 AS x
)
SELECT * FROM list
-- OUTPUT
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3