T-SQL results in to columns - sql

I have a table (t1) like below
Id Name RelId
1 a 2
2 b 3
3 c 4
4 d 3
5 e 6
The other table (t2)
Id data FK Order
1 aa 2 2
2 bb 2 3
3 cc 2 1
4 dd 2 4
5 ee 2 5
6 ff 3 3
7 gg 3 2
8 hh 3 1
9 ii 4 7
10 jj 4 4
11 kk 4 1
12 ll 4 3
13 mm 6 1
14 nn 6 2
15 oo 6 3
16 pp 6 4
My output result am looking for is
+----+------+-------+-------+------+----------+
| id | name | RelId | Col 1 | Col2 | Col-Oth |
+----+------+-------+-------+------+----------+
| 1 | a | 2 | cc | aa | bb,dd,ee |
| 2 | b | 3 | hh | gg | ff |
| 3 | c | 4 | kk | ll | jj,ii |
| 4 | d | 3 | hh | gg | ff |
| 5 | e | 6 | mm | nn | oo,pp |
+----+------+-------+-------+------+----------+
based on the Relid in T1 table join with FK column in T2 and populate col1 with the least order data, col2 with the next higher order data and col-oth with remaining data comma separated ordered.
Need your help on same.
SELECT id,name,RelId, (select data,rownumber() (partition by data order by order asc) from t2 inner join t1 on t1.relid= t2.FK) from t1

Try following query:
DECLARE #TEMP TABLE
(
Id INT,
Name VARCHAR(10),
RelId INT
)
INSERT INTO #TEMP VALUES (1,'a',2),(2,'b',3),(3,'c',4),(4,'d',3),(5,'e',6)
DECLARE #TEMP1 TABLE
(
Id INT,
Data varchar(10),
FK INT,
[order] INT
)
INSERT INTO #TEMP1 VALUES
(1 ,'aa',2,2),(2 ,'bb',2,3),(3 ,'cc',2,1),(4 ,'dd',2,4),(5 ,'ee',2,5),
(6 ,'ff',3,3),(7 ,'gg',3,2),(8 ,'hh',3,1),(9 ,'ii',4,7),(10,'jj',4,4),
(11,'kk',4,1),(12,'ll',4,3),(13,'mm',6,1),(14,'nn',6,2),(15,'oo',6,3),(16,'pp',6,4)
SELECT
t1.*,
(SELECT Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo=1),
(SELECT Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo=2),
STUFF((SELECT DISTINCT ',' + Data FROM (SELECT ROW_NUMBER() OVER(ORDER BY t2.[order]) As RowNo,Data FROM #TEMP1 t2 WHERE t2.FK = t1.RelId)t3 WHERE t3.RowNo > 2 FOR XML PATH ('')), 1, 1, '')
FROM
#TEMP t1

Using PIVOT:
DECLARE #t1 TABLE
(
ID INT ,
Name CHAR(1) ,
RelID INT
)
DECLARE #t2 TABLE
(
ID INT ,
Data CHAR(2) ,
RelID INT ,
Ordering INT
)
INSERT INTO #t1
VALUES ( 1, 'a', 2 ),
( 2, 'b', 3 ),
( 3, 'c', 4 ),
( 4, 'd', 3 ),
( 5, 'e', 6 )
INSERT INTO #t2
VALUES ( 1, 'aa', 2, 2 ),
( 2, 'bb', 2, 3 ),
( 3, 'cc', 2, 1 ),
( 4, 'dd', 2, 4 ),
( 5, 'ee', 2, 5 ),
( 6, 'ff', 3, 3 ),
( 7, 'gg', 3, 2 ),
( 8, 'hh', 3, 1 ),
( 9, 'ii', 4, 7 ),
( 10, 'jj', 4, 4 ),
( 11, 'kk', 4, 1 ),
( 12, 'll', 4, 3 ),
( 13, 'mm', 6, 1 ),
( 14, 'nn', 6, 2 ),
( 15, 'oo', 6, 3 ),
( 16, 'pp', 6, 4 );
WITH cte1
AS ( SELECT t1.ID ,
t1.Name ,
t1.RelID ,
t2.Data ,
ROW_NUMBER() OVER ( PARTITION BY t1.ID ORDER BY t2.Ordering ) AS rn
FROM #t1 t1
JOIN #t2 t2 ON t1.RelID = t2.RelID
),
cte2
AS ( SELECT ID ,
Name ,
RelID ,
Data ,
rn ,
STUFF(( SELECT ',' + Data
FROM cte1 ci
WHERE co.ID = ci.ID
AND rn > 2
FOR
XML PATH('')
), 1, 1, '') AS Col3
FROM cte1 co
)
SELECT ID ,
Name ,
RelID ,
[1] AS Col1 ,
[2] AS Col2 ,
Col3
FROM cte2 PIVOT( MAX(data) FOR rn IN ( [1], [2] ) ) p
Output:
ID Name RelID Col1 Col2 Col3
1 a 2 cc aa bb,dd,ee
2 b 3 hh gg ff
3 c 4 kk ll jj,ii
4 d 3 hh gg ff
5 e 6 mm nn oo,pp
Execution plan of my statement
Execution plan of accepted statement:
Which is better? :)

Related

Update the Unique number for the co-related records between two columns in the group

I need to identify and update co-related records associated rank under Req_Result column as depicted below.
Table name is tblSource.
+------+-----+-----------------+---------+
| Item | key | DenseRankWrtKey | Req_Res |
+------+-----+-----------------+---------+
| a | 1 | 1 | 1 |
+------+-----+-----------------+---------+
| a | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| a | 3 | 3 | 1 |
+------+-----+-----------------+---------+
| b | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| b | 9 | 7 | 1 |
+------+-----+-----------------+---------+
| c | 1 | 1 | 1 |
+------+-----+-----------------+---------+
| c | 6 | 5 | 1 |
+------+-----+-----------------+---------+
| d | 5 | 4 | 4 |
+------+-----+-----------------+---------+
| e | 8 | 6 | 6 |
+------+-----+-----------------+---------+
| f | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| f | 6 | 5 | 1 |
+------+-----+-----------------+---------+
Item and Key are co-related columns and DenseRankWrtKey is created by using Dense rank with respect to key. I need to assign the same DenseRankWrtKey values to all the co-related values.
Scenario explained:
Item a has the key value 1 and 1 is co-related with c as well, so all related values for a and 1 are a,b,c,f,2,3,7,6,5 hence all these values are assigned as 1 by referring DenseRank column, d and e are not further related to any other values hence its value is kept as is from DenseRank column.
I tried the queries
Update a
SET a.Req_Res = b.DenseRankWrtKey
from tblSource a
inner join tblSource b on a.DenseRankWrtKey = b.DenseRankWrtKey
which is not sufficient.
Just try for this table too : DECLARE #Table AS TABLE
(
Id INT IDENTITY(1,1) PRIMARY KEY
,Item varchar(100)
,[key] INT
,DenseRankWrtKey INT
,Req_Res INT
)
INSERT INTO #Table
(
Item
,[key]
,DenseRankWrtKey
)
VALUES
('p', 10 ,1 ),
('q', 10 ,1 ),
('r', 20 ,2 ),
('s', 30 ,3 ),
('t', 30 ,3 ),
('u', 40 ,4 ),
('v', 40 ,4 ),
('w', 40 ,4 ),
('p', 50 ,5 ),
('q', 50 ,5 ),
('r', 50 ,5 ),
('s', 50 ,5 ),
('t', 50 ,5 ),
('u', 50 ,5 ),
('v', 50 ,5 ),
('w', 50 ,5 )
I find this way easier to read and maintain
DECLARE #TestTable TABLE (Item CHAR(1), ItemKey INT, DenseRankWrtKey INT, Req_Res INT)
INSERT #TestTable (Item, ItemKey, DenseRankWrtKey) VALUES
('a' , 1 , 1)
, ('a' , 2 , 2)
, ('a' , 3 , 3)
, ('b' , 2 , 2)
, ('b' , 9 , 7)
, ('c' , 1 , 1)
, ('c' , 6 , 5)
, ('d' , 5 , 4)
, ('e' , 8 , 6)
, ('f' , 2 , 2)
, ('f' , 6 , 5)
DECLARE #OtpTable TABLE (Item CHAR(1), ItemKey INT, DenseRankWrtKey INT)
DECLARE #RC INT = 1
WHILE #RC > 0
BEGIN
DELETE #OtpTable
;WITH UpdateCTE AS (
SELECT TOP 1 * from #TestTable
WHERE Req_Res IS NULL
)
UPDATE UpdateCTE
set Req_Res = DenseRankWrtKey
OUTPUT Inserted.Item, Inserted.ItemKey, inserted.DenseRankWrtKey INTO #OtpTable
SET #RC = ##ROWCOUNT
WHILE ##ROWCOUNT > 0
UPDATE T
SET Req_Res = (SELECT TOP 1 DenseRankWrtKey FROM #OtpTable)
OUTPUT Inserted.Item, Inserted.ItemKey, inserted.DenseRankWrtKey INTO #OtpTable
FROM #TestTable T
WHERE T.Req_Res IS NULL AND EXISTS (SELECT 1 FROM #OtpTable OT WHERE (T.Item = OT.Item OR T.ItemKey = OT.ItemKey))
END
SELECT * FROM #TestTable
You can not do update in single statement.
CREATE TABLE #Table
(
Id INT
,Item varchar(30)
,[key] INT
,DenseRankWrtKey INT
,Req_Res INT
)
INSERT INTO #Table
(
Item
,[key]
,DenseRankWrtKey
)
VALUES
<YOUR DATA>
;WITH CTE
AS
(
SELECT
T.Item
,T.[Key]
,Id = RANK() OVER(order by T.DenseRankWrtKey,T.Item)
FROM
#Table AS T
)
UPDATE
T
SET
T.Id = CTE.Id
FROM
CTE
INNER JOIN #Table AS T ON T.Item = CTE.Item AND T.[key] = CTE.[key]
DECLARE #LoopVal INT = 0
,#LoopReq INT = NULL
,#LoopKey VARCHAR(50) = NULL
WHILE 1 = 1
BEGIN
SELECT TOP 1
#LoopVal = T.DenseRankWrtKey
,#LoopReq = T.Req_Res
FROM
#Table AS T
WHERE
T.DenseRankWrtKey > #LoopVal
ORDER BY
T.DenseRankWrtKey ASC
IF ##ROWCOUNT = 0
BREAK;
UPDATE T2
SET Req_Res = CASE WHEN #LoopReq IS NOT NULL THEN #LoopReq ELSE T.DenseRankWrtKey END
FROM
#Table AS T
INNER JOIN #Table AS T2 ON T.[key] = T2.[key]
WHERE
T.DenseRankWrtKey = #LoopVal
AND T2.Req_Res IS NULL
UPDATE
T
SET
T.Req_Res = CASE WHEN #LoopReq IS NOT NULL THEN #LoopReq ELSE T2.Req_Res END
FROM
#Table AS T
INNER JOIN #Table AS T2 ON T.Item = T2.Item
AND T2.Req_Res IS NOT NULL
AND T.Req_Res IS NULL
END
SELECT * FROM #Table
ORDER BY
DenseRankWrtKey
DROP TABLE #Table
GO

SQL select parent-child recursively based on a reference table

I saw many questions related to a recursive query but couldn't find any that shows how to use it based on a reference table.
I have a MasterTable where Id, ParentId columns are establishing the parent/child relation.
I have a SubTable where I have a bunch of Ids which could be a parent Id or child Id.
I would like to retrieve all related records (parent or child, recursively) from the MasterTable based on given SubTable
Current output:
id parentId
----------- -----------
1 NULL
2 1
3 1
4 NULL
5 4
6 5
7 6
Expected output
id parentId
----------- -----------
1 NULL
2 1
3 1
4 NULL
5 4
6 5
7 6
8 9
9 NULL
10 NULL
11 10
13 11
14 10
15 16
16 NULL
Comparison of actual vs expected:
Code:
DECLARE #MasterTable TABLE
(
id INT NOT NULL,
parentId INT NULL
);
DECLARE #SubTable TABLE
(
id INT NOT NULL
);
INSERT INTO #MasterTable (id, parentId)
VALUES (1, NULL), (2, 1), (3, 1), (4, NULL), (5, 4), (6, 5),
(7, 6), (8, 9), (9, NULL), (10, NULL), (11, 10), (12, NULL),
(13, 11), (13, 11), (14, 10), (15, 16), (16, NULL);
INSERT INTO #SubTable (id)
VALUES (1), (2), (3), (4), (6), (5), (7),
(8), -- it does not show
(13), -- it does not show
(15); -- it does not show
/* beside 8,13,15 it should add 9,11,14 and 10,16 */
;WITH cte AS
(
SELECT
mt1.id,
mt1.parentId
FROM
#MasterTable AS mt1
WHERE
mt1.parentId IS NULL
AND EXISTS (SELECT NULL AS empty
FROM #SubTable AS st
WHERE st.Id = mt1.id)
UNION ALL
SELECT
mt2.id,
mt2.parentId
FROM
#MasterTable AS mt2
INNER JOIN
cte AS c1 ON c1.id = mt2.parentId
)
SELECT DISTINCT
c2.id,
c2.parentId
FROM
cte AS c2
ORDER BY
id;
Is the following query suitable for the issue in question?
with
r as(
select
m.*, iif(m.parentid is null, 1, 0) p_flag
from #MasterTable m
join #SubTable s
on s.id = m.id
union all
select
m.*, iif(m.parentid is null, 1, r.p_flag)
from r
join #MasterTable m
on (r.p_flag = 1 and m.parentid = r.id) or
(r.p_flag = 0 and r.parentid = m.id)
)
select distinct
id, parentid
from r
order by id;
Output:
| id | parentid |
+----+----------+
| 1 | NULL |
| 2 | 1 |
| 3 | 1 |
| 4 | NULL |
| 5 | 4 |
| 6 | 5 |
| 7 | 6 |
| 8 | 9 |
| 9 | NULL |
| 10 | NULL |
| 11 | 10 |
| 13 | 11 |
| 14 | 10 |
| 15 | 16 |
| 16 | NULL |
Test it online with rextester.com.
;WITH cte
AS (
SELECT mt1.id,
mt1.parentId
FROM #MasterTable AS mt1
WHERE mt1.parentId IS NULL
UNION ALL
SELECT mt2.id,
mt2.parentId
FROM #MasterTable AS mt2
INNER JOIN cte AS c1
ON c1.id = mt2.parentId
)
SELECT DISTINCT c2.id,
c2.parentId
FROM cte AS c2
where
EXISTS (
SELECT 1 AS empty FROM #SubTable AS st
WHERE ( st.Id = c2.id or st.Id = c2.parentId)
)
or
EXISTS (
SELECT 1 AS empty FROM #MasterTable AS mt
WHERE ( c2.Id = mt.parentId or c2.parentId = mt.parentId)
)
ORDER BY id;
You may try this....
; with cte as(
select distinct mas.id, mas.parentId, iif(mas.parentid is null, 1, 0) PId
from #MasterTable mas inner join #SubTable sub
on sub.id in(mas.id, mas.parentid) ----- create top node having parentid is null
union all
select mas.id, mas.parentId, ct.PId
from cte ct inner join #MasterTable mas
on (ct.PId = 1 and mas.parentid = ct.id) or
(ct.PId = 0 and ct.parentid = mas.id) ----- create child node for correspoding parentid created above
)
select distinct id, parentid from cte order by id
option (MAXRECURSION 100); ---- Add Maxrecursion to prevent the infinite loop
You can find this link for more info on recursive query in SQL link. In this link see Example E or above.

How to exclude certain rows from sql select

How do I exclude certain rows?
For example, I have the following table:
+------+------+------+
| Col1 | Col2 | Col3 |
+------+------+------+
| 1 | 1 | R |
| 1 | 2 | D |
| 2 | 3 | R |
| 2 | 4 | R |
| 3 | 5 | R |
| 4 | 6 | D |
+------+------+------+
I need to select only:
| 2 | 3 | R |
| 2 | 4 | R |
| 3 | 5 | R |
My select that does not work properly:
with t (c1,c2,c3) as(
select 1 , 1 , 'R' from dual union all
select 1 , 2 , 'D' from dual union all
select 2 , 3 , 'R' from dual union all
select 2 , 4 , 'R' from dual union all
select 3 , 5 , 'R' from dual union all
select 4 , 6 , 'D' from dual),
tt as (select t.*,count(*) over (partition by c1) cc from t ) select * from tt where cc=1 and c3='R';
Thanks in advance!
select * from table where col2 = 'R'
or if you want to exclude rows with D value just
select * from table where col2 != 'D'
It depends on your requirements but you can do in this way:
SELECT * FROM `table` WHERE col1 = 2 AND col3 = "R"
if you want to exclude just do it like WHERE col1 != 1
You ca also use IN clause also e.g.
SELECT column_name(s)
FROM table_name
WHERE column_name IN (value1, value2, ...);
This syntax is for MySql, but you can modify it as per your requirement or database you are using.
this will work :
select * from (select * from table_name) where rownum<=4
minus
select * from ( select * from table_name) where rownum<=2
My guess is that you want all rows for a col1 where no row for a col1 = D and at least 1 row for a col1 = R. # where [not] exists may do
DROP TABLE T;
CREATE TABLE T
(Col1 NUMBER, Col2 NUMBER, Col3 VARCHAR(1));
INSERT INTO T VALUES ( 1 , 1 , 'R');
INSERT INTO T VALUES ( 1 , 2 , 'D');
INSERT INTO T VALUES ( 2 , 3 , 'R');
INSERT INTO T VALUES ( 2 , 4 , 'R');
INSERT INTO T VALUES ( 3 , 5 , 'R');
INSERT INTO T VALUES ( 3 , 6 , 'D');
INSERT INTO T VALUES ( 4 , 5 , 'X');
INSERT INTO T VALUES ( 4 , 6 , 'Y');
INSERT INTO T VALUES ( 5 , 6 , 'X');
INSERT INTO T VALUES ( 5 , 5 , 'R');
INSERT INTO T VALUES ( 5 , 6 , 'Y');
SELECT *
FROM T
WHERE NOT EXISTS(SELECT 1 FROM T T1 WHERE T1.COL1 = T.COL1 AND COL3 = 'D') AND
EXISTS(SELECT 1 FROM T T1 WHERE T1.COL1 = T.COL1 AND COL3 = 'R');
Result
COL1 COL2 COL3
---------- ---------- ----
5 6 X
5 5 R
5 6 Y
2 3 R
2 4 R
use row_number() window function
with t (c1,c2,c3) as(
select 1 , 1 , 'R' from dual union all
select 1 , 2 , 'D' from dual union all
select 2 , 3 , 'R' from dual union all
select 2 , 4 , 'R' from dual union all
select 3 , 5 , 'R' from dual union all
select 4 , 6 , 'D' from dual
),
t1 as
(
select c1,c2,c3,row_number() over(order by c2) rn from t
) select * from t1 where t1.rn>=3 and t1.rn<=5
demo link
C1 C2 C3
2 3 R
2 4 R
3 5 R
You can try using correlated subquery
select * from tablename a
from
where exists (select 1 tablename b where a.col1=b.col1 having count(*)>1)
Based on what you have provided I can only surmise that the only requirement is for COL1 to be equal to 2 or 3 in that case all you have to do is (assuming that you actually have table);
SELECT * FROM <table_name>
WHERE col1 IN (2,3);
This will give you the desired output for the particular example provided in the question. If there is a selection requirement that goes beyond retrieving data where column 1 is either 2 or 3 than a more specific or precise answer can be provided.

SQL server and STUFF with two tables

I'm facing a problem. I have two tables as below.
table 1
+----+------+
| ks | keys |
+----+------+
| 11 | 1122|
+----+------+
| 12 | 2211|
+----+------+
| 13 | 2233|
+----+------+
| 14 | 3322|
+----+------+
table 2
+----+--+-------+
| Id | ks|codes|
+----+-----------+
| 1 | 11 |aaaaa|
+----+-----------+
| 2 | 11 |bbbbb|
+----+-----------+
| 3 | 12 |aaaaa|
+----+-----------+
| 3 | 13 |ccccc|
+----+-----------+
| 4 | 12 |bbbbb|
+----+-----------+
I tried to implement a following query in order to get my required output but did not work:
SELECT ks,
STUFF (
(SELECT ', ' + t2.codes as [text()]
from table2 as t2 where t1.ks = t2.ks FOR XML PATH('')
),1,1,''
) as "codes"
from table1 t1
group by ks;
I get this table as result:
+----+------+
| ks | codes|
+----+------+
| 11 | aaaa |
+----+------+
| 11 | bbbb |
+----+------+
| 12 | cccc |
+----+------+
| 12 | dddd |
+----+------+
then this image below shows my required output:
required result
I did something wrong but I do not know what could be. Any chance someone help me? Thanks!
Try this. I think you posted the wrong output.
Create table #tbl (ks int , codes varchar(10))
Insert into #tbl values
(11 ,'aaaa'),
(12 ,'bbbb'),
(13 ,'cccc'),
(14 ,'dddd')
Create table #tbl2 (id int, ks int , codes varchar(10))
Insert into #tbl2 values
( 1 ,11 ,'aaaaa'),
( 2 ,11 ,'bbbbb'),
( 3 ,12 ,'aaaaa'),
( 3 ,13 ,'ccccc'),
( 4 ,12 ,'bbbbb')
with cte as
(Select t1.ks, t2.codes
from #tbl t1 join #tbl2 t2 on t1.ks = t2.ks)
Select ks, STUFF(
(SELECT ',' + codes FROM cte c1
where c1.ks = c2.ks FOR XML PATH ('')), 1, 1, ''
)
from cte c2
group by ks
Output:
ks
11 aaaaa,bbbbb
12 aaaaa,bbbbb
13 ccccc
I cannot say that I fully understand what is going on in your tables--especially given your output image appears to have no relation to your sample tables--but it looks like you want a comma-delimited list of sub-values from table2 that are associated with table1.
Here's a working example that I think addresses your need. You can use CROSS APPLY in these situations. Doing so allows you to return all values from table1 regardless of a matching record in table2.
DECLARE #table1 TABLE ( [ks] INT, [code] VARCHAR(10) );
DECLARE #table2 TABLE ( [id] INT, [ks] INT, [code] VARCHAR(10) );
-- populate table1 --
INSERT INTO #table1 (
[ks], [code]
)
VALUES
( 11, 'aaaa' )
, ( 12, 'bbbb' )
, ( 13, 'cccc' )
, ( 14, 'dddd' );
-- populate table two --
INSERT INTO #table2 (
[id], [ks], [code]
)
VALUES
( 1, 11, 'aaaaa' )
, ( 2, 11, 'bbbbb' )
, ( 3, 12, 'aaaaa' )
, ( 3, 13, 'ccccc' )
, ( 4, 12, 'bbbbb' );
SELECT
t1.ks, codes.codes
FROM #table1 t1
CROSS APPLY (
SELECT (
STUFF(
( SELECT ', ' + t2.code AS "text()" FROM #table2 t2 WHERE t2.ks = t1.ks FOR XML PATH ( '' ) )
, 1, 2, ''
)
) AS [codes]
) AS codes
ORDER BY
t1.ks;
Resulting Output:
ks codes
11 aaaaa, bbbbb
12 aaaaa, bbbbb
13 ccccc
14 NULL

SQL Server Create Grouping For Related Records

I'm running into an interesting scenario trying to assign an arbitrary FamilyId to fields that are related to each other.
Here is the structure that we're currently working with:
DataId OriginalDataId
3 1
4 1
5 1
6 1
3 2
4 2
5 2
6 2
7 10
8 10
9 10
11 15
What we're attempting to do is add a FamilyId column to all DataIds that have a relationship between each other.
In this case, Id's 3, 4, 5, and 6 have a relationship to 1. But 3, 4, 5, and 6 also have a relationship with 2. So 1, 2, 3, 4, 5, and 6 should all be considered to be in the same FamilyId.
7, 8, and 9 only have a relationship to 10, which puts this into a separate FamilyId. Same for 11 and 15.
What I am expecting as a result from this are the following results:
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3
Sample data, structure, and queries:
Declare #Results_Stage Table
(
DataId BigInt Not Null,
OriginalDataId BigInt Null
)
Insert #Results_Stage
Values (3,1), (4,1), (5,1), (6,1), (3,2), (4,2), (5,2), (6,2), (7,10), (8, 10), (9, 10), (11, 15)
Select DataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From #Results_Stage R
Union
Select OriginalDataId, Row_Number() Over(Partition By DataId Order By OriginalDataId Asc) FamilyId
From #Results_Stage
I'm positive my attempt is nowhere near correct, but I'm honestly not sure where to even start on this -- or if it's even possible in SQL Server.
Does anyone have an idea on how to tackle this issue, or at least, something to point me in the right direction?
Edit Below is a query I've come up with so far to identify the other DataId records that should belong to the same FamilyId
Declare #DataId BigInt = 1
;With Children As
(
Select Distinct X.DataId
From #Results_Stage S
Outer Apply
(
Select Distinct DataId
From #Results_Stage R
Where R.OriginalDataId = S.DataId
Or R.OriginalDataId = S.OriginalDataId
) X
Where S.DataId = #DataId
Or S.OriginalDataId = #DataId
)
Select Distinct O.OriginalDataId
From Children C
Outer Apply
(
Select S.OriginalDataId
From #Results_Stage S
Where S.DataId = C.DataId
) O
Union
Select DataId
From Children
The following query, which employs FOR XML PATH:
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
can be used to produce this output:
OriginalDataId GroupValues
===========================
1 3, 4, 5, 6
2 3, 4, 5, 6
10 7, 8, 9
15 11
Using the above result set, we can easily identify each group and thus have something upon which DENSE_RANK() can be applied:
;WITH GroupedData AS (
SELECT R.OriginalDataId,
STUFF((
SELECT ', ' + + CAST([DataId] AS VARCHAR(MAX))
FROM #Results_Stage
WHERE (OriginalDataId = R.OriginalDataId)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS GroupValues
FROM #Results_Stage R
GROUP BY R.OriginalDataId
), Families AS (
SELECT OriginalDataId, DENSE_RANK() OVER (ORDER BY GroupValues) AS FamilyId
FROM GroupedData
)
SELECT OriginalDataId AS DataId, FamilyId
FROM Families
UNION
SELECT DataId, F.FamilyId
FROM #Results_Stage R
INNER JOIN Families F ON R.OriginalDataId = F.OriginalDataId
ORDER BY FamilyId
Output from above is:
DataId FamilyId
===================
11 1
15 1
1 2
2 2
3 2
4 2
5 2
6 2
7 3
8 3
9 3
10 3
Check this ... it doesn't look too nice but is doing the job :)
DECLARE #T TABLE (DataId INT, OriginalDataId INT)
INSERT INTO #T(DataId , OriginalDataId)
select 3,1
union all select 4,1
union all select 5,1
union all select 6,1
union all select 3,2
union all select 4,2
union all select 5,2
union all select 6,2
union all select 7,10
union all select 8,10
union all select 9,10
union all select 11,15
SELECT * FROM #T
;WITH f AS (
SELECT DISTINCT OriginalDataId FROM #T
)
, m AS (
SELECT DISTINCT
DataId , OriginalDataId = MIN(OriginalDataId)
FROM #T
GROUP BY DataId
)
, m2 AS (
SELECT DISTINCT
x.DataId , x.OriginalDataId
FROM #T AS x
LEFT OUTER JOIN m ON x.DataId = m.DataId AND x.OriginalDataId = m.OriginalDataId
WHERE m.DataId IS NULL
)
, m3 AS (
SELECT DISTINCT DataId = x.OriginalDataId , m.OriginalDataId
FROM m2 AS x
INNER JOIN m ON x.DataId = m.DataId
)
, m4 AS (
SELECT DISTINCT
DataId = OriginalDataId , OriginalDataId
FROM #T
WHERE OriginalDataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DISTINCT
x.DataId , f.OriginalDataId
FROM f
INNER JOIN m AS x on x.OriginalDataId = f.OriginalDataId
WHERE x.DataId NOT IN(SELECT DataId FROM m3)
UNION
SELECT DataId , OriginalDataId FROM m3
)
, list AS (
SELECT
x.DataId, FamilyId = DENSE_RANK() OVER(ORDER BY x.OriginalDataId )
FROM m4 AS x
)
SELECT * FROM list
-- OUTPUT
DataId FamilyId
1 1
2 1
3 1
4 1
5 1
6 1
7 2
8 2
9 2
10 2
11 3
15 3