Simplest way to do a recursive self-join? - sql

What is the simplest way of doing a recursive self-join in SQL Server? I have a table like this:
PersonID | Initials | ParentID
1 CJ NULL
2 EB 1
3 MB 1
4 SW 2
5 YT NULL
6 IS 5
And I want to be able to get the records only related to a hierarchy starting with a specific person. So If I requested CJ's hierarchy by PersonID=1 I would get:
PersonID | Initials | ParentID
1 CJ NULL
2 EB 1
3 MB 1
4 SW 2
And for EB's I'd get:
PersonID | Initials | ParentID
2 EB 1
4 SW 2
I'm a bit stuck on this can can't think how to do it apart from a fixed-depth response based on a bunch of joins. This would do as it happens because we won't have many levels but I would like to do it properly.
Thanks! Chris.

WITH q AS
(
SELECT *
FROM mytable
WHERE ParentID IS NULL -- this condition defines the ultimate ancestors in your chain, change it as appropriate
UNION ALL
SELECT m.*
FROM mytable m
JOIN q
ON m.parentID = q.PersonID
)
SELECT *
FROM q
By adding the ordering condition, you can preserve the tree order:
WITH q AS
(
SELECT m.*, CAST(ROW_NUMBER() OVER (ORDER BY m.PersonId) AS VARCHAR(MAX)) COLLATE Latin1_General_BIN AS bc
FROM mytable m
WHERE ParentID IS NULL
UNION ALL
SELECT m.*, q.bc + '.' + CAST(ROW_NUMBER() OVER (PARTITION BY m.ParentID ORDER BY m.PersonID) AS VARCHAR(MAX)) COLLATE Latin1_General_BIN
FROM mytable m
JOIN q
ON m.parentID = q.PersonID
)
SELECT *
FROM q
ORDER BY
bc
By changing the ORDER BY condition you can change the ordering of the siblings.

Using CTEs you can do it this way
DECLARE #Table TABLE(
PersonID INT,
Initials VARCHAR(20),
ParentID INT
)
INSERT INTO #Table SELECT 1,'CJ',NULL
INSERT INTO #Table SELECT 2,'EB',1
INSERT INTO #Table SELECT 3,'MB',1
INSERT INTO #Table SELECT 4,'SW',2
INSERT INTO #Table SELECT 5,'YT',NULL
INSERT INTO #Table SELECT 6,'IS',5
DECLARE #PersonID INT
SELECT #PersonID = 1
;WITH Selects AS (
SELECT *
FROM #Table
WHERE PersonID = #PersonID
UNION ALL
SELECT t.*
FROM #Table t INNER JOIN
Selects s ON t.ParentID = s.PersonID
)
SELECT *
FROm Selects

The Quassnoi query with a change for large table. Parents with more childs then 10: Formating as str(5) the row_number()
WITH q AS
(
SELECT m.*, CAST(str(ROW_NUMBER() OVER (ORDER BY m.ordernum),5) AS VARCHAR(MAX)) COLLATE Latin1_General_BIN AS bc
FROM #t m
WHERE ParentID =0
UNION ALL
SELECT m.*, q.bc + '.' + str(ROW_NUMBER() OVER (PARTITION BY m.ParentID ORDER BY m.ordernum),5) COLLATE Latin1_General_BIN
FROM #t m
JOIN q
ON m.parentID = q.DBID
)
SELECT *
FROM q
ORDER BY
bc

SQL 2005 or later, CTEs are the standard way to go as per the examples shown.
SQL 2000, you can do it using UDFs -
CREATE FUNCTION udfPersonAndChildren
(
#PersonID int
)
RETURNS #t TABLE (personid int, initials nchar(10), parentid int null)
AS
begin
insert into #t
select * from people p
where personID=#PersonID
while ##rowcount > 0
begin
insert into #t
select p.*
from people p
inner join #t o on p.parentid=o.personid
left join #t o2 on p.personid=o2.personid
where o2.personid is null
end
return
end
(which will work in 2005, it's just not the standard way of doing it. That said, if you find that the easier way to work, run with it)
If you really need to do this in SQL7, you can do roughly the above in a sproc but couldn't select from it - SQL7 doesn't support UDFs.

Check following to help the understand the concept of CTE recursion
DECLARE
#startDate DATETIME,
#endDate DATETIME
SET #startDate = '11/10/2011'
SET #endDate = '03/25/2012'
; WITH CTE AS (
SELECT
YEAR(#startDate) AS 'yr',
MONTH(#startDate) AS 'mm',
DATENAME(mm, #startDate) AS 'mon',
DATEPART(d,#startDate) AS 'dd',
#startDate 'new_date'
UNION ALL
SELECT
YEAR(new_date) AS 'yr',
MONTH(new_date) AS 'mm',
DATENAME(mm, new_date) AS 'mon',
DATEPART(d,#startDate) AS 'dd',
DATEADD(d,1,new_date) 'new_date'
FROM CTE
WHERE new_date < #endDate
)
SELECT yr AS 'Year', mon AS 'Month', count(dd) AS 'Days'
FROM CTE
GROUP BY mon, yr, mm
ORDER BY yr, mm
OPTION (MAXRECURSION 1000)

DELIMITER $$
DROP PROCEDURE IF EXISTS `myprocDURENAME`$$
CREATE DEFINER=`root`#`%` PROCEDURE `myprocDURENAME`( IN grp_id VARCHAR(300))
BEGIN
SELECT h.ID AS state_id,UPPER(CONCAT( `ACCNAME`,' [',b.`GRPNAME`,']')) AS state_name,h.ISACTIVE FROM accgroup b JOIN (SELECT get_group_chield (grp_id) a) s ON FIND_IN_SET(b.ID,s.a) LEFT OUTER JOIN acc_head h ON b.ID=h.GRPID WHERE h.ID IS NOT NULL AND H.ISACTIVE=1;
END$$
DELIMITER ;
////////////////////////
DELIMITER $$
DROP FUNCTION IF EXISTS `get_group_chield`$$
CREATE DEFINER=`root`#`%` FUNCTION `get_group_chield`(get_id VARCHAR(999)) RETURNS VARCHAR(9999) CHARSET utf8
BEGIN
DECLARE idd VARCHAR(300);
DECLARE get_val VARCHAR(300);
DECLARE get_count INT;
SET idd=get_id;
SELECT GROUP_CONCAT(id)AS t,COUNT(*) t1 INTO get_val,get_count FROM accgroup ag JOIN (SELECT idd AS n1) d ON FIND_IN_SET(ag.PRNTID,d.n1);
SELECT COUNT(*) INTO get_count FROM accgroup WHERE PRNTID IN (idd);
WHILE get_count >0 DO
SET idd=CONCAT(idd,',', get_val);
SELECT GROUP_CONCAT(CONCAT('', id ,'' ))AS t,COUNT(*) t1 INTO get_val,get_count FROM accgroup ag JOIN (SELECT get_val AS n1) d ON FIND_IN_SET(ag.PRNTID,d.n1);
END WHILE;
RETURN idd;
-- SELECT id FROM acc_head WHERE GRPID IN (idd);
END$$
DELIMITER ;

Related

SQL multiplying rows in select

I would like to select some rows multiple-times, depending on the column's value.
Source table
Article | Count
===============
A | 1
B | 4
C | 2
Wanted result
Article
===============
A
B
B
B
B
C
C
Any hints or samples, please?
You could use:
SELECT m.Article
FROM mytable m
CROSS APPLY (VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)) AS s(n)
WHERE s.n <= m.[Count];
LiveDemo
Note: CROSS APLLY with any tally table. Here values up to 10.
Related: What is the best way to create and populate a numbers table?
You could also use a recursive CTE which works with numbers > 10 (here up to 1000):
With NumberSequence( Number ) as
(
Select 0 as Number
union all
Select Number + 1
from NumberSequence
where Number BETWEEN 0 AND 1000
)
SELECT Article
FROM ArticleCounts
CROSS APPLY NumberSequence
WHERE Number BETWEEN 1 AND [Count]
ORDER BY Article
Option (MaxRecursion 0)
Demo
A number-table will certainly be the best option.
http://sqlperformance.com/2013/01/t-sql-queries/generate-a-set-2
Please check following SQL script
Before executing the SELECT statement, note that I used a user function which is used to simulate a numbers table
You can find the sql codes of numbers table in SQL Server at referred tutorial
----create table myTempTbl (Article varchar(10), Count int)
--insert into myTempTbl select 'A',1
--insert into myTempTbl select 'B',4
--insert into myTempTbl select 'C',2
select t.*
from myTempTbl t
cross apply dbo.NumbersTable(1,100,1) n
where n.i <= t.Count
order by t.Article
one more CTE
with cte_t as (
select c as c, 1 as i
from mytable
group by c
union all
select t.c, ctet.i + 1
from mytable t
join cte_t ctet
on ctet.c = t.c
and ctet.i < t.i
)
select cte_t.c
from cte_t
order by cte_t.c
Can obtain the output using simple WHILE LOOP
DECLARE #table TABLE
(ID int ,Article varchar(5),[Count] int)
INSERT INTO #table
(ID,Article,Count)
VALUES
(1,'A',1),(2,'B',4),(3,'C',2)
DECLARE #temp TABLE
(Article varchar(5))
DECLARE #Cnt1 INT
DECLARE #Cnt2 INT
DECLARE #Check INT
DECLARE #max INT
SET #max =0
SET #Cnt1 = (SELECT Count(Article) FROM #table)
WHILE (#max < #Cnt1)
BEGIN
SET #max = #max +1
SET #Cnt2 = (SELECT [Count] FROM #table WHERE ID =#max)
SET #Check =(SELECT [Count] FROM #table WHERE ID =#max)
WHILE (#Cnt2 > 0)
BEGIN
INSERT INTO #temp
SELECT Article FROM #table WHERE [Count] =#Check
SET #Cnt2 = #Cnt2 -1
END
END
SELECT * FROM #temp

How to traverse a path in a table with id & parentId?

Suppose I have a table like:
id | parentId | name
1 NULL A
2 1 B
3 2 C
4 1 E
5 3 E
I am trying to write a scalar function I can call as:
SELECT dbo.GetId('A/B/C/E') which would produce "5" if we use the above reference table. The function would do the following steps:
Find the ID of 'A' which is 1
Find the ID of 'B' whose parent is 'A' (id:1) which would be id:2
Find the ID of 'C' whose parent is 'B' (id:2) which would be id:3
Find the ID of 'E' whose parent is 'C' (id:3) which would be id:5
I was trying to do it with a WHILE loop but it was getting very complicated very fast... Just thinking there must be a simple way to do this.
CTE version is not optimized way to get the hierarchical data. (Refer MSDN Blog)
You should do something like as mentioned below. It's tested for 10 millions of records and is 300 times faster than CTE version :)
Declare #table table(Id int, ParentId int, Name varchar(10))
insert into #table values(1,NULL,'A')
insert into #table values(2,1,'B')
insert into #table values(3,2,'C')
insert into #table values(4,1,'E')
insert into #table values(5,3,'E')
DECLARE #Counter tinyint = 0;
IF OBJECT_ID('TEMPDB..#ITEM') IS NOT NULL
DROP TABLE #ITEM
CREATE TABLE #ITEM
(
ID int not null
,ParentID int
,Name VARCHAR(MAX)
,lvl int not null
,RootID int not null
)
INSERT INTO #ITEM
(ID,lvl,ParentID,Name,RootID)
SELECT Id
,0 AS LVL
,ParentId
,Name
,Id AS RootID
FROM
#table
WHERE
ISNULL(ParentId,-1) = -1
WHILE ##ROWCOUNT > 0
BEGIN
SET #Counter += 1
insert into #ITEM(ID,ParentId,Name,lvl,RootID)
SELECT ci.ID
,ci.ParentId
,ci.Name
,#Counter as cntr
,ch.RootID
FROM
#table AS ci
INNER JOIN
#ITEM AS pr
ON
CI.ParentId=PR.ID
LEFT OUTER JOIN
#ITEM AS ch
ON ch.ID=pr.ID
WHERE
ISNULL(ci.ParentId, -1) > 0
AND PR.lvl = #Counter - 1
END
select * from #ITEM
Here is an example of functional rcte based on your sample data and requirements as I understand them.
if OBJECT_ID('tempdb..#Something') is not null
drop table #Something
create table #Something
(
id int
, parentId int
, name char(1)
)
insert #Something
select 1, NULL, 'A' union all
select 2, 1, 'B' union all
select 3, 2, 'C' union all
select 4, 1, 'E' union all
select 5, 3, 'E'
declare #Root char(1) = 'A';
with MyData as
(
select *
from #Something
where name = #Root
union all
select s.*
from #Something s
join MyData d on d.id = s.parentId
)
select *
from MyData
Note that if you change the value of your variable the output will adjust. I would make this an inline table valued function.
I think I have it based on #SeanLange's recommendation to use a recursive CTE (above in the comments):
CREATE FUNCTION GetID
(
#path VARCHAR(MAX)
)
/* TEST:
SELECT dbo.GetID('A/B/C/E')
*/
RETURNS INT
AS
BEGIN
DECLARE #ID INT;
WITH cte AS (
SELECT p.id ,
p.parentId ,
CAST(p.name AS VARCHAR(MAX)) AS name
FROM tblT p
WHERE parentId IS NULL
UNION ALL
SELECT p.id ,
p.parentId ,
CAST(pcte.name + '/' + p.name AS VARCHAR(MAX)) AS name
FROM dbo.tblT p
INNER JOIN cte pcte ON
pcte.id = p.parentId
)
SELECT #ID = id
FROM cte
WHERE name = #path
RETURN #ID
END

Find the missing number group by category

I want to find the missing batchNo group by each category
I try this and it work but I get all the missing number for all categories
how to group by the data ?
CREATE TABLE #tmp (BatchNo INT, Category VARCHAR(15))
INSERT INTO #tmp
SELECT 94, 'A01'
UNION ALL
SELECT 97, 'A01'
UNION ALL
SELECT 100, 'A02'
UNION ALL
SELECT 105, 'A02'
declare #valmax INT, #valmin INT, #i INT;
select #valmax=max(BatchNo) from #tmp;
select #valmin=min(BatchNo) from #tmp;
set #i=#valmin;
while (#i<#valmax) begin
if (not exists(select * from #tmp where BatchNo=#i)) begin
-- SELECT #i, Category FROM #tmp GROUP BY Category
SELECT #i
end;
set #i=#i+1
end;
the out put shold be like
95 A01
96 A01
101 A02
102 A02
103 A02
104 A02
You can do this by joining with a number table. This query uses thespt_valuestable and should work:
;with cte as (
select category , min(batchno) min_batch, max(batchno) max_batch
from #tmp
group by category
)
select number, category
from master..spt_values
cross join cte
where type = 'p'
and number > min_batch
and number < max_batch
group by category, number
Sample SQL Fiddle
Note that this table only has a sequence of numbers 0-2047so if yourBatchNocan be higher you need another source for the query (could be another table or a recursive cte); something like this would work:
;with
cte (category, min_batch, max_batch) as (
select category , min(batchno), max(batchno)
from #tmp
group by category
),
numbers (number, max_number) as (
select 1 as number, (select MAX(batchno) from #tmp) max_number
union all
select number + 1, max_number
from numbers
where number < max_number
)
select number, category
from numbers cross join cte
where number > min_batch
and number < max_batch
group by category, number
option (maxrecursion 0)
Amended my answer. Probably better than using spt_values just because there is a limit. Thanks for jpw for a better way of doing it
Declare #Start int
Declare #End int
Select #Start = Min(BatchNo), #End = Max(BatchNo) from #tmp;
with nums as (
select #Start as n
union all
select n+1
from nums
where n < #End
)
Select n, Category from nums
cross join #tmp t
where n > (select Min(BatchNo) from #tmp where Category = t.Category group by Category)
and n < (select Max(BatchNo) from #tmp where Category = t.Category group by Category)
group by category, n

find circular transactions in database table

I have a table in sql server database in which records of transactions are stored. Table consists of user id of buyer and user id of seller of product. I have to find the circles in the table for example-
I have to get the records of type- A sells to B, B sells to C, C sells to D AND D sells to A.
Please help.
Use following function:
CREATE FUNCTION dbo.CheckIsCircular(#SellerId INT)
RETURNS BIT
AS BEGIN
DECLARE #IsCircular BIT = 0
DECLARE #Sellers TABLE(Id INT)
DECLARE #TempSellers TABLE(Id INT)
DECLARE #Buyers TABLE(Id INT)
INSERT INTO #TempSellers(Id)VALUES(#SellerId)
WHILE EXISTS(SELECT * FROM #TempSellers)BEGIN
IF EXISTS(SELECT *
FROM #Sellers s
INNER JOIN #TempSellers t ON t.Id = s.Id)BEGIN
SET #IsCircular = 1
BREAK;
END
INSERT INTO #Sellers(Id)
SELECT Id FROM #TempSellers
INSERT INTO #Buyers(Id) SELECT BuyerId FROM YourTable
DELETE #TempSellers
INSERT Into #TempSellers(Id)
SELECT YourTable.SellerId
FROM YourTable
INNER JOIN #Buyers ON [#Buyers].Id = YourTable.SellerId
END
RETURN #IsCircular
END
Your problem is a graph traversal challenge; this is not natively supported in TSQL, but you can simulate it.
This is a skeleton how I do it in Teradata, so syntax must be slightly modified for SQL Server:
WITH RECURSIVE cte (..., Path, isCycle) AS
(
SELECT
...
,',' || CAST(seller AS VARCHAR(1000)) || ',' AS path
,0 AS isCycle
FROM tab
UNION ALL
SELECT
...
,cte.Path || cte.buyer || ',',
,case when cte.Path LIKE '%,' || TRIM(tab.buyer) || ',%' then 1 else 0 end
FROM cte, tab
WHERE cte.buyer = tab.seller
AND cte.isCycle <> 1
)
SELECT ...
,Path || Destination
,isCycle
FROM cte
WHERE isCycle = 1
Build a materialized path of the graph while traversing and check if the next buyer is already in this path.
With a recursive cte
declare #trans table (seller int, buyer int)
insert #trans
values (1,2),(2,3),(3,4),(4,1),(1,5),(2,6),(3,5)
begin try
;with cte as
(
select *, convert(varchar(500),'') as route from #trans
union all
select cte.seller, t1.buyer, convert(varchar(500),route + CONVERT(varchar(5),t1.seller)) from cte
inner join #trans t1 on cte.buyer = t1.seller
)
select * from cte
where seller=buyer
option (maxrecursion 50)
end try
begin catch
print 'loops'
end catch

How to find missing id in the table

I have column looks like below
SID101
SID102
SID103
SID105
SID107
In the above criteria i need to find missed SID numbers. SID104 and SID 106 are missed while ordering.
How can i find the missed id numbers.Could any one help me finding it.
Thanks in advance.
If your table contains gaps with length more than 1 item, you can use this query:
declare #t table(s varchar(20))
insert #t values ('SID101'),('SID102'),('SID103'),('SID105'),('SID108');
with cte as
(
select substring(t.s, 4, len(t.s)) [i]
from #t t
)
select 'SID' + cast(m.number as varchar(20))
from master..spt_values m
left join cte c on c.i = m.number
where [Type] = 'P'
and m.number >= (select min(i) from cte)
and m.number <= (select max(i) from cte)
and c.i is null
Output:
-----------------------
SID104
SID106
SID107
Something like this should work:
DECLARE #i INT;
SET #i = 100;
CREATE TABLE #idsToCheck (checkId varchar(100));
WHILE (#i < 200)
BEGIN
INSERT INTO #idsToCheck VALUES ('SID' + CONVERT(varchar(100), #i));
SET #i = #i + 1;
END
SELECT * FROM #idsToCheck itc
LEFT OUTER JOIN MainTable mt ON itc.checkId = mt.realId
WHERE mt.realId = NULL
DROP TABLE #idsToCheck
... where MainTable is your table containing the SID101, SID102, etc. column values, and MainTable.realId is the column containing those IDs. Modify the #i initial value and number in the while loop condition based on which SIDs you want to check from/to.
It's difficult. With
SELECT COUNT(*),MAX(CAST(REPLACE(y.name,'SID','') AS INT)) AS col_max FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
you should know, how much columns are missing (i.e. COUNT(*) is 5 and col_max is 107)
When you have a table, which contains only one column with all possible IDs from 1 to max (i.e. 100,101,102,103,104,...,132) then you could do
SELECT * FROM (
SELECT CAST(REPLACE(y.name,'SID','') AS INT) AS col_id FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
) a
RIGHT JOIN <TABLE_IDS> b ON a.col_id=b.id
WHERE a.col_id IS NULL AND b.id<=(
SELECT MAX(CAST(REPLACE(y.name,'SID','') AS INT)) AS col_max FROM
sys.objects x INNER JOIN sys.columns y ON x.object_id=y.object_id
WHERE x.name='<TABLE_NAME>'
)
EDIT: sorry, I've seen just now, that these values aren't column names, but values. My solution will find missing column names
 Declare #St int
declare #end int
set #st = CAST( (select RIGHT( max(data),4) from orderno)as int)
set #end = CAST( (select RIGHT( min(data),4) from orderno)as int)
create table #temp(data int)
while(#St <= #end )
begin
insert into #temp values(#St)
set #St = #St +1
end
select * from orderno
select * from #temp
select data from #temp where data not in (select cast(RIGHT(data,4))
declare #t table(s varchar(20))
insert #t values ('SID101'),('SID102'),('SID103'),('SID105'),('SID107');
with cte as
(
select substring(t.s, 4, len(t.s)) [i]
from #t t
)
select 'SID' + cast(t1.i + 1 as varchar(20))
from cte t1
join cte t2 on t2.i > t1.i
and not exists(
select 1
from cte c3
where c3.i > t1.i and c3.i < t2.i
)
where t2.i <> t1.i + 1
Output:
-----------------------
SID104
SID106