How to get the deepest levels of a hierarchical sql query - sql

I'm using SQLServer 2008.
Say I have a recursive hierarchy table, SalesRegion, whit SalesRegionId and ParentSalesRegionId. What I need is, given a specific SalesRegion (anywhere in the hierarchy), retrieve ALL the records at the BOTTOM level.
I.E.:
SalesRegion, ParentSalesRegionId
1, null
1-1, 1
1-2, 1
1-1-1, 1-1
1-1-2, 1-1
1-2-1, 1-2
1-2-2, 1-2
1-1-1-1, 1-1-1
1-1-1-2, 1-1-1
1-1-2-1, 1-1-2
1-2-1-1, 1-2-1
(in my table I have sequencial numbers, this dashed numbers are only to be clear)
So, if the user enters 1-1, I need to retrieve al records with SalesRegion 1-1-1-1 or 1-1-1-2 or 1-1-2-1 (and NOT 1-2-2). Similarly, if the user enters 1-1-2-1, I need to retrieve just 1-1-2-1
I have a CTE query that retrieves everything below 1-1, but that includes rows that I don't want:
WITH SaleLocale_CTE AS (
SELECT SL.SaleLocaleId, SL.SaleLocaleName, SL.AccountingLocationID, SL.LocaleTypeId, SL.ParentSaleLocaleId, 1 AS Level /*Added as a workaround*/
FROM SaleLocale SL
WHERE SL.Deleted = 0
AND (#SaleLocaleId IS NULL OR SaleLocaleId = #SaleLocaleId)
UNION ALL
SELECT SL.SaleLocaleId, SL.SaleLocaleName, SL.AccountingLocationID, SL.LocaleTypeId, SL.ParentSaleLocaleId, Level + 1 AS Level
FROM SaleLocale SL
INNER JOIN SaleLocale_CTE SLCTE ON SLCTE.SaleLocaleId = SL.ParentSaleLocaleId
WHERE SL.Deleted = 0
)
SELECT *
FROM SaleLocale_CTE
Thanks in advance!
Alejandro.

I found a quick way to do this, but I'd rather the answer to be in a single query. So if you can think of one, please share! If I like it better, I'll vote for it as the best answer.
I added a "Level" column in my previous query (I'll edit the question so this answer is clear), and used it to get the last level and then delete the ones I don't need.
INSERT INTO #SaleLocales
SELECT *
FROM SaleLocale_GetChilds(#SaleLocaleId)
SELECT #LowestLevel = MAX(Level)
FROM #SaleLocales
DELETE #SaleLocales
WHERE Level <> #LowestLevel

Building off your post:
; WITH CTE AS
(
SELECT *
FROM SaleLocale_GetChilds(#SaleLocaleId)
)
SELECT
FROM CTE a
JOIN
(
SELECT MAX(level) AS level
FROM CTE
) b
ON a.level = b.level
There were a few edits in there. Kept hitting post...

Are you looking for something like this:
declare #SalesRegion as table ( SalesRegion int, ParentSalesRegionId int )
insert into #SalesRegion ( SalesRegion, ParentSalesRegionId ) values
( 1, NULL ), ( 2, 1 ), ( 3, 1 ),
( 4, 3 ), ( 5, 3 ),
( 6, 5 )
; with CTE as (
-- Get the root(s).
select SalesRegion, CAST( SalesRegion as varchar(1024) ) as Path
from #SalesRegion
where ParentSalesRegionId is NULL
union all
-- Add the children one level at a time.
select SR.SalesRegion, CAST( CTE.Path + '-' + cast( SR.SalesRegion as varchar(10) ) as varchar(1024) )
from CTE inner join
#SalesRegion as SR on SR.ParentSalesRegionId = CTE.SalesRegion
)
select *
from CTE
where Path like '1-3%'

I haven't tried this on a serious dataset, so I'm not sure how it'll perform, but I believe it solves your problem:
WITH SaleLocale_CTE AS (
SELECT SL.SaleLocaleId, SL.SaleLocaleName, SL.AccountingLocationID, SL.LocaleTypeId, SL.ParentSaleLocaleId, CASE WHEN EXISTS (SELECT 1 FROM SaleLocal SL2 WHERE SL2.ParentSaleLocaleId = SL.SaleLocaleID) THEN 1 ELSE 0 END as HasChildren
FROM SaleLocale SL
WHERE SL.Deleted = 0
AND (#SaleLocaleId IS NULL OR SaleLocaleId = #SaleLocaleId)
UNION ALL
SELECT SL.SaleLocaleId, SL.SaleLocaleName, SL.AccountingLocationID, SL.LocaleTypeId, SL.ParentSaleLocaleId, CASE WHEN EXISTS (SELECT 1 FROM SaleLocal SL2 WHERE SL2.ParentSaleLocaleId = SL.SaleLocaleID) THEN 1 ELSE 0 END as HasChildren
FROM SaleLocale SL
INNER JOIN SaleLocale_CTE SLCTE ON SLCTE.SaleLocaleId = SL.ParentSaleLocaleId
WHERE SL.Deleted = 0
)
SELECT *
FROM SaleLocale_CTE
WHERE HasChildren = 0

Related

How to add rows to a specific number multiple times in the same query

I already asked for help on a part of my problem here.
I used to get 10 rows no matter if there are filled or not. But now I'm facing something else where I need to do it multiple times in the same query result.
WITH NUMBERS AS
(
SELECT 1 rowNumber
UNION ALL
SELECT 2
UNION ALL
SELECT 3
UNION ALL
SELECT 4
UNION ALL
SELECT 5
UNION ALL
SELECT 6
UNION ALL
SELECT 7
UNION ALL
SELECT 8
UNION ALL
SELECT 9
UNION ALL
SELECT 10
)
SELECT DISTINCT sp.SLC_ID, c.rowNumber, c.PCE_ID
FROM SELECT_PART sp
LEFT JOIN (
SELECT b.*
FROM NUMBERS
LEFT OUTER JOIN (
SELECT a.*
FROM (
SELECT SELECT_PART.SLC_ID, ROW_NUMBER() OVER (ORDER BY SELECT_PART.SLC_ID) as
rowNumber, SELECT_PART.PCE_ID
FROM SELECT_PART
WHERE SELECT_PART.SLC_ID = (must be the same as sp.SLC_ID and can''t hardcode it)
) a
) b
ON b.rowNumber = NUMBERS.rowNumber
) c ON c.SLC_ID = sp.SLC_ID
ORDER BY sp.SLC_ID, c.rowNumber
It works fine for the first 10 lines, but next SLC_ID only got 1 empty line
I need it to be like that
SLC_ID rowNumer PCE_ID
1 1 0001
1 2 0002
1 3 NULL
1 ... ...
1 10 NULL
2 1 0011
2 2 0012
2 3 0013
2 ... ...
2 10 0020
3 1 0021
3 ... ...
Really need it that way to build a report.
Instead of manually building a query-specific number list where you have to include every possible number you need (1 through 10 in this case), create a numbers table.
DECLARE #UpperBound INT = 1000000;
;WITH cteN(Number) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY s1.[object_id]) - 1
FROM sys.all_columns AS s1
CROSS JOIN sys.all_columns AS s2
)
SELECT [Number] INTO dbo.Numbers
FROM cteN WHERE [Number] <= #UpperBound;
CREATE UNIQUE CLUSTERED INDEX CIX_Number ON dbo.Numbers([Number])
WITH
(
FILLFACTOR = 100, -- in the event server default has been changed
DATA_COMPRESSION = ROW -- if Enterprise & table large enough to matter
);
Source: mssqltips
Alternatively, since you can't add data, use a table that already exists in SQL Server.
WITH NUMBERS AS
(
SELECT DISTINCT Number as rowNumber FROM master..spt_values where type = 'P'
)
SELECT DISTINCT sp.SLC_ID, c.rowNumber, c.PCE_ID
FROM SELECT_PART sp
LEFT JOIN (
SELECT b.*
FROM NUMBERS
LEFT OUTER JOIN (
SELECT a.*
FROM(
SELECT SELECT_PART.SLC_ID, ROW_NUMBER() OVER (ORDER BY SELECT_PART.SLC_ID) as
rowNumber, SELECT_PART.PCE_ID
FROM SELECT_PART
WHERE SELECT_PART.SLC_ID = (must be the same as sp.SLC_ID and can''t hardcode it)
) a
) b
ON b.rowNumber = NUMBERS.rowNumber
) c ON c.SLC_ID = sp.SLC_ID
ORDER BY sp.SLC_ID, c.rowNumber
NOTE: Max value for this solution is 2047

Using recursive sql query not for parent-child

I'm not new in sql and t-sql, but at past I've never used recursive query - all problems were solved with WHILE or CURSOR. I just got 1 question - how to organaze recursion query for following problem: I want to manipulate with last row of data in certain partition. Can't understand how to stop my recursion at last level of partition.
CREATE TABLE #temp
(i int
, s int
, v int);
INSERT INTO #temp
SELECT 1, 1, 10
UNION
SELECT 1, 2, 20
UNION
SELECT 2, 1, 5
UNION
SELECT 2, 2, 5
UNION
SELECT 2, 3, 2
WITH CTE AS
(
SELECT i
, s
, v
FROM #temp
WHERE s=1
UNION ALL
SELECT t.i
, t.s
, t.v + cte.v as new_v
FROM #temp t
INNER JOIN cte
ON (cte.i=t.i)
WHERE t.s>1
)
SELECT *
FROM cte
OPTION(MAXRECURSION 0)
I want to get 5 rows as result:
result
I know that it could be solved with OUTER APPLY, JOINS, WHILE or CURSOR methods. Could you please share any features for my to understand how to get same result with recurcive cte query? SUM function there is just for example - for that problem recurcive query is best way cause I will use many scalar functions in big CASE which will use value from last row in partition and value of current row partition.
Thanks.
Sorry for my bad english level.
Will it be correctly if I'll try same problem with following example? I guess that need to correctly say in which order way recursive query gonna do any data manipulating. So below code which will help you understand what did I want to solve:
CREATE TABLE #temp
(i_key int
, step int
, step_h int
, value int);
INSERT INTO #temp
SELECT 1, 1, NULL, 20
UNION
SELECT 1, 2, 1, 20
UNION
SELECT 2, 1, NULL, 10
UNION
SELECT 2, 2, 1, 10
UNION
SELECT 2, 3, 2, 5
WITH CTE AS
(
SELECT i_key
, step
, value
FROM #temp
WHERE step=1
--AND i_key=2
UNION ALL
SELECT t.i_key
, t.step
, CASE
WHEN cte.value - t.value <=0 THEN 0
ELSE cte.value - t.value
END as value
FROM #temp t
INNER JOIN cte
ON (cte.i_key=t.i_key
AND cte.step=t.step_h)
--WHERE t.step>1
)
SELECT *
FROM CTE
OPTION(MAXRECURSION 0)
Is parent-child structure always need for solving this problems?
So i guess it could be done with another join (without column of parent-child).
AND cte.step=t.step-1
For your particular example, recursion is unnecessary. All you need is SQL Server 2012 or later version:
select t.*,
sum(t.v) over(partition by t.i order by t.s) as [RT]
from #temp t
order by t.i, t.s;
If you need to access previos / next row, there are lag() / lead() ranking functions that were introduced in the same aforementioned version of SQL Server.
EDIT: Ah, I see. You simply want to know how to write recursive CTEs properly. Here is a (seemingly) correct code for your second example:
with cte as (
select t.i_key, t.step, t.value
from #temp t
where t.step_h is null
union all
select c.i_key, t.step, case
when c.value < t.value then 0
else c.value - t.value
end as [Value]
from #temp t
inner join cte c on c.step = t.step_h
and c.i_key = t.i_key
)
select *
from cte c
order by c.i_key, c.step;
In the end, it stops by itself when an iteration does not produce any new rows.

SQL: I want a row to be return with NULL even if there is no match to my IN clause

I would like my SQL query to return a row even if there is no row matching in my IN clause.
For exemple this query:
SELECT id, foo
FROM table
WHERE id IN (0, 1, 2, 3)
would return:
id|foo
0|bar
1|bar
2|bar
3|null
But instead I have (because no row with id 3):
id|foo
0|bar
1|bar
2|bar
I have been able to find this trick:
SELECT tmpTable.id, table.bar
FROM (
SELECT 0 as id
UNION SELECT 1
UNION SELECT 2
UNION SELECT 3
) tmpTable
LEFT JOIN
(
SELECT table.foo, table.id
FROM table
WHERE table.id IN (0, 1, 2, 3)
) table
on table.id = tmpTable.id
Is there a better way?
Bonus: How to make it work with myBatis's list variable?
overslacked is right. Most SQL developers use an auxiliary table that stores integers (and one that stores dates). This is outlined in an entire chapter of Joe Celko's "SQL for Smarties".
Example:
CREATE TABLE numeri ( numero INTEGER PRIMARY KEY )
DECLARE #x INTEGER
SET #x = 0
WHILE #x < 1000
BEGIN
INSERT INTO numeri ( numero ) VALUES ( #x )
SET #x = #x + 1
END
SELECT
numero AS id,
foo
FROM
numeri
LEFT OUTER JOIN my_table
ON my_table.id = numero
WHERE
numero BETWEEN 0 AND 3
Main Goal of Programming minimal code high performance no need this things just remove id 3 from in clause
What about just saying:
SELECT id, foo
FROM table
WHERE id >= 0 AND <= 3

Find overlapping sets of data in a table

I need to identify duplicate sets of data and give those sets who's data is similar a group id.
id threshold cost
-- ---------- ----------
1 0 9
1 100 7
1 500 6
2 0 9
2 100 7
2 500 6
I have thousands of these sets, most are the same with different id's. I need find all the like sets that have the same thresholds and cost amounts and give them a group id. I'm just not sure where to begin. Is the best way to iterate and insert each set into a table and then each iterate through each set in the table to find what already exists?
This is one of those cases where you can try to do something with relational operators. Or, you can just say: "let's put all the information in a string and use that as the group id". SQL Server seems to discourage this approach, but it is possible. So, let's characterize the groups using:
select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id;
Oh, I think that solves your problem. The groupname can serve as the group id. If you want a numeric id (which is probably a good idea, use dense_rank():
select d.id, dense_rank() over (order by groupname) as groupid
from (select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id
) d;
Here's the solution to my interpretation of the question:
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
;
WITH BaseTable AS
(
SELECT 1 id, 0 as threshold, 9 as cost
UNION SELECT 1, 100, 7
UNION SELECT 1, 500, 6
UNION SELECT 2, 0, 9
UNION SELECT 2, 100, 7
UNION SELECT 2, 500, 6
UNION SELECT 3, 1, 9
UNION SELECT 3, 100, 7
UNION SELECT 3, 500, 6
)
, BaseCTE AS
(
SELECT
id
--,dense_rank() over (order by threshold, cost ) as GroupId
,
(
SELECT CAST(TblGrouping.threshold AS varchar(8000)) + '/' + CAST(TblGrouping.cost AS varchar(8000)) + ';'
FROM BaseTable AS TblGrouping
WHERE TblGrouping.id = BaseTable.id
ORDER BY TblGrouping.threshold, TblGrouping.cost
FOR XML PATH ('')
) AS MultiGroup
FROM BaseTable
GROUP BY id
)
,
CTE AS
(
SELECT
*
,DENSE_RANK() OVER (ORDER BY MultiGroup) AS GroupId
FROM BaseCTE
)
SELECT *
INTO #tempGrouping
FROM CTE
-- SELECT * FROM #tempGrouping;
UPDATE BaseTable
SET BaseTable.GroupId = #tempGrouping.GroupId
FROM BaseTable
INNER JOIN #tempGrouping
ON BaseTable.Id = #tempGrouping.Id
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
Where BaseTable is your table, and and you don't need the CTE "BaseTable", because you have a data table.
You may need to take extra-precautions if your threshold and cost fields can be NULL.

How do you find a missing number in a table field starting from a parameter and incrementing sequentially?

Let's say I have an sql server table:
NumberTaken CompanyName
2 Fred 3 Fred 4 Fred 6 Fred 7 Fred 8 Fred 11 Fred
I need an efficient way to pass in a parameter [StartingNumber] and to count from [StartingNumber] sequentially until I find a number that is missing.
For example notice that 1, 5, 9 and 10 are missing from the table.
If I supplied the parameter [StartingNumber] = 1, it would check to see if 1 exists, if it does it would check to see if 2 exists and so on and so forth so 1 would be returned here.
If [StartNumber] = 6 the function would return 9.
In c# pseudo code it would basically be:
int ctr = [StartingNumber]
while([SELECT NumberTaken FROM tblNumbers Where NumberTaken = ctr] != null)
ctr++;
return ctr;
The problem with that code is that is seems really inefficient if there are thousands of numbers in the table. Also, I can write it in c# code or in a stored procedure whichever is more efficient.
Thanks for the help
A solution using JOIN:
select min(r1.NumberTaken) + 1
from MyTable r1
left outer join MyTable r2 on r2.NumberTaken = r1.NumberTaken + 1
where r1.NumberTaken >= 1 --your starting number
and r2.NumberTaken is null
I called my table Blank, and used the following:
declare #StartOffset int = 2
; With Missing as (
select #StartOffset as N where not exists(select * from Blank where ID = #StartOffset)
), Sequence as (
select #StartOffset as N from Blank where ID = #StartOffset
union all
select b.ID from Blank b inner join Sequence s on b.ID = s.N + 1
)
select COALESCE((select N from Missing),(select MAX(N)+1 from Sequence))
You basically have two cases - either your starting value is missing (so the Missing CTE will contain one row), or it's present, so you count forwards using a recursive CTE (Sequence), and take the max from that and add 1
Edit from comment. Yes, create another CTE at the top that has your filter criteria, then use that in the rest of the query:
declare #StartOffset int = 2
; With BlankFilters as (
select ID from Blank where hasEntered <> 1
), Missing as (
select #StartOffset as N where not exists(select * from BlankFilters where ID = #StartOffset)
), Sequence as (
select #StartOffset as N from BlankFilters where ID = #StartOffset
union all
select b.ID from BlankFilters b inner join Sequence s on b.ID = s.N + 1
)
select COALESCE((select N from Missing),(select MAX(N)+1 from Sequence))
this may now return a row that does exist in the table, but hasEntered=1
Tables:
create table Blank (
ID int not null,
Name varchar(20) not null
)
insert into Blank(ID,Name)
select 2 ,'Fred' union all
select 3 ,'Fred' union all
select 4 ,'Fred' union all
select 6 ,'Fred' union all
select 7 ,'Fred' union all
select 8 ,'Fred' union all
select 11 ,'Fred'
go
Try the set based approach - should be faster
select min(t1.NumberTaken)+1 as "min_missing" from t t1
where not exists (select 1 from t t2
where t1.NumberTaken = t2.NumberTaken+1)
and t1.NumberTaken > #StartingNumber
This is Sybase syntax, so massage for SQL server consumption if needed.
Create a temp table with all numbers from StartingValue to EndValue and LEFT OUTER JOIN to your data table.