SQL Server: stored procedure using recursive CTE finding values matching a total - sql

I need to find within a stored procedure which values match a wanted total following valex's solution recursive query in SQL Server
The following works pretty well assuming the CTE anchor recordset is very small
CREATE TABLE #t ([id] INT, [num] FLOAT);
DECLARE #wanted FLOAT = 100000
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
/* when you add more records the query becomes too slow, remove this comment
to test*/
/*,(18,10000),(19,78000),(20,10000),(21,10000),(22,80000),(23,19000),
(24,8000),(25,5000),(26,10000),(27,4000),(28,46000),(29,48000),(30,20000),
(31,10000),(32,25000),(33,10000),(34,13000),(35,16000),(36,10000),
(37,5000), 38,5000),(39,30000),(40,15000),(41,10000)*/
;
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
WITH CTE AS
(
SELECT
id, num AS CSum,
CAST(id AS VARCHAR(MAX)) AS path
FROM
#t
WHERE num <= #wanted
UNION ALL
SELECT
#t.id, #t.num + CTE.CSum AS CSum,
CTE.path + ',' + CAST(#t.id AS VARCHAR(MAX)) AS path
FROM
#T
INNER JOIN
CTE ON #T.num + CTE.CSum <= #wanted AND CTE.id < #T.id
WHERE
#T.num + CTE.CSum <= #wanted
)
SELECT TOP 1 Path
FROM CTE
WHERE CTE.CSum = #wanted
ORDER BY id
DROP TABLE #t
It will return 3,4 which are the first 2 rows whose [num] values gives the #wanted total.
This works reasonably fast when there are just a few records in the temp table #t but when you remove the comment and all remaining records (from id 17 to id 41) the query just takes forever because the CTE grows exponentially.
Is there a way to speed up the code? i just need the first matching total (the list anchor dataset is ordered so a result like 3,4 is better than 8,20,22)

What if you took an iterative approach? This would be pretty simple to give the ability to stop as soon as a solution is found.
This was put together quickly, so you may can optimize further. I tested for your example (ran in less than 1 second) and several other combinations and levels of depth.
Result Depth Total IdList NumList
------ ----------- ----------- ---------- -------------
Found 1 100000 3,4 53000,47000
Full Code:
-- Configuration
DECLARE #wanted FLOAT = 100000
DECLARE #MaxDepth INT = 10 -- Customize how many levels you want to look
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
IF OBJECT_ID('tempdb..#T') IS NULL BEGIN
CREATE TABLE #T (Id INT, Num INT)
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
END
-- Setup processing table
IF OBJECT_ID('tempdb..#U') IS NOT NULL DROP TABLE #U
CREATE TABLE #U (
MaxId INT,
Total INT,
IdList VARCHAR(MAX),
NumList VARCHAR(MAX)
)
-- Initial population from source table
INSERT #U
SELECT Id, Num,
CONVERT(VARCHAR(10), Id),
CONVERT(VARCHAR(10), Num)
FROM #T
-- Iterative approach
DECLARE #Depth INT = 0
WHILE NOT EXISTS (SELECT * FROM #U WHERE Total = #wanted) BEGIN
-- Increment depth
SET #Depth = #Depth + 1
IF #Depth >= #MaxDepth BEGIN
PRINT 'Max depth reached'
RETURN -- Stop processing further
END
-- Calculate sum for this depth
IF OBJECT_ID('tempdb..#V') IS NOT NULL
DROP TABLE #V
SELECT
T.Id AS MaxId,
U.Total + T.Num AS Total,
U.IdList + ',' + CONVERT(VARCHAR(10), T.Id) AS IdList,
U.NumList + ',' + CONVERT(VARCHAR(10), T.Num) AS NumList
INTO #V
FROM #U U
INNER JOIN #T T
ON U.MaxId < T.Id
-- Replace data for next iteration
TRUNCATE TABLE #U
INSERT #U
SELECT * FROM #V
-- Check if no more combinations available
IF ##ROWCOUNT = 0 BEGIN
PRINT 'All combinations tested'
RETURN -- Stop processing further
END
END
-- Return result
SELECT TOP 1 'Found' AS [Result], #Depth AS Depth, Total, IdList, NumList FROM #U WHERE Total = #wanted

Related

Pivot values on column based on grouped columns in SQL

I want to pivot values to columns based on a group. However, I do not know the values beforehand.
A query gives me this result.
Id Code EntityId
----------- ------------ -------------
3 22209776 1
4 143687971 3
4 143687971 4
4 143687971 5
4 143687971 15
5 143658155 7
5 143658155 8
I would like to output this
Id Code EntityId1 EntityId2 EntityId3 EntityId4
----------- ------------ ------------- ------------- ------------- -------------
3 22209776 1 NULL NULL NULL
4 143687971 3 4 5 15
5 143658155 7 8 NULL NULL
If you do now know how many column you are going to have in the result, you need to use dynamic T-SQL statement to build the PIVOT. For example:
IF OBJECT_ID('tempdb..#DataSource') IS NOT NULL
BEGIN;
DROP TABLE #DataSource;
END;
CREATE TABLE #DataSource
(
[id] INT
,[Code] INT
,[EntityId] INT
);
DECLARE #DynamicTSQLStatement NVARCHAR(MAX)
,#Columns NVARCHAR(MAX);
DECLARE #MaxColumns INT;
INSERT INTO #DataSource ([id], [Code], [EntityId])
VALUES (3, 22209776 , 1)
,(4, 143687971, 3)
,(4, 143687971, 4)
,(4, 143687971, 5)
,(4, 143687971, 15)
,(5, 143658155, 7)
,(5, 143658155, 8)
,(4, 143687971, 25)
,(4, 143687971, 26);
-- we need to know how many columns are going to be shown
SELECT TOP 1 #MaxColumns = COUNT(*)
FROM #DataSource
GROUP BY [Code]
ORDER BY COUNT(*) DESC;
-- we are building here the following string '[1],[2],[3],[4],[5],[6]';
-- this will change depending the input data
WITH gen AS
(
SELECT 1 AS num
UNION ALL
SELECT num+1
FROM gen
WHERE num+1<=#MaxColumns
)
SELECT #Columns = STUFF
(
(
SELECT ',[EntityId' + CAST([num] AS VARCHAR(12)) + ']'
FROM gen
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
)
OPTION (maxrecursion 10000);
SET #DynamicTSQLStatement = N'
SELECT *
FROM
(
SELECT [id]
,[Code]
,[EntityId]
,''EntityId'' + CAST(ROW_NUMBER() OVER(PARTITION BY [Code] ORDER BY [EntityId]) AS VARCHAR(12))
FROM #DataSource
) DS ([id], [Code], [EntityId], [RowID])
PIVOT
(
MAX([EntityId]) for [RowID] in (' + #Columns +')
) PVT;';
EXEC sp_executesql #DynamicTSQLStatement;
You could try using the pivot function:
declare #tmp TABLE (id int, Code int, EntityId NVARCHAR(10))
insert into #tmp (id, Code, EntityId)
values (3, 22209776 , 1),
(4, 143687971, 3),
(4, 143687971, 4),
(4, 143687971, 5),
(4, 143687971, 15),
(5, 143658155, 7),
(5, 143658155, 8)
select
pvt.id
,pvt.Code
,[1] as EntityID1
,[2] as EntityID2
,[3] as EntityID3
,[4] as EntityID4
from (
select
id, Code, EntityId
,ROW_NUMBER() over(partition by code order by EntityId) as RowNum
from
#tmp
) a
pivot (MAX(EntityId) for RowNum in ([1], [2], [3], [4])) as pvt

SQL merging tables [duplicate]

This question already has answers here:
Efficiently convert rows to columns in sql server
(5 answers)
Closed 8 years ago.
I am trying to merge a few tables in order to get the output as outlined in the image below.
My issue is that I am not sure what type of joins to use to achieve that
Can someone please help me with the syntax.
You could do something like this, it's a dynamic pivot as you might add/ take away users?
CREATE TABLE #Tests (
Test_ID INT,
TestName VARCHAR(50));
INSERT INTO #Tests VALUES (1, 'SQL Test');
INSERT INTO #Tests VALUES (2, 'C# Test');
INSERT INTO #Tests VALUES (3, 'Java Test');
CREATE TABLE #Users (
[User_ID] INT,
UserName VARCHAR(50));
INSERT INTO #Users VALUES (1, 'Joe');
INSERT INTO #Users VALUES (2, 'Jack');
INSERT INTO #Users VALUES (3, 'Jane');
CREATE TABLE #UserTests (
ID INT,
[User_ID] INT,
Test_ID INT,
Completed INT);
INSERT INTO #UserTests VALUES (1, 1, 1, 0);
INSERT INTO #UserTests VALUES (2, 1, 2, 1);
INSERT INTO #UserTests VALUES (3, 1, 3, 1);
INSERT INTO #UserTests VALUES (4, 2, 1, 0);
INSERT INTO #UserTests VALUES (5, 2, 2, 0);
INSERT INTO #UserTests VALUES (6, 2, 3, 0);
INSERT INTO #UserTests VALUES (7, 3, 1, 1);
INSERT INTO #UserTests VALUES (8, 3, 2, 1);
INSERT INTO #UserTests VALUES (9, 3, 3, 1);
DECLARE #Cols VARCHAR(MAX);
SELECT #Cols = STUFF((SELECT distinct ',' + QUOTENAME(u.UserName)
FROM #Users u
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'');
DECLARE #Query NVARCHAR(MAX);
SELECT #Query = 'SELECT TestName, ' + #Cols + ' FROM
(
SELECT
t.TestName,
u.UserName,
ut.Completed
FROM
#Tests t
INNER JOIN #UserTests ut ON ut.Test_ID = t.Test_ID
INNER JOIN #Users u ON u.[User_ID] = ut.[User_ID]) x
PIVOT (
MAX(Completed)
FOR UserName IN (' + #Cols + ')
) AS pt';
EXEC(#Query);
Results are:
TestName Jack Jane Joe
C# Test 0 1 1
Java Test 0 1 1
SQL Test 0 1 0
(Same results as yours, but in a different sort order.)

T-SQL: Paging WITH TIES

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.
declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.
I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])

SQL Randomise rows based on date and int to only change results order daily

I would like to generate a "random" integer for each row returned from a select statement where the random int only changes once per day (before and after 4am).
Example
declare #Date datetime
set #Date = dateadd(dd, 8, GETDATE())
declare #DateNumber int
set #DateNumber = LEFT(CONVERT(VARCHAR(8), #Date, 112),10)
+ cast(CASE WHEN DATEPART(hh, #Date) > 4 THEN 1 ELSE 0 END as varchar(1))
declare #Customers table (Id int, Customer varchar(150), OrderNo int)
insert into #Customers (Id, Customer) values (1, 'Cust A'), (2, 'Cust B'),
(3, 'Cust C'), (4, 'Cust D'), (5, 'Cust E'), (6, 'Cust F'),
(7, 'Cust G'), (8, 'Cust H'), (9, 'Cust I')
-- my 1st attempt which doesnt work
update #Customers set OrderNo = rand(#DateNumber) / rand(Id) * 100
select * from
#Customers order by OrderNo
The order of the results should remain constant until I change the dd value in the set #Date statement at the top.
Any ideas? Is this possible?
(outside of calculating this daily via a SQL job)
updated solution with HABO's recomendation
declare #Date datetime = dateadd(hh, 36, GETDATE())
declare #DateNumber int = LEFT(CONVERT(VARCHAR(8), #Date, 112),10) +
cast(CASE WHEN DATEPART(hh, #Date) > 4 THEN 1 ELSE 0 END as varchar(1))
declare #Customers table (Id int, Customer varchar(150), OrderNo int)
insert into #Customers (Id, Customer) values (1, 'Cust A'), (2, 'Cust B'),
(3, 'Cust C'), (4, 'Cust D'), (5, 'Cust E'), (6, 'Cust F'),
(7, 'Cust G'), (8, 'Cust H'), (9, 'Cust I')
declare #ThrowAway as Float = Rand(#DateNumber)
declare #ID int
set #ID = (select min(ID) from #Customers)
while #ID is not null
begin
update #Customers set OrderNo = Floor(Rand() * 100) + 1 where ID = #ID
set #ID = (select min(ID) from #Customers where ID > #ID)
end
select * from #Customers order by OrderNo
When you provide a seed to RAND( Seed ) it will return the same result. If you use a seed value prior to your UPDATE query it will initialize the sequence. Thereafter just use RAND() without an argument. Something like:
declare #ThrowAway as Float = Rand( #DateNumber )
update #Customers
set OrderNo = Floor( Rand() * 100 ) + 1
Do keep in mind that random values include duplicates.

Complex SQL query for inventory app

Given the following 2 tables, I need to find the warehouses that have all the parts in the right quantity to build a particular kit, or more appropriately, how many kits each can warehouse can build.
Inventory table: Warehouse, Part, and QuantityOnHand
Kit table: Kit, Part, QuantityForKit
For example: Kit1 requires 1 of Part1, 2 of Part2, and 1 of Part3. Warehouse A has 20 Part1, 5 Part2 and 3 Part3. Warehouse B has 5 Part1, 10 Part2, and no Part3.
Warehouse A can only build 2 of Kit1 because it doesn't have enough Part2 to make more than 2 kits. Warehouse B can't build any Kit1 because it doesn't have all the necessary parts.
I've got the following demo that works, but it seems really cumbersome and uses mostly table/index scans. Our inventory table is large and this just runs too slow. I'm looking for a better way to accomplish the same thing. In the demo there's an unbounded cross join, but in the actual app, it's limited to a single kit.
CREATE TABLE #warehouse
(
Warehouse CHAR(1) NOT NULL PRIMARY KEY
)
INSERT INTO #warehouse VALUES ('A')
INSERT INTO #warehouse VALUES ('B')
INSERT INTO #warehouse VALUES ('C')
INSERT INTO #warehouse VALUES ('D')
CREATE TABLE #inventory
(
Warehouse CHAR(1) NOT NULL ,
Part INT NOT NULL ,
OnHand INT NOT NULL ,
CONSTRAINT pk_inventory PRIMARY KEY CLUSTERED (Part, Warehouse)
)
INSERT INTO #inventory VALUES ('A', 1, 20)
INSERT INTO #inventory VALUES ('A', 2, 5)
INSERT INTO #inventory VALUES ('A', 3, 3)
INSERT INTO #inventory VALUES ('B', 1, 5)
INSERT INTO #inventory VALUES ('B', 2, 10)
INSERT INTO #inventory VALUES ('C', 1, 1)
INSERT INTO #inventory VALUES ('C', 3, 1)
INSERT INTO #inventory VALUES ('D', 1, 1)
INSERT INTO #inventory VALUES ('D', 2, 2)
INSERT INTO #inventory VALUES ('D', 3, 1)
CREATE TABLE #kit
(
Kit INT NOT NULL ,
Part INT NOT NULL ,
Quantity INT NOT NULL ,
CONSTRAINT pk_kit PRIMARY KEY CLUSTERED (Kit, Part)
)
INSERT INTO #kit VALUES (1, 1, 1)
INSERT INTO #kit VALUES (1, 2, 2)
INSERT INTO #kit VALUES (1, 3, 1)
INSERT INTO #kit VALUES (2, 1, 1)
INSERT INTO #kit VALUES (2, 2, 1)
-- Here's the statement I need to optimize
SELECT
Warehouse,
Kit,
MIN(Capacity) AS [Capacity]
FROM
(
SELECT
A.Warehouse,
A.Kit,
A.Part,
ISNULL(B.OnHand, 0) AS [Quantity],
ISNULL(B.OnHand, 0) / A.Quantity AS Capacity
FROM
(
SELECT *
FROM
#warehouse
CROSS JOIN
-- (SELECT * FROM
#kit
-- WHERE #kit.Kit = #Kit) K
) A
LEFT OUTER JOIN
#inventory B
ON A.Warehouse = B.Warehouse
AND A.Part = B.Part
) C
GROUP BY
Warehouse,
Kit
;
Suggestions appreciated.
Try this:
SELECT warehouse, MIN(capacity) FROM (
SELECT i.warehouse, i.onhand / k.quantity as capacity
FROM #kit k
JOIN #inventory i
ON k.part = i.part AND k.quantity <= i.onhand
WHERE k.kit = #kit) c
GROUP BY warehouse
HAVING COUNT(*) = (SELECT COUNT(*) FROM #kit WHERE kit = #kit)
sqlfiddle here