SQL Server - Grouping Combination of possibilities by fixed value - sql

I have to create cheapest basket which inculde fixed items.
For example for a basket which have (5) items
1 and 4 = (1 * 50) + (1 * 100) = 150
2 and 3 = (1 * 60) + (1 * 80) = 140 -- this is my guy
2 and 2 and 1 = (1 * 60) + (1 * 60) + (1 * 50) = 170
3 and 3 = (1 * 80) + (1 * 80) = 160 **** this 6 items but total item can exceed min items. The important thing is total cost...
....
Also this is valid for any number of items a basket may have. Also there are lots of stores and each stores have different package may include several items.
How can handle this issue with SQL?
UPDATE
Here is example data generation code. Recursive CTE solutions are more expensive. I should finish the job under 500-600ms over 600-700 stores each time. this is a package search engine. Manual scenario creation by using ´#temp´ tables or ´UNUION´ is 15-20 times cheaper then Recursive CTE.
Also concatenating Item or PackageId is very expensive. I can found required package id or item after selecting cheapest package with join to source table.
I am expecting a megical solution which can be ultra fast and get the correct option.
Only cheapest basket required for each store. Manual scenario creation is very fast but sometimes fail for correct cheapest basket.
CREATE TABLE #storePackages(
StoreId int not null,
PackageId int not null,
ItemType int not null, -- there are tree item type 0 is normal item, 1 is item has discount 2 is free item
ItemCount int not null,
ItemPrice decimal(18,8) not null,
MaxItemQouta int not null, -- in generaly a package can have between 1 and 6 qouata but in rare can up to 20-25
MaxFullQouta int not null -- sometimes a package can have additional free or discount item qouta. MaxFullQouta will always greater then MaxItemQouta
)
declare #totalStores int
set #totalStores = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 200 AND 400 ORDER BY NEWID())
declare #storeId int;
declare #packageId int;
declare #maxPackageForStore int;
declare #itemMinPrice decimal(18,8);
set #storeId = 1;
set #packageId = 1
while(#storeId <= #totalStores)
BEGIN
set #maxPackageForStore = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 2 AND 6 ORDER BY NEWID())
set #itemMinPrice = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 40 AND 100 ORDER BY NEWID())
BEGIN
INSERT INTO #storePackages
SELECT DISTINCT
StoreId = #storeId
,PackageId = CAST(#packageId + number AS int)
,ItemType = 0
,ItemCount = number
,ItemPrice = #itemMinPrice + (10 * (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN pkgNo.number AND pkgNo.number + 2 ORDER BY NEWID()))
,MaxItemQouta = #maxPackageForStore
,MaxFullQouta = #maxPackageForStore + (CASE WHEN number > 1 AND number < 4 THEN 1 ELSE 0 END)
FROM master..[spt_values] pkgNo
WHERE number BETWEEN 1 AND #maxPackageForStore
UNION ALL
SELECT DISTINCT
StoreId = #storeId
,PackageId = CAST(#packageId + number AS int)
,ItemType = 1
,ItemCount = 1
,ItemPrice = (#itemMinPrice / 2) + (10 * (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN pkgNo.number AND pkgNo.number + 2 ORDER BY NEWID()))
,MaxItemQouta = #maxPackageForStore
,MaxFullQouta = #maxPackageForStore + (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 0 AND 2 ORDER BY NEWID())
FROM master..[spt_values] pkgNo
WHERE number BETWEEN 2 AND (CASE WHEN #maxPackageForStore > 4 THEN 4 ELSE #maxPackageForStore END)
set #packageId = #packageId + #maxPackageForStore;
END
set #storeId =#storeId + 1;
END
SELECT * FROM #storePackages
drop table #storePackages
MY SOLUTION
First of all I am thankful for everyone who try to help me. However all suggested solutions are based on CTE. As I said before recursive CTEs cause performace problems when hunderds of stores are considered. Also multiple packages are requested for one time. This means, I request can include mutiple baskets. One is 5 items other is 3 items and another one is 7 items...
Last Solution
First of all I generates all possible scenarios in a table by item size... By this way, I have option eleminate unwanted scenarios.
CREATE TABLE ItemScenarios(
Item int,
ScenarioId int,
CalculatedItem int --this will be joined with Store Item
)
Then I generated all possible scenario from 2 item to 25 item and insert to the ItemScenarios table. Scenarios can be genereated one time by using WHILE or recursive CTE. The advantage of this way, scenarios generated only for one time.
Resuls are like below.
Item | ScenarioId | CalculatedItem
--------------------------------------------------------
2 1 2
2 2 3
2 3 1
2 3 1
3 4 5
3 5 4
3 6 3
3 7 2
3 7 2
3 8 2
3 8 1
3 9 1
3 9 1
3 9 1
....
.....
......
25 993 10
By this way, I can restrict scenario sizes, Max different store, max different package etc.
Also I can eleminate some scenarios which matematically impossible cheapest then other. For example for 4 items request, some scenario
Scenario 1 : 2+2
Scenario 2: 2+1+1
Scenario 3: 1+1+1+1
Among these scenarios; It is impossible Scenario 2 would be cheapest basket. Because,
If Scenario 2 < Scenario 3 --> Scenario 1 would be lower then Scenario 2. Because the thing decreasing cost is 2 item price and **Scenario 1* have double 2 items
Also If Scenario 2 < Scenario 1 --> Scenario 3 would be lower then Scenario 2
Now, If I delete scenarios like Scenario 2 I would gain some performance advantages.
Now I can chose chepest item prices among stores
DECLARE #requestedItems int;
SET #requestedItems = 5;
CREATE TABLE #JoinedPackageItemWithScenarios(
StoreId int not null,
PackageId int not null,
ItemCount int not null,
ItemPrice decimal(18,8)
ScenarioId int not null,
)
INSERT INTO #JoinedPackageItemWithScenarios
SELECT
SPM.StoreId
,SPM.PackageId
,SPM.ItemCount
,SPM.ItemPrice
,SPM.ScenarioId
FROM (
SELECT
SP.StoreId
,SP.PackageId
,SP.ItemCount
,SP.ItemPrice
,SC.ScenarioId
,RowNumber = ROW_NUMBER() OVER (PARTITION BY SP.StoreId,SC.ScenarioId,SP.ItemCount ORDER BY SP.ItemPrice)
FROM ItemScenarios SC
LEFT JOIN StorePackages AS SP ON SP.ItemCount = SC.CalculatedItem
WHERE SC.Item = #requestedItems
) SPM
WHERE SPM.RowNumber = 1
-- NOW I HAVE CHEAPEST PRICE FOR EACH ITEM, I CAN CREATE BASKET
CREATE TABLE #selectedScenarios(
StoreId int not null,
ScenarioId int not null,
TotalItem int not null,
TotalCost decimal(18,8)
)
INSERT INTO #selectedScenarios
SELECT
StoreId
,ScenarioId
,TotalItem
,TotalCost
FROM (
SELECT
StoreId
,ScenarioId
--,PackageIds = dbo.GROUP_CONCAT(CAST(PackageId AS nvarchar(20))) -- CONCATENING PackageId decreasing performance here. We can joing seleceted scenarios with #JoinedPackageItemWithScenarios after selection complated.
,TotalItem = SUM(ItemCount)
,TotalCost = SUM(ItemPrice)
,RowNumber = ROW_NUMBER() OVER (PARTITION BY StoreId ORDER BY SUM(ItemPrice))
FROM #JoinedPackageItemWithScenarios JPS
GROUP BY StoreId,ScenarioId
HAVING(SUM(ItemCount) >= #requestedItems)
) SLECTED
WHERE RowNumber = 1
-- NOW WE CAN POPULATE PackageIds if needed
SELECT
SS.StoreId
,SS.ScenarioId
,TotalItem = MAX(SS.TotalItem)
,TotalCost = MAX(SS.TotalCost)
,PackageIds = dbo.GROUP_CONCAT(CAST(JPS.PackageId AS nvarchar(20)))
FROM #selectedScenarios SS
JOIN #JoinedPackageItemWithScenarios AS JPS ON JPS.StoreId = SS.StoreId AND JPS.ScenarioId = SS.ScenarioId
GROUP BY SS.StoreId,SS.ScenarioId
SUM
In my test, this way is mimimum 10 times faster then recursive CTE, especially when number of stores and requested items increased. Also It gets 100% correct results. Because recursive CTE tried milions of unrequired JOINs when number of stores and requested items increased.

If you want combinations, you'll need a recursive CTE. Preventing infinite recursion is a challenge. Here is one method:
with cte as (
select cast(packageid as nvarchar(4000)) as packs, item, cost
from t
union all
select concat(cte.packs, ',', t.packageid), cte.item + t.item, cte.cost + t.cost
from cte join
t
on cte.item + t.item < 10 -- some "reasonable" stop condition
)
select top 1 cte.*
from cte
where cte.item >= 5
order by cost desc;
I'm not 100% sure that SQL Server will accept the join condition, but this should work.

Assuming you want to compare all possible permutations of items until the total items in the basket exceeds your total basket number, something like the following would do what you want.
DECLARE #N INT = 1;
DECLARE #myTable TABLE (storeID INT DEFAULT(1), packageID INT IDENTITY(1, 1), item INT, cost INT);
INSERT #myTable (item, cost) VALUES (1, 50), (2, 60), (3, 80), (4, 100), (5, 169), (5, 165), (4, 101), (2, 61);
WITH CTE1 AS (
SELECT item, cost
FROM (
SELECT item, cost, ROW_NUMBER() OVER (PARTITION BY item ORDER BY cost) RN
FROM #myTable) T
WHERE RN = 1)
, CTE2 AS (
SELECT CAST('items'+CAST(C1.item AS VARCHAR(10)) AS VARCHAR(4000)) items, C1.cost totalCost, C1.item totalItems
FROM CTE1 C1
UNION ALL
SELECT CAST(C2.items + ' + items' + CAST(C1.item AS VARCHAR(10)) AS VARCHAR(4000)), C1.cost + C2.totalCost, C1.item + C2.totalItems
FROM CTE2 C2
CROSS JOIN CTE1 C1
WHERE C2.totalItems < #N)
SELECT TOP 1 *
FROM CTE2
WHERE totalItems >= #N
ORDER BY totalCost, totalItems DESC;
Edited to deal with the issue #Matt mentioned.

Firstly we'll should to find all combinations, and next select one with minimal price for seeking value
DECLARE #Table as TABLE (StoreId INT, PackageId INT, Item INT, Cost INT)
INSERT INTO #Table VALUES (1,1,1,50),(1,2,2,60),(1,3,3,80),(1,4,4,100)
DECLARE #MinItemCount INT = 5;
WITH cteCombinationTable AS (
SELECT cast(PackageId as NVARCHAR(4000)) as Package, Item, Cost
FROM #Table
UNION ALL
SELECT CONCAT(o.Package,',',c.PackageId), c.Item + o.Item, c.Cost + o.Cost FROM #Table as c join cteCombinationTable as o on CONCAT(o.Package,',',c.PackageId) <> Package
where o.Item < #MinItemCount
)
select top 1 *
from cteCombinationTable
where item >= #MinItemCount
order by cast(cost as decimal)/#MinItemCount

IF OBJECT_ID('tempdb..#TestResults') IS NOT NULL
BEGIN
DROP TABLE #TestResults
END
DECLARE #MinItemCount INT = 5
;WITH cteMaxCostToConsider AS (
SELECT
StoreId
,CASE
WHEN (SUM(ItemCount) >= #MinItemCount) AND
SUM(ItemPrice) < MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice) THEN SUM(ItemPrice)
ELSE MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice)
END AS MaxCostToConsider
FROM
storePackages
GROUP BY
StoreId
)
, cteRecursive AS (
SELECT
StoreId
,'<PackageId>' + CAST(PackageId AS VARCHAR(MAX)) + '</PackageId>' AS PackageIds
,ItemCount AS CombinedItemCount
,CAST(ItemPrice AS decimal(18,8)) AS CombinedCost
FROM
storePackages
UNION ALL
SELECT
r.StoreId
,r.PackageIds + '<PackageId>' + CAST(t.PackageId AS VARCHAR(MAX)) + '</PackageId>'
,r.CombinedItemCount + t.ItemCount
,CAST(r.CombinedCost + t.ItemPrice AS decimal(18,8))
FROM
cteRecursive r
INNER JOIN storePackages t
ON r.StoreId = t.StoreId
INNER JOIN cteMaxCostToConsider m
ON r.StoreId = m.StoreId
AND r.CombinedCost + t.ItemPrice <= m.MaxCostToConsider
)
, cteCombinedCostRowNum AS (
SELECT
StoreId
,CAST(PackageIds AS XML) AS PackageIds
,CombinedCost
,CombinedItemCount
,DENSE_RANK() OVER (PARTITION BY StoreId ORDER BY CombinedCost) AS CombinedCostRowNum
,ROW_NUMBER() OVER (PARTITION BY StoreId ORDER BY CombinedCost) AS PseudoCartId
FROM
cteRecursive
WHERE
CombinedItemCount >= #MinItemCount
)
SELECT DISTINCT
c.StoreId
,x.PackageIds
,c.CombinedItemCount
,c.CombinedCost
INTO #TestResults
FROM
cteCombinedCostRowNum c
CROSS APPLY (
SELECT( STUFF ( (
SELECT ',' + PackageId
FROM
(SELECT T.N.value('.','VARCHAR(100)') as PackageId FROM c.PackageIds.nodes('PackageId') as T(N)) p
ORDER BY
PackageId
FOR XML PATH(''), TYPE ).value('.','NVARCHAR(MAX)'), 1, 1, '')
) as PackageIds
) x
WHERE
CombinedCostRowNum = 1
SELECT *
FROM
#TestResults
Takes about 1000-2000 MS varies widely depending on combinations that have to be considered within test data (e.g. some times more or less data is generate by your script).
this answer no doubt looks a bit more complicated than Gordon's or ZLKs but it handles Ties, repeated values, 1 package meeting the criteria and a few other things. The main difference however is really in the last query where I take the XML that was build during the recursive query split it and then re-combined in order so that you can use DISTINCT and get a unique pairing e.g. package 2 + package 3 = 140 & package 3 + package 2 = 140 would be the first 2 results in all of the queries so using the XML to split then recombine allows that to be a single row. But lets say you also had another row such as (1,5,2,60) that had 2 items and a cost of 60 this query will return that combination too.
You can cherry pick between the answers and use their method to get to the combinations and my methods to get to the final results etc.... But to explain the process of my query.
cteMaxCostToConsider - this is just a way of getting a cost to contain the recursive query to so that less records have to be considered. what it does is determines the cost of all of the packages together or the cost if you bought all of the same package to satisfy the minimum count.
cteRecursive - this is similar to ZLKs answer and a litte like Gordon's but what it does is goes out and continues to add items & item combinations until it reaches MaxCostToConsider. If I limit to look at item count it could miss a situation where 7 items would be cheaper than 5 so by constraining to the determined Combined Cost it limits the recursion and performs better.
cteCombinedCostRowNum - This simply finds the lowest Combined Cost and at least the minimum item count.
The final query is a bit trickier but the cross apply splits the XML string build in the recursive cte to different rows re-orders those rows and then concatenates them again so that the reverse combination e.g. Package 2 & Package 3 reverse Package 3 & Package 2 becomes the same record and then calls distinct.
This is a bit more flexible than SELECT top N. To see the difference add the following test cases to your test data 1 at a time:
(StoreId, PackageId, Item, Cost)
(1,5,2,60)
(1,6,1,1),(1,7,1,1)
(1,8,50,1)
Edited. The above will give you every combination of a store that will have the lowest combined cost. The bug that you noted was due to cteMaxCostToConsider. I was using SUM(ItemPrice) but sometimes SUM(ItemCount) related to it didn't have enough items in it to allow it to be considered for the MaxCostToConsider. I modified the case statement to correct that issue.
I have also modified to work with your data example your provided. NOTE you should change your PackageId in that to an IDENTITY column though because I was getting the duplicate PackageIds within a store with the method you used.
Here is a modified version of your script to see what I am talking about:
IF OBJECT_ID('storePackages') IS NOT NULL
BEGIN
DROP TABLE storePackages
END
CREATE TABLE storePackages(
StoreId int not null,
PackageId int not null IDENTITY(1,1),
ItemType int not null, -- there are tree item type 0 is normal item, 1 is item has discount 2 is free item
ItemCount int not null,
ItemPrice decimal(18,8) not null,
MaxItemQouta int not null, -- in generaly a package can have between 1 and 6 qouata but in rare can up to 20-25
MaxFullQouta int not null -- sometimes a package can have additional free or discount item qouta. MaxFullQouta will always greater then MaxItemQouta
)
declare #totalStores int
set #totalStores = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 200 AND 400 ORDER BY NEWID())
declare #storeId int;
declare #packageId int;
declare #maxPackageForStore int;
declare #itemMinPrice decimal(18,8);
set #storeId = 1;
set #packageId = 1
while(#storeId <= #totalStores)
BEGIN
set #maxPackageForStore = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 2 AND 6 ORDER BY NEWID())
set #itemMinPrice = (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 40 AND 100 ORDER BY NEWID())
BEGIN
INSERT INTO storePackages (StoreId, ItemType, ItemCount, ItemPrice, MaxFullQouta, MaxItemQouta)
SELECT DISTINCT
StoreId = #storeId
--,PackageId = CAST(#packageId + number AS int)
,ItemType = 0
,ItemCount = number
,ItemPrice = #itemMinPrice + (10 * (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN pkgNo.number AND pkgNo.number + 2 ORDER BY NEWID()))
,MaxItemQouta = #maxPackageForStore
,MaxFullQouta = #maxPackageForStore + (CASE WHEN number > 1 AND number < 4 THEN 1 ELSE 0 END)
FROM master..[spt_values] pkgNo
WHERE number BETWEEN 1 AND #maxPackageForStore
UNION ALL
SELECT DISTINCT
StoreId = #storeId
--,PackageId = CAST(#packageId + number AS int)
,ItemType = 1
,ItemCount = 1
,ItemPrice = (#itemMinPrice / 2) + (10 * (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN pkgNo.number AND pkgNo.number + 2 ORDER BY NEWID()))
,MaxItemQouta = #maxPackageForStore
,MaxFullQouta = #maxPackageForStore + (SELECT TOP 1 n = number FROM master..[spt_values] WHERE number BETWEEN 0 AND 2 ORDER BY NEWID())
FROM master..[spt_values] pkgNo
WHERE number BETWEEN 2 AND (CASE WHEN #maxPackageForStore > 4 THEN 4 ELSE #maxPackageForStore END)
--set #packageId = #packageId + #maxPackageForStore;
END
set #storeId =#storeId + 1;
END
SELECT * FROM storePackages
--drop table #storePackages
No PackageIds Simply StoreId and Lowest CombinedCost - ~200-300MS depending on data
Next if you don't care what Packages are in there and you only want 1 row per store you can do the following:
IF OBJECT_ID('tempdb..#TestResults') IS NOT NULL
BEGIN
DROP TABLE #TestResults
END
DECLARE #MinItemCount INT = 5
;WITH cteMaxCostToConsider AS (
SELECT
StoreId
,CASE
WHEN (SUM(ItemCount) >= #MinItemCount) AND
SUM(ItemPrice) < MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice) THEN SUM(ItemPrice)
ELSE MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice)
END AS MaxCostToConsider
FROM
storePackages
GROUP BY
StoreId
)
, cteRecursive AS (
SELECT
StoreId
,ItemCount AS CombinedItemCount
,CAST(ItemPrice AS decimal(18,8)) AS CombinedCost
FROM
storePackages
UNION ALL
SELECT
r.StoreId
,r.CombinedItemCount + t.ItemCount
,CAST(r.CombinedCost + t.ItemPrice AS decimal(18,8))
FROM
cteRecursive r
INNER JOIN storePackages t
ON r.StoreId = t.StoreId
INNER JOIN cteMaxCostToConsider m
ON r.StoreId = m.StoreId
AND r.CombinedCost + t.ItemPrice <= m.MaxCostToConsider
)
SELECT
StoreId
,MIN(CombinedCost) as CombinedCost
INTO #TestResults
FROM
cteRecursive
WHERE
CombinedItemCount >= #MinItemCount
GROUP BY
StoreId
SELECT *
FROM
#TestResults
WITH PackageIds Only 1 Record Per StoreId - Varries widely depending on test data/combinations to consider ~600-1300MS
Or if you still want package ids but you don't care which combination you choose and you only want 1 record then you can do:
IF OBJECT_ID('tempdb..#TestResults') IS NOT NULL
BEGIN
DROP TABLE #TestResults
END
DECLARE #MinItemCount INT = 5
;WITH cteMaxCostToConsider AS (
SELECT
StoreId
,CASE
WHEN (SUM(ItemCount) >= #MinItemCount) AND
SUM(ItemPrice) < MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice) THEN SUM(ItemPrice)
ELSE MIN(((#MinItemCount / ItemCount) + IIF((#MinItemCount % ItemCount) > 0, 1,0)) * ItemPrice)
END AS MaxCostToConsider
FROM
storePackages
GROUP BY
StoreId
)
, cteRecursive AS (
SELECT
StoreId
,CAST(PackageId AS VARCHAR(MAX)) AS PackageIds
,ItemCount AS CombinedItemCount
,CAST(ItemPrice AS decimal(18,8)) AS CombinedCost
FROM
storePackages
UNION ALL
SELECT
r.StoreId
,r.PackageIds + ',' + CAST(t.PackageId AS VARCHAR(MAX))
,r.CombinedItemCount + t.ItemCount
,CAST(r.CombinedCost + t.ItemPrice AS decimal(18,8))
FROM
cteRecursive r
INNER JOIN storePackages t
ON r.StoreId = t.StoreId
INNER JOIN cteMaxCostToConsider m
ON r.StoreId = m.StoreId
AND r.CombinedCost + t.ItemPrice <= m.MaxCostToConsider
)
, cteCombinedCostRowNum AS (
SELECT
StoreId
,PackageIds
,CombinedCost
,CombinedItemCount
,ROW_NUMBER() OVER (PARTITION BY StoreId ORDER BY CombinedCost) AS RowNumber
FROM
cteRecursive
WHERE
CombinedItemCount >= #MinItemCount
)
SELECT DISTINCT
c.StoreId
,c.PackageIds
,c.CombinedItemCount
,c.CombinedCost
INTO #TestResults
FROM
cteCombinedCostRowNum c
WHERE
RowNumber = 1
SELECT *
FROM
#TestResults
Note all bench marking is done on a 4 year old laptop Intel i7-3520M CPU 2.9 GHz with 8 GB of RAM and SAMSUNG 500 GB EVO SSD. So if you run this on an appropriately resourced server I would expect exponentially faster. There is also no doubt that adding indexes on storePackages would expedite the answer as well.

MY SOLUTION
First of all I am thankful for everyone who try to help me. However all suggested solutions are based on CTE. As I said before recursive CTEs cause performace problems when hunderds of stores are considered. Also multiple packages are requested for one time. This means, A request can include mutiple baskets. One is 5 items other is 3 items and another one is 7 items...
Last Solution
First of all I generates all possible scenarios in a table by item size... By this way, I have option eleminate unwanted scenarios.
CREATE TABLE ItemScenarios(
Item int,
ScenarioId int,
CalculatedItem int --this will be joined with Store Item
)
Then I generated all possible scenario from 2 item to 25 item and insert to the ItemScenarios table. Scenarios can be genereated one time by using WHILE or recursive CTE. The advantage of this way, scenarios generated only for one time.
Resuls are like below.
Item | ScenarioId | CalculatedItem
--------------------------------------------------------
2 1 2
2 2 3
2 3 1
2 3 1
3 4 5
3 5 4
3 6 3
3 7 2
3 7 2
3 8 2
3 8 1
3 9 1
3 9 1
3 9 1
....
.....
......
25 993 10
By this way, I can restrict scenario sizes, Max different store, max different package etc.
Also I can eleminate some scenarios which matematically impossible cheapest then other. For example for 4 items request, some scenario
Scenario 1 : 2+2
Scenario 2: 2+1+1
Scenario 3: 1+1+1+1
Among these scenarios; It is impossible Scenario 2 would be cheapest basket. Because,
If Scenario 2 < Scenario 3 --> Scenario 1 would be lower then Scenario 2. Because the thing decreasing cost is 2 item price and **Scenario 1* have double 2 items
Also If Scenario 2 < Scenario 1 --> Scenario 3 would be lower then Scenario 2
Now, If I delete scenarios like Scenario 2 I would gain some performance advantages.
Now I can chose chepest item prices among stores
DECLARE #requestedItems int;
SET #requestedItems = 5;
CREATE TABLE #JoinedPackageItemWithScenarios(
StoreId int not null,
PackageId int not null,
ItemCount int not null,
ItemPrice decimal(18,8)
ScenarioId int not null,
)
INSERT INTO #JoinedPackageItemWithScenarios
SELECT
SPM.StoreId
,SPM.PackageId
,SPM.ItemCount
,SPM.ItemPrice
,SPM.ScenarioId
FROM (
SELECT
SP.StoreId
,SP.PackageId
,SP.ItemCount
,SP.ItemPrice
,SC.ScenarioId
,RowNumber = ROW_NUMBER() OVER (PARTITION BY SP.StoreId,SC.ScenarioId,SP.ItemCount ORDER BY SP.ItemPrice)
FROM ItemScenarios SC
LEFT JOIN StorePackages AS SP ON SP.ItemCount = SC.CalculatedItem
WHERE SC.Item = #requestedItems
) SPM
WHERE SPM.RowNumber = 1
-- NOW I HAVE CHEAPEST PRICE FOR EACH ITEM, I CAN CREATE BASKET
CREATE TABLE #selectedScenarios(
StoreId int not null,
ScenarioId int not null,
TotalItem int not null,
TotalCost decimal(18,8)
)
INSERT INTO #selectedScenarios
SELECT
StoreId
,ScenarioId
,TotalItem
,TotalCost
FROM (
SELECT
StoreId
,ScenarioId
--,PackageIds = dbo.GROUP_CONCAT(CAST(PackageId AS nvarchar(20))) -- CONCATENING PackageId decreasing performance here. We can joing seleceted scenarios with #JoinedPackageItemWithScenarios after selection complated.
,TotalItem = SUM(ItemCount)
,TotalCost = SUM(ItemPrice)
,RowNumber = ROW_NUMBER() OVER (PARTITION BY StoreId ORDER BY SUM(ItemPrice))
FROM #JoinedPackageItemWithScenarios JPS
GROUP BY StoreId,ScenarioId
HAVING(SUM(ItemCount) >= #requestedItems)
) SLECTED
WHERE RowNumber = 1
-- NOW WE CAN POPULATE PackageIds if needed
SELECT
SS.StoreId
,SS.ScenarioId
,TotalItem = MAX(SS.TotalItem)
,TotalCost = MAX(SS.TotalCost)
,PackageIds = dbo.GROUP_CONCAT(CAST(JPS.PackageId AS nvarchar(20)))
FROM #selectedScenarios SS
JOIN #JoinedPackageItemWithScenarios AS JPS ON JPS.StoreId = SS.StoreId AND JPS.ScenarioId = SS.ScenarioId
GROUP BY SS.StoreId,SS.ScenarioId
SUM
In my test, this way is mimimum 10 times faster then recursive CTE, especially when number of stores and requested items increased. Also It gets 100% correct results. Because recursive CTE tried milions of unrequired JOINs when number of stores and requested items increased.

Related

Get rows in SQL by summing up a until certain value is exceeded and stop retrieving

I have to return rows from the database when the value exceeds a certain point.
I should get enough rows to sum up to a value that is greater than my quantity and stop retrieving rows.
Is this possible and does it makes sense?
Can this be transferred into LINQ for EF core?
I am currently stuck with query that will return all the rows...
SELECT [i].[InventoryArticleId], [i].[ArticleId], [i].[ArticleQuantity], [i].[InventoryId]
FROM [InventoryArticle] AS [i]
INNER JOIN [Article] AS [a] ON [i].[ArticleId] = [a].[ArticleId]
WHERE (([i].[ArticleId] = 1) AND ([a].[ArticlePrice] <= 1500))
AND ((
SELECT COALESCE(SUM([i0].[ArticleQuantity]), 0)
FROM [InventoryArticle] AS [i0]
INNER JOIN [Article] AS [a0] ON [i0].[ArticleId] = [a0].[ArticleId]
WHERE ([i0].[ArticleId] = 1) AND ([a0].[ArticlePrice] < 1500)) > 10)
Expected result is one row. If number would be greater than 34, more rows should be added.
You can use a windowed SUM to calculate a running sum ArticleQuantity. It is likely to be far more efficient than self-joining.
The trick is that you need all rows where the running sum up to the previous row is less than the requirement.
You could utilize a ROWS clause of ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING. But then you need to deal with possible NULLs on the first row.
In any event, even a regular running sum should always use ROWS UNBOUNDED PRECEDING, because the default is RANGE UNBOUNDED PRECEDING, which is subtly different and can cause incorrect results, as well as being slower.
DECLARE #requirement int = 10;
SELECT
i.InventoryArticleId,
i.ArticleId,
i.ArticleQuantity,
i.InventoryId
FROM (
SELECT
i.*,
RunningSum = SUM(i.ArticleQuantity) OVER (PARTITION BY i.ArticleId ORDER BY i.InventoryArticleId ROWS UNBOUNDED PRECEDING)
FROM InventoryArticle i
INNER JOIN Article a ON i.ArticleId = a.ArticleId
WHERE i.ArticleId = 1
AND a.ArticlePrice <= 1500
) i
WHERE i.RunningSum - i.ArticleQuantity < #requirement;
You may want to choose a better ordering clause.
EF Core cannot use window functions, unless you specifically define a SqlExpression for it.
My approach would be to:
Filter for the eligible records.
Calculate the running total.
Identify the first record where the running total satisfies your criteria.
Perform a final select of all eligible records up to that point.
Something like the following somewhat stripped down example:
-- Some useful generated data
DECLARE #Inventory TABLE (InventoryArticleId INT, ArticleId INT, ArticleQuantity INT)
INSERT #Inventory(InventoryArticleId, ArticleId, ArticleQuantity)
SELECT TOP 1000
InventoryArticleId = N.n,
ArticleId = N.n % 5,
ArticleQuantity = 5 * N.n
FROM (
-- Generate a range of integers
SELECT n = ones.n + 10*tens.n + 100*hundreds.n + 1000*thousands.n
FROM (VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) ones(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) tens(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) hundreds(n),
(VALUES(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) thousands(n)
ORDER BY 1
) N
ORDER BY N.n
SELECT * FROM #Inventory
DECLARE #ArticleId INT = 2
DECLARE #QuantityNeeded INT = 500
;
WITH isum as (
SELECT i.*, runningTotalQuantity = SUM(i.ArticleQuantity) OVER(ORDER BY i.InventoryArticleId)
FROM #Inventory i
WHERE i.ArticleId = #ArticleId
)
SELECT isum.*
FROM (
SELECT TOP 1 InventoryArticleId
FROM isum
WHERE runningTotalQuantity >= #QuantityNeeded
ORDER BY InventoryArticleId
) selector
JOIN isum ON isum.InventoryArticleId <= selector.InventoryArticleId
ORDER BY isum.InventoryArticleId
Results:
InventoryArticleId
ArticleId
ArticleQuantity
runningTotalQuantity
2
2
10
10
7
2
35
45
12
2
60
105
17
2
85
190
22
2
110
300
27
2
135
435
32
2
160
595
All of the ORDER BY clauses in the running total calculation, selector, and final select must be consistent and unambiguous (no dups). If a more complex order or preference is needed, it may be necessary to assign a rank value the eligible records before calculating the running total.

CTE multiplication resulting in multiple result rows

EDIT - The problem was that the same item showed up multiple times in different spots in the BOM which then produced a result row for every instance instead of just one. This has been resolved. Thanks
I have the following code
WITH tBomCTE (ParentItem, ChildItem, WorkCentre, Operation, Quantity, ActualQuantity, ParentUnitWeight, ParentWeightUnitOfMeasure, ChildUnitWeight, ChildWeightUnitOfMeasure, BomLevel, MaterialClass, ParentItemSource) AS
(
SELECT
id.parentitem, id.ChildItem, id.WorkCentre, id.Operation,
id.Quantity, id.Quantity, id.ParentUnitWeight,
id.ParentWeightUnitOfMeasure, id.ChildUnitWeight,
id.ChildWeightUnitOfMeasure,
0 as BomLevel, id.MaterialClassCode, ParentItemSource
FROM
#tItemDenomalized id
WHERE
id.parentitem = '10054471'
UNION ALL
SELECT
id.parentitem, id.ChildItem, id.WorkCentre, id.Operation,
id.Quantity, CAST((id.Quantity * b.ActualQuantity) AS DECIMAL(19,8)), id.ParentUnitWeight,
id.ParentWeightUnitOfMeasure, id.ChildUnitWeight,
id.ChildWeightUnitOfMeasure,
BomLevel + 1,
id.MaterialClassCode, id.ParentItemSource
FROM
tBomCTE b
JOIN
#tItemDenomalized id ON b.ChildItem = id.parentitem
)
SELECT DISTINCT
'T1', ParentItem, ChildItem, WorkCentre, Operation, Quantity,
ActualQuantity,
COALESCE(ParentUnitWeight, 0), ParentWeightUnitOfMeasure,
COALESCE(ChildUnitWeight, 0), ChildWeightUnitOfMeasure, BomLevel,
MaterialClass, ParentItemSource
FROM
tBomCTE
The problem is that this code is producing multiple result rows. I have isolated it down to the cast((id.Quantity * b.ActualQuantity) as decimal(19,8))line.
Basically i am trying to build a Bill Of Materials (BOM) and we had a problem with the quantity not being added up appropriately. For example if we needed 2 of the parent item, the child item quantity only reflected what we needed for 1. Which was messing up costs.
So that line was added. It has never caused a problem but we just ran a test and now it is causing issues.
Specifically. I have a parent item, then child 1, then child 2. When I run this code i get 3 results for child 2 that all have the same path from the parent. So that doesn't make sense. And then the quantities are 22, 44, 66 for the 3 child 2 items respectively.
If i had to guess it looks like whats happening is that the quantity does get multiplied to the parent. Which then turns into 22. Then i gets multiplied by the next parent and instead of multiplying, it is creating a new row entirely.
Right now my solution is to update the quantities with code and then to delete all duplicate rows to get rid of the extra rows. But this is bad practice.
Why is it producing multiple rows instead of multiplying the parent to the current item?
Edit.
Here is my entire stored procedure that is causing the problem:
IF EXISTS (SELECT *
FROM sysobjects
WHERE id = object_id(N'[dbo].[spSAL_BomRecursive]')
AND OBJECTPROPERTY(id, N'IsProcedure') = 1 )
BEGIN
DROP PROCEDURE [dbo].[spSAL_BomRecursive];
END
GO
CREATE PROCEDURE [dbo].[spSAL_BomRecursive]
(
#SessionId varchar(50)
,#Item [dbo].[ItemType] = NULL
,#DebugLevel BIT = 0
,#CurrentOrStandardBOM nvarchar(1) = 'C'
)
AS
BEGIN
SET NOCOUNT ON;
--declare #Item varchar(30);
--set #item = '10029554';
Declare #tItemDenomalized TABLE (ParentItem nvarchar(30),
ChildItem nvarchar(30),
WorkCentre nvarchar(30) ,
Operation nvarchar(30),
Quantity decimal(19,8),
ParentUnitWeight decimal(18,9),
ParentWeightUnitOfMeasure nvarchar(3),
ChildUnitWeight decimal(18,9),
ChildWeightUnitOfMeasure nvarchar(3),
MaterialClassCode nvarchar(30),
ParentItemSource nvarchar(30));
Declare #CurrentOrStandardSuffix int
Set #CurrentOrStandardSuffix = Case when #CurrentOrStandardBOM = 'C' then 0 else 1 end
-- populate a table with all of the items, merging data to make the recursive SQL easier
Insert into #tItemDenomalized
select distinct i.item, coalesce(jm.item, ''), jr.wc, jr.oper_num, coalesce(jm.matl_qty, 0) as qty ,i.unit_weight as ParentUnitWeight, i.weight_units as ParentWeightUnitOfMeasure, i2.unit_weight as ChildUnitWeight, i2.weight_units as ChildWeightUnitOfMeasure, i.Uf_SalMaterialClassCode as MaterialClass, i.p_m_t_code
from item_mst i
left join jobroute_mst jr on i.job = jr.job and jr.suffix = #CurrentOrStandardSuffix
left join jobmatl_mst jm on jr.job = jm.job and jr.oper_num = jm.oper_num and jr.suffix = jm.suffix
left join item_mst i2 on coalesce(jm.item, '') = i2.item;
WITH tBomCTE ( ParentItem, ChildItem, WorkCentre, Operation, Quantity, ActualQuantity, ParentUnitWeight, ParentWeightUnitOfMeasure, ChildUnitWeight, ChildWeightUnitOfMeasure, BomLevel, MaterialClass, ParentItemSource )
AS
(
select id.parentitem, id.ChildItem, id.WorkCentre, id.Operation, id.Quantity, id.Quantity, id.ParentUnitWeight, id.ParentWeightUnitOfMeasure, id.ChildUnitWeight, id.ChildWeightUnitOfMeasure, 0 as BomLevel, id.MaterialClassCode, ParentItemSource
from #tItemDenomalized id
where id.parentitem = #item
UNION ALL
select id.parentitem, id.ChildItem, id.WorkCentre, id.Operation, id.Quantity, cast((id.Quantity * b.ActualQuantity) as decimal(19,8)) , id.ParentUnitWeight, id.ParentWeightUnitOfMeasure, id.ChildUnitWeight, id.ChildWeightUnitOfMeasure, BomLevel+1, id.MaterialClassCode, id.ParentItemSource
from tBomCTE b
join #tItemDenomalized id on b.ChildItem = id.parentitem
)
insert into tSAL_Bom
([SessionId],[ParentItem],[ChildItem],[WorkCentre],[Operation],[Quantity],[ActualQuantity],[ParentUnitWeight],[ParentWeightUnitOfMeasure],[ChildUnitWeight],[ChildWeightUnitOfMeasure],[BomLevel],[MaterialClassCode],[ParentItemSource])
SELECT distinct #SessionId, ParentItem, ChildItem, WorkCentre, Operation, Quantity, ActualQuantity, coalesce(ParentUnitWeight, 0), ParentWeightUnitOfMeasure, coalesce(ChildUnitWeight, 0), ChildWeightUnitOfMeasure, BomLevel, MaterialClass, ParentItemSource
FROM tBomCTE
-- cleanup the table from yesterday
delete from tSAL_Bom
where CreatedOn < Getdate()-1
RETURN 0;
END
GO
The problematic data rows are the following
SortingOrder DepthLevel ItemOrWorkCenterNumber BaseQuantity Quantity
[10054471] 0 10054471 1 1
[10054471][1605][10008773] 1 10008773 1 2
[10054471][1605][10008773][1100][10024306] 2 10024306 2 4
[10054471][1605][10008773][1100][10024306][1005][10030273] 3 10030273 11 22
[10054471][1605][10008773][1100][10024306][1005][10030273] 3 10030273 11 44
[10054471][1605][10008773][1100][10024306][1005][10030273] 3 10030273 11 66
So as you can hopefully see. There should be 1 row that looks like this
[10054471][1605][10008773][1100][10024306][1005][10030273] 3 10030273 11 88
Because the main parent requires 1. The first child requires 2. So our multiplier is 2. Our next parent requires 2 normally but with the multiplier requires 4 which makes our multiplier now 8. This line is correct. Then the next line should be the base quantity of 11 times the multiplier of 8. So 88. But instead i am getting a row that is multiplied by 2, and 4 and 6.
This isn't really an answer to your question, but an attempt to break down the massive amount of logic into something reproducible.
I wrote a very quick, self-encapsulated query that "sort of" does what I think you are trying to do. Maybe you could do something similar to explain what the differences between my logic and your logic are?
WITH Base AS (
SELECT 1 AS id, NULL AS parent, 1 AS multiplier
UNION ALL
SELECT 2 AS id, 1 AS parent, 2 AS multiplier
UNION ALL
SELECT 3 AS id, 2 AS parent, 4 AS multiplier),
Recurs AS (
SELECT
id,
1 AS depth,
multiplier
FROM
Base
WHERE
id = 1
UNION ALL
SELECT
b.id,
depth + 1 AS depth,
b.multiplier * r.multiplier AS multiplier
FROM
Base b
INNER JOIN Recurs r ON r.id = b.parent),
SecondRecurs AS (
SELECT
id,
depth,
multiplier
FROM
Recurs
UNION ALL
SELECT
p.parent,
s.depth,
s.multiplier
FROM
SecondRecurs s
INNER JOIN Base b ON b.id = s.id
INNER JOIN Base p ON p.id = b.parent),
Ordered AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY depth DESC, id) AS order_id
FROM
SecondRecurs)
SELECT
id,
depth,
multiplier
FROM
Ordered
WHERE
order_id = 1;
So how does this work?
First I make some test data:
id parent multiplier
1 NULL 1
2 1 2
3 2 4
Then I use a recursive CTE to get the depth/ multiplier, using similar logic to your example:
id depth multiplier
1 1 1
2 2 2
3 3 8
But I deliberately let this run without worrying about rolling up children into parents, so now I have a second stage to get this into some sort of order:
id depth multiplier order_id
1 3 8 1
3 3 8 2
NULL 2 2 3
2 2 2 4
1 1 1 5
Finally I can pick out the row I want, ignoring the "partial" results that you seem to be getting in your query?
id depth multiplier
1 3 8
Does this help at all?

Alternative: Sql - SELECT rows until the sum of a row is a certain value

My question is very similar to my previous one posted here:
Sql - SELECT rows until the sum of a row is a certain value
To sum it up, I need to return the rows, until a certain sum is reached, but the difference this time, is that, I need to find the best fit for this sum, I mean, It doesn't have to be sequential. For example:
Let's say I have 5 unpaid receipts from customer 1:
Receipt_id: 1 | Amount: 110€
Receipt_id: 2 | Amount: 110€
Receipt_id: 3 | Amount: 130€
Receipt_id: 4 | Amount: 110€
Receipt_id: 5 | Amount: 190€
So, customer 1 ought to pay me 220€.
Now I need to select the receipts, until this 220€ sum is met and it might be in a straight order, like (receipt 1 + receipt 2) or not in a specific order, like (receipt 1 + receipt 4), any of these situations would be suitable.
I am using SQL Server 2016.
Any additional questions, feel free to ask.
Thanks in advance for all your help.
This query should solve it.
It is a quite dangerous query (containing a recursive CTE), so please be careful!
You can find some documentation here: https://www.essentialsql.com/recursive-ctes-explained/
WITH the_data as (
SELECT *
FROM (
VALUES (1, 1, 110),(1, 2,110),(1, 3,130),(1, 4,110),(1, 5,190),
(2, 1, 10),(2, 2,20),(2, 3,200),(2, 4,190)
) t (user_id, receipt_id, amount)
), permutation /* recursive used here */ as (
SELECT
user_id,
amount as sum_amount,
CAST(receipt_id as varchar(max)) as visited_receipt_id,
receipt_id as max_receipt_id,
1 as i
FROM the_data
WHERE amount > 0 -- remove empty amount
UNION ALL
SELECT
the_data.user_id,
sum_amount + amount as sum_amount,
CAST(concat(visited_receipt_id, ',', CAST(receipt_id as varchar))as varchar(max)) as visited_receipt_id,
receipt_id as max_receipt_id ,
i + 1
FROM the_data
JOIN permutation
ON the_data.user_id = permutation.user_id
WHERE i < 1000 -- max 1000 loops, means any permutation with less than 1000 different receipts
and receipt_id > max_receipt_id -- in order that sum in komutatif , we can check the sum in any unique order ( here we take the order of the reciept_id in fact we do not produce any duplicates )
-- AND sum_amount + amount <= 220 -- ignore everything that is bigger than the expected value (optional)
)
SELECT *
FROM permutation
WHERE sum_amount = 220
in order to select only one combination per user_id, replace the last three lines of the previous query by
SELECT *
FROM (
SELECT *, row_number() OVER (partition by user_id order by random() ) as r
FROM permutation
WHERE sum_amount = 220
) as t
WHERE r = 1
IF your target is to sum only 2 receipts in order to reach your value, this could be a solution:
DECLARE #TARGET INT = 220 --SET YOUR TARGET
, #DIFF INT
, #FIRSTVAL INT
SET #FIRSTVAL = (
SELECT TOP 1 AMOUNT
FROM myRECEIPTS
ORDER BY RECEIPT_ID ASC
)
SELECT TOP 1 *
FROM myRECEIPTS
WHERE AMOUNT = #TARGET - #FIRSTVAL
ORDER BY RECEIPT_ID ASC
this code will do it:
declare #sum1 int
declare #numrows int
set #numrows= 1
set #sum1 =0
while (#sum1 < 10)
begin
select top (#numrows) #sum1=sum(sum1) from receipts
set #numrows +=1
end
select top(#numrows) * from receipts

SQL query to select percentage of total

I have a MSSQL table stores that has the following columns in a table:
Storeid, NumEmployees
1 125
2 154
3 10
4 698
5 54
6 98
7 87
8 100
9 58
10 897
Can someone help me with the SQL query to produce the top stores(storeID) that has 30% of the total emplyees(NumEmployees)?
WITH cte
AS (SELECT storeid,
numemployees,
( numemployees * 100 ) / SUM(numemployees) OVER (PARTITION BY 1)
AS
percentofstores
FROM stores)
SELECT *
FROM cte
WHERE percentofstores >= 30
ORDER BY numemployees desc
Working Demo
Alternative that doesn't use SUM/OVER
SELECT s.storeid, s.numemployees
FROM (SELECT SUM(numemployees) AS [tots]
FROM stores) AS t,
stores s
WHERE CAST(numemployees AS DECIMAL(15, 5)) / tots >= .3
ORDER BY s.numemployees desc
Working Demo
Note that in the second version I decided not to multiply by 100 before dividing. This requires a cast to decimal otherwise it would be implicitly converted to a int resulting in no records returned
Also I'm not completely clear that you want this, but you can add TOP 1 to both queries and it will limit the results to just the one with the greatest # of stores with more than 30%
UPDATE
Based on your comments it sounds to paraphrase Kevin
You want the rows, starting at the store with the most employees and working down until you have at least 30 %
This is difficult because it requires a running percentage and its a bin packing problem however this does work. Note I've included two other test cases (where the percent exactly equals and its just over the top two combined)
Working Demo
DECLARE #percent DECIMAL (20, 16)
SET #percent = 0.3
--Other test values
--SET #percent = 0.6992547128452433
--SET #percent = 0.6992547128452434
;WITH sums
AS (SELECT DISTINCT s.storeid,
s.numemployees,
s.numemployees + Coalesce(SUM(s2.numemployees) OVER (
PARTITION
BY
s.numemployees), 0)
runningsum
FROM stores s
LEFT JOIN stores s2
ON s.numemployees < s2.numemployees),
percents
AS (SELECT storeid,
numemployees,
runningsum,
CAST(runningsum AS DECIMAL(15, 5)) / tots.total
running_percent,
Row_number() OVER (ORDER BY runningsum, storeid ) rn
FROM sums,
(SELECT SUM(numemployees) total
FROM stores) AS tots)
SELECT p.storeID,
p.numemployees,
p.running_percent,
p.running_percent,
p.rn
FROM percents p
CROSS JOIN (SELECT MAX(rn) rn
FROM percents
WHERE running_percent = #percent) exactpercent
LEFT JOIN (SELECT MAX(rn) rn
FROM percents
WHERE running_percent <= #percent) underpercent
ON p.rn <= underpercent.rn
OR ( exactpercent.rn IS NULL
AND p.rn <= underpercent.rn + 1 )
WHERE
underpercent.rn is not null or p.rn = 1

SQL - Subtracting a depleting value from rows

I have a situation where I need to take a "quantity consumed" from one table, and apply it against a second table that has 1 or more rows that are "pooled lots" of quantities. I'm not sure how to describe it better, here's what I mean from a table perspective:
Table Pooled_Lots
----------------------------
Id Pool Lot Quantity
1 1 1 5
2 1 2 10
3 1 3 4
4 2 1 7
5 3 1 1
6 3 2 5
Table Pool_Consumption
----------------------------
Id PoolId QuantityConsumed
1 1 17
2 2 8
3 3 10
I need a resulting rowset from a SQL query that would look like:
Pool Lot Quantity QuantityConsumed RunningQuantity RemainingDemand SurplusOrDeficit
1 1 5 17 0 12 NULL
1 2 10 17 0 2 NULL
1 3 4 17 2 0 2
2 1 7 8 0 1 -1
3 1 1 10 0 9 NULL
3 2 5 10 0 4 -4
So, Pool_Consumption.QuantityConsumed needs to be a "depleting value" subtracted over the rows from Pooled_Lots where Pool_Consumption.PoolId = Pooled_Lots.Pool. I can't figure out how you would state a query that says:
If not on the last row, AmtConsumedFromLot = Quantity - QuantityConsumed if QuantityConsumed < Quantity, else Quantity
If more rows, QuantityConsumed = QuantityConsumed - Quantity
Loop until last row
If last row, AmtConsumedFromLot = QuantityConsumed
Assume Id is a primary key, and the target DB is SQL 2005.
Edit: Since people are proclaiming I am "not giving enough information, please close this" Here is more: There is NO set lot that the Pool_Consumption draws from, it needs to draw from all lots where Pool_Consumption.PoolId = Pooled_Lots.Pool, until QuantityConsumed is either completely depleted or I am subtracting against the last subset of Pooled_Lots rows where Pool_Consumption.PoolId = Pooled_Lots.Pool
I don't know how more to explain this. This is not a homework question, this is not a made-up "thought exercise". I need help trying to figure out how to properly subtract QuantityConsumed against multiple rows!
Left as an exercise to the OP: Figuring out the correct results given the sample data and summarizing the results of the following query:
-- Create some test data.
declare #Pooled_Lots as table ( Id int, Pool int, Lot int, Quantity int );
insert into #Pooled_Lots ( Id, Pool, Lot, Quantity ) values
( 1, 1, 1, 5 ), ( 2, 1, 2, 10 ), ( 3, 1, 3, 4 ),
( 4, 2, 1, 7 ),
( 5, 3, 1, 1 ), ( 6, 3, 2, 5 );
declare #Pool_Consumption as table ( Id int, Pool int, QuantityConsumed int );
insert into #Pool_Consumption ( Id, Pool, QuantityConsumed ) values
( 1, 1, 17 ), ( 2, 2, 8 ), ( 3, 3, 10 );
select * from #Pooled_Lots order by Pool, Lot;
select * from #Pool_Consumption order by Pool;
with Amos as (
-- Start with Lot 1 for each Pool.
select PL.Pool, PL.Lot, PL.Quantity, PC.QuantityConsumed,
case
when PC.QuantityConsumed is NULL then PL.Quantity
when PL.Quantity >= PC.QuantityConsumed then PL.Quantity - PC.QuantityConsumed
when PL.Quantity < PC.QuantityConsumed then 0
end as RunningQuantity,
case
when PC.QuantityConsumed is NULL then 0
when PL.Quantity >= PC.QuantityConsumed then 0
when PL.Quantity < PC.QuantityConsumed then PC.QuantityConsumed - PL.Quantity
end as RemainingDemand
from #Pooled_Lots as PL left outer join
#Pool_Consumption as PC on PC.Pool = PL.Pool
where Lot = 1
union all
-- Add the next Lot for each Pool.
select PL.Pool, PL.Lot, PL.Quantity, CTE.QuantityConsumed,
case
when CTE.RunningQuantity + PL.Quantity >= CTE.RemainingDemand then CTE.RunningQuantity + PL.Quantity - CTE.RemainingDemand
when CTE.RunningQuantity + PL.Quantity < CTE.RemainingDemand then 0
end,
case
when CTE.RunningQuantity + PL.Quantity >= CTE.RemainingDemand then 0
when CTE.RunningQuantity + PL.Quantity < CTE.RemainingDemand then CTE.RemainingDemand - CTE.RunningQuantity - PL.Quantity
end
from Amos as CTE inner join
#Pooled_Lots as PL on PL.Pool = CTE.Pool and PL.Lot = CTE.Lot + 1
)
select *,
case
when Lot = ( select max( Lot ) from #Pooled_Lots where Pool = Amos.Pool ) then RunningQuantity - RemainingDemand
else NULL end as SurplusOrDeficit
from Amos
order by Pool, Lot;
(based on version 4 of question as my WiFi went down for quite some time)
(SELECT
Pool,
SUM(Quantity) as Pool_Quantity
FROM
Pooled_Lots
GROUP BY
Pool) as Pool_Quantity_Table
Now you have a table with the Pool Quantity rolled up into a single value.
Now the complete query:
SELECT
Pool_Consumption.PoolID as Pool,
Pool_Quantity_Table.Pool_Quantity as Quantity,
Pool_Consumption.QuantityConsumed as AmtConsumedFromLot,
(Pool_Quantity_Table.Pool_Quantity - Pool_Consumption.QuantityConsumed) as SurplusOrDefecit
FROM
Pool_Consumption
INNER JOIN
(SELECT
Pool,
SUM(Quantity) as Pool_Quantity
FROM
Pooled_Lots
GROUP BY
Pool) as Pool_Quantity_Table
ON (Pool_Consumption.PoolID = Pool_Quantity_Table.Pool);
Habo's answer looks like it works well, but wanted to provide a variation that doesn't require recursion but is still set-based. Without recursion, the solution is simpler and will probably perform better (with the right indexing strategy)
/*Raw data setup*/
CREATE TABLE #Pool_Consumption (Id int, [Pool] int, QuantityConsumed int );
CREATE TABLE #Pooled_Lots( Id int, [Pool] int, Lot int, Quantity int );
INSERT INTO #Pool_Consumption ( Id, [Pool], QuantityConsumed )
VALUES (1,1,17),(2,2,8),(3,3,10);
INSERT INTO #Pooled_Lots
VALUES (1,1,1,5),(2,1,2,10),(3,1,3,4),
(4,2,1,7),
(5,3,1,1),(6,3,2,5);
/*Setup work tables
Essentially ranges number each consumption and reserves a range of ID's for each lot*/
SELECT *
,LowerRange = 1
,UpperRange = QuantityConsumed
INTO #Pool_Consumption_Range
FROM #Pool_Consumption
SELECT *
,LowerRange = SUM(Quantity) OVER (PARTITION BY [Pool] ORDER BY Lot) - Quantity + 1
,UpperRange = SUM(Quantity) OVER (PARTITION BY [Pool] ORDER BY Lot)
INTO #Pooled_Lots_Range
FROM #Pooled_Lots
SELECT
C.[Pool]
,L.Lot
,L.Quantity
,C.QuantityConsumed
,QtyFulfilled = SUM(CA.ReservedQty) OVER (PARTITION BY C.[Pool] ORDER BY L.ID)
,RemainderInLot = L.Quantity - CA.ReservedQty
,RemainingDemand = QuantityConsumed - SUM(CA.ReservedQty) OVER (PARTITION BY C.[Pool] ORDER BY L.ID)
,SurplusOrDefecit = CASE WHEN L.ID = MAX(L.ID) OVER (PARTITION BY C.[Pool]) THEN SUM(L.Quantity) OVER (PARTITION BY C.[Pool] ORDER BY L.ID) - C.QuantityConsumed END
FROM #Pool_Consumption_Range AS C
LEFT JOIN #Pooled_Lots_Range AS L
ON C.[Pool] = L.[Pool]
AND (
/*Overlap between consumption range and lots ranges*/
C.LowerRange BETWEEN L.LowerRange and L.UpperRange
OR C.UpperRange BETWEEN L.LowerRange and L.UpperRange
/*For when entire lots range between consumption range*/
OR L.UpperRange BETWEEN C.LowerRange AND C.UpperRange
)
CROSS APPLY (
/*Finds the count of overlap between order range and inventory range
Essentially orders all range values and finds the two middle numbers, then calculates the delta of them
Ex. If you have a range of 2 to 5 and 3 to 6
It sorts the numbers as 2,3,5,6
It grabs the second value 3 (OFFSET 1 FETCH 1), then because LEAD runs before OFFSET it grabs the next row so 5
The delta of 3 and 5 = 2 so there's a 2 number overlap between the two ranges
*/
SELECT ReservedQty = LEAD(DTA.Points,1) OVER (ORDER BY DTA.Points) - DTA.Points + 1
FROM (VALUES(C.LowerRange),(C.UpperRange),(L.LowerRange),(L.UpperRange)) AS DTA(Points)
ORDER BY Points
OFFSET 1 ROW FETCH NEXT 1 ROW ONLY
) AS CA
ORDER BY C.[Pool],L.Lot