SQL Group By Sum of Column Value Not Fixed Value - sql

I have a table of data like this:
CommonURN GiftAidAmount GA_Status
14013084 2.00 45
14637494 20.00 45
14637496 1.00 45
14637508 5.00 45
14637520 10.00 45
14637525 19.00 45
14637562 10.00 45
14637586 12.00 45
14637590 3.00 45
14637649 5.00 45
I want to group my rows into blocks totalling £1000 or less, where the GA_Status value equals 45.
What I have so far is this:
CREATE TABLE #Temp
(CommonURN int,
GiftAidAmount money,
RunningSum money,
GroupID int);
WITH cte
(commonURN,giftAidAmount,running_sum)
AS
(SELECT
c.commonURN,
c.giftAidAmount,
(select sum(c2.giftAidAmount)
from dbo.[Campaign_Data] as c2
where
GA_Status = 45 and
c2.commonURN <= c.commonURN)
FROM
[dbo].[Campaign_Data] c
WHERE GA_Status = 45)
INSERT INTO #Temp (CommonURN,GiftAidAmount,RunningSum,GroupID)
SELECT
commonURN,
giftAidAmount,
running_sum,
ceiling(running_sum/1000) as GroupID
FROM
cte
ORDER BY
commonURN
SELECT
GroupID,
COUNT(*) NumRows,
SUM(GiftAidAmount) TotalGiftAid
FROM
#Temp
GROUP BY GroupID
This works pretty well except that one of the groups in the result set is over £1000:
GroupID NumRows TotalGiftAid
1 100 999.83
2 107 999.57
3 91 990.82
4 114 1009.34
5 114 995.40
6 58 455.65
I don't understand why this is. Can anyone explain and help me fix it? Or suggest a better approach altogther?

You can get a running value with single table update using a variable, and condition the variable to reset when the running value exceeds 1000. You can also update a GroupID based on this. Here is how I would do it. I insert a bunch of random amounts here anywhere from 5 to 30. The larger the amounts, the more likely you will run into a scenario where the running value is well below 1000, but the next value puts the running value over 1000:
-- insert test data
declare #Campaign_Data table
(
GiftAidAmount money,
RunningValue money,
GroupID int
)
while (select count(*) from #Campaign_Data) < 2000
begin
insert into #Campaign_Data (GiftAidAmount)
values (round(rand()*25,2)+5)
end
-- update Running Value and GroupID; restart when amount exceeds 1000
declare #RunningValue money,
#GroupID int
set #RunningValue = 0
set #GroupID = 1
update #Campaign_Data
set
#GroupID = GroupID = case when #RunningValue + GiftAidAmount > 1000
then #GroupID + 1
else #GroupID
end,
#RunningValue = RunningValue = case when #RunningValue + GiftAidAmount > 1000
then GiftAidAmount
else #RunningValue + GiftAidAmount
end
-- return values grouped by GroupID
select
GroupID,
count(GroupID) NumRows,
sum(GiftAidAmount) TotalGiftAid
from #Campaign_Data
group by
GroupID
order by
GroupID

Try this:
SELECT
GroupID,
COUNT(*) NumRows,
SUM(GiftAidAmount) TotalGiftAid
FROM
#Temp
GROUP BY GroupID
HAVING SUM(GiftAidAmount) < 1000

Related

How to split the number for each row in SQL?

I am actually working on to split the quantity number for each of the row.
Example :
Total Quantity for each container: 29
For some reason, the max limit for the 1st batch is 10, and the second batch is 69
I have tried to write the query like below, but it return wrongly.
select case when totalQ < 29.5 then Quantity else Quantity
end as Quantity, lotno, totalQ from (
select quantity - 29.5 as totalQ,
* from TestB where id in (
select id from TestA where id =20))A
However, I am expecting something like below:
Could anyone please help me to solve this?
Thanks in advance!
Try something like this:
create table TestB ( BatchID varchar(10), TotalQuantity int )
insert into TestB ( BatchID, TotalQuantity ) values
( 'A', 70 ), ( 'B', 10 ), ( 'C', 69 ), ( 'D', 100 ),
( 'E', 0 ), ( 'F', 29 ), ( 'G', 58 ), ( 'H', 200 )
; with
A as
( select BatchID,
case when TotalQuantity >= 29 then 29 else TotalQuantity end as Quantity,
case when TotalQuantity >= 29 then TotalQuantity - 29 else 0 end as Remaining
from TestB
union all
select BatchID,
case when Remaining >= 29 then 29 else Remaining end as Quantity,
case when Remaining >= 29 then Remaining - 29 else 0 end as Remaining
from A
where Remaining > 0 )
select BatchID,
Quantity
from A
order by BatchID,
Quantity desc
option ( MaxRecursion 1000 )
David's answer is pretty nice. This is a longer answer using SQL Stored Procedure.
Example is here: http://rextester.com/VUQXVQ46335
Stored Procedure
create table test (batch varchar(20), quantity int);
insert into test values ('lot0', 29), ('lot1', 30), ('lot2', 28), ('lot3', 100);
go
create procedure CreateBatches (#BatchSize int)
as
begin
-- declare variables and create a temporary table
set nocount on
declare #v_batch varchar(20), #v_quantity int
create table #tempTest (batch varchar(20), quantity int)
-- loop through all records
declare testCursor cursor for select * from test
open testCursor
fetch next from testCursor into #v_batch, #v_quantity
-- process each record
while ##fetch_status = 0
begin
-- if quantity is larger than the bucket, insert the batch size in the table
-- reduce the quantity and continue looping
while #v_quantity > #BatchSize
begin
insert into #tempTest values (#v_batch, #BatchSize)
set #v_quantity = #v_quantity - #BatchSize
end
-- store the quantity lower than the batch size in the table
insert into #tempTest values (#v_batch, #v_quantity)
fetch next from testCursor into #v_batch, #v_quantity
end
select * from #tempTest
drop table #tempTest
close testCursor
deallocate testCursor
set nocount off
end;
go
Result
exec CreateBatches 32;
# batch quantity
1 lot0 29
2 lot1 30
3 lot2 28
4 lot3 32
5 lot3 32
6 lot3 32
7 lot3 4
Another run
exec CreateBatches 29;
# batch quantity
1 lot0 29
2 lot1 29
3 lot1 1
4 lot2 28
5 lot3 29
6 lot3 29
7 lot3 29
8 lot3 13
This method gives you a bit of flexibility and a little more understanding of how the batching system works. It is possible that the Stored Procedure will be slow when processing a large amount of data.
Comparison
I did a comparison of David's recursive CTE and Stored Procedure. I created 3031 lots/batches/records starting with 500 quantity for lot 1, 600 for lot 2...303500 for lot 3031.
Results
Results are in seconds. Dash means the query was aborted after 12s.
Batch size CTE SP
---------- ----- -----
300000 1.46s 1.66s
200000 1.61s 1.88s
100000 2.27s 2.47s
50000 5.00s 5.41s
25000 7.71s 8.05s
12500 - -
These are just crude results testing on rextester. You can see that stored procedure is slower than CTE.
use self join and union
select t.batch,t.quantity from t join t t1 on t.batch=t1.batch
union
select t.batch,t.totalQ from t join t t1 on t.batch=t1.batch

TSQL Divide Up a Table evenly based on a Sort

How can I split a table evenly based on a sort? Here is a mock up script of what I am talking about:
Edit: I want to split the table evenly by balance into 4 different groups. (or any number of groups). It's important so that each group has their fair share of high and low balances.
DECLARE #WorkList TABLE
(
account_number VARCHAR(10),
balance MONEY,
assigned_to INT
)
DECLARE #Loop INT
DECLARE #TotalPartsToSplitEvenly INT
SET #TotalPartsToSplitEvenly = 4
SET #Loop = 1
WHILE #Loop < 50
BEGIN
INSERT INTO #WorkList (account_number, balance, assigned_to)
VALUES ((#Loop * 5) * 1234, #Loop * 1000, NULL)
SET #Loop = #Loop + 1
END
SELECT *
FROM #WorkList
ORDER BY balance DESC
I want to split the result set evenly so that everyone gets their fair share of balance.
account_number balance assigned_to
-------------- --------------------- -----------
302330 49000.00 1
296160 48000.00 2
289990 47000.00 3
283820 46000.00 4
277650 45000.00 1
271480 44000.00 2
265310 43000.00 3
259140 42000.00 4
252970 41000.00 1
246800 40000.00 2
240630 39000.00 3
NTILE doesn't work for this. I am out of ideas.
You seem to want row_number() mod 4:
select wl.*,
(1 + (row_number() over (order by balance desc) - 1) % 4) as assigned_to
from #worklist wl;

SQL subset sum negative values

I have a table valued function that return the set of rows that matches a given sum, It works fine with positive values but not with negatives one.
Can someone modify this function to work with both positive and negative values (price field)
The function take a table with decimal values, then return the first combination of rows that match a given sum in the parameter :
For example if the #psum = 9 and the given table below :
n id price
1 1 4.00
2 2 4.00
3 3 5.00
4 4 6.00
5 5 8.00
The out put is :
select * from SubsetSum2(9)
n id price
3 3 5.00
2 2 4.00
alter FUNCTION [dbo].[SubsetSum2](#psum int )
RETURNS #tt table (n int,id int, price numeric(20,2))
AS
BEGIN
declare #t table (n int IDENTITY(1,1), id int, price numeric(20,2))
insert into #t -- note asc order of book prices
select 1, 4 union all
select 2, 4 union all
select 3, 5 union all
select 4, 6 union all
select 5, 8
declare #rows int, #p numeric(20,2), #sum numeric(20,2) set #sum= 9
delete from #t where price>#sum
set #p=(select sum(price) from #t)
if #p>= #sum
begin --1
set #rows=(select max(n) from #t)
declare #n int, #s numeric(20,2)
set #n=#rows+1 set #s=0
while 0=0
begin --2
while #n>1
begin --3
set #n=#n-1
if #s+(select price from #t where n=#n)<=#sum
and #s+(select sum(price) from #t where n<=#n)>=#sum
begin --4
set #s=#s+(select price from #t where n=#n)
insert into #tt select n, id, price from #t where n=#n
if #s=#sum return ;
end --4
end --3
set #n=(select min(n) from #tt)
set #s=#s-(select price from #tt where n=#n)
delete from #tt where n=#n
if #s=0 and (select sum(price) from #t where n<#n)<#sum break
end --2
end --1
return
END
Use Absolute function ABS(Price) for treating the negatives as positives

How to distribute sql results over column x?

I have a table of houses.
City | Address | Price | NewRelease
Auckland, 18 Queen St, 1000000, true
Auckland, 21 Queen St, 1100000, false
Auckland, 23 Queen St, 1150000, false
Wellington, 23 Cuba St, 850000, false
.....
What I want to do is be able to query that list and return a list of x houses that are in a particular city AND I want the list to be as evenly balanced as possible between NewReleases, so theres roughly half true and half false. The function needs to be able to accept an input as to how many houses from a region to return. Something like houses_in_region(amount)
Sometimes there might not be enough of each to get a 50/50 split then its most important that the overall right number is just returned. For example if you ask for 10 houses in Wellington but there are only 12 houses in wellington in the db and all of them are NewRelease=false then it should just return 10 of those. However if you ask the same in Auckland where there might be 1000, then you should return 10 houses with 5 of them being NewRelease and 5 that are not.
Is this possible with sql?
Thanks for your help guys!
You can use this:
DECLARE #i int = 6, -- How many rows we want to show
#s int -- This will determine what will be inserted first
DECLARE #Results TABLE ( --Result table
City nvarchar(100),
[Address] nvarchar(100),
[Price] int,
[NewRelease] bit
)
SELECT #s = CASE WHEN SUM(CASE WHEN NewRelease = 1 THEN 1 ELSE 0 END) > SUM(CASE WHEN NewRelease = 0 THEN 1 ELSE 0 END) THEN 0 ELSE 1 END
FROM YourTable
INSERT INTO #Results -- Insert half of needed rows
SELECT TOP (#i/2) *
FROM YourTable
WHERE NewRelease = #s
SELECT #i = #i-COUNT(*) --Re-count how many rows we need more
FROM #Results
INSERT INTO #Results --Insert another part of rows
SELECT TOP (#i) *
FROM YourTable
WHERE NewRelease != #s
SELECT * --show data selected
FROM #Results
ORDER BY NewRelease DESC

SQL Query to retrieve the last records till the quantity purchased reaches the total quantity in stock

I have a table that have the ItemCode and Quantity in stock and another table that contains the purchases.
I want a query to get the Quantity in stock (ex. Qty = 5) and to take the purchase table to get the purchase invoices by descending order and take the Item Prices.
The Query has to keep retrieving records from the Purchase table according to the Quantity till we reach sum of Quantity in stock = 5.
ex.
**Purchase No ItemCode Qty Cost Price**
2 123 2 100
3 123 10 105
6 123 2 100
8 123 1 90
9 123 2 120
---------------------------------------------
**ItemCode Qty in Stock**
123 5
--------------------------------------------
In this example I want the query to retrieve for me the last 3 invoices (9,8 and 6) because the Qty (2+1+2 = 5)
Is there any suggestion .
Thank you in advance
This script should do the job.
/* SQL SCRIPT BEGIN */
create table #tmp (PurchaseNo int, ItemCode int, Qty int)
insert into #tmp (PurchaseNo, ItemCode, Qty)
select
p1.PurchaseNo, p1.ItemCode, sum(t.Qty) as Qty
from
Purchases p1
join
(
select
p2.PurchaseNo,
p2.ItemCode, p2.Qty
from
Purchases p2
) t on p1.PurchaseNo <= t.PurchaseNo and p1.ItemCode = t.ItemCode
group by p1.PurchaseNo, p1.ItemCode
order by p1.ItemCode, sum(t.Qty) asc
select * From #tmp
where
ItemCode = 123
and
Qty < 5
union
select top 1 * From #tmp
where
ItemCode = 123
and
Qty >= 5
order by PurchaseNo desc
drop table #tmp
/* SQL SCRIPT END */
Hi This can be the solution :
Here I have Used Result Table which will store the result.
I have used three tables Purchage(PurchageNo,ItemCode,Qty) , Stock(ItemCode,QtyInStock) and result(PurchageNo).
Full Workable Code is Here:
DECLARE #ItemCode int;
DECLARE #AvailableQty int;
SET #ItemCode = 123 ;
SET #AvailableQty = (select QtyInStock from Stock where ItemCode = #ItemCode);
SELECT
RowNum = ROW_NUMBER() OVER(ORDER BY PurchageNo),*
INTO #PurchageTemp
FROM Purchage
DECLARE #MaxRownum INT;
SET #MaxRownum = (select COUNT(*)from #PurchageTemp);
DECLARE #Iter INT;
SET #Iter = 1;
DECLARE #QtySum int=0;
DECLARE #QtySumTemp int=0;
DECLARE #CurrentItem int;
WHILE (#Iter <= #MaxRownum and #QtySum <= #AvailableQty)
BEGIN
set #QtySumTemp=#QtySum;
set #QtySumTemp = #QtySumTemp + (SELECT Qty FROM #PurchageTemp WHERE RowNum = #Iter and ItemCode=#ItemCode);
IF #QtySumTemp <= #AvailableQty
BEGIN
set #QtySum=#QtySumTemp;
set #CurrentItem= (SELECT PurchageNo FROM #PurchageTemp WHERE RowNum = #Iter and ItemCode=#ItemCode);
insert into [Result] values (#CurrentItem);
END
SET #Iter = #Iter + 1
END
DROP TABLE #PurchageTemp