SQL Group By Modulo of Row Count

SQL Group By Modulo of Row Count - sql

I have the following sample data:
Id Name Quantity
1 Red 1
2 Red 3
3 Blue 1
4 Red 1
5 Yellow 3
So for this example, there are a total of 5 Red, 1 Blue, and 3 Yellow. I am looking for a way to group them by Color, but with a maximum of 2 items per group (sorting is not important). Like so:
Name QuantityInPackage
Red 2
Red 2
Red 1
Blue 1
Yellow 2
Yellow 1
Any suggestions on how to accomplish this using T-SQL on MS-SQL 2005?

I would define a table containing sequential numbers, say 1 to 1000 and join that table (unless your database supports generating these numbers in the query like Oracle using CONNECT BY):
Table num
n
1
2
3
...
I tried the following query using Oracle (should work with TSQL too):
With summed_colors As (
Select name, Sum(quantity) quantity
From colors
Group By name
)
Select
name,
Case When n*2-1 = quantity Then 1 Else 2 End quantityInPackage
From summed_colors
Join nums On ( n*2-1 <= quantity )
Order By name, quantityInPackage Desc
and it returns
Blue 1
Red 2
Red 2
Red 1
Yellow 2
Yellow 1

You need to use a numbers table to unpivot your data to make multiple rows:
DECLARE #PackageSize AS int
SET #PackageSize = 2
DECLARE #numbers AS TABLE (Number int)
INSERT INTO #numbers
VALUES (1)
INSERT INTO #numbers
VALUES (2)
INSERT INTO #numbers
VALUES (3)
INSERT INTO #numbers
VALUES (4)
INSERT INTO #numbers
VALUES (5)
INSERT INTO #numbers
VALUES (6)
INSERT INTO #numbers
VALUES (7)
INSERT INTO #numbers
VALUES (8)
INSERT INTO #numbers
VALUES (9)
INSERT INTO #numbers
VALUES (10)
DECLARE #t AS TABLE
(
Id int
,Nm varchar(6)
,Qty int
)
INSERT INTO #t
VALUES (1, 'Red', 1)
INSERT INTO #t
VALUES (2, 'Red', 3)
INSERT INTO #t
VALUES (3, 'Blue', 1)
INSERT INTO #t
VALUES (4, 'Red', 1)
INSERT INTO #t
VALUES (5, 'Yellow', 3) ;
WITH Totals
AS (
SELECT Nm
,SUM(Qty) AS TotalQty
,SUM(Qty) / #PackageSize AS NumCompletePackages
,SUM(Qty) % #PackageSize AS PartialPackage
FROM #t
GROUP BY Nm
)
SELECT Totals.Nm
,#PackageSize AS QuantityInPackage
FROM Totals
INNER JOIN #numbers AS numbers
ON numbers.Number <= Totals.NumCompletePackages
UNION ALL
SELECT Totals.Nm
,PartialPackage AS QuantityInPackage
FROM Totals
WHERE PartialPackage <> 0

It's not grouping or modulo/division that's the hard part here, it's the fact that you need to do an aggregate (sum) and then explode the data again. There aren't actually any "Red 2" rows, you have to create them somehow.
For SQL Server 2005+, I would probably use a function do the "exploding":
CREATE FUNCTION dbo.CreateBuckets
(
#Num int,
#MaxPerGroup int
)
RETURNS TABLE
AS RETURN
WITH First_CTE AS
(
SELECT CASE
WHEN #MaxPerGroup < #Num THEN #MaxPerGroup
ELSE #Num
END AS Seed
),
Sequence_CTE AS
(
SELECT Seed AS [Current], Seed AS Total
FROM First_CTE
UNION ALL
SELECT
CASE
WHEN (Total + #MaxPerGroup) > #Num THEN (#Num - Total)
ELSE #MaxPerGroup
END,
Total + #MaxPerGroup
FROM Sequence_CTE
WHERE Total < #Num
)
SELECT [Current] AS Num
FROM Sequence_CTE
Then, in the main query, group (sum) the data first and then use the bucket function:
WITH Totals AS
(
SELECT Name, SUM(Quantity) AS Total
FROM Table
GROUP BY Name
)
SELECT Name, b.Num AS QuantityInPackage
FROM Totals
CROSS APPLY dbo.CreateBuckets(Total, 2) b
This should work for any bucket size, doesn't have to be 2 (just change the parameter).

This is very crude, but it works.
CREATE TABLE #Colors
(
Id int,
Name varchar(50),
Quantity int
)
INSERT INTO #Colors VALUES (1, 'Red', 1)
INSERT INTO #Colors VALUES (2, 'Red', 3)
INSERT INTO #Colors VALUES (3, 'Blue', 1)
INSERT INTO #Colors VALUES (4, 'Red', 1)
INSERT INTO #Colors VALUES (5, 'Yellow', 3)
INSERT INTO #Colors VALUES (6, 'Green', 2)
SELECT
Name,
SUM(Quantity) AS TotalQuantity
INTO #Summed
FROM
#Colors
GROUP BY
Name
SELECT
Name,
TotalQuantity / 2 AS RecordsWithQuantity2,
TotalQuantity % 2 AS RecordsWithQuantity1
INTO #SortOfPivot
FROM
#Summed
ORDER BY
Name
DECLARE #RowCount int
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
DECLARE #Name varchar(50)
DECLARE #TwosInsertCount int
DECLARE #OnesInsertCount int
CREATE TABLE #Result (Name varchar(50), Quantity int)
WHILE #RowCount > 0
BEGIN
SET #Name = (SELECT TOP 1 Name FROM #SortOfPivot)
SET #TwosInsertCount = (SELECT TOP 1 RecordsWithQuantity2 FROM #SortOfPivot)
SET #OnesInsertCount = (SELECT TOP 1 RecordsWithQuantity1 FROM #SortOfPivot)
WHILE #TwosInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 2)
SET #TwosInsertCount = #TwosInsertCount - 1
END
WHILE #OnesInsertCount > 0
BEGIN
INSERT INTO #Result (Name, Quantity) VALUES (#Name, 1)
SET #OnesInsertCount = #OnesInsertCount - 1
END
DELETE FROM #SortOfPivot WHERE Name = #Name
SET #RowCount = (SELECT COUNT(*) FROM #SortOfPivot)
END
SELECT * FROM #Result
DROP TABLE #Colors
DROP TABLE #Result
DROP TABLE #Summed
DROP TABLE #SortOfPivot

Related

Find missing numbers in a sequence in MS SQL

Say i have a table with an integer column Id. I need to find the missing numbers in a sequence with a maximum returned amount.
If the table is empty and i'm asking for 10, it should return the numbers 1-10.
If the table has 1-5 and i'm asking for 10, it should return the numbers 6,7,8,9,10,11,12,13,14,15.
If the table has 1,2,4,6,9 and im asking for 10, it should return the numbers 3,5,7,8,10,11,12,13,14,15
How can i achive this in one single query using MS SQL?
Thanks in advance!

Try this:
If you need to get more numbers, just increase the WHERE Number<=100.
DECLARE #Tab1 TABLE (ID INT)
INSERT INTO #Tab1 VALUES(1)
INSERT INTO #Tab1 VALUES(3)
INSERT INTO #Tab1 VALUES(5)
INSERT INTO #Tab1 VALUES(7)
INSERT INTO #Tab1 VALUES(9)
;WITH CTE AS
(
SELECT 1 AS Number
UNION ALL
SELECT Number + 1 FROM CTE
WHERE Number<=100
)
SELECT TOP 5 *
FROM CTE
WHERE Number NOT IN(SELECT ID FROM #Tab1)
ORDER BY Number
OPTION (maxrecursion 0);
Existing values:
Number
1
3
5
7
9
OutPut:
Number
2
4
6
8
10
Hope this helps you.

This should work
There are also a system table with numbers
declare #T table (i int primary key);
insert into #T values (1), (2), (4), (6), (9);
declare #count int = 10;
declare #size int = (select count(*) from #T);
with cte as
( select 1 as num
union all
select num + 1
from cte
where num + 1 <= (#count + #size)
)
select top (#count) cte.num
from cte
left join #T t
on t.i = cte.num
where t.i is null
order by cte.num
option ( MaxRecursion 0 );

Replace cursors with queries

Let's say I have a booking covering 6 hours and 3 discounts covering 2 hours each. I want to split my booking into 3 parts so I can allocate 2 hours per discount.
It would return something like this:
BookingId 1 | DiscountId 1 | Qty 2
BookingId 1 | DiscountId 2 | Qty 2
BookingId 1 | DiscountId 3 | Qty 2
I would then insert those records this into another table.
I'm using an heavily optimized query to determine the number of hours available for each discount. However, I can't find a "good" way to allocate my booking to each discount without using a cursor.
(...)
WHILE ##FETCH_STATUS = 0
BEGIN
IF #RequiredQty = 0
RETURN
IF #RequiredQty <= #AvailableQty
BEGIN
INSERT INTO discount.Usage (DiscountId, BookingId, Quantity)
VALUES (#DiscountId, #BookingId, #RequiredQty)
SET #RequiredQty = 0
END
IF #RequiredQty > #AvailableQty
BEGIN
INSERT INTO discount.Usage (DiscountId, BookingId, Quantity)
VALUES (#DiscountId, #BookingId, #AvailableQty)
SET #RequiredQty -= #AvailableQty
END
FETCH NEXT FROM ecursor INTO #DiscountId, #AvailableQty
END
DEALLOCATE ecursor
I tried building the corresponding query but I can't select and assign variables at the same time. Using a cursor is not really a problem (besides some potential performance issues) but I was just curious to see if with the newest SQL Server we can convert our old cursors to something better?
Thanks,
Seb

You can useCTE RECURSIVE to make a Table.
like this.
DECLARE #BookingId INT = 1;
DECLARE #RequiredQty INT = 2;
DECLARE #Hours INT = 7;
CREATE TABLE #T
(
BookingId INT,
DiscountId INT,
Quantity INT
)
;WITH CTE([Count],[Quantity],Rk) AS
(
SELECT
CASE
WHEN [HOURS] - #RequiredQty > #RequiredQty THEN #RequiredQty
ELSE [HOURS] - #RequiredQty
END ,
T.HOURS,1
FROM
(
SELECT #Hours [HOURS]
) AS T
UNION ALL
SELECT CASE
WHEN CTE.[Quantity] - #RequiredQty > #RequiredQty THEN #RequiredQty
ELSE CTE.[Quantity] - #RequiredQty
END AS [Count],
CTE.[Quantity] - #RequiredQty,
RK + 1
FROM CTE
WHERE CTE.[Quantity] - #RequiredQty > 0
)
INSERT INTO #T(BookingId,DiscountId,Quantity)
SELECT #BookingId,Rk,[Count] FROM CTE
option (maxrecursion 0)
select * from #T
SQLDEMO

This is another approach, but don't know if this code has better performance than cursor.
DECLARE #DiscountStocks TABLE (Id INT IDENTITY(1,1), DiscountId INT, LastQty INT)
INSERT INTO #DiscountStocks (DiscountId, LastQty) VALUES (1, 5)
INSERT INTO #DiscountStocks (DiscountId, LastQty) VALUES (2, 2)
INSERT INTO #DiscountStocks (DiscountId, LastQty) VALUES (3, 1)
DECLARE #DiscountBookings TABLE (Id INT IDENTITY(1,1), DiscountId INT, BookingId INT, Qty INT)
DECLARE #BookingDiscount TABLE (Id INT IDENTITY(1,1), BookingId INT, DiscountId INT, Qty INT)
INSERT INTO #BookingDiscount (BookingId, DiscountId, Qty) VALUES (1, 1, 4)
INSERT INTO #BookingDiscount (BookingId, DiscountId, Qty) VALUES (1, 2, 2)
INSERT INTO #BookingDiscount (BookingId, DiscountId, Qty) VALUES (1, 3, 1)
INSERT INTO #BookingDiscount (BookingId, DiscountId, Qty) VALUES (2, 1, 1)
INSERT INTO #BookingDiscount (BookingId, DiscountId, Qty) VALUES (2, 2, 2)
SELECT BD.Id AS BDId, DS.Id AS DSId, DS.LastQty, BD.Qty
, DS.LastQty - (SELECT SUM(Qty) FROM #BookingDiscount WHERE Id <= BD.Id AND DiscountId = BD.DiscountId) AS QtyAfterSubstract
INTO #LastDiscountStock
FROM #DiscountStocks DS
INNER JOIN #BookingDiscount BD ON DS.DiscountId = BD.DiscountId
ORDER BY BD.Id, DS.Id
INSERT INTO #DiscountBookings (DiscountId, BookingId, Qty)
SELECT DSId, BDId, Qty
FROM #LastDiscountStock
WHERE QtyAfterSubstract >= 0
DROP TABLE #LastDiscountStock
SELECT * FROM #DiscountBookings

Adding total row and Increasing quality for an sql query based on multiple parameters

I have a table in Microsoft Sql Server 2008 like:
id name timelong
1 Eray 2
1 Jack 1
1 Ali 7
1 john 3
1 Roby 5
1 Mike 4
1 Josh 11
What I want to do is to select data based on user multi-selectable parameters. Think that there are 4 checkboxes: 0-3,4-6,7-9,10-12 and users can select more than one checkbox. Only those data user selected should be seen and a TOTAL row needs to be added at the bottom.
What I tried is on the bottom but it is not working well - TOTAL row is not there. My question is how I can add the Total row there, and is there any more professional way to provide this query. Thanks.
declare #interval03 bit -- 0 to 3
declare #interval06 bit -- 4 to 6
declare #interval09 bit -- 7 to 9
declare #interval12 bit -- 10 to 12
Select *, sum(timelong)
From myTable
Where (#interval03=1 and timelong<4)
or
(#interval06=1 and timelong>3 and timelong<7)
or
(#interval09=1 and timelong>6 and timelong<10)
or
(#interval12=1 and timelong>9 and timelong<13)
group by id, name

Try grouping sets:
Select ID, isnull(Name, 'TOTAL'), sum(timelong)
From myTable
Where (#interval03=1 and timelong <= 3)
or
(#interval06=1 and timelong between 4 and 6)
or
(#interval09=1 and timelong between 7 and 9)
or
(#interval12=1 and timelong >= 10)
group by grouping sets ((ID, name), ())
Assuming from your query that timelong is an int, I've simplified your where a little as well.
More information on grouping sets, rollup, and cube: https://technet.microsoft.com/en-us/library/bb522495(v=sql.105).aspx

TRY This..One more way to add totals...
declare #table Table (id INT,name VARCHAR(10), timelong INT)
insert into #table (id ,name, timelong) VALUES (1, 'Eray', 2)
insert into #table (id ,name, timelong) VALUES (1 ,'Jack' ,1)
insert into #table (id ,name, timelong) VALUES (1 ,'Ali' , 7)
insert into #table (id ,name, timelong) VALUES (1 ,'john' ,3)
insert into #table (id ,name, timelong) VALUES (1 ,'Roby' , 5)
insert into #table (id ,name, timelong) VALUES (1 ,'Mike' ,4)
insert into #table (id ,name, timelong) VALUES (1 ,'Josh' ,11)
declare #interval03 bit=1 -- 0 to 3
declare #interval06 bit -- 4 to 6
declare #interval09 bit -- 7 to 9
declare #interval12 bit -- 10 to 12
DECLARE #result TABLE (ID INT,Name VARCHAR (30),TimeLong INT)
INSERT INTO #result
Select id, name, sum(timelong) timelong
From #table
Where (#interval03=1 and timelong<4)
or (#interval06=1 and timelong>3 and timelong<7)
or (#interval09=1 and timelong>6 and timelong<10)
or (#interval12=1 and timelong>9 and timelong<13)
group by id, name
INSERT INTO #result
SELECT MAX(ID) +100 ID,'Total' Name,SUM(TimeLong) TimeLong
FROM #result
HAVING COUNT(*)<>0
SELECT * FROM #result

Identifying repeated fields in SQL query

I have an SQL query that returns a column like this:
foo
-----------
1200
1200
1201
1200
1200
1202
1202
1202
It has already been ordered in a specific way, and I would like to perform another query on this result set to ID the repeated data like this:
foo ID
---- ----
1200 1
1200 1
1201 2
1200 3
1200 3
1202 4
1202 4
1202 4
It's important that the second group of 1200 is identified as separate from the first. Every variation of OVER/PARTITION seems to want to lump both groups together. Is there a way to window the partition to only these repeated groups?
Edit:
This is for Microsoft SQL Server 2012

Not sure this will be the fastest results...
select main.num, main.id from
(select x.num,row_number()
over (order by (select 0)) as id
from (select distinct num from num) x) main
join
(select num, row_number() over(order by (select 0)) as ordering
from num) x2 on
x2.num=main.num
order by x2.ordering
Assuming the table "num" has a column "num" that contains your data, in the order-- of course num could be made into a view or a "with" for your original query.
Please see the following sqlfiddle

Here is one way to do this without a CURSOR
-- Create a temporay table
DECLARE #table TABLE
(
SeqID INT IDENTITY(1,1) NOT NULL PRIMARY KEY,
foo INT,
id int null
)
DECLARE #i INT
DECLARE #j INT
DECLARE #k INT
declare #tFoo INT
declare #oldFoo INT
SET #k = 0
set #oldFoo = 0
-- Insert data into the temporary table
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1201
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1200
INSERT INTO #table(foo)
SELECT 1202
INSERT INTO #table(foo)
SELECT 1202
INSERT INTO #table(foo)
SELECT 1202
-- Get the max and min SeqIDs to loop through
SELECT #i = MIN(SeqID) FROM #table
SELECT #j = MAX(SeqID) FROM #table
-- Loop through the temp table using the SeqID indentity column
WHILE (#i <= #j)
BEGIN
SELECT #tFoo = foo FROM #table WHERE SeqID = #i
if #oldFoo <> #tFoo
set #k = #k + 1
update #table set id = #k where SeqID = #i
SET #oldFoo = #tFoo
-- Increment the counter
SET #i = #i + 1
END
SELECT * from #table

You can do it without using cursors but it does not look nice (at least what I came up with). So 1) I assume you have PK column which orders your main values.
Then 2) I assume you have an ID column which you want to set.
create table tbl(foo int, pk int, id int);
insert into tbl(foo, pk) values (1100, 5);
insert into tbl(foo, pk) values (1200, 10);
insert into tbl(foo, pk) values (1200, 20);
insert into tbl(foo, pk) values (1201, 30);
insert into tbl(foo, pk) values (1200, 40);
insert into tbl(foo, pk) values (1200, 50);
insert into tbl(foo, pk) values (1202, 60);
insert into tbl(foo, pk) values (1202, 70);
insert into tbl(foo, pk) values (1202, 80);
insert into tbl(foo, pk) values (1202, 90);
SQL Fiddle here: http://sqlfiddle.com/#!6/fdaaa/2
update tbl
set
ID = 1
update t
set
t.ID = m.RN2
from
tbl t
join
(
select
y1.RN as RN1, y1.PK as PK1,
y2.RN as RN2, y2.PK as PK2
FROM
(
SELECT
ROW_NUMBER() OVER(ORDER BY x.pk1 ASC) AS rn,
x.pk1 AS pk
FROM
(
SELECT t1.pk AS pk1, t2.pk AS pk2
FROM
tbl t1
LEFT JOIN tbl t2 ON
(
(t1.pk < t2.pk AND t1.foo = t2.foo)
AND
(
NOT EXISTS
(
SELECT tMid.pk FROM
tbl tMid WHERE
tMid.pk < t2.pk
AND
tMid.pk > t1.pk
)
)
)
) x WHERE x.pk2 IS NULL
) y1
left join
(
SELECT
ROW_NUMBER() OVER(ORDER BY x.pk1 ASC) AS rn,
x.pk1 AS pk
FROM
(
SELECT t1.pk AS pk1, t2.pk AS pk2
FROM
tbl t1
LEFT JOIN tbl t2 ON
(
(t1.pk < t2.pk AND t1.foo = t2.foo)
AND
(
NOT EXISTS
(
SELECT tMid.pk FROM
tbl tMid WHERE
tMid.pk < t2.pk
AND
tMid.pk > t1.pk
)
)
)
) x WHERE x.pk2 IS NULL
) y2 on y1.RN = y2.RN - 1
) m on
(
(t.pk > m.pk1 and ((m.pk2 is not null ) and (t.pk <= m.pk2)))
-- or
-- (t.pk<=m.pk1)
)

This is my solution using a cursor and a temporary table to hold the results.
DECLARE #foo INT
DECLARE #previousfoo INT = -1
DECLARE #id INT = 0
DECLARE #getid CURSOR
DECLARE #resultstable TABLE
(
primaryId INT IDENTITY(1, 1) NOT NULL PRIMARY KEY,
foo INT,
id int null
)
SET #getid = CURSOR FOR
SELECT originaltable.foo
FROM originaltable
OPEN #getid
FETCH NEXT
FROM #getid INTO #foo
WHILE ##FETCH_STATUS = 0
BEGIN
IF (#foo <> #previousfoo)
BEGIN
SET #id = #id + 1
END
INSERT INTO #resultstable VALUES (#foo, #id)
SET #previousfoo = #foo
FETCH NEXT
FROM #getid INTO #foo
END
CLOSE #getid
DEALLOCATE #getid

Turn value into singular row

Current
Name Quantity
---------------
Stella 2
Jennifer 2
Greg 3
Requested result
Name Quantity
---------------
Stella 1
Stella 1
Jennifer 1
Jennifer 1
Greg 1
Greg 1
Greg 1
How should I do it?
declare #T table
(
Name varchar(50),
Sales int
)
insert into #T values
('Stella', '2'),
('Jennifer', '2'),
('Greg', '3')

If the maximum value in the quantity column is known to be less than 32,767, you can use Recursion to generate numbers and join the Numbers to achieve your result.
/*******************************************
Max Recursion Count in SQL Server is 32767
Limitation of 32767 Numbers!
******************************************/
;WITH Numbers (Number) AS
(
SELECT 1
UNION ALL
SELECT 1 + Number FROM Numbers WHERE Number < 100
)
SELECT m.Name,
Quantity = 1
FROM MyTable m
JOIN #numbers n ON m.Quantity <= n.Number
OPTION (MAXRECURSION 32767);

Using recursion and borrowing Michael Fredrickson's setup code:
declare #T table (
Name varchar(50),
Sales int
)
insert into #T values ('Stella', '2')
insert into #T values ('Jennifer', '2')
insert into #T values ('Greg', '3')
-- Recursive verion
;with People (Name, Sales) as
(
select Name, Sales
from #T
union all
select Name, Sales - 1
from People
where Sales - 1 > 0
)
select Name, 1 as Quantity
from People
option (maxrecursion 0) -- Recurse without limit
This seems to run faster on my box (5x faster than Michael Fredrickson's according to query plan, but with many more logical reads), not that it matters much.

You'll probably want to have a pre-populated numbers table to do this:
declare #T table (
Name varchar(50),
Sales int
)
declare #numbers table (
Number int
)
insert into #numbers values (1)
insert into #numbers values (2)
insert into #numbers values (3)
insert into #numbers values (4)
-- Etc... up to however many numbers is the max possible value for sales...
insert into #T values ('Stella', '2')
insert into #T values ('Jennifer', '2')
insert into #T values ('Greg', '3')
SELECT
t.Name,
1 AS Sales
FROM
#T t JOIN
#numbers n ON
t.Sales >= n.Number
ORDER BY t.Name
That's how you could do it, but I'm not sure on why you would want to do it.

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL Group By Modulo of Row Count - sql

Related

Find missing numbers in a sequence in MS SQL

Replace cursors with queries

Adding total row and Increasing quality for an sql query based on multiple parameters

Identifying repeated fields in SQL query

Turn value into singular row

Categories

Resources