SQL Server grouping rows - sql

I have this data:
Id | Name | count | Group_number
------+-------+-------+--------------
1 | cdd | 50 | 0
2 | cdd | 15 | 0
3 | cdd | 0 | 0
4 | cdd | 25 | 0
5 | cdd | 11 | 0
I want a script that makes three or four groups on condition: Sum(count) for each group < 50
I want this output:
1 | cdd | 50 | 1
2 | cdd | 15 | 2
3 | cdd | 0 | 2
4 | cdd | 25 | 2
5 | cdd | 11 | 3

Assuming this has to be done for each name, you can use a recursive cte.
with rownums as (select t.*,row_number() over(partition by name order by id) as rnum from t)
,cte(rnum,id,name,cnt,runningsum,grp) as
(select rnum,id,name,cnt,cnt,1 from rownums where rnum=1
union all
select t.rnum,t.id,t.name,t.cnt
,case when c.runningsum+t.cnt > 50 then t.cnt else c.runningsum+t.cnt end
,case when c.runningsum+t.cnt > 50 then t.id else c.grp end
from cte c
join rownums t on t.rnum=c.rnum+1 and t.name=c.name
)
select id,cnt,name,dense_rank() over(partition by name order by grp) as grp
from cte
Sample Demo
Keep track of the running sum and reset it when it goes over 50. Also remember the id when the sum goes over 50. This can be used to assign group numbers.

For records where the count is less than 50 we can simply generate a grouping id by calculating a running total on the count and then divide this running total by 50. However, since some records may already have a count that is greater than or equal to 50 might generate an incorrect id. To solve this problem, we need to somehow force the generation of a new grouping id on the next record. This can be done by simply adjusting the count the next record by 50 if the current records count is 50 or greater.
The following example demonstrates how this can be done:
CREATE TABLE #Items
(
[Id] INT NOT NULL PRIMARY KEY
,[Name] VARCHAR(50) NOT NULL
,[Count] INT NOT NULL
)
INSERT INTO #Items
VALUES
(1, 'cdd', 50 ),
(2, 'cdd', 15 ),
(3, 'cdd', 0 ),
(4, 'cdd', 25 ),
(5, 'cdd', 11 );
;WITH CTE_ItemCountsAdjusted
AS
(
SELECT [Id]
,[Name]
,[Count]
,LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) AS PrevCount
,(
CASE
WHEN LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) >= 50 THEN [Count] + 50
ELSE [Count]
END
) AdjustedCount
FROM #Items
)
SELECT [Id]
,[Name]
,[Count]
,SUM([AdjustedCount]) OVER (PARTITION BY [Name] ORDER BY [Id] ROWS UNBOUNDED PRECEDING) / 50 AS [Group_number]
FROM CTE_ItemCountsAdjusted
ORDER BY [Id]
This method eliminates the need for recursive calls. Note if you need the group id to be strictly sequential (no gaps between group numbers) then you can make use of the DENSE_RANK() windowing function to achieve this as per following example:
INSERT INTO #Items
VALUES
(1, 'cdd', 50 ),
(2, 'cdd', 15 ),
(3, 'cdd', 0 ),
(4, 'cdd', 25 ),
(5, 'cdd', 11 ),
(6, 'cdd', 200 ),
(7, 'cdd', 10 );
;WITH CTE_ItemCountsAdjusted
AS
(
SELECT [Id]
,[Name]
,[Count]
,LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) AS PrevCount
,(
CASE
WHEN LAG([Count], 1, 0) OVER (PARTITION BY [Name] ORDER BY [Id]) >= 50 THEN [Count] + 50
ELSE [Count]
END
) AdjustedCount
FROM #Items
),CTE_ItemCountsWithGroupID
AS
(
SELECT [Id]
,[Name]
,[Count]
,SUM([AdjustedCount]) OVER (PARTITION BY [Name] ORDER BY [Id] ROWS UNBOUNDED PRECEDING) / 50 AS [Group_number]
FROM CTE_ItemCountsAdjusted
)
SELECT [Id]
,[Name]
,[Count]
,[Group_number]

Related

Issue with Row_Number() Over Partition

I've been trying to reset the row_number when the value changes on Column Value and I have no idea on how should i do this.
This is my SQL snippet:
WITH Sch(SubjectID, VisitID, Scheduled,Actual,UserId,RLev,SubjectTransactionID,SubjectTransactionTypeID,TransactionDateUTC,MissedVisit,FieldId,Value) as
(
select
svs.*,
CASE WHEN stdp.FieldID = 'FrequencyRegime' and svs.SubjectTransactionTypeID in (2,3) THEN
stdp.FieldID
WHEN stdp.FieldID is NULL and svs.SubjectTransactionTypeID = 1
THEN NULL
WHEN stdp.FieldID is NULL
THEN 'FrequencyRegime'
ELSE stdp.FieldID
END AS [FieldID],
CASE WHEN stdp.Value is NULL and svs.SubjectTransactionTypeID = 1
THEN NULL
WHEN stdp.Value IS NULL THEN
(SELECT TOP 1 stdp.Value from SubjectTransaction st
JOIN SubjectTransactionDataPoint STDP on stdp.SubjectTransactionID = st.SubjectTransactionID and stdp.FieldID = 'FrequencyRegime'
where st.SubjectID = svs.SubjectID
order by st.ServerDateST desc)
ELSE stdp.Value END AS [Value]
from SubjectVisitSchedule svs
left join SubjectTransactionDataPoint stdp on svs.SubjectTransactionID = stdp.SubjectTransactionID and stdp.FieldID = 'FrequencyRegime'
)
select
Sch.*,
CASE WHEN sch.Value is not NULL THEN
ROW_NUMBER() over(partition by Sch.Value, Sch.SubjectID order by Sch.SubjectID, Sch.VisitID)
ELSE NULL
END as [FrequencyCounter],
CASE WHEN Sch.Value = 1 THEN 1--v.Quantity
WHEN Sch.Value = 2 and (ROW_NUMBER() over(partition by Sch.Value, Sch.SubjectID order by Sch.SubjectID, Sch.VisitID) % 2) <> 0
THEN 0
WHEN Sch.Value = 2 and (ROW_NUMBER() over(partition by Sch.Value, Sch.SubjectID order by Sch.SubjectID, Sch.VisitID) % 2) = 0
THEN 1
ELSE NULL
END AS [DispenseQuantity]
from Sch
--left join VisitDrugAssignment v on v.VisitID = Sch.VisitID
where SubjectID = '4E80718E-D0D8-4250-B5CF-02B7A259CAC4'
order by SubjectID, VisitID
This is my Dataset:
Based on the Dataset, I am trying to reset the FrequencyCounter to 1 every time the value changes for each subject, Right now it does 50% of what I want, It is counting when the value 1 or 2 is found, but when value 1 comes again after value 2 it continues the count from where it left. I want every time the value is changes the count to also start from the beginning.
It's difficult to reproduce and test without sample data, but if you want to know how to number rows based on change in column value, next approach may help. It's probably not the best one, but at least will give you a good start. Of course, I hope I understand your question correctly.
Data:
CREATE TABLE #Data (
[Id] int,
[Subject] varchar(3),
[Value] int
)
INSERT INTO #Data
([Id], [Subject], [Value])
VALUES
(1, '801', 1),
(2, '801', 2),
(3, '801', 2),
(4, '801', 2),
(5, '801', 1),
(6, '801', 2),
(7, '801', 2),
(8, '801', 2)
Statement:
;WITH ChangesCTE AS (
SELECT
*,
CASE
WHEN LAG([Value]) OVER (PARTITION BY [Subject] ORDER BY [Id]) <> [Value] THEN 1
ELSE 0
END AS [Change]
FROM #Data
), GroupsCTE AS (
SELECT
*,
SUM([Change]) OVER (PARTITION BY [Subject] ORDER BY [Id]) AS [GroupID]
FROM ChangesCTE
)
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY [GroupID] ORDER BY [Id]) AS Rn
FROM GroupsCTE
Result:
--------------------------------------
Id Subject Value Change GroupID Rn
--------------------------------------
1 801 1 0 0 1
2 801 2 1 1 1
3 801 2 0 1 2
4 801 2 0 1 3
5 801 1 1 2 1
6 801 2 1 3 1
7 801 2 0 3 2
8 801 2 0 3 3
As per my understanding, you need DENSE_RANK as you are looking for the row number will only change when value changed. The syntax will be as below-
WITH your_table(your_column)
AS
(
SELECT 2 UNION ALL
SELECT 10 UNION ALL
SELECT 2 UNION ALL
SELECT 11
)
SELECT *,DENSE_RANK() OVER (ORDER BY your_column)
FROM your_table

how to separate and sum 2 columns based on condition

I'm doing a select statement and I have a column I would like to separate into 2 columns based on their type, and then get the sum of the amounts grouped by an ID
I want all the gold and platinum types in one column, and all the silver and bronze in a 2nd column, then summed and grouped by the ID so it looks like this :
I tried doing a union like this:
SELECT
ID,
SUM(Amount) AS "Gold/Platinum",
0 AS "Bronze/Silver"
FROM
table
WHERE
Type IN ('gold', 'platinum')
GROUP BY
ID
UNION ALL
SELECT
ID,
SUM(Amount) AS "Bronze/Silver",
0 AS "Gold/Platinum"
FROM
table
WHERE
Type IN ('bronze', 'silver')
GROUP BY
ID
The gold/platinum column will be correct, but I get nothing in the bronze/silver column
Use conditional aggregation:
select id,
sum(case when Type in ('gold', 'platinum') then amount else 0 end) as gold_platinum,
sum(case when Type in ('bronze', 'silver') then amount else 0 end) as bronze_silver
from t
group by id
order by id;
You can run this in SSMS:
DECLARE #data TABLE( [ID] INT, [Type] VARCHAR(10), [Amount] INT );
INSERT INTO #data ( [ID], [Type], [Amount] ) VALUES
( 1, 'gold', 100 )
, ( 1, 'gold', 50 )
, ( 1, 'bronze', 75 )
, ( 2, 'silver', 10 )
, ( 2, 'bronze', 20 )
, ( 3, 'gold', 35 )
, ( 4, 'silver', 20 )
, ( 4, 'platinum', 30 );
SELECT
[ID]
, SUM( CASE WHEN [Type] IN ( 'gold', 'platinum' ) THEN Amount ELSE 0 END ) AS [Gold/Platinum]
, SUM( CASE WHEN [Type] IN ( 'bronze', 'silver' ) THEN Amount ELSE 0 END ) AS [Bronze/Silver]
FROM #data
GROUP BY [ID]
ORDER BY [ID];
Returns
+----+---------------+---------------+
| ID | Gold/Platinum | Bronze/Silver |
+----+---------------+---------------+
| 1 | 150 | 75 |
| 2 | 0 | 30 |
| 3 | 35 | 0 |
| 4 | 30 | 20 |
+----+---------------+---------------+

SQL Order By On two columns but same prority

I'm stuck on this simple select and don't know what to do.
I Have this:
ID | Group
===========
1 | NULL
2 | 100
3 | 100
4 | 100
5 | 200
6 | 200
7 | 100
8 | NULL
and want this:
ID | Group
===========
1 | NULL
2 | 100
3 | 100
4 | 100
7 | 100
5 | 200
6 | 200
8 | NULL
all group members keep together, but others order by ID.
I can not write this script because of that NULL records. NULL means that there is not any group for this record.
First you want to order your rows by the minimum ID of their group - or their own ID in case they belong to no group.Then you want to order by ID. That is:
order by min(id) over (partition by case when grp is null then id else grp end), id
If IDs and groups can overlap (i.e. the same number can be used for an ID and for a group, e.g. add a record for ID 9 / group 1 to your sample data) you should change the partition clause to something like
order by min(id) over (partition by case when grp is null
then 'ID' + cast(id as varchar)
else 'GRP' + cast(grp as varchar) end),
id;
Rextester demo: http://rextester.com/GPHBW5600
What about data after a null? In a comment you said don't sort the null.
declare #T table (ID int primary key, grp int);
insert into #T values
(1, NULL)
, (3, 100)
, (5, 200)
, (6, 200)
, (7, 100)
, (8, NULL)
, (9, 200)
, (10, 100)
, (11, NULL)
, (12, 150);
select ttt.*
from ( select tt.*
, sum(ff) over (order by tt.ID) as sGrp
from ( select t.*
, iif(grp is null or lag(grp) over (order by id) is null, 1, 0) as ff
from #T t
) tt
) ttt
order by ttt.sGrp, ttt.grp, ttt.id
ID grp ff sGrp
----------- ----------- ----------- -----------
1 NULL 1 1
3 100 1 2
7 100 0 2
5 200 0 2
6 200 0 2
8 NULL 1 3
10 100 0 4
9 200 1 4
11 NULL 1 5
12 150 1 6

GroupBy with respect to record intervals on another table

I prepared a sql fiddle for my question. Here it is There is a working code here. I am asking whether there exists an alternative solution which I did not think.
CREATE TABLE [Product]
([Timestamp] bigint NOT NULL PRIMARY KEY,
[Value] float NOT NULL
)
;
CREATE TABLE [PriceTable]
([Timestamp] bigint NOT NULL PRIMARY KEY,
[Price] float NOT NULL
)
;
INSERT INTO [Product]
([Timestamp], [Value])
VALUES
(1, 5),
(2, 3),
(4, 9),
(5, 2),
(7, 11),
(9, 3)
;
INSERT INTO [PriceTable]
([Timestamp], [Price])
VALUES
(1, 1),
(3, 4),
(7, 2.5),
(10, 3)
;
Query:
SELECT [Totals].*, [PriceTable].[Price]
FROM
(
SELECT [PriceTable].[Timestamp]
,SUM([Value]) AS [TotalValue]
FROM [Product],
[PriceTable]
WHERE [PriceTable].[Timestamp] <= [Product].[Timestamp]
AND NOT EXISTS (SELECT * FROM [dbo].[PriceTable] pt
WHERE pt.[Timestamp] <= [Product].[Timestamp]
AND pt.[Timestamp] > [PriceTable].[Timestamp])
GROUP BY [PriceTable].[Timestamp]
) AS [Totals]
INNER JOIN [dbo].[PriceTable]
ON [PriceTable].[Timestamp] = [Totals].[Timestamp]
ORDER BY [PriceTable].[Timestamp]
Result
| Timestamp | TotalValue | Price |
|-----------|------------|-------|
| 1 | 8 | 1 |
| 3 | 11 | 4 |
| 7 | 14 | 2.5 |
Here, my first table [Product] contains the product values for different timestamps. And second table [PriceTable] contains the prices for different time intervals. A given price is valid until a new price is set. Therefore the price with timestamp 1 is valid for Products with timestamps 1 and 2.
I am trying to get the total number of products with respect to given prices. The SQL on the fiddle produces what I expect.
Is there a smarter way to get the same result?
By the way, I am using SQLServer 2014.
DECLARE #Product TABLE
(
[Timestamp] BIGINT NOT NULL
PRIMARY KEY ,
[Value] FLOAT NOT NULL
);
DECLARE #PriceTable TABLE
(
[Timestamp] BIGINT NOT NULL
PRIMARY KEY ,
[Price] FLOAT NOT NULL
);
INSERT INTO #Product
( [Timestamp], [Value] )
VALUES ( 1, 5 ),
( 2, 3 ),
( 4, 9 ),
( 5, 2 ),
( 7, 11 ),
( 9, 3 );
INSERT INTO #PriceTable
( [Timestamp], [Price] )
VALUES ( 1, 1 ),
( 3, 4 ),
( 7, 2.5 ),
( 10, 3 );
WITH cte
AS ( SELECT * ,
LEAD(pt.[Timestamp]) OVER ( ORDER BY pt.[Timestamp] ) AS [lTimestamp]
FROM #PriceTable pt
)
SELECT cte.[Timestamp] ,
( SELECT SUM(Value)
FROM #Product
WHERE [Timestamp] >= cte.[Timestamp]
AND [Timestamp] < cte.[lTimestamp]
) AS [TotalValue],
cte.[Price]
FROM cte
Idea is to generate intervals from price table like:
1 - 3
3 - 7
7 - 10
and sum up all values in those intervals.
Output:
Timestamp TotalValue Price
1 8 1
3 11 4
7 14 2.5
10 NULL 3
You can simply add WHERE clause if you want to filter out rows where no orders are sold.
Also you can indicate the default value for LEAD window function if you want to close the last interval like:
LEAD(pt.[Timestamp], 1, 100)
and I guess it would be something like this in production:
LEAD(pt.[Timestamp], 1, GETDATE())
I think I've got a query which is easier to read. Does this work for you?
select pt.*,
(select sum(P.Value) from Product P where
P.TimeStamp between pt.TimeStamp and (
--get the next time stamp
select min(TimeStamp)-1 from PriceTable where TimeStamp > pt.TimeStamp
)) as TotalValue from PriceTable pt
--exclude entries with timestamps greater than those in Product table
where pt.TimeStamp < (select max(TimeStamp) from Product)
Very detailed question BTW
You could use a cte
;with cte as
(
select p1.[timestamp] as lowval,
case
when p2.[timestamp] is not null then p2.[timestamp] - 1
else 999999
end hival,
p1.price
from
(
select p1.[timestamp],p1.price,
row_number() over (order by p1.[timestamp]) rn
from pricetable p1 ) p1
left outer join
(select p1.[timestamp],p1.price,
row_number() over (order by p1.[timestamp]) rn
from pricetable p1) p2
on p2.rn = p1.rn + 1
)
select cte.lowval as 'timestamp',sum(p1.value) TotalValue,cte.price
from product p1
join cte on p1.[Timestamp] between cte.lowval and cte.hival
group by cte.lowval,cte.price
order by cte.lowval
It's a lot easier to understand and the execution plan compares favourably with your query (about 10%) cheaper

How to pivot rows to columns with known max number of columns

I have a table structured as such:
Pricing_Group
GroupID | QTY
TestGroup1 | 1
TestGroup1 | 2
TestGroup1 | 4
TestGroup1 | 8
TestGroup1 | 22
TestGroup2 | 2
TestGroup3 | 2
TestGroup3 | 5
What I'm looking for is a result like this:
Pricing_Group
GroupID | QTY1 | QTY2 | QTY3 | QTY4 | QTY5
TestGroup1 | 1 | 2 | 4 | 8 | 22
TestGroup2 | 2 | NULL | NULL | NULL | NULL
TestGroup3 | 2 | 5 | NULL | NULL | NULL
Note that there can only ever be a maximum of 5 different quantities for a given GroupID, there's just no knowing what those 5 quantities will be.
This seems like an application of PIVOT, but I can't quite wrap my head around the syntax that would be required for an application like this.
Thanks for taking the time to look into this!
Perfect case for pivot and you don't need a CTE:
Declare #T Table (GroupID varchar(10) not null,
QTY int)
Insert Into #T
Values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
Select GroupID, [QTY1], [QTY2], [QTY3], [QTY4], [QTY5]
From (Select GroupID, QTY,
RowID = 'QTY' + Cast(ROW_NUMBER() Over (Partition By GroupID Order By QTY) as varchar)
from #T) As Pvt
Pivot (Min(QTY)
For RowID In ([QTY1], [QTY2], [QTY3], [QTY4], [QTY5])
) As Pvt2
You can pivot on a generated rank;
;with T as (
select
rank() over (partition by GroupID order by GroupID, QTY) as rank,
GroupID,
QTY
from
THE_TABLE
)
select
*
from
T
pivot (
max(QTY)
for rank IN ([1],[2],[3],[4],[5])
) pvt
>>
GroupID 1 2 3 4 5
----------------------------------------
TestGroup1 1 2 4 8 22
TestGroup2 2 NULL NULL NULL NULL
TestGroup3 2 5 NULL NULL NULL
You can also use case statement to perform the pivot:
declare #t table ( GroupID varchar(25), QTY int)
insert into #t
values ('TestGroup1', 1),
('TestGroup1', 2),
('TestGroup1', 4),
('TestGroup1', 8),
('TestGroup1', 22),
('TestGroup2', 2),
('TestGroup3', 2),
('TestGroup3', 5)
;with cte_Stage (r, GroupId, QTY)
as ( select row_number() over(partition by GroupId order by QTY ),
GroupId,
QTY
from #t
)
select GroupId,
[QTY1] = sum(case when r = 1 then QTY else null end),
[QTY2] = sum(case when r = 2 then QTY else null end),
[QTY3] = sum(case when r = 3 then QTY else null end),
[QTY4] = sum(case when r = 4 then QTY else null end),
[QTY5] = sum(case when r = 5 then QTY else null end),
[QTYX] = sum(case when r > 5 then QTY else null end)
from cte_Stage
group
by GroupId;