Break up running sum into maximum group size / length - sql

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.

A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Related

Is it possible to use a aggregate function over partition by as a case condition in SQL?

Problem statement is to calculate median from a table that has two columns. One specifying a number and the other column specifying the frequency of the number.
For e.g.
Table "Numbers":
Num
Freq
1
3
2
3
This median needs to be found for the flattened array with values:
1,1,1,2,2,2
Query:
with ct1 as
(select num,frequency, sum(frequency) over(order by num) as sf from numbers o)
select case when count(num) over(order by num) = 1 then num
when count(num) over (order by num) > 1 then sum(num)/2 end median
from ct1 b where sf <= (select max(sf)/2 from ct1) or (sf-frequency) <= (select max(sf)/2 from ct1)
Is it not possible to use count(num) over(order by num) as the condition in the case statement?
Find the relevant row / 2 rows based of the accumulated frequencies, and take the average of num.
The example and Fiddle will also show you the
computations leading to the result.
If you already know that num is unique, rowid can be removed from the ORDER BY clauses
with
t1 as
(
select t.*
,nvl(sum(freq) over (order by num,rowid rows between unbounded preceding and 1 preceding),0) as freq_acc_sum_1
,sum(freq) over (order by num, rowid) as freq_acc_sum_2
,sum(freq) over () as freq_sum
from t
)
select t1.*
,case
when freq_sum/2 between freq_acc_sum_1 and freq_acc_sum_2
then 'V'
end as relevant_record
from t1
order by num, rowid
Fiddle
Example:
ID
NUM
FREQ
FREQ_ACC_SUM_1
FREQ_ACC_SUM_2
FREQ_SUM
RELEVANT_RECORD
7
8
1
0
1
18
5
10
1
1
2
18
1
29
3
2
5
18
6
31
1
5
6
18
3
33
2
6
8
18
4
41
1
8
9
18
V
9
49
2
9
11
18
V
2
52
1
11
12
18
8
56
3
12
15
18
10
92
3
15
18
18
MEDIAN
45
Fiddle for 1M records
You can find the one (or two) middle value(s) and then average:
SELECT AVG(num) AS median
FROM (
SELECT num,
freq,
SUM(freq) OVER (ORDER BY num) AS cum_freq,
(SUM(freq) OVER () + 1)/2 AS median_freq
FROM table_name
)
WHERE cum_freq - freq < median_freq
AND median_freq < cum_freq + 1
Or, expand the values using a LATERAL join to a hierarchical query and then use the MEDIAN function:
SELECT MEDIAN(num) AS median
FROM table_name t
CROSS JOIN LATERAL (
SELECT LEVEL
FROM DUAL
WHERE freq > 0
CONNECT BY LEVEL <= freq
)
Which, for the sample data:
CREATE TABLE table_name (Num, Freq) AS
SELECT 1, 3 FROM DUAL UNION ALL
SELECT 2, 3 FROM DUAL;
Outputs:
MEDIAN
1.5
(Note: For your sample data, there are 6 items, an even number, so the MEDIAN will be half way between the value of 3rd and 4rd items; so half way between 1 and 2 = 1.5.)
db<>fiddle here

SQL Server - Building out a dynamic range of numbers while grouping by a specific column

I have the following data:
ID Days
----------------------- --------
1 5
1 10
1 15
2 5
2 13
2 15
I am trying to build out a range of numbers based on the days while grouping by their ID.
For ID Group 1: The range would start at 5 and end at 9. The next range would be 10-14, and then the final range would be 15-9999
For ID Group 2: The range would start at 5 and end at 12. The next range would be 13-14, and then the final range would be 15-9999
The resulting table would look something like this:
RangeStart RangeEnd RangeText ID
----------- ----------- --------- ----
5 9 5 - 9 1
10 14 10 - 14 1
15 9999 15 - 9999 1
5 12 5 - 12 2
13 14 13 - 14 2
15 9999 15 - 9999 2
I have attempted to use a CTE which works but only when I am not grouping by ID's.
Declare #RangeTable Table
(
ID Int,
RangeStart INT,
RangeEnd INT,
RangeText Varchar(50),
);with CTE as (
SELECT temp.Days,
rn = ROW_NUMBER() over(order by temp.Days asc),
temp.ID
FROM #TableWithDays temp)
INSERT #RangeTable
SELECT
ID= d1.ID,
RangeStart= ISNULL(d1.Days, 0),
RangeEnd = ISNULL(d2.Days- 1, 9999),
RangeText =
CASE WHEN (d1.Days = d2.Days - 1)
THEN CAST(d1.Days AS VARCHAR(100))
ELSE
ISNULL(CAST(d1.Days AS VARCHAR(100)),'0') + ISNULL(' - '+
CAST(d2.Days - 1 AS VARCHAR(100)),' - 9999')END
FROM
CTE d1 LEFT JOIN
CTE d2
ON d1.rn = d2.rn - 1
You can use a recursive CTE. This would be simpler with lead(), but that is not available. So:
with t as (
select t.*, t2.days as next_days
from #TableWithDays t outer apply
(select top (1) t2.*
from #TableWithDays t2
where t2.id = t.id and t2.days > t.days
order by t2.days desc
) t2
),
cte as (
select t.id, t.days, t.next_days
from t
union all
select cte.id, cte.days + 1, cte.next_days
from cte
where cte.days < cte.next_days or
(cte.days < 9999 and cte.next_days is null)
)
select *
from cte
with option (maxrecursion 0);

grouping results based on time diff in sql

I have results like this
TimeDiffMin | OrdersCount
10 | 2
12 | 5
09 | 6
20 | 15
27 | 11
I would like the following
TimeDiffMin | OrdersCount
05 | 0
10 | 8
15 | 5
20 | 15
25 | 0
30 | 11
So you can see that i want the grouping of every 5 minutes and show the total order count in those 5 minutes. eg. 0-5 minutes 0 orders, 5-10 minutes 8 orders
any help would be appreciated.
current query:
SELECT TimeDifferenceInMinutes, count(OrderId) NumberOfOrders FROM (
SELECT AO.OrderID, AO.OrderDate, AON.CreatedDate AS CancelledDate, DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) AS TimeDifferenceInMinutes
FROM
(SELECT OrderID, OrderDate FROM AC_Orders) AO
JOIN
(SELECT OrderID, CreatedDate FROM AC_OrderNotes WHERE Comment LIKE '%has been cancelled.') AON
ON AO.OrderID = AON.OrderID
WHERE DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) <= 100 AND AO.OrderDate >= '2016-12-01'
) AS Temp1
GROUP BY TimeDifferenceInMinutes
Now, if you are open to a TVF.
I use this UDF to create dynamic Date/Time Ranges. You supply the range and increment
Declare #YourTable table (TimeDiffMin int,OrdersCount int)
Insert Into #YourTable values
(10, 2),
(12, 5),
(09, 6),
(20,15),
(27,11)
Select TimeDiffMin = cast(R2 as int)
,OrdersCount = isnull(sum(OrdersCount),0)
From (Select R1=RetVal,R2=RetVal+5 From [dbo].[udf-Range-Number](0,25,5)) A
Left Join (
-- Your Complicated Query
Select * From #YourTable
) B on TimeDiffMin >= R1 and TimeDiffMin<R2
Group By R1,R2
Order By 1
Returns
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11
The UDF if interested
CREATE FUNCTION [dbo].[udf-Range-Number] (#R1 money,#R2 money,#Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((#R2-#R1)/#Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal=#R1 Union All Select N+1,(N*#Incr)+#R1
From cte2
)
-- Max 100 million observations
-- Select * from [dbo].[udf-Range-Number](0,4,0.25)
You can do this using a derived table to first build up your time difference windows and then joining from that to sum up all the Orders that fall within that window.
declare #t table(TimeDiffMin int
,OrdersCount int
);
insert into #t values
(10, 2)
,(12, 5)
,(09, 6)
,(20,15)
,(27,11);
declare #Increment int = 5; -- Set your desired time windows here.
with n(n)
as
( -- Select 10 rows to start with:
select n from(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) as n(n)
),n2 as
( -- CROSS APPLY these 10 rows to get 10*10=100 rows we can use to generate incrementing ROW_NUMBERs. Use more CROSS APPLYs to get more rows:
select (row_number() over (order by (select 1))-1) * #Increment as StartMin
,(row_number() over (order by (select 1))) * #Increment as EndMin
from n -- 10 rows
cross apply n n2 -- 100 rows
--cross apply n n3 -- 1000 rows
--cross apply n n4 -- 10000 rows
)
select m.EndMin as TimeDiffMin
,isnull(sum(t.OrdersCount),0) as OrdersCount
from n2 as m
left join #t t
on(t.TimeDiffMin >= m.StartMin
and t.TimeDiffMin < m.EndMin
)
where m.EndMin <= 30 -- Filter as required
group by m.EndMin
order by m.EndMin
Query result:
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11

How to get the second largest Time for a certain Foreign Key in SQL Server

For example if I have
I want to return the second largest time for each "FkID".
In this case it have to return the values with ID = 1, 2, 3, 4, 20, 23, 26,
with the associated time.
Here is some sample data:
ID FkID Time
1 1 13:22.9
2 2 14:44.8
3 3 15:43.4
4 4 16:31.0
19 11 06:20.6
20 11 06:28.6
21 11 06:36.3
22 12 02:34.9
23 12 02:39.5
24 12 02:44.3
25 13 22:47.2
26 13 22:56.3
27 13 23:01.8
28 14 13:03.3
Try this :
select max(date)
from table_name t1
where date < (select max(date) from table_name where FkID= t1.FkID)
group by FkID
Give ROW_NUMBER with CTE.
Query
;with cte as
(
select rn = row_number() over
(
partition by fkid
order by [Date] desc
), *
from your_table_name
)
select fkid, min([Date]) as [Date]
from cte
where rn < 3
group by fkid;

SQL Server query for finding the sum of 4 consecutive values

Can somebody help me in finding the sum of 4 consecutive values i.e rolling sum of last 4 values.
Like:
VALUE SUM
1 NULL
2 NULL
3 NULL
4 10
5 14
6 18
7 22
8 26
9 30
10 34
11 38
12 42
13 46
14 50
15 54
16 58
17 62
18 66
19 70
20 74
21 78
22 82
23 86
24 90
25 94
26 98
27 102
28 106
29 110
30 114
31 118
32 122
33 126
34 130
35 134
36 138
37 142
38 146
Thanks,
select sum(select top 4 Value from [table] order by Value Desc)
or, perhaps
select sum(value)
from [Table]
where Value >= (Max(Value) - 4)
I haven't actually tried either of those- and can't at the moment, but they should get you pretty close.
Quick attempt, which gets the results you've posted in your question (except the 1st 3 rows are not NULL). Assumes that VALUE field is unique and in ascending order:
-- Create test TABLE with 38 values in
DECLARE #T TABLE (Value INTEGER)
DECLARE #Counter INTEGER
SET #Counter = 1
WHILE (#Counter <= 38)
BEGIN
INSERT #T VALUES(#Counter)
SET #Counter = #Counter + 1
END
-- This gives the results
SELECT t1.VALUE, x.Val
FROM #T t1
OUTER APPLY(SELECT SUM(VALUE) FROM (SELECT TOP 4 VALUE FROM #T t2 WHERE t2.VALUE <= t1.VALUE ORDER BY t2.VALUE DESC) x) AS x(Val)
ORDER BY VALUE
At the very least, you should see the kind of direction I was heading in.
Assuming ID can give you the last 4 rows.
SELECT SUM([SUM])
FROM
(
SELECT TOP 4 [SUM] FROM myTable ORDER BY ID DESC
) foo
Each time you query it, it will read the last 4 rows.
If this is wrong (e.g. you want the sum of each consecutive 4 rows), then please give sample output
Following would work if your Value column is sequential
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
SELECT q.Value
, CASE WHEN q.Value >= 4 THEN q.Value * 4 - 6 ELSE NULL END
FROM q
otherwise you might use something like this
;WITH q (Value) AS (
SELECT 1
UNION ALL
SELECT q.Value + 1
FROM q
WHERE q.Value < 38
)
, Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM q
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1
Note that the table qin the examples is a stub for your actual table. The actual statement then becomes
;WITH Sequential (ID, Value) AS (
SELECT ID = ROW_NUMBER() OVER (ORDER BY Value)
, Value
FROM YourTable
)
SELECT s1.Value
, [SUM] = s1.Value + s2.Value + s3.Value + s4.Value
FROM Sequential s1
LEFT OUTER JOIN Sequential s2 ON s2.ID = s1.ID - 1
LEFT OUTER JOIN Sequential s3 ON s3.ID = s2.ID - 1
LEFT OUTER JOIN Sequential s4 ON s4.ID = s3.ID - 1