grouping results based on time diff in sql - sql

I have results like this
TimeDiffMin | OrdersCount
10 | 2
12 | 5
09 | 6
20 | 15
27 | 11
I would like the following
TimeDiffMin | OrdersCount
05 | 0
10 | 8
15 | 5
20 | 15
25 | 0
30 | 11
So you can see that i want the grouping of every 5 minutes and show the total order count in those 5 minutes. eg. 0-5 minutes 0 orders, 5-10 minutes 8 orders
any help would be appreciated.
current query:
SELECT TimeDifferenceInMinutes, count(OrderId) NumberOfOrders FROM (
SELECT AO.OrderID, AO.OrderDate, AON.CreatedDate AS CancelledDate, DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) AS TimeDifferenceInMinutes
FROM
(SELECT OrderID, OrderDate FROM AC_Orders) AO
JOIN
(SELECT OrderID, CreatedDate FROM AC_OrderNotes WHERE Comment LIKE '%has been cancelled.') AON
ON AO.OrderID = AON.OrderID
WHERE DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) <= 100 AND AO.OrderDate >= '2016-12-01'
) AS Temp1
GROUP BY TimeDifferenceInMinutes

Now, if you are open to a TVF.
I use this UDF to create dynamic Date/Time Ranges. You supply the range and increment
Declare #YourTable table (TimeDiffMin int,OrdersCount int)
Insert Into #YourTable values
(10, 2),
(12, 5),
(09, 6),
(20,15),
(27,11)
Select TimeDiffMin = cast(R2 as int)
,OrdersCount = isnull(sum(OrdersCount),0)
From (Select R1=RetVal,R2=RetVal+5 From [dbo].[udf-Range-Number](0,25,5)) A
Left Join (
-- Your Complicated Query
Select * From #YourTable
) B on TimeDiffMin >= R1 and TimeDiffMin<R2
Group By R1,R2
Order By 1
Returns
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11
The UDF if interested
CREATE FUNCTION [dbo].[udf-Range-Number] (#R1 money,#R2 money,#Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((#R2-#R1)/#Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal=#R1 Union All Select N+1,(N*#Incr)+#R1
From cte2
)
-- Max 100 million observations
-- Select * from [dbo].[udf-Range-Number](0,4,0.25)

You can do this using a derived table to first build up your time difference windows and then joining from that to sum up all the Orders that fall within that window.
declare #t table(TimeDiffMin int
,OrdersCount int
);
insert into #t values
(10, 2)
,(12, 5)
,(09, 6)
,(20,15)
,(27,11);
declare #Increment int = 5; -- Set your desired time windows here.
with n(n)
as
( -- Select 10 rows to start with:
select n from(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) as n(n)
),n2 as
( -- CROSS APPLY these 10 rows to get 10*10=100 rows we can use to generate incrementing ROW_NUMBERs. Use more CROSS APPLYs to get more rows:
select (row_number() over (order by (select 1))-1) * #Increment as StartMin
,(row_number() over (order by (select 1))) * #Increment as EndMin
from n -- 10 rows
cross apply n n2 -- 100 rows
--cross apply n n3 -- 1000 rows
--cross apply n n4 -- 10000 rows
)
select m.EndMin as TimeDiffMin
,isnull(sum(t.OrdersCount),0) as OrdersCount
from n2 as m
left join #t t
on(t.TimeDiffMin >= m.StartMin
and t.TimeDiffMin < m.EndMin
)
where m.EndMin <= 30 -- Filter as required
group by m.EndMin
order by m.EndMin
Query result:
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

SQL Server - Building out a dynamic range of numbers while grouping by a specific column

I have the following data:
ID Days
----------------------- --------
1 5
1 10
1 15
2 5
2 13
2 15
I am trying to build out a range of numbers based on the days while grouping by their ID.
For ID Group 1: The range would start at 5 and end at 9. The next range would be 10-14, and then the final range would be 15-9999
For ID Group 2: The range would start at 5 and end at 12. The next range would be 13-14, and then the final range would be 15-9999
The resulting table would look something like this:
RangeStart RangeEnd RangeText ID
----------- ----------- --------- ----
5 9 5 - 9 1
10 14 10 - 14 1
15 9999 15 - 9999 1
5 12 5 - 12 2
13 14 13 - 14 2
15 9999 15 - 9999 2
I have attempted to use a CTE which works but only when I am not grouping by ID's.
Declare #RangeTable Table
(
ID Int,
RangeStart INT,
RangeEnd INT,
RangeText Varchar(50),
);with CTE as (
SELECT temp.Days,
rn = ROW_NUMBER() over(order by temp.Days asc),
temp.ID
FROM #TableWithDays temp)
INSERT #RangeTable
SELECT
ID= d1.ID,
RangeStart= ISNULL(d1.Days, 0),
RangeEnd = ISNULL(d2.Days- 1, 9999),
RangeText =
CASE WHEN (d1.Days = d2.Days - 1)
THEN CAST(d1.Days AS VARCHAR(100))
ELSE
ISNULL(CAST(d1.Days AS VARCHAR(100)),'0') + ISNULL(' - '+
CAST(d2.Days - 1 AS VARCHAR(100)),' - 9999')END
FROM
CTE d1 LEFT JOIN
CTE d2
ON d1.rn = d2.rn - 1
You can use a recursive CTE. This would be simpler with lead(), but that is not available. So:
with t as (
select t.*, t2.days as next_days
from #TableWithDays t outer apply
(select top (1) t2.*
from #TableWithDays t2
where t2.id = t.id and t2.days > t.days
order by t2.days desc
) t2
),
cte as (
select t.id, t.days, t.next_days
from t
union all
select cte.id, cte.days + 1, cte.next_days
from cte
where cte.days < cte.next_days or
(cte.days < 9999 and cte.next_days is null)
)
select *
from cte
with option (maxrecursion 0);

Get Interval Percentage

Please consider this Table:
FileName FileSize
----------------------------
1 33
2 198
3 10
4 127
5 85
6 23
7 105
8 158
9 78
10 90
and I want to create such this result:
FileSize Percentage
--------------------------------
1-50 30%
50-150 50%
150-200 20%
How I can group by and create percentage based on intervals?
Thanks
For a query without bounds table you can create a table inline using VALUES constructor like below
select DISTINCT
CAST(I.LowNumber as VARCHAR) + ' - '+ CAST(I.HighNumber as VARCHAR) as FileSize,
COUNT(*) OVER (PARTITION BY lowNumber,HighNumber ORDER By lowNumber) * 100.00 /COUNT(*) OVER ( ORDER BY (SELECT 1)) as percentage
from TblFile F
join (values (1, 50),(50, 150),(150, 200)) as I(LowNumber, highNumber)
on F.FileSize >=I.LowNumber and F.FileSize<I.HighNumber
Order By I.LowNumber
Your query should look like
select DISTINCT
CAST(I.LowNumber as VARCHAR) + ' - '+ CAST(I.HighNumber as VARCHAR) as FileSize,
COUNT(*) OVER (PARTITION BY lowNumber,HighNumber ORDER By lowNumber) * 100.00 /COUNT(*) OVER ( ORDER BY (SELECT 1)) as percentage
from TblFile F
join TblInterval I
on F.FileSize >=I.LowNumber and F.FileSize<I.HighNumber
Explanation:
Ideally you should leverage set based approach and store the range values in a table. This allows for faster processing and also allows you a single place to limits externally and not in the procedure. This is also in line with Dependency injection principle.
For inline anonymous tables use VALUES constructor. More on this at this msdn link
PS: Insert scripts for table
--create table tblInterval (LowNumber Int, HighNumber Int)
--insert into tblInterval values
--(1,50),(50,150),(150,200)
create table tblFile (fileName int,fileSize int)
insert into tblFile values
( 1 ,33)
,( 2 ,198 )
,( 3 ,10 )
,( 4 ,127 )
,( 5 ,85 )
,( 6 ,23 )
,( 7 ,105 )
,( 8 ,158 )
,( 9 ,78 )
,( 10,90 )
Assuming you have a table like below
TblInterval
LowNumber HighNumber
1 50
50 150
150 200
You can use case statement to make the file size range then get percentage by the count like this
select a.range as FileSize, (Count(*)* 100 / (Select Count(*) From MyTable)) as Percentage
from (
select case
when FileSize between 1 and 50 then '1-50'
when FileSize between 50 and 150 then '50-150'
when FileSize between 150 and 200 then '150-200' end as range
from MyTable) a
group by a.range
CREATE TABLE #A
(
FILENAME INT, FILESIZE INT
)
INSERT INTO #A VALUES
(1,33),
(2,198),
(3,10),
(4,127),
(5,85),
(6,23),
(7,105),
(8,158),
(9,78),
(10,90)
SELECT RANGE,COUNT(*)*100/(SELECT COUNT(*) FROM #A) AS PERCENTAGE
FROM (
SELECT *,CASE
WHEN FILESIZE BETWEEN 1 AND 50 THEN '1-50'
WHEN FILESIZE BETWEEN 50 AND 150 THEN '50-150'
WHEN FILESIZE BETWEEN 150 AND 200 THEN '150-200' END AS RANGE
FROM #A) A
GROUP BY A.RANGE
ORDER BY CASE WHEN RANGE = '1-50' THEN 1
WHEN RANGE ='50-150' THEN 2
WHEN RANGE ='150-200' THEN 3
END
output
RANGE PERCENTAGE
1-50 30
50-150 50
150-200 20
You can use AVG together with partition by:
select distinct
case
when FileSize between 1 and 50 then '1-50'
when FileSize between 50 and 150 then '50-150'
when FileSize between 150 and 200 then '150-200' end as range
, avg(FileSize) OVER (PARTITION BY (select
case when filesize between 0 and 50 then 1
when filesize between 50 and 150 then 2
when filesize between 150 and 200 then 3
end)) as percentage
from mytable

sql : get consecutive group 'n' rows (could be inbetween)

Below is my theater table:
create table theater
(
srno integer,
seatno integer,
available boolean
);
insert into theater
values
(1, 100,true),
(2, 200,true),
(3, 300,true),
(4, 400,false),
(5, 500,true),
(6, 600,true),
(7, 700,true),
(8, 800,true);
I want a sql which should take input as 'n' and returns me the first 'n' consecutive available seats, like
if n = 2 output should be 100,200
if n = 4 output should be 500,600,700,800
NOTE: I am trying to build an query for postgres 9.3
In SQL-Server you can do It in following:
DECLARE #num INT = 4
;WITH cte AS
(
SELECT *,COUNT(1) OVER(PARTITION BY cnt) pt FROM
(
SELECT tt.*
,(SELECT COUNT(srno) FROM theater t WHERE available <> 'true' and srno < tt.srno) AS cnt
FROM theater tt
WHERE available = 'true'
) t1
)
SELECT TOP (SELECT #num) srno, seatno, available
FROM cte
WHERE pt >= #num
OUTPUT
srno seatno available
5 500 true
6 600 true
7 700 true
8 800 true
This will find the available seats. written for sqlserver 2008+:
DECLARE #num INT = 4
;WITH CTE as
(
SELECT
srno-row_number() over (partition by available order by srno) grp,
srno, seatno, available
FROM theater
), CTE2 as
(
SELECT grp, count(*) over (partition by grp) cnt,
srno, seatno, available
FROM CTE
WHERE available = 'true'
)
SELECT top(#num)
srno, seatno, available
FROM CTE2
WHERE cnt >= #num
ORDER BY srno
Result:
srno seatno available
5 500 1
6 600 1
7 700 1
8 800 1
-- naive solution without window using functions
-- [the funny +-100 constants are caused by
-- "consecutive" seats being 100 apart]
-- -------------------------------------------
WITH bot AS ( -- start of an island --
SELECT seatno FROM theater t
WHERE t.available
AND NOT EXISTS (select * from theater x
where x.available AND x.seatno = t.seatno -100)
)
, top AS ( -- end of an island --
SELECT seatno FROM theater t
WHERE t.available
AND NOT EXISTS (select * from theater x
where x.available AND x.seatno = t.seatno +100)
)
, mid AS ( -- [start,end] without intervening gaps --
SELECT l.seatno AS bot, h.seatno AS top
FROM bot l
JOIN top h ON h.seatno >= l.seatno
AND NOT EXISTS (
SELECT * FROM theater x
WHERE NOT x.available
AND x.seatno >= l.seatno AND x.seatno <= h.seatno)
)
-- all the consecutive ranges
-- [ the end query should select from this
-- , using "cnt >= xxx" ]
SELECT bot, top
, 1+(top-bot)/100 AS cnt
FROM mid;
Result:
bot | top | cnt
-----+-----+-----
100 | 300 | 3
500 | 800 | 4
(2 rows)
thanks guys, but i have done achieved it like below,
select srno, seatno from (
select *, count(0) over (order by grp) grp1 from (
select t1.*,
sum(group_flag) over (order by srno) as grp
from (
select *,
case
when lag(available) over (order by srno) = available then null
else 1
end as group_flag
from theater
) t1 ) tx ) tr where tr.available=true and tr.grp1 >= 2 limit 2

How to limit the selection in SQL Server by sum of a column?

Can I limit rows by sum of a column in a SQL Server database?
For example:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
D | 20
E | 70
...
And I want to limit the selection by sum of time. For example maximum of 100 minutes. Table must look like this:
Type | Time (in minutes)
-------------------------
A | 50
B | 10
C | 30
Any ideas? Thanks.
DECLARE #T TABLE
(
[Type] CHAR(1) PRIMARY KEY,
[Time] INT
)
INSERT INTO #T
SELECT 'A',50 UNION ALL
SELECT 'B',10 UNION ALL
SELECT 'C',30 UNION ALL
SELECT 'D',20 UNION ALL
SELECT 'E',70;
WITH RecursiveCTE
AS (
SELECT TOP 1 [Type], [Time], CAST([Time] AS BIGINT) AS Total
FROM #T
ORDER BY [Type]
UNION ALL
SELECT R.[Type], R.[Time], R.Total
FROM (
SELECT T.*,
T.[Time] + Total AS Total,
rn = ROW_NUMBER() OVER (ORDER BY T.[Type])
FROM #T T
JOIN RecursiveCTE R
ON R.[Type] < T.[Type]
) R
WHERE R.rn = 1 AND Total <= 100
)
SELECT [Type], [Time], Total
FROM RecursiveCTE
OPTION (MAXRECURSION 0);
Or if your table is small
SELECT t1.[Type],
t1.[Time],
SUM(t2.[Time])
FROM #T t1
JOIN #T t2
ON t2.[Type] <= t1.[Type]
GROUP BY t1.[Type],t1.[Time]
HAVING SUM(t2.[Time]) <=100