SQL Server - Building out a dynamic range of numbers while grouping by a specific column - sql

I have the following data:
ID Days
----------------------- --------
1 5
1 10
1 15
2 5
2 13
2 15
I am trying to build out a range of numbers based on the days while grouping by their ID.
For ID Group 1: The range would start at 5 and end at 9. The next range would be 10-14, and then the final range would be 15-9999
For ID Group 2: The range would start at 5 and end at 12. The next range would be 13-14, and then the final range would be 15-9999
The resulting table would look something like this:
RangeStart RangeEnd RangeText ID
----------- ----------- --------- ----
5 9 5 - 9 1
10 14 10 - 14 1
15 9999 15 - 9999 1
5 12 5 - 12 2
13 14 13 - 14 2
15 9999 15 - 9999 2
I have attempted to use a CTE which works but only when I am not grouping by ID's.
Declare #RangeTable Table
(
ID Int,
RangeStart INT,
RangeEnd INT,
RangeText Varchar(50),
);with CTE as (
SELECT temp.Days,
rn = ROW_NUMBER() over(order by temp.Days asc),
temp.ID
FROM #TableWithDays temp)
INSERT #RangeTable
SELECT
ID= d1.ID,
RangeStart= ISNULL(d1.Days, 0),
RangeEnd = ISNULL(d2.Days- 1, 9999),
RangeText =
CASE WHEN (d1.Days = d2.Days - 1)
THEN CAST(d1.Days AS VARCHAR(100))
ELSE
ISNULL(CAST(d1.Days AS VARCHAR(100)),'0') + ISNULL(' - '+
CAST(d2.Days - 1 AS VARCHAR(100)),' - 9999')END
FROM
CTE d1 LEFT JOIN
CTE d2
ON d1.rn = d2.rn - 1

You can use a recursive CTE. This would be simpler with lead(), but that is not available. So:
with t as (
select t.*, t2.days as next_days
from #TableWithDays t outer apply
(select top (1) t2.*
from #TableWithDays t2
where t2.id = t.id and t2.days > t.days
order by t2.days desc
) t2
),
cte as (
select t.id, t.days, t.next_days
from t
union all
select cte.id, cte.days + 1, cte.next_days
from cte
where cte.days < cte.next_days or
(cte.days < 9999 and cte.next_days is null)
)
select *
from cte
with option (maxrecursion 0);

Related

Break up running sum into maximum group size / length

I am trying to break up a running (ordered) sum into groups of a max value. When I implement the following example logic...
IF OBJECT_ID(N'tempdb..#t') IS NOT NULL DROP TABLE #t
SELECT TOP (ABS(CHECKSUM(NewId())) % 1000) ROW_NUMBER() OVER (ORDER BY name) AS ID,
LEFT(CAST(NEWID() AS NVARCHAR(100)),ABS(CHECKSUM(NewId())) % 30) AS Description
INTO #t
FROM sys.objects
DECLARE #maxGroupSize INT
SET #maxGroupSize = 100
;WITH t AS (
SELECT
*,
LEN(Description) AS DescriptionLength,
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID) AS [RunningLength],
SUM(LEN(Description)) OVER (/*PARTITION BY N/A */ ORDER BY ID)/#maxGroupSize AS GroupID
FROM #t
)
SELECT *, SUM(DescriptionLength) OVER (PARTITION BY GroupID) AS SumOfGroup
FROM t
ORDER BY GroupID, ID
I am getting groups that are larger than the maximum group size (length) of 100.
A recusive common table expression (rcte) would be one way to resolve this.
Sample data
Limited set of fixed sample data.
create table data
(
id int,
description nvarchar(20)
);
insert into data (id, description) values
( 1, 'qmlsdkjfqmsldk'),
( 2, 'mldskjf'),
( 3, 'qmsdlfkqjsdm'),
( 4, 'fmqlsdkfq'),
( 5, 'qdsfqsdfqq'),
( 6, 'mds'),
( 7, 'qmsldfkqsjdmfqlkj'),
( 8, 'qdmsl'),
( 9, 'mqlskfjqmlkd'),
(10, 'qsdqfdddffd');
Solution
For every recursion step evaluate (r.group_running_length + len(d.description) <= #group_max_length) if the previous group must be extended or a new group must be started in a case expression.
Set group target size to 40 to better fit the sample data.
declare #group_max_length int = 40;
with rcte as
(
select d.id,
d.description,
len(d.description) as description_length,
len(d.description) as running_length,
1 as group_id,
len(d.description) as group_running_length
from data d
where d.id = 1
union all
select d.id,
d.description,
len(d.description),
r.running_length + len(d.description),
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_id
else r.group_id + 1
end,
case
when r.group_running_length + len(d.description) <= #group_max_length
then r.group_running_length + len(d.description)
else len(d.description)
end
from rcte r
join data d
on d.id = r.id + 1
)
select r.id,
r.description,
r.description_length,
r.running_length,
r.group_id,
r.group_running_length,
gs.group_sum
from rcte r
cross apply ( select max(r2.group_running_length) as group_sum
from rcte r2
where r2.group_id = r.group_id ) gs -- group sum
order by r.id;
Result
Contains both the running group length as well as the group sum for every row.
id description description_length running_length group_id group_running_length group_sum
-- ---------------- ------------------ -------------- -------- -------------------- ---------
1 qmlsdkjfqmsldk 14 14 1 14 33
2 mldskjf 7 21 1 21 33
3 qmsdlfkqjsdm 12 33 1 33 33
4 fmqlsdkfq 9 42 2 9 39
5 qdsfqsdfqq 10 52 2 19 39
6 mds 3 55 2 22 39
7 qmsldfkqsjdmfqlkj 17 72 2 39 39
8 qdmsl 5 77 3 5 28
9 mqlskfjqmlkd 12 89 3 17 28
10 qsdqfdddffd 11 100 3 28 28
Fiddle to see things in action (includes random data version).

Generate group of data based on sum of values

How to calculate the sum and total files.
here is my table
**FileId** **FileSize(MB)**
1 5
2 4
3 1
4 6
5 8
6 1
7 7
8 2
Expected result
BatchNo StartId EndId BatchSize
1 1 3 10
2 4 4 6
3 5 6 9
4 7 8 9
If File Size >= 10 then start new batch
also file count per batch is >= 10 then start new batch
StartId and EndId based on FileId
and BatchNo Is AutoIncrement
You can use recursive query like this
with rdata as
(
select row_number() over (order by fileId) rn, * from data
), rcte as
(
select 1 no, 1 gr, fileSize fileSizeSum , *
from rdata where fileid = 1
union all
select case when fileSizeSum + d.fileSize > 10 or r.no = 10 then 1 else r.no + 1 end gr,
case when fileSizeSum + d.fileSize > 10 or r.no = 10 then r.gr + 1 else r.gr end gr,
case when fileSizeSum + d.fileSize > 10 or r.no = 10 then d.fileSize else d.fileSize + fileSizeSum end fileSizeSum,
d.*
from rcte r
join rdata d on r.rn + 1 = d.rn
)
select r.gr,
min(fileId),
max(fileId),
max(fileSizeSum)
from rcte r
group by r.gr
dbfiddle
Here is another solution, with different approach:
INSERT INTO #batchdetails (FileID, FileSizeTotal, GroupID)
SELECT FileID, (
SELECT SUM(filesize) FROM #filedetails f2
WHERE f1.fileid >= f2.fileid ) AS FileSizeTotal,
1+CONVERT(INT,(
SELECT SUM(filesize) FROM #filedetails f2
WHERE f1.fileid >= f2.fileid
)/(#filesizepergroup+0.1)) AS GroupID
FROM #filedetails f1
SELECT DISTINCT
BatchDetails.GroupID AS BatchNo,
MIN(BatchDetails.FileID) OVER (PARTITION BY BatchDetails.GroupID ORDER BY BatchDetails.GroupID) AS StartID,
MAX(BatchDetails.FileID) OVER (PARTITION BY BatchDetails.GroupID ORDER BY BatchDetails.GroupID) AS EndID,
BatchSizeGroup.BatchSize
FROM #batchdetails BatchDetails
INNER JOIN (
SELECT GroupID, (GroupFileSizeTotal - LAG(GroupFileSizeTotal,1,0) OVER (ORDER BY GroupID)) AS BatchSize
FROM (
SELECT DISTINCT
GroupID,
MAX(FileSizeTotal) OVER (PARTITION BY GroupID ORDER BY GroupID) AS GroupFileSizeTotal
FROM #batchdetails
GROUP BY GroupID, FileSizeTotal
)A
)BatchSizeGroup ON BatchDetails.GroupID = BatchSizeGroup.GroupID
GROUP BY BatchDetails.GroupID, BatchDetails.FileID, BatchSizeGroup.BatchSize
Demo is Here : dbfiddle

grouping results based on time diff in sql

I have results like this
TimeDiffMin | OrdersCount
10 | 2
12 | 5
09 | 6
20 | 15
27 | 11
I would like the following
TimeDiffMin | OrdersCount
05 | 0
10 | 8
15 | 5
20 | 15
25 | 0
30 | 11
So you can see that i want the grouping of every 5 minutes and show the total order count in those 5 minutes. eg. 0-5 minutes 0 orders, 5-10 minutes 8 orders
any help would be appreciated.
current query:
SELECT TimeDifferenceInMinutes, count(OrderId) NumberOfOrders FROM (
SELECT AO.OrderID, AO.OrderDate, AON.CreatedDate AS CancelledDate, DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) AS TimeDifferenceInMinutes
FROM
(SELECT OrderID, OrderDate FROM AC_Orders) AO
JOIN
(SELECT OrderID, CreatedDate FROM AC_OrderNotes WHERE Comment LIKE '%has been cancelled.') AON
ON AO.OrderID = AON.OrderID
WHERE DATEDIFF(minute, AO.OrderDate, AON.CreatedDate) <= 100 AND AO.OrderDate >= '2016-12-01'
) AS Temp1
GROUP BY TimeDifferenceInMinutes
Now, if you are open to a TVF.
I use this UDF to create dynamic Date/Time Ranges. You supply the range and increment
Declare #YourTable table (TimeDiffMin int,OrdersCount int)
Insert Into #YourTable values
(10, 2),
(12, 5),
(09, 6),
(20,15),
(27,11)
Select TimeDiffMin = cast(R2 as int)
,OrdersCount = isnull(sum(OrdersCount),0)
From (Select R1=RetVal,R2=RetVal+5 From [dbo].[udf-Range-Number](0,25,5)) A
Left Join (
-- Your Complicated Query
Select * From #YourTable
) B on TimeDiffMin >= R1 and TimeDiffMin<R2
Group By R1,R2
Order By 1
Returns
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11
The UDF if interested
CREATE FUNCTION [dbo].[udf-Range-Number] (#R1 money,#R2 money,#Incr money)
Returns Table
Return (
with cte0(M) As (Select cast((#R2-#R1)/#Incr as int)),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a,cte1 b,cte1 c,cte1 d,cte1 e,cte1 f,cte1 g,cte1 h )
Select RetSeq=1,RetVal=#R1 Union All Select N+1,(N*#Incr)+#R1
From cte2
)
-- Max 100 million observations
-- Select * from [dbo].[udf-Range-Number](0,4,0.25)
You can do this using a derived table to first build up your time difference windows and then joining from that to sum up all the Orders that fall within that window.
declare #t table(TimeDiffMin int
,OrdersCount int
);
insert into #t values
(10, 2)
,(12, 5)
,(09, 6)
,(20,15)
,(27,11);
declare #Increment int = 5; -- Set your desired time windows here.
with n(n)
as
( -- Select 10 rows to start with:
select n from(values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) as n(n)
),n2 as
( -- CROSS APPLY these 10 rows to get 10*10=100 rows we can use to generate incrementing ROW_NUMBERs. Use more CROSS APPLYs to get more rows:
select (row_number() over (order by (select 1))-1) * #Increment as StartMin
,(row_number() over (order by (select 1))) * #Increment as EndMin
from n -- 10 rows
cross apply n n2 -- 100 rows
--cross apply n n3 -- 1000 rows
--cross apply n n4 -- 10000 rows
)
select m.EndMin as TimeDiffMin
,isnull(sum(t.OrdersCount),0) as OrdersCount
from n2 as m
left join #t t
on(t.TimeDiffMin >= m.StartMin
and t.TimeDiffMin < m.EndMin
)
where m.EndMin <= 30 -- Filter as required
group by m.EndMin
order by m.EndMin
Query result:
TimeDiffMin OrdersCount
5 0
10 6
15 7
20 0
25 15
30 11

Getting the minimum of column on the basis of other field

I have a scenario wherein I have
Id|rank| date
1 | 7 |07/08/2015
1 | 7 |09/08/2015
1 | 8 |16/08/2015
1 | 8 |17/08/2015
1 | 7 |19/08/2015
1 | 7 |15/08/2015
2 | 7 |01/08/2015
2 | 7 |02/08/2015
2 | 8 |16/08/2015
2 | 8 |17/08/2015
2 | 7 |26/08/2015
2 | 7 |28/08/2015
My desired solution is
1 | 7 |07/08/2015
1 | 8 |16/08/2015
1 | 7 |15/08/2015
2 | 7 |01/08/2015
2 | 8 |16/08/2015
2 | 7 |26/08/2015
i.e for each block of id and rank I want the minimum of date.
I have tried using while loop as there are thousands of records it is taking 2 hours to load.Is there any other way to do please suggest.
For each row give unique row number using necessary order. (As I get Id is more important than date and date is more important than rank).
Join resulting table to itself using row numbers shifted by one row (d1.RowNum = d2.RowNum+1).
Select only rows that are joined to "other block" rows (d1.Id <> d2.Id or d1.Rank <> d2.rank).
Depending on shifting direction and selected table either maximal or minimal date will be selected.
Don't forget "edge case" - row that due to shifting can't be joined (that's why not inner join and d1.RowNum = 1 condition used).
;WITH dataWithRowNums as (
select Id, Rank, Date,
RowNum = ROW_NUMBER() OVER (ORDER BY Id,date,rank)
from YourTable
)
select d1.Id, d1.Rank, d1.Date
from dataWithRowNums d1
left join dataWithRowNums d2
on d1.RowNum = d2.RowNum+1 and (d1.Id <> d2.Id or d1.Rank <> d2.rank)
where not d2.Id is null or d1.RowNum = 1
This code returns result bit different from yours:
Id Rank Date
1 7 2015-08-07
1 8 2015-08-16
1 7 2015-08-19 <-- you've got here 2015-08-15
2 7 2015-08-01
2 8 2015-08-16
2 7 2015-08-26
As block (Rank 8 Id 1) have started at 16/08 so row 15/08 for rank 7 is related to first block (rank7 Id1).
If you still need your sorting (so 15/08 rank 7 is related to second block (rank7 id1)) then you should provide your own RowSorting data and then ask here about another solution for another task )
Here is the query using row_number()
;WITH cte_rec
as (SELECT Id,Rank,Date
,ROW_NUMBER()OVER (partition by Id,Rank ORDER BY date) as RNO
FROM YourTable)
SELECT Id,Rank,Date
FROM cte_rec
WHERE RNO =1
This is what I have tried and is running as expected
create table #temp
(
iden int identity(1,1),
ID int,
[rank] int,
[date] date,
dr_id int,
rownum_id int,
grouprecord int
)
Insert into #temp(id,rank,date)
select 1 , 7 ,'07/08/2015'
union all select 1 , 7 ,'09/08/2015'
union all select 1 , 8 ,'08/16/2015'
union all select 1 , 8 ,'08/17/2015'
union all select 1 , 7 ,'08/19/2015'
union all select 1 , 7 ,'08/15/2015'
union all select 2 , 7 ,'08/01/2015'
union all select 2 , 7 ,'08/02/2015'
union all select 2 , 8 ,'08/16/2015'
union all select 2 , 8 ,'08/17/2015'
union all select 2 , 7 ,'08/26/2015'
union all select 2 , 7 ,'08/28/2015'
update t1
set dr_id = t2.rn
from #temp t1 inner join
(select iden, dense_rank() over(order by id) as rn from #temp) t2
on t1.iden = t2.iden
update t1
set rownum_id = t2.rn
from #temp t1 inner join
(select iden, row_number() over(partition by dr_id order by id) as rn from #temp) t2
on t1.iden = t2.iden
select *,row_number() over(order by iden)rn into #temp1 from
(
select t2.*
from #temp t1 inner join #temp t2
on (t1.dr_id = t2.dr_id or t2.dr_id = (t1.dr_id +1) ) and ( t1.rank<>t2.rank or t2.dr_id = (t1.dr_id +1) )
and t2.iden = t1.iden + 1
)a
declare #id int,#miniden int,#maxiden int,#maxid int
set #id = 1
select #maxid = max(iden) from #temp
while exists(select 1 from #temp1 where rn = #id)
begin
Select #miniden = iden from #temp1
where rn = #id
Select #maxiden = iden from #temp1
where rn = #id+1
update #temp
set grouprecord = #id +1
where iden between #miniden and #maxiden
IF(#maxiden IS NULL)
BEGIN
Update #temp
set grouprecord = #id +1
where iden between #miniden and #maxid
END
set #id = #id + 1
SET #miniden =NULL
SET #maxiden = NULL
end
UPDATE #TEMP
SET GROUPRECORD = 1
WHERE GROUPRECORD IS NULL
select min(date) as mindate,grouprecord from #temp
group by grouprecord
Thanks everyone the help :)

What is the SQL for 'next' and 'previous' in a table?

I have a table of items, each of which has a date associated with it. If I have the date associated with one item, how do I query the database with SQL to get the 'previous' and 'subsequent' items in the table?
It is not possible to simply add (or subtract) a value, as the dates do not have a regular gap between them.
One possible application would be 'previous/next' links in a photo album or blog web application, where the underlying data is in a SQL table.
I think there are two possible cases:
Firstly where each date is unique:
Sample data:
1,3,8,19,67,45
What query (or queries) would give 3 and 19 when supplied 8 as the parameter? (or the rows 3,8,19). Note that there are not always three rows to be returned - at the ends of the sequence one would be missing.
Secondly, if there is a separate unique key to order the elements by, what is the query to return the set 'surrounding' a date? The order expected is by date then key.
Sample data:
(key:date) 1:1,2:3,3:8,4:8,5:19,10:19,11:67,15:45,16:8
What query for '8' returns the set:
2:3,3:8,4:8,16:8,5:19
or what query generates the table:
key date prev-key next-key
1 1 null 2
2 3 1 3
3 8 2 4
4 8 3 16
5 19 16 10
10 19 5 11
11 67 10 15
15 45 11 null
16 8 4 5
The table order is not important - just the next-key and prev-key fields.
Both TheSoftwareJedi and Cade Roux have solutions that work for the data sets I posted last night. For the second question, both seem to fail for this dataset:
(key:date) 1:1,2:3,3:8,4:8,5:19,10:19,11:67,15:45,16:8
The order expected is by date then key, so one expected result might be:
2:3,3:8,4:8,16:8,5:19
and another:
key date prev-key next-key
1 1 null 2
2 3 1 3
3 8 2 4
4 8 3 16
5 19 16 10
10 19 5 11
11 67 10 15
15 45 11 null
16 8 4 5
The table order is not important - just the next-key and prev-key fields.
Select max(element) From Data Where Element < 8
Union
Select min(element) From Data Where Element > 8
But generally it is more usefull to think of sql for set oriented operations rather than iterative operation.
Self-joins.
For the table:
/*
CREATE TABLE [dbo].[stackoverflow_203302](
[val] [int] NOT NULL
) ON [PRIMARY]
*/
With parameter #val
SELECT cur.val, MAX(prv.val) AS prv_val, MIN(nxt.val) AS nxt_val
FROM stackoverflow_203302 AS cur
LEFT JOIN stackoverflow_203302 AS prv
ON cur.val > prv.val
LEFT JOIN stackoverflow_203302 AS nxt
ON cur.val < nxt.val
WHERE cur.val = #val
GROUP BY cur.val
You could make this a stored procedure with output parameters or just join this as a correlated subquery to the data you are pulling.
Without the parameter, for your data the result would be:
val prv_val nxt_val
----------- ----------- -----------
1 NULL 3
3 1 8
8 3 19
19 8 45
45 19 67
67 45 NULL
For the modified example, you use this as a correlated subquery:
/*
CREATE TABLE [dbo].[stackoverflow_203302](
[ky] [int] NOT NULL,
[val] [int] NOT NULL,
CONSTRAINT [PK_stackoverflow_203302] PRIMARY KEY CLUSTERED (
[ky] ASC
)
)
*/
SELECT cur.ky AS cur_ky
,cur.val AS cur_val
,prv.ky AS prv_ky
,prv.val AS prv_val
,nxt.ky AS nxt_ky
,nxt.val as nxt_val
FROM (
SELECT cur.ky, MAX(prv.ky) AS prv_ky, MIN(nxt.ky) AS nxt_ky
FROM stackoverflow_203302 AS cur
LEFT JOIN stackoverflow_203302 AS prv
ON cur.ky > prv.ky
LEFT JOIN stackoverflow_203302 AS nxt
ON cur.ky < nxt.ky
GROUP BY cur.ky
) AS ordering
INNER JOIN stackoverflow_203302 as cur
ON cur.ky = ordering.ky
LEFT JOIN stackoverflow_203302 as prv
ON prv.ky = ordering.prv_ky
LEFT JOIN stackoverflow_203302 as nxt
ON nxt.ky = ordering.nxt_ky
With the output as expected:
cur_ky cur_val prv_ky prv_val nxt_ky nxt_val
----------- ----------- ----------- ----------- ----------- -----------
1 1 NULL NULL 2 3
2 3 1 1 3 8
3 8 2 3 4 19
4 19 3 8 5 67
5 67 4 19 6 45
6 45 5 67 NULL NULL
In SQL Server, I prefer to make the subquery a Common table Expression. This makes the code seem more linear, less nested and easier to follow if there are a lot of nestings (also, less repetition is required on some re-joins).
Firstly, this should work (the ORDER BY is important):
select min(a)
from theTable
where a > 8
select max(a)
from theTable
where a < 8
For the second question that I begged you to ask...:
select *
from theTable
where date = 8
union all
select *
from theTable
where key = (select min(key)
from theTable
where key > (select max(key)
from theTable
where date = 8)
)
union all
select *
from theTable
where key = (select max(key)
from theTable
where key < (select min(key)
from theTable
where date = 8)
)
order by key
SELECT 'next' AS direction, MIN(date_field) AS date_key
FROM table_name
WHERE date_field > current_date
GROUP BY 1 -- necessity for group by varies from DBMS to DBMS in this context
UNION
SELECT 'prev' AS direction, MAX(date_field) AS date_key
FROM table_name
WHERE date_field < current_date
GROUP BY 1
ORDER BY 1 DESC;
Produces:
direction date_key
--------- --------
prev 3
next 19
My own attempt at the set solution, based on TheSoftwareJedi.
First question:
select date from test where date = 8
union all
select max(date) from test where date < 8
union all
select min(date) from test where date > 8
order by date;
Second question:
While debugging this, I used the data set:
(key:date) 1:1,2:3,3:8,4:8,5:19,10:19,11:67,15:45,16:8,17:3,18:1
to give this result:
select * from test2 where date = 8
union all
select * from (select * from test2
where date = (select max(date) from test2
where date < 8))
where key = (select max(key) from test2
where date = (select max(date) from test2
where date < 8))
union all
select * from (select * from test2
where date = (select min(date) from test2
where date > 8))
where key = (select min(key) from test2
where date = (select min(date) from test2
where date > 8))
order by date,key;
In both cases the final order by clause is strictly speaking optional.
If your RDBMS supports LAG and LEAD, this is straightforward (Oracle, PostgreSQL, SQL Server 2012)
These allow to choose the row either side of any given row in a single query
Try this...
SELECT TOP 3 * FROM YourTable
WHERE Col >= (SELECT MAX(Col) FROM YourTable b WHERE Col < #Parameter)
ORDER BY Col