How to Create Times Based on an Interval and Start Time? - sql

I am performing some scheduling optimization in my database.
For the job schedule, many records appear like this
Time NextRunTime
Every 3 hours 2019-06-03 10:00:00
Every 3 hours 2019-05-28 20:00:00
Every 4 hours 2017-07-31 18:00:00
Every 1 hours 2019-06-03 14:00:00
Every 4 hours 2017-06-08 16:00:00
What is an efficient means to split the "every" records into separate records within the 24 hour day?
For example, for the first record (every 3 hours from 10:00), I need it to insert the following into a table.
Time
13:00:00
16:00:00
19:00:00
22:00:00
01:00:00
04:00:00
07:00:00
10:00:00
I need to repeat this for every record in the first table with "every".
Can anybody help?

If you want to do this correctly you will need a tally table. First let's look at the logic required to solve this.
DECLARE #starttime DATETIME = '2019-06-03 10:00:00', #hours INT = 3;
SELECT t.N, Tm = CAST(DATEADD(HOUR,t.N*3,#startTime) AS TIME)
FROM
(
SELECT TOP (24/#hours) ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS a(x),
(VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS b(x)
) AS t(N);
Returns:
N Tm
------- ----------------
1 13:00:00.0000000
2 16:00:00.0000000
3 19:00:00.0000000
4 22:00:00.0000000
5 01:00:00.0000000
6 04:00:00.0000000
7 07:00:00.0000000
8 10:00:00.0000000
Now for some sample data and a record identifier (named "someId") so we can calculate this for all rows in your table.
-- Sample Data
DECLARE #yourTable TABLE (someId INT IDENTITY PRIMARY KEY, freq INT, NextRunTime DATETIME);
INSERT #yourTable(freq, NextRunTime) VALUES (3, '2019-06-03 10:00:00'),
(3, '2019-05-28 20:00:00'),(4, '2017-07-31 18:00:00'),
(1, '2019-06-03 14:00:00'),(4, '2017-06-08 16:00:00');
-- Solution
SELECT yt.someId, f.Tm
FROM #yourTable AS yt
CROSS APPLY
(
SELECT t.N, CAST(DATEADD(HOUR,t.N*yt.freq,yt.NextRunTime) AS TIME)
FROM
(
SELECT TOP (24/yt.freq) ROW_NUMBER() OVER (ORDER BY (SELECT 1))
FROM (VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS a(x),
(VALUES (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) AS b(x)
) AS t(N)
) AS f(N,Tm);
Returns:
someId Tm
----------- ----------------
1 13:00:00.0000000
1 16:00:00.0000000
1 19:00:00.0000000
1 22:00:00.0000000
1 01:00:00.0000000
1 04:00:00.0000000
1 07:00:00.0000000
1 10:00:00.0000000
2 23:00:00.0000000
2 02:00:00.0000000
2 05:00:00.0000000
2 08:00:00.0000000
2 11:00:00.0000000
2 14:00:00.0000000
2 17:00:00.0000000
2 20:00:00.0000000
3 22:00:00.0000000
3 02:00:00.0000000
3 06:00:00.0000000
3 10:00:00.0000000
3 14:00:00.0000000
3 18:00:00.0000000
4 15:00:00.0000000
4 16:00:00.0000000
4 17:00:00.0000000
4 18:00:00.0000000
4 19:00:00.0000000
4 20:00:00.0000000
4 21:00:00.0000000
4 22:00:00.0000000
4 23:00:00.0000000
4 00:00:00.0000000
4 01:00:00.0000000
4 02:00:00.0000000
4 03:00:00.0000000
4 04:00:00.0000000
4 05:00:00.0000000
4 06:00:00.0000000
4 07:00:00.0000000
4 08:00:00.0000000
4 09:00:00.0000000
4 10:00:00.0000000
4 11:00:00.0000000
4 12:00:00.0000000
4 13:00:00.0000000
4 14:00:00.0000000
5 20:00:00.0000000
5 00:00:00.0000000
5 04:00:00.0000000
5 08:00:00.0000000
5 12:00:00.0000000
5 16:00:00.0000000

I have always thought that a recursive cte is a nice way to solve your query:
-- the sample data
declare #data as table (freq varchar(100), tmst datetime, each_ int)
insert into #data
select s.freq,s.tmst,
convert(int,replace(replace(freq,'Every ',''),' hours','')) as each_
from (
select 'Every 4 hours' as freq, convert(datetime,'2019-06-02 10:00:00') as tmst union all
select 'Every 3 hours' as freq, convert(datetime,'2019-06-02 11:00:00') as tmst union all
select 'Every 2 hours' as freq, convert(datetime,'2019-06-02 10:00:00') as tmst
) s
-- the query
;with cte as (
select freq, tmst, each_, null t1 from #data
union all
select freq, tmst, each_, isnull(t1,datepart(hour,tmst)) + each_
from cte
where isnull(t1,datepart(hour,tmst)) + each_ <= 23
)
select freq,
isnull(convert(datetime, convert(varchar(8),tmst,112) + ' ' + (convert(varchar(100),t1) + ':00:00' ), 120),tmst)
from cte
order by 1, 2
With this second version, you can get all the ranges from 0 to 23 (in the previous example you got just from the starting point to the 23)
-- the query
;with findfirst as (
select freq, tmst, datepart(hour,tmst) as fhour, datepart(hour,tmst) as init, each_ from #data
union all
select freq, tmst, fhour, init - each_, each_ from findfirst where init - each_ >= 0
),
cte as (
select min(init) as init, freq, tmst, each_, fhour from findfirst group by freq, tmst, each_, fhour
union all
select init + each_, freq, tmst, each_, fhour from cte where init + each_ <= 23
)
select freq,tmst,convert(time, right('0' + convert(varchar(2),init), 2) + ':00:00')
from cte order by freq,init,each_
Remember to continue using the #data table.
Output:

First drop any temp tables that have the names of the temp tables you will create:
IF OBJECT_ID(N'tempdb..#Interval', N'U') IS NOT NULL DROP TABLE #Interval
GO
IF OBJECT_ID(N'tempdb..#Interval2', N'U') IS NOT NULL DROP TABLE #Interval2
GO
IF OBJECT_ID(N'tempdb..#Runstart', N'U') IS NOT NULL DROP TABLE #Runstart
GO
Then create, and insert data into, your temp tables:
CREATE TABLE #Interval
(
_Time NVARCHAR(13),
NextRunTime DATETIME
)
GO
INSERT INTO #Interval VALUES ('Every 3 hours','2019-06-03 10:00:00')
INSERT INTO #Interval VALUES ('Every 3 hours','2019-05-28 20:00:00')
INSERT INTO #Interval VALUES ('Every 4 hours','2017-07-31 18:00:00')
INSERT INTO #Interval VALUES ('Every 1 hours','2019-06-03 14:00:00')
INSERT INTO #Interval VALUES ('Every 4 hours','2017-06-08 16:00:00')
GO
CREATE TABLE #Interval2
(
RunID INT IDENTITY(10001,1) NOT NULL PRIMARY KEY,
_Time INT NOT NULL,
NextRunTime DATETIME NOT NULL
)
GO
The code below ensures that you get the right interval for each run start, note: If the number of hours are ever 10 or more, it would be necessary to add some more code here to make the number of digits selected conditional upon the length of the string which contains them, let me know if you need this code too.
INSERT INTO #Interval2 (_TIME,NextRunTime) SELECT SUBSTRING(_Time,7,1),NextRunTime FROM #Interval WHERE LEFT(_Time,5) = 'Every'
GO
CREATE TABLE #Runstart
(
StartID INT IDENTITY(10001,1) NOT NULL PRIMARY KEY,
RunID INT NOT NULL,
[Start_DTTM] DATETIME
)
GO
Next populate the #RUNSTART table using this loop:
DECLARE #RunID INT = 10001
DECLARE #RunTime INT = (SELECT _TIME FROM #Interval2 WHERE RunID = #RunID)
DECLARE #NextRun DATETIME = (SELECT NextRunTime FROM #Interval2 WHERE RunID = #RunID)
WHILE #RunID <= (SELECT MAX(RunID) FROM #Interval2)
BEGIN
WHILE #NextRun < (SELECT DATEADD(DD,1,NextRunTime) FROM #Interval2 WHERE RunID = #RunID)
BEGIN
INSERT INTO #Runstart (RunID,[Start_DTTM]) SELECT #RunID,DATEADD(HH,#RunTime,#NextRun)
SET #NextRun = (SELECT DATEADD(HH,#RunTime,#NextRun))
END
SET #RunID = #RunID+1
SET #RunTime = (SELECT _TIME FROM #Interval2 WHERE RunID = #RunID)
SET #NextRun = (SELECT NextRunTime FROM #Interval2 WHERE RunID = #RunID)
END
GO
SELECT StartID, RunID,CONVERT(VARCHAR,START_DTTM,108) AS Start_time FROM #RUNSTART

Related

Summarising a table containing timestamps into 10-minute periods

I have a SQL-Server table called UserConnections that is structured like this:
ID
User
From
To
1
Bob
31-jan-2023 09:00:00
31-jan-2023 10:00:00
2
Bob
31-jan-2023 12:00:00
31-jan-2023 15:00:00
3
Sally
31-jan-2023 14:00:00
31-jan-2023 16:00:00
and I want to create a summary table for the previous day specifying the number of users connected during each 10-minute period. So it would look something like this
Period Start
User Count
31-jan-2023 00:00:00
0
31-jan-2023 00:10:00
0
...
...
31-jan-2023 09:00:00
1
31-jan-2023 09:10:00
1
...
...
31-jan-2023 12:00:00
1
31-jan-2023 12:10:00
1
...
...
31-jan-2023 14:00:00
2
31-jan-2023 14:10:00
2
...
...
31-jan-2023 15:00:00
1
31-jan-2023 15:10:00
1
...
...
31-jan-2023 16:00:00
0
31-jan-2023 16:10:00
0
So I need to get the start of each 10-minute period, and then count the number of connections where the [from] <= PeriodStart and [To] >= PeriodEnd
Given the start and end I can probably do the counting but I have no idea how to get the 10-minute periods (I am not experienced with complex SQL!).
I've looked at a few Date/Time functions but really don't know where to start.
I've also looked at this: MSSQL Start and End Time Grouped by 10 Minute, for Elapsed Time
which looks similar but I'm having difficulty seeing how to adjust it for my data.
The first trick here, which is really the basis for the entire solution, is to generate a list of all the 10 minute intervals for the day. To accomplish this I used a tally table. I keep one as a view on my system like this. I got my version from Jeff Moden who has a great article on the topic.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
Next I need to create a table so I have something to work with on my machine.
declare #Something table
(
ID int
, [User] varchar(10)
, [From] datetime
, [To] datetime
)
insert #Something values
(1, 'Bob', '31-jan-2023 09:00:00', '31-jan-2023 10:00:00')
, (2, 'Bob', '31-jan-2023 12:00:00', '31-jan-2023 15:00:00')
, (3, 'Sally', '31-jan-2023 14:00:00', '31-jan-2023 16:00:00')
Now that I have the foundation setup the query is fairly straight forward.
with DateVals as --used a cte here so I don't have to write out the date math over and over. ;)
(
select PeriodStart = dateadd(minute, (t.N - 1) * 10, convert(datetime, convert(date, getdate())))
from cteTally t
where t.N <= 144 --24 hours a day, 6 times an hour
)
select dv.PeriodStart
, UserCount = count(s.[From])
from DateVals dv
left join #Something s on s.[From] <= dv.PeriodStart and s.[To] > dv.PeriodStart
group by dv.PeriodStart
order by dv.PeriodStart

Time until midnight between two date and time columns

I need help with my query to calculate the time until midnight between two date and time columns
break down by day
This is the main table:
ID
Start_Time
End_time
DateDiff
32221
01-01-2022 13:10:00
01-03-2022 13:10:00
2880
My query:
SELECT
start_time.ID,
start_time.Date_Time AS Start_time,
end_time.Date_Time AS End_time,
DATEDIFF(minute, start_time.Date_Time, end_time.Date_Time) AS DateDiff
FROM
Main
what I need is similar to this:
ID
Date_start
End_time
DateDiff
32221
01-01-2022 13:10:00
01-01-2022 23:59:59
654
32221
01-02-2022 00:00:00
01-02-2022 23:59:59
1440
32221
01-03-2022 00:00:00
01-03-2022 13:10:00
781
how i can do that?
You can loop through the times, always adding the time untill midnight, untill your 'start_time + 1 day' is bigger than your end_time.
The below code can be run directly in SQL (mind the date notation, my SQL is in united states notation, so if yours is in Europe this will give you back results for 3 months instead of 3 days);
DECLARE #start_time datetime2 = '01/01/2022 13:00:00';
DECLARE #end_time datetime2 = '03/01/2022 14:00:00';
DECLARE #daily_end_time datetime2=NULL;
DECLARE #Table Table (start_time datetime2, end_time datetime2, diff nvarchar(8));
DECLARE #diff_minutes_start int = DATEDIFF(MINUTE,#start_time,DateDiff(day,0,dateadd(day,1,#start_time)));
DECLARE #diff_minutes_end int = DATEDIFF(minute,#end_time,DateDiff(day,0,dateadd(day,1,#end_time)))
SET #daily_end_time = DATEADD(mi,#diff_minutes_start,#start_time)
WHILE #daily_end_time < #end_time
BEGIN
INSERT INTO #Table (start_time,end_time,diff)
VALUES (
#start_time,
CASE WHEN DATEADD(day,1,#daily_end_time) > #end_time THEN #end_time ELSE
#daily_end_time END,
CASE WHEN DATEADD(day,1,#daily_end_time) > #end_time THEN #diff_minutes_end ELSE
#diff_minutes_start END )
SET #daily_end_time = DATEADD(mi,#diff_minutes_start,#start_time)
SET #start_time = DATEADD(mi,1,#daily_end_time);
select #diff_minutes_start =
DATEDIFF(MINUTE,#start_time,DateDiff(day,0,dateadd(day,1,#start_time)));
select #diff_minutes_end = DATEDIFF(minute,#end_time,DateDiff(day,0,dateadd(day,1,#end_time)))
END
SELECT * FROM #Table
And the results:
You may use a recursive CTE as the following:
With CTE As
(
Select ID, Start_Time, End_time, DATEADD(Second, -1, DATEADD(Day, DATEDIFF(Day,0, Start_Time), 1)) et
From main
Union All
Select C.ID, DATEADD(Second, 1, C.et), C.End_time, DATEADD(Day, 1, C.et)
From CTE C Join main T
On C.ID = T.ID
Where DATEADD(Second, 1, C.et) <= C.End_time
)
Select ID, Start_Time,
Case When End_Time <= et Then End_Time Else et End As End_Time,
DATEDIFF(Minute, Start_Time, DATEADD(Second, 1, Case When End_Time <= et Then End_Time Else et End)) As [DateDiff]
From CTE
Order By ID, Start_Time
See a demo with extended data sample from db<>fiddle.
You can also solve this with a tally table, using the expanded (to show different cases) sample data
ID
StartTime
EndTime
32221
2022-01-01 13:10:00
2022-01-03 13:10:00
32222
2022-02-02 10:10:00
2022-02-02 17:10:00
32223
2022-03-03 19:10:00
2022-03-04 08:10:00
32224
2022-04-04 19:10:00
2022-04-08 08:10:00
and the code
with cteSampleData as ( --Enter some sample data, include spans of 0, 1, and >1 days
SELECT * --Note that we need CONVERT to make sure the dates are treated as datetime not string!
FROM (VALUES(32221, CONVERT(datetime2(0), '01-01-2022 13:10:00'), CONVERT(datetime2(0), '01-03-2022 13:10:00') )
, (32222, '02-02-2022 10:10:00', '02-02-2022 17:10:00')
, (32223, '03-03-2022 19:10:00', '03-04-2022 08:10:00')
, (32224, '04-04-2022 19:10:00', '04-08-2022 08:10:00')
) as Samp(ID, StartTime, EndTime)
), cteWithControl as ( --Add some fields to make testing cledarer - you could do this as part of a subsequent step instead
SELECT *
, CONVERT(date, StartTime) as StartDate , CONVERT(date, EndTime) as EndDate
, DATEDIFF(day, StartTime , EndTime) as DiffDays
--, DATEDIFF(day, CONVERT(date, StartTime) , CONVERT(date, EndTime)) as DiffDays
FROM cteSampleData
), cteTally as ( --Get a list of integers to represent days, assume nothing lasts longer than a year
SELECT top 365 ROW_NUMBER() over (ORDER BY name) as Tally
FROM sys.objects --just a table we know has over 300 rows, look up tally tables for other generation methods
)--The real work begins below, partition the data into "same day" and "multi-day" spans
, cteSet as (
SELECT ID, StartTime, EndTime, DiffDays, 1 as DayNumber
FROM cteWithControl WHERE DiffDays = 0
UNION ALL
SELECT ID --For multi-day, cross with the tally table and treat first and last days special
, CASE WHEN T.Tally = 1 THEN StartTime --For the first day the start time is the real time
ELSE DATEADD (day, T.Tally - 1, startdate) END as StartTime --Otherwise it's the start of the day
, CASE WHEN T.Tally = DiffDays + 1 THEN EndTime --For the last day the end is the real end
ELSE DATEADD (second, -1, CONVERT(DATETIME2(0), DATEADD (day, T.Tally, startdate)))
END as EndTime --otherwise 1 second less than the next day
, DiffDays, Tally as DayNumber
FROM cteWithControl as D CROSS JOIN cteTally as T
WHERE DiffDays > 0 AND T.Tally <= D.DiffDays + 1
)--Now we display the results and calculate the length (in minutes) of each span
SELECT *
, DATEDIFF(MINUTE, StartTime, EndTime) as DateDiff
FROM cteSet
ORDER BY ID, DayNumber
we get the output
ID
StartTime
EndTime
DiffDays
DayNumber
DateDiff
32221
2022-01-01 13:10:00
2022-01-01 23:59:59
2
1
649
32221
2022-01-02 00:00:00
2022-01-02 23:59:59
2
2
1439
32221
2022-01-03 00:00:00
2022-01-03 13:10:00
2
3
790
32222
2022-02-02 10:10:00
2022-02-02 17:10:00
0
1
420
32223
2022-03-03 19:10:00
2022-03-03 23:59:59
1
1
289
32223
2022-03-04 00:00:00
2022-03-04 08:10:00
1
2
490
32224
2022-04-04 19:10:00
2022-04-04 23:59:59
4
1
289
32224
2022-04-05 00:00:00
2022-04-05 23:59:59
4
2
1439
32224
2022-04-06 00:00:00
2022-04-06 23:59:59
4
3
1439
32224
2022-04-07 00:00:00
2022-04-07 23:59:59
4
4
1439
32224
2022-04-08 00:00:00
2022-04-08 08:10:00
4
5
490

Determine contiguous date intervals

I have the following table structure:
id int -- more like a group id, not unique in the table
AddedOn datetime -- when the record was added
For a specific id there is at most one record each day. I have to write a query that returns contiguous (at day level) date intervals for each id.
The expected result structure is:
id int
StartDate datetime
EndDate datetime
Note that the time part of AddedOn is available but it is not important here.
To make it clearer, here is some input data:
with data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
select * from data
And the expected result:
id StartDate EndDate
1 2015-01-01 2015-01-02
1 2015-01-04 2015-01-06
2 2015-01-01 2015-01-04
2 2015-01-06 2015-01-07
3 2015-01-01 2015-01-01
3 2015-01-03 2015-01-03
3 2015-01-05 2015-01-06
3 2015-01-08 2015-01-10
Although it looks like a common problem I couldn't find a similar enough question. Also I'm getting closer to a solution and I will post it when (and if) it works but I feel that there should be a more elegant one.
Here's answer without any fancy joining, but simply using group by and row_number, which is not only simple but also more efficient.
WITH CTE_dayOfYear
AS
(
SELECT id,
AddedOn,
DATEDIFF(DAY,'20000101',AddedOn) dyID,
ROW_NUMBER() OVER (ORDER BY ID,AddedOn) row_num
FROM data
)
SELECT ID,
MIN(AddedOn) StartDate,
MAX(AddedOn) EndDate,
dyID-row_num AS groupID
FROM CTE_dayOfYear
GROUP BY ID,dyID - row_num
ORDER BY ID,2,3
The logic is that the dyID is based on the date so there are gaps while row_num has no gaps. So every time there is a gap in dyID, then it changes the difference between row_num and dyID. Then I simply use that difference as my groupID.
In Sql Server 2008 it is a little bit pain without LEAD and LAG functions:
WITH data
AS ( SELECT * ,
ROW_NUMBER() OVER ( ORDER BY id, AddedOn ) AS rn
FROM ( VALUES ( 0, GETDATE()), --dummy record used to infer column types
( 1, '20150101'), ( 1, '20150102'), ( 1, '20150104'),
( 1, '20150105'), ( 1, '20150106'), ( 2, '20150101'),
( 2, '20150102'), ( 2, '20150103'), ( 2, '20150104'),
( 2, '20150106'), ( 2, '20150107'), ( 3, '20150101'),
( 3, '20150103'), ( 3, '20150105'), ( 3, '20150106'),
( 3, '20150108'), ( 3, '20150109'), ( 3, '20150110') )
AS d ( id, AddedOn )
WHERE id > 0 -- exclude dummy record
),
diff
AS ( SELECT d1.* ,
CASE WHEN ISNULL(DATEDIFF(dd, d2.AddedOn, d1.AddedOn),
1) = 1 THEN 0
ELSE 1
END AS diff
FROM data d1
LEFT JOIN data d2 ON d1.id = d2.id
AND d1.rn = d2.rn + 1
),
parts
AS ( SELECT * ,
( SELECT SUM(diff)
FROM diff d2
WHERE d2.rn <= d1.rn
) AS p
FROM diff d1
)
SELECT id ,
MIN(AddedOn) AS StartDate ,
MAX(AddedOn) AS EndDate
FROM parts
GROUP BY id ,
p
Output:
id StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
Walkthrough:
diff
This CTE returns data:
1 2015-01-01 00:00:00.000 1 0
1 2015-01-02 00:00:00.000 2 0
1 2015-01-04 00:00:00.000 3 1
1 2015-01-05 00:00:00.000 4 0
1 2015-01-06 00:00:00.000 5 0
You are joining same table on itself to get the previous row. Then you calculate difference in days between current row and previous row and if the result is 1 day then pick 0 else pick 1.
parts
This CTE selects result from previous step and sums up the new column(it is a cumulative sum. sum of all values of new column from starting till current row), so you are getting partitions to group by:
1 2015-01-01 00:00:00.000 1 0 0
1 2015-01-02 00:00:00.000 2 0 0
1 2015-01-04 00:00:00.000 3 1 1
1 2015-01-05 00:00:00.000 4 0 1
1 2015-01-06 00:00:00.000 5 0 1
2 2015-01-01 00:00:00.000 6 0 1
2 2015-01-02 00:00:00.000 7 0 1
2 2015-01-03 00:00:00.000 8 0 1
2 2015-01-04 00:00:00.000 9 0 1
2 2015-01-06 00:00:00.000 10 1 2
2 2015-01-07 00:00:00.000 11 0 2
3 2015-01-01 00:00:00.000 12 0 2
3 2015-01-03 00:00:00.000 13 1 3
The last step is just a grouping by ID and new column and picking min and max values for dates.
I took the "Islands Solution #3 from SQL MVP Deep Dives" solution from https://www.simple-talk.com/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/ and applied to your test data:
with
data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
,CTE_Seq
AS
(
SELECT
ID
,SeqNo
,SeqNo - ROW_NUMBER() OVER (PARTITION BY ID ORDER BY SeqNo) AS rn
FROM
data
CROSS APPLY
(
SELECT DATEDIFF(day, '20150101', AddedOn) AS SeqNo
) AS CA
)
SELECT
ID
,DATEADD(day, MIN(SeqNo), '20150101') AS StartDate
,DATEADD(day, MAX(SeqNo), '20150101') AS EndDate
FROM CTE_Seq
GROUP BY ID, rn
ORDER BY ID, StartDate;
Result set
ID StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
I'd recommend you to examine the intermediate results of CTE_Seq to understand how it actually works. Just put
select * from CTE_Seq
instead of the final SELECT ... GROUP BY .... You'll get this result set:
ID SeqNo rn
1 0 -1
1 1 -1
1 3 0
1 4 0
1 5 0
2 0 -1
2 1 -1
2 2 -1
2 3 -1
2 5 0
2 6 0
3 0 -1
3 2 0
3 4 1
3 5 1
3 7 2
3 8 2
3 9 2
Each date is converted into a sequence number by DATEDIFF(day, '20150101', AddedOn). ROW_NUMBER() generates a set of sequential numbers without gaps, so when these numbers are subtracted from a sequence with gaps the difference jumps/changes. The difference stays the same until the next gap, so in the final SELECT GROUP BY ID, rn brings all rows from the same island together.
Here is a simple solution that does not use analytics. I tend not to use analytics because I work with many different DBMSs and many don't (yet) have them emplemented and even those who do have different syntaxes. I just have the habit of writing generic code whenever possible.
with
Data( ID, AddedOn )as(
select 1, convert( date, '20150101' ) union all
select 1, '20150102' union all
select 1, '20150104' union all
select 1, '20150105' union all
select 1, '20150106' union all
select 2, '20150101' union all
select 2, '20150102' union all
select 2, '20150103' union all
select 2, '20150104' union all
select 2, '20150106' union all
select 2, '20150107' union all
select 3, '20150101' union all
select 3, '20150103' union all
select 3, '20150105' union all
select 3, '20150106' union all
select 3, '20150108' union all
select 3, '20150109' union all
select 3, '20150110'
)
select d.ID, d.AddedOn StartDate, IsNull( d1.AddedOn, '99991231' ) EndDate
from Data d
left join Data d1
on d1.ID = d.ID
and d1.AddedOn =(
select Min( AddedOn )
from data
where ID = d.ID
and AddedOn > d.AddedOn );
In your situation I assume that ID and AddedOn form a composite PK and so are indexed. Thus, the query will run impressively fast even on very large tables.
Also, I used the outer join because it seemed like the last AddedOn date of each ID should be seen in the StartDate column. Instead of NULL I used a common MaxDate value. The NULL could work just as well as a "this is the latest StartDate row" flag.
Here is the output for ID=1:
ID StartDate EndDate
----------- ---------- ----------
1 2015-01-01 2015-01-02
1 2015-01-02 2015-01-04
1 2015-01-04 2015-01-05
1 2015-01-05 2015-01-06
1 2015-01-06 9999-12-31
I'd like to post my own solution too because it's yet another approach:
with data as
(
...
),
temp as
(
select d.id
,d.AddedOn
,dprev.AddedOn as PrevAddedOn
,dnext.AddedOn as NextAddedOn
FROM data d
left JOIN
data dprev on dprev.id = d.id
and dprev.AddedOn = dateadd(d, -1, d.AddedOn)
left JOIN
data dnext on dnext.id = d.id
and dnext.AddedOn = dateadd(d, 1, d.AddedOn)
),
starts AS
(
select id
,AddedOn
from temp
where PrevAddedOn is NULL
),
ends as
(
select id
,AddedOn
from temp
where NextAddedon is NULL
)
SELECT s.id as id
,s.AddedOn as StartDate
,(select min(e.AddedOn) from ends e where e.id = s.id and e.AddedOn >= s.AddedOn) as EndDate
from starts s

SQL query stuck - comparison on different lines

I m working on a very weird problem with SQL where I have to compare previous rows
Number start_date end_date
----- ------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000
Here , I have to compare the start_date and end_date on the two different line and create a view out of it.
(If the start_date is less than the previous end_date , then criteria is set to 1).
Well it should compare 2011-10-26 00:00:00.000 for 3 and 2011-10-27 00:00:00.000 on 2 for 30 days
Number start_date end_date Criteria
----- ----------- ---------------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-30 00:00:00.000 2011-11-15 00:00:00.000 1
I m confused how should I proceed with this.
Any help would be helpful !!!!
Thanks !!!
The most straightforward way to do this is to use a subquery:
select A.number, a.start_date, a.end_date,
CASE WHEN start_date < dateadd(d,30,(select TOP(1) b.end_date
from mytable B
where B.number < A.number
order by B.number desc)) then 1 else 0 end Criteria
from mytable A
Note: If the start date is the 29th day following the previous row's end date, Criteria becomes 1. By the 30th day onwards, it is 0. Tweak the 30 in the query as required.
Sample:
create table mytable (
Number int primary key,
start_date datetime,
end_date datetime);
insert mytable
select 1, '2011-06-07', '2011-07-10' union all
select 2, '2011-10-11', '2011-10-27' union all
select 3, '2011-10-26', '2011-10-29' union all
select 4, '2011-10-29', '2011-11-15'
Result:
number start_date end_date Criteria
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-27 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000 0
Try using case like this:
create view vDates as
select Number,start_date,end_date,
case
when start_date<end_date
then 0
else 1
end as Criteria
from tab
SQL Fiddle Demo
A more readable way is create a function and send the correct dates:
Function:
create function [dbo].[CompareDates] (
#START_DATE datetime,
#PREVIOUS_END_DATE datetime
)
RETURNS int
AS
BEGIN
if #START_DATE < #PREVIOUS_END_DATE
return 1
return 0
END
Query (using subquery):
declare #dates table
(
number int,
start datetime,
end_date datetime
)
insert into #dates values
(1, '2011-06-07 00:00:00.000', '2011-07-10 00:00:00.000'),
(2, '2011-10-11 00:00:00.000', '2011-10-27 00:00:00.000'),
(3, '2011-10-26 00:00:00.000', '2011-10-29 00:00:00.000'),
(4, '2011-10-29 00:00:00.000', '2011-11-15 00:00:00.000')
select *, dbo.CompareDates(dates.end_date, dates.previous_end_date) from
(
select number, start, end_date,
(select TOP 1 end_date
from #dates d2
where d2.number < d1.number
order by d2.number desc) as previous_end_date
from #dates d1
) dates

data grouping according to relevant values in sql server

the data is in 15 minute interval:
Time Value
2010-01-01 00:15 3
2010-01-01 00:30 2
2010-01-01 00:45 4
2010-01-01 01:00 5
2010-01-01 01:15 1
2010-01-01 01:30 3
2010-01-01 01:45 4
2010-01-01 02:00 12
2010-01-01 02:15 13
2010-01-01 02:30 12
2010-01-01 02:45 14
2010-01-01 03:00 15
2010-01-01 03:15 3
2010-01-01 03:30 2
2010-01-01 03:45 3
2010-01-01 04:00 5
..........
..........
..........
2010-01-02 00:00
Typically there will be 96 points.
According to the values, we may notice that the values from 00:15 to 01:45 are close to each other, and from 02:00 to 03:00 they are close to each other, and from 03:15 to 04:00 they are close to each other.
Based on the "close to each other" rule, I want the data to be "grouped" into 3 parts:
00:15 to 01:45
02:00 to 03:00
03:15 to 04:00
Please consider that the data could be random, and could be grouped into more than 3 parts according to the rule defined above, but maximum should not be more than 10 parts. And the grouping must honor the time sequence, for example, you cannot just put 00:15/02:30/04:45 into 1 group because these 3 points are NOT consecutive.
Please give some thoughts how to implement it in t-sql.
updated:
The value could be:
Time Value
2010-01-01 00:15 3
2010-01-01 00:30 2
2010-01-01 00:45 4
2010-01-01 01:00 5
2010-01-01 01:15 1
2010-01-01 01:30 3
2010-01-01 01:45 4
2010-01-01 02:00 12
2010-01-01 02:15 13
2010-01-01 02:30 4 --suddenly decreased
2010-01-01 02:45 14
2010-01-01 03:00 15
2010-01-01 03:15 3
2010-01-01 03:30 2
2010-01-01 03:45 3
2010-01-01 04:00 5
..........
..........
..........
2010-01-02 00:00
for these kinds of situation, we should not group 02:30 separately, because we want the group size has to be at least 3 points, and we will put that point (02:30) to the previous group (from 02:00 to 03:00).
Declare and populate testdata:
set nocount on
declare #result table(mintime datetime, maxtime datetime)
declare #t table(time datetime, value int)
-- variation is how much difference will be allowed from one row to the next
declare #variation int
set #variation = 5
insert #t values('2010-01-01 00:15',3)
insert #t values('2010-01-01 00:30',2)
insert #t values('2010-01-01 00:45',4)
insert #t values('2010-01-01 01:00',5)
insert #t values('2010-01-01 01:15',1)
insert #t values('2010-01-01 01:30',3)
insert #t values('2010-01-01 01:45',4)
insert #t values('2010-01-01 02:00',12)
insert #t values('2010-01-01 02:15',13)
insert #t values('2010-01-01 02:30',12)
insert #t values('2010-01-01 02:45',14)
insert #t values('2010-01-01 03:00',15)
insert #t values('2010-01-01 03:15',3)
insert #t values('2010-01-01 03:30',2)
insert #t values('2010-01-01 03:45',3)
insert #t values('2010-01-01 04:00',5)
Code:
a:
;with t as
( -- add a rownumber
select *, rn = row_number() over(order by time) from #t
), a as
(-- increase group if current row's value varies more than #variation from last row's value
select time, value, rn, 0 grp from t where rn = 1
union all
select t.time, t.value, t.rn, case when t.value between
a.value - #variation and a.value +#variation
then grp else grp+1 end
from t join a on
t.rn = a.rn +1
)
insert #result
select min(time), max(time) from a group by grp
if ##rowcount > 10
begin
-- this will activate if more than 10 groups of numbers are found
-- start over with higher tolerance for variation
set #variation=#variation + 1
delete #result
goto a
end
select convert(char(5), mintime,114) + ' to ' + convert(char(5), maxtime,114)
from #result
Result here:
https://data.stackexchange.com/stackoverflow/q/110891/declare-and-populate-testdata
Since your question changed so much, here is a new answer to the new question, I only included the code part.
declare #t table(time datetime, value int)
declare #variation float
set #variation = 2
set nocount on
insert #t values('2010-01-01 00:15',3)
insert #t values('2010-01-01 00:30',2)
insert #t values('2010-01-01 00:45',4)
insert #t values('2010-01-01 01:00',5)
insert #t values('2010-01-01 01:15',1)
insert #t values('2010-01-01 01:30',3)
insert #t values('2010-01-01 01:45',4)
insert #t values('2010-01-01 02:00',52)
insert #t values('2010-01-01 02:15',5)
insert #t values('2010-01-01 02:30',52)
insert #t values('2010-01-01 02:45',54)
insert #t values('2010-01-01 03:00',55)
insert #t values('2010-01-01 03:15',3)
insert #t values('2010-01-01 03:30',2)
insert #t values('2010-01-01 03:45',3)
insert #t values('2010-01-01 04:00',5)
declare #result table(mintime datetime, maxtime datetime)
a:
delete #result
;with t as
(
select *, rn = row_number() over(order by time), log(value) lv from #t where datediff(day, time, '2010-01-01') = 0
), a as
(
select time, lv, rn, 0 grp from t where rn = 1
union all
select t1.time, a.lv, t1.rn,
case when exists (select 1 from t t2 where t1.rn between rn + 1 and rn + 3 and
lv between t1.lv - #variation and t1.lv +#variation) then grp else grp + 1 end
from t t1 join a on
t1.rn = a.rn +1
)
insert #result
select min(time), max(time) from a group by grp
if ##rowcount > 10
begin
set #variation=#variation + .5
goto a
end
select * from #result
Result:
mintime maxtime
2010-01-01 00:15:00.000 2010-01-01 01:45:00.000
2010-01-01 02:00:00.000 2010-01-01 03:00:00.000
2010-01-01 03:15:00.000 2010-01-01 04:00:00.000