Sql Server - Count occurrences of datetime group by specific interval - sql

I have this table with sample data:
MY_TABLE
------------------------------------------
ID DateVal other columns
------------------------------------------
1 2017-01-14 11:00:00 ...
2 2017-01-14 11:01:00 ...
3 2017-01-14 11:02:00 ...
4 2017-01-14 11:03:00 ...
5 2017-01-14 11:11:00 ...
6 2017-01-14 11:11:30 ...
7 2017-01-14 11:15:00 ...
8 2017-01-14 11:15:01 ...
9 2017-01-14 11:18:00 ...
I need to have this kind of result:
start end occurrences
-----------------------------------------------------------
2017-01-14 11:00 2017-01-14 11:05 4
2017-01-14 11:05 2017-01-14 11:10 0
2017-01-14 11:10 2017-01-14 11:15 3
2017-01-14 11:15 2017-01-14 11:20 2
...
In specific I need a query that extracts all the occurrences of raws in MY_TABLE in 5 mins range (range value is variable).
Someone could help me?
Best regards,

You need to generate the timeframes you want and then left join. Here is one method:
select v.dt, dateadd(minute, 5, v.dt) as end_dt, count(t.id)
from (values (convert(datetime, '2017-01-14 11:00')),
(convert(datetime, '2017-01-14 11:05')),
. . .
) v(dt) left join
my_table t
on t.dateval >= v.dt and
t.dateval < dateadd(minute, 5, v.dt)
group by v.dt;
Note: If you want to do this for a wider range of time, then using a tally table or recursive CTE is handy.

Let's have a row generator, that generates dates every rangeSize from a startDate:
DECLARE #rangeSize INT = 5;
DECLARE #startDate DATETIME = '2020-01-01 00:00';
WITH RG(D,D2) AS (
SELECT #startDate AS D, DATEADD(MINUTE, #rangeSize, #startDate) AS D2
UNION ALL
SELECT DATEADD(MINUTE, #rangeSize, D), DATEADD(MINUTE, #rangeSize, D2)
FROM RG a
WHERE D < DATEADD(MINUTE, #rangeSize * 100, #startDate)
)
SELECT D,D2
FROM RG
OPTION (MAXRECURSION 100);
Now let's hook it up to your data and count the data:
DECLARE #rangeSize INT = 5;
DECLARE #startDate DATETIME = '2020-01-01 00:00';
WITH RG(D,D2) AS (
SELECT #startDate AS D, DATEADD(MINUTE, #rangeSize, #startDate) AS D2
UNION ALL
SELECT DATEADD(MINUTE, #rangeSize, D), DATEADD(MINUTE, #rangeSize, D2)
FROM RG a
WHERE D < DATEADD(MINUTE, #rangeSize * 100, #startDate)
)
SELECT r.D as StartDate, r.D2 as EndDate, COUNT(m.ID) as Count as EndDate
FROM
RG r
LEFT JOIN
my_table m ON m.dateval > r.D AND m.dateval <= r.D2
GROUP BY r.D, r.D2
OPTION (MAXRECURSION 100);
Note I used > and <= for the range because you seem to classify from e.g 15:01 to 20:00 as a range, whereas it feels more natural to me to have 15:00 to 19:59 as "belonging to the 15-20 range"

If you don't need your start and end to reflect the exact range:
You can use something like DATEDIFF(minute, '2000-01-01', DateVal)/5 as your grouping, then use MIN(DateVal) and MAX(DateVal) for your start and end; but those values will be of the first and last transaction in the interval, not the bounds of the interval.
Alternatively, you can use a recursive CTE to generate the intervals, and then join that to your data:
; WITH intervals AS (
SELECT CAST('2017-11-01 00:00:00' AS DATETIME) AS `start`
, CAST ('2017-11-01 00:05:00' AS DATETIME) AS `end`
UNION ALL
SELECT DATEADD(minute, 5, `start`) AS, DATEADD(minute, 5, `end`) AS end
FROM intervals
WHERE intervals.end < '2017-11-02 00:00:00'
)
SELECT i.`start`, i.`end`, COUNT(t.ID) AS occurrences
FROM intervals AS i
INNER JOIN MY_TABLE AS t ON t.DateVal >= i.`start` AND t.DateVal < i.End
GROUP BY i.`start`, i.`end`
ORDER BY i.`start`, i.`end`
;
Notes:
the literal date values can be adjusted to reflect the actual range you want to query
If you want intervals without activity to be included, the INNER can be changed to LEFT
Since your question as stated has overlapping intervals, I went with the assumption that a DateVal on the 5 minute mark belongs to the interval that starts with that value.

Related

SQL number of rows valid in a time range grouped by time

SQL Server
I have a table with 2 time stamps, time_start and time_end.
e.g.
ID time_start time_end
---- ------------------- -------------------
1 2019-01-01 08:30:00 2019-01-01 09:40:00
2 2019-01-01 09:10:24 2019-01-01 15:14:19
3 2019-01-01 09:21:15 2019-01-01 09:21:19
4 2019-01-01 10:39:45 2019-01-01 10:58:12
5 2019-01-01 11:39:45 2019-01-01 11:40:10
and I would like to group them so I can have the number of rows grouped by a variable time interval.
e.g.
time_interval row_count
------------------- ---------
2019-01-01 07:00:00 0
2019-01-01 08:00:00 1
2019-01-01 09:00:00 3
2019-01-01 10:00:00 2
2019-01-01 11:00:00 1
2019-01-01 12:00:00 0
My interval could be 1 hour, 1 minute, 30 minutes, 1 day, etc...
Think of it as a log-in/log-out situation, and I want to see how may users were logged at any given minute, hour, day, etc...
Try this,
DECLARE #start_date datetime='2019-01-01',
#end_date datetime='2019-01-02',
#i_minutes int=60
DECLARE #t TABLE
(
id int identity(1,1),time_start datetime,time_end datetime
)
INSERT INTO #t(time_start,time_end)VALUES
('2019-01-01 08:30:00','2019-01-01 09:40:00'),
('2019-01-01 09:10:24','2019-01-01 15:14:19'),
('2019-01-01 09:21:15','2019-01-01 09:21:19'),
('2019-01-01 10:39:45','2019-01-01 10:58:12'),
('2019-01-01 11:39:45','2019-01-01 11:40:10')
--SELECT #start_date=min(time_start),#end_date=max(time_end)
--FROM #t
;WITH CTE_time_Interval AS
(
SELECT #start_date AS time_int,#i_minutes AS i_minutes
UNION ALL
SELECT dateadd(minute,#i_minutes,time_int),i_minutes+ #i_minutes
FROM CTE_time_Interval
WHERE time_int<=#end_date
)
,CTE1 AS
(
SELECT ROW_NUMBER()OVER(ORDER BY time_int)AS r_no,time_int
FROM CTE_time_Interval
)
,CTE2 AS
(
SELECT a.time_int AS Int_start_time,b.time_int AS Int_end_time
FROM CTE1 a
INNER JOIN CTE1 b ON a.r_no+1=b.r_no
)
SELECT a.Int_start_time,a.Int_end_time,sum(iif(b.time_start is not null,1,0)) AS cnt
FROM CTE2 a
LEFT JOIN #t b ON
(
b.time_start BETWEEN a.Int_start_time AND a.Int_end_time
OR
b.time_end BETWEEN a.Int_start_time AND a.Int_end_time
OR
a.Int_start_time BETWEEN b.time_start AND b.time_end
OR
a.Int_end_time BETWEEN b.time_start AND b.time_end
)
GROUP BY a.Int_start_time,a.Int_end_time
Hi this is my workaround.
I created a table "test" with your data.
First I get the min and max intervals and after, I get all the intervals between these values with a CTE.
Finally, with this CTE and a left join with the intervals between time_start and time_end I got the answer.
This is for intervals of 1 hour
DECLARE #minDate AS DATETIME;
DECLARE #maxDate AS DATETIME;
SET #minDate = (select case
when (select min(time_start) from test) < (select min(time_end) from test)
then (select min(time_start) from test)
else (select min(time_end) from test) end )
SET #minDate = FORMAT(#minDate, 'dd-MM.yyyy HH:00:00')
SET #maxDate = (select case
when (select max(time_start) from test) > (select max(time_end) from test)
then (select max(time_start) from test)
else (select max(time_end) from test) end )
SET #maxDate = FORMAT(#maxDate, 'dd-MM.yyyy HH:00:00')
;WITH Dates_CTE
AS (SELECT #minDate AS Dates
UNION ALL
SELECT Dateadd(hh, 1, Dates)
FROM Dates_CTE
WHERE Dates < #maxDate)
SELECT d.Dates as time_interval, count(*) as row_count
FROM Dates_CTE d
LEFT JOIN test t on d.Dates
between (FORMAT(t.time_start, 'dd-MM.yyyy HH:00:00'))
and (FORMAT(t.time_end, 'dd-MM.yyyy HH:00:00'))
GROUP BY d.Dates
For intervals of 10 minutes you need some changes.
First I format dates getting minutes (dd-MM.yyyy HH:mm:00 instead of dd-MM.yyyy HH:00:00)
and in the left join I approach time_start and time_end to their 10 minutes time (9:32:00 in 9:30:00) (dateadd(minute, 10 * (datediff(minute, 0, time_start) / 10), 0)):
DECLARE #minDate AS DATETIME;
DECLARE #maxDate AS DATETIME;
SET #minDate = (select case
when (select min(time_start) from test) < (select min(time_end) from test)
then (select min(time_start) from test)
else (select min(time_end) from test) end )
SET #minDate = FORMAT(#minDate, 'dd-MM.yyyy HH:mm:00')
SET #maxDate = (select case
when (select max(time_start) from test) > (select max(time_end) from test)
then (select max(time_start) from test)
else (select max(time_end) from test) end )
SET #maxDate = FORMAT(#maxDate, 'dd-MM.yyyy HH:mm:00')
;WITH Dates_CTE
AS (SELECT #minDate AS Dates
UNION ALL
SELECT Dateadd(minute, 10, Dates)
FROM Dates_CTE
WHERE Dates < #maxDate)
SELECT d.Dates as time_interval, count(*) as row_count
FROM Dates_CTE d
LEFT JOIN test t on d.Dates
between dateadd(minute, 10 * (datediff(minute, 0, time_start) / 10), 0)
and dateadd(minute, 10 * (datediff(minute, 0, time_end) / 10), 0)
GROUP BY d.Dates
And finally I get this results for 1 hour intervals:
+---------------------+-----------+
| time_interval | row_count |
+---------------------+-----------+
| 01/01/2019 08:00:00 | 1 |
| 01/01/2019 09:00:00 | 3 |
| 01/01/2019 10:00:00 | 2 |
| 01/01/2019 11:00:00 | 2 |
| 01/01/2019 12:00:00 | 1 |
| 01/01/2019 13:00:00 | 1 |
| 01/01/2019 14:00:00 | 1 |
| 01/01/2019 15:00:00 | 1 |
+---------------------+-----------+
I hope it works for you.
You need to specify the time intervals. The rest is a LEFT JOIN/GROUP BY or correlated subquery:
with dates as (
select convert(datetime, '2019-01-01 07:00:00') as dt
union all
select dateadd(hour, 1, dt)
from dates
where dt < '2019-01-01 12:00:00'
)
select dates.dt, count(t.id)
from dates left join
t
on dates.dt < t.time_end and
dates.dt >= dateadd(hour, 1, t.time_start)
group by dates.dt
order by dates.dt;
If you have lots of data and lots of time periods, you might find that this has poor performance. If this is the case, ask a new question, with more information about sizing and performance.

How to assign shift based on punch time

Based on punch time shift automatically assigned to employee
Table Trnevents:
emp_reader_id EVENTID DT
3 1 2019-07-14 17:00:00.000
3 0 2019-07-14 10:00:00.000
3 1 2019-07-13 17:50:00.000
3 0 2019-07-13 10:05:00.000
3 1 2019-07-12 16:00:00.000
3 0 2019-07-12 08:55:00.000
declare
#start_date date='2019-07-12'
,#end_date date ='2019-07-14'
;WITH ByDays AS
( -- Number the entry register in each day
SELECT
emp_reader_id,
dt AS T,
CONVERT(VARCHAR(10),dt,102) AS Day,
FLOOR(CONVERT(FLOAT,dt)) DayNumber,
ROW_NUMBER() OVER(PARTITION BY FLOOR(CONVERT(FLOAT,dt)) ORDER BY dt) InDay
FROM trnevents
where
(
CONVERT(VARCHAR(26), dt, 23) >= CONVERT(VARCHAR(26), #start_date, 23)
and CONVERT(VARCHAR(26), dt, 23) <=CONVERT(VARCHAR(26), #end_date, 23)
)
)
,Diffs AS
(
SELECT
E.Day,
E.emp_Reader_id,
E.T ET,
O.T OT,
O.T-E.T Diff,
DATEDIFF(S,E.T,O.T) DiffSeconds -- difference in seconds
FROM
(
SELECT
BE.emp_Reader_id,
BE.T,
BE.Day,
BE.InDay
FROM ByDays BE
WHERE BE.InDay % 2 = 1
) E -- Even rows
INNER JOIN
(
SELECT
BO.emp_reader_id,
BO.T,
BO.Day,
BO.InDay
FROM ByDays BO
WHERE BO.InDay % 2 = 0
) O -- Odd rows
ON E.InDay + 1 = O.InDay -- Join rows (1,2), (3,4) and so on
AND E.Day = O.Day -- in the same day
)
SELECT * FROM Diffs
DECLARE #start TIME(0) = '9:00 AM', #end TIME(0) = '18:00 PM';
WITH x(n) AS
(
SELECT TOP (DATEDIFF(HOUR, #start, #end) + 1)
rn = ROW_NUMBER() OVER (ORDER BY [object_id])
FROM sys.all_columns
ORDER BY [object_id]
)
SELECT
t = DATEADD(HOUR, n-1, #start)
,cast(DATEADD(HOUR, n-1, #start) as varchar(50))+' shift'
FROM x
ORDER BY t;
If employee punch in time between 8.30 to 9.30 am , it assigned to 9.00 shift
if 9.30 to 10.30. it assigned to 10.00 shift
Expected output:
Day emp_Reader_id ET OT Diff DiffSeconds Shift
2019.07.12 3 2019-07-12 08:55:00.000 2019-07-12 16:00:00.000 1900-01-01 07:05:00.000 25500 09:00:00 shift
2019.07.13 3 2019-07-13 10:05:00.000 2019-07-13 17:50:00.000 1900-01-01 07:45:00.000 27900 10:00:00 shift
2019.07.14 3 2019-07-14 12:00:00.000 2019-07-14 21:00:00.000 1900-01-01 07:00:00.000 25200 12:00:00 shift
Two solutions, one with LEAD.
First is without LEAD:
select
CAST(t1.DT as date) AS "Day",
t1.emp_reader_id AS emp_Reader_id,
t1.DT AS ET,
t2.DT AS OT,
t1.DT - t2.DT As Diff,
DATEDIFF(s, t1.DT, t2.DT) As DiffSeconds,
cast(dateadd(HOUR,datepart(HH,t1.DT)+ round(datepart(MINUTE,t1.dt)/60.0,0),0) as time) as Shift
from trnevents t1
inner join trnevents t2 on t2.emp_reader_id=t1.emp_reader_id and t2.EVENTID=1 and CAST(t2.DT as date)= CAST(t1.DT as date)
where t1.eventID=0
order by t1.DT
or:
SELECT
Day,
emp_reader_id,
ET,
OT,
ET-OT AS Diff ,
DATEDIFF(s,ET,OT) as DiffSeconds,
cast(dateadd(HOUR,datepart(HH,ET)+ round(datepart(MINUTE,ET)/60.0,0),0) as time) as Shift
FROM (
select
CAST(t1.DT as date) AS "Day",
t1.emp_reader_id AS emp_Reader_id,
t1.DT AS ET,
LEAD(t1.DT) over (order by emp_reader_id,dt) AS OT,
eventid,
--t1.DT - t2.DT As Diff,
--DATEDIFF(s, t1.DT, t2.DT) As DiffSeconds,
cast(dateadd(HOUR,datepart(HH,t1.DT)+ round(datepart(MINUTE,t1.dt)/60.0,0),0) as time) as Shift
from trnevents t1) x
where x.EVENTID=0
Both query produce same result (second one is probably quicker)
If employee punch in time between 8.30 to 9.30 am , it assigned to 9.00 shift if 9.30 to 10.30. it assigned to 10.00 shift
If I understand this correctly, you can use a case expression:
select e.*,
(case when dt >= '08:30:00' and dt < '09:30:00'
then 'Shift 09:00'
when dt >= '09:30:00' and dt < '10:30:00'
then 'Shift 10:00'
end) as shift
from Trnevents e
If you want a more general solution where the breaks are at 30 minute intervals throughout the day, then subtract 30 minutes and extract the hour:
select e.*,
datepart(hour, dateadd(minute, -30, dt)) as shift
from e;

Group time series by time intervals (e.g. days) with aggregate of duration

I have a table containing a time series with following information. Each record represents the event of "changing the mode".
Timestamp | Mode
------------------+------
2018-01-01 12:00 | 1
2018-01-01 18:00 | 2
2018-01-02 01:00 | 1
2018-01-02 02:00 | 2
2018-01-04 04:00 | 1
By using the LEAD function, I can create a query with the following result. Now each record contains the information, when and how long the "mode was active".
Please check the 2nd and the 4th record. They "belong" to multiple days.
StartDT | EndDT | Mode | Duration
------------------+------------------+------+----------
2018-01-01 12:00 | 2018-01-01 18:00 | 1 | 6:00
2018-01-01 18:00 | 2018-01-02 01:00 | 2 | 7:00
2018-01-02 01:00 | 2018-01-02 02:00 | 1 | 1:00
2018-01-02 02:00 | 2018-01-04 04:00 | 2 | 50:00
2018-01-04 04:00 | (NULL) | 1 | (NULL)
Now I would like to have a query that groups the data by day and mode and aggregates the duration.
This result table is needed:
Date | Mode | Total
------------+------+-------
2018-01-01 | 1 | 6:00
2018-01-01 | 2 | 6:00
2018-01-02 | 1 | 1:00
2018-01-02 | 2 | 23:00
2018-01-03 | 2 | 24:00
2018-01-04 | 2 | 04:00
I didn't known how to handle the records that "belongs" to multiple days. Any ideas?
create table ChangeMode ( ModeStart datetime2(7), Mode int )
insert into ChangeMode ( ModeStart, Mode ) values
( '2018-11-15T21:00:00.0000000', 1 ),
( '2018-11-16T17:18:19.1231234', 2 ),
( '2018-11-16T18:00:00.5555555', 1 ),
( '2018-11-16T18:00:01.1234567', 2 ),
( '2018-11-16T19:02:22.8888888', 1 ),
( '2018-11-16T20:00:00.9876543', 2 ),
( '2018-11-17T09:00:00.0000000', 1 ),
( '2018-11-17T23:23:23.0230450', 2 ),
( '2018-11-19T17:00:00.0172839', 1 ),
( '2018-11-20T03:07:00.7033077', 2 )
;
with
-- Determine the earliest and latest dates.
-- Cast to date to remove the time portion.
-- Cast results back to datetime because we're going to add hours later.
MinMaxDates
as
(select cast(min(cast(ModeStart as date))as datetime) as MinDate,
cast(max(cast(ModeStart as date))as datetime) as MaxDate from ChangeMode),
-- How many days have passed during that period
Dur
as
(select datediff(day,MinDate,MaxDate) as Duration from MinMaxDates),
-- Create a list of numbers.
-- These will be added to MinDate to get a list of dates.
NumList
as
( select 0 as Num
union all
select Num+1 from NumList,Dur where Num<Duration ),
-- Create a list of dates by adding those numbers to MinDate
DayList
as
( select dateadd(day,Num,MinDate)as ModeDate from NumList, MinMaxDates ),
-- Create a list of day periods
PeriodList
as
( select ModeDate as StartTime,
dateadd(day,1,ModeDate) as EndTime
from DayList ),
-- Use LEAD to get periods for each record
-- Final record would return NULL for ModeEnd
-- We replace that with end of last day
ModePeriodList
as
( select ModeStart,
coalesce( lead(ModeStart)over(order by ModeStart),
dateadd(day,1,MaxDate) ) as ModeEnd,
Mode from ChangeMode, MinMaxDates ),
ModeDayList
as
( select * from ModePeriodList, PeriodList
where ModeStart<=EndTime and ModeEnd>=StartTime
),
-- Keep the later of the mode start time, and the day start time
-- Keep the earlier of the mode end time, and the day end time
ModeDayPeriod
as
( select case when ModeStart>=StartTime then ModeStart else StartTime end as StartTime,
case when ModeEnd<=EndTime then ModeEnd else EndTime end as EndTime,
Mode from ModeDayList ),
SumDurations
as
( select cast(StartTime as date) as ModeDate,
Mode,
DateDiff_Big(nanosecond,StartTime,EndTime)
/3600000000000
as DurationHours from ModeDayPeriod )
-- List the results in order
-- Use MaxRecursion option in case there are more than 100 days
select ModeDate as [Date], Mode, sum(DurationHours) as [Total Duration Hours]
from SumDurations
group by ModeDate, Mode
order by ModeDate, Mode
option (maxrecursion 0)
Result is:
Date Mode Total Duration Hours
---------- ----------- ---------------------------------------
2018-11-15 1 3.00000000000000
2018-11-16 1 18.26605271947221
2018-11-16 2 5.73394728052777
2018-11-17 1 14.38972862361111
2018-11-17 2 9.61027137638888
2018-11-18 2 24.00000000000000
2018-11-19 1 6.99999519891666
2018-11-19 2 17.00000480108333
2018-11-20 1 3.11686202991666
2018-11-20 2 20.88313797008333
you could use a CTE to create a table of days then join the time slots to it
DECLARE #MAX as datetime2 = (SELECT MAX(CAST(Timestamp as date)) MX FROM process);
WITH StartEnd AS (select p1.Timestamp StartDT,
P2.Timestamp EndDT ,
p1.mode
from process p1
outer apply
(SELECT TOP 1 pOP.* FROM
process pOP
where pOP.Timestamp > p1.Timestamp
order by pOP.Timestamp asc) P2
),
CAL AS (SELECT (SELECT MIN(cast(StartDT as date)) MN FROM StartEnd) DT
UNION ALL
SELECT DATEADD(day,1,DT) DT FROM CAL WHERE CAL.DT < #MAX
),
TMS AS
(SELECT CASE WHEN S.StartDT > C.DT THEN S.StartDT ELSE C.DT END AS STP,
CASE WHEN S.EndDT < DATEADD(day,1,C.DT) THEN S.ENDDT ELSE DATEADD(day,1,C.DT) END AS STE
FROM StartEnd S JOIN CAL C ON NOT(S.EndDT <= C.DT OR S.StartDT>= DATEADD(day,1,C.dt))
)
SELECT *,datediff(MI ,TMS.STP, TMS.ste) as x from TMS
The following uses recursive CTE to build a list of dates (a calendar or number table works equally well). It then intersect the dates with date times so that missing dates are populated with matching data. The important bit is that for each row, if start datetime belongs to previous day then it is clamped to 00:00. Likewise for end datetime.
DECLARE #t TABLE (timestamp DATETIME, mode INT);
INSERT INTO #t VALUES
('2018-01-01 12:00', 1),
('2018-01-01 18:00', 2),
('2018-01-02 01:00', 1),
('2018-01-02 02:00', 2),
('2018-01-04 04:00', 1);
WITH cte1 AS (
-- the min and max dates in your data
SELECT
CAST(MIN(timestamp) AS DATE) AS mindate,
CAST(MAX(timestamp) AS DATE) AS maxdate
FROM #t
), cte2 AS (
-- build all dates between min and max dates using recursive cte
SELECT mindate AS day_start, DATEADD(DAY, 1, mindate) AS day_end, maxdate
FROM cte1
UNION ALL
SELECT DATEADD(DAY, 1, day_start), DATEADD(DAY, 2, day_start), maxdate
FROM cte2
WHERE day_start < maxdate
), cte3 AS (
-- pull end datetime from next row into current
SELECT
timestamp AS dt_start,
LEAD(timestamp) OVER (ORDER BY timestamp) AS dt_end,
mode
FROM #t
), cte4 AS (
-- join datetime with date using date overlap query
-- then clamp start datetime to 00:00 of the date
-- and clamp end datetime to 00:00 of next date
SELECT
IIF(dt_start < day_start, day_start, dt_start) AS dt_start_fix,
IIF(dt_end > day_end, day_end, dt_end) AS dt_end_fix,
mode
FROM cte2
INNER JOIN cte3 ON day_end > dt_start AND dt_end > day_start
)
SELECT dt_start_fix, dt_end_fix, mode, datediff(minute, dt_start_fix, dt_end_fix) / 60.0 AS total
FROM cte4
DB Fiddle
Thanks everybody!
The answer from Cato put me on the right track. Here my final solution:
DECLARE #Start AS datetime;
DECLARE #End AS datetime;
DECLARE #Interval AS int;
SET #Start = '2018-01-01';
SET #End = '2018-01-05';
SET #Interval = 24 * 60 * 60;
WITH
cteDurations AS
(SELECT [Timestamp] AS StartDT,
LEAD ([Timestamp]) OVER (ORDER BY [Timestamp]) AS EndDT,
Mode
FROM tblLog
WHERE [Timestamp] BETWEEN #Start AND #End
),
cteTimeslots AS
(SELECT #Start AS StartDT,
DATEADD(SECOND, #Interval, #Start) AS EndDT
UNION ALL
SELECT EndDT,
DATEADD(SECOND, #Interval, EndDT)
FROM cteTimeSlots WHERE StartDT < #End
),
cteDurationsPerTimesplot AS
(SELECT CASE WHEN S.StartDT > C.StartDT THEN S.StartDT ELSE C.StartDT END AS StartDT,
CASE WHEN S.EndDT < C.EndDT THEN S.EndDT ELSE C.EndDT END AS EndDT,
C.StartDT AS Slot,
S.Mode
FROM cteDurations S
JOIN cteTimeslots C ON NOT(S.EndDT <= C.StartDT OR S.StartDT >= C.EndDT)
)
SELECT Slot,
Mode,
SUM(DATEDIFF(SECOND, StartDT, EndDT)) AS Duration
FROM cteDurationsPerTimesplot
GROUP BY Slot, Mode
ORDER BY Slot, Mode;
With the variable #Interval you are able to define the size of the timeslots.
The CTE cteDurations creates a subresult with the durations of all necessary entries by using the TSQL function LEAD (available in MSSQL >= 2012). This will be a lot faster than an OUTER APPLY.
The CTE cteTimeslots generates a list of timeslots with start time and end time.
The CTE cteDurationsPerTimesplot is a subresult with a JOIN between cteDurations and cteTimeslots. This this the magic JOIN statement from Cato!
And finally the SELECT statement will do the grouping and sum calculation per Slot and Mode.
Once again: Thanks a lot to everybody! Especially to Cato! You saved my weekend!
Regards
Oliver

How to set a specific time interval for different work shifts to retrieve data

I have two working shifts: 8:00:00 to 16:30:00 and 20:00:00 to 06:00:00.
I want to create a stored procedure that will retrieve data from an SQL table when I pass the date
this are my tables
Table1 Emp
ID DateTime EmpID
47 2014-12-05 08:00:00 1111
47 2014-12-05 08:25:00 1235
47 2014-12-05 23:55:00 4569
47 2014-12-06 00:00:00 4563
47 2014-12-06 02:00:00 7412
59 2014-12-06 04:00:00 8523
59 2014-12-05 10:30:00 5632
Table2 Product
ID DateTime ProductMade
47 2014-12-05 11:00:00 Milk
47 2014-12-05 08:00:00 Juice
47 2014-12-06 00:00:00 Bread
47 2014-12-06 06:00:00 Cakes
query for shift 2 18:00 to 06:00
SELECT *
FROM Table 1 as T1
INNER JOIN Table_Product as Prod ON t1.ID=Prod.ID
WHERE T1.DateTime BETWEEN DATEADD(DAY, DATEDIFF(DAY, 0, GETDATE()-8), 0) + '18:00'
AND DATEADD(DAY, DATEDIFF(DAY, 0, GETDATE()-7), 0) + '06:00'
so this will get all the records that has the same ID matching
then i have to do another query for the first shift.
between 08:00 to 16:30
SELECT *
FROM Table 1 AS T1
INNER JOIN
Table_Product AS Prod ON t1.ID=Prod.ID
WHERE DATEDIFF(day, CONVERT(VARCHAR(10), GETDATE(),110), CONVERT(VARCHAR(10), T1.DateTime,110))=-1 AND DATEPART(HOUR,T1.DateTime) BETWEEN '07' AND '16'
How do i make this into one stored procdure and elminate having two queries.
Try this if you want it for a specific shift. Then you have to specify #Shift
Declare #Shift char(1),
#days int
Set #Shift = 'A' -- will only get information for SHIFT A. Change to B if you want the rest
Set #days = 1
Select *
from Table1 t
where t.DateTime between
case when #Shift = 'A' then DateAdd(hour, 8, Convert(date, GetDate() - #days))
else DateAdd(hour, 20, Convert(date, GetDate() - #days)) end
and
case when #Shift = 'A' then DateAdd(hour, 16, Convert(date, GetDate() - #days))
else DateAdd(hour, 30, Convert(date, GetDate() - #days)) end
Specify the Shift and a Date, and it should work.
You can always do something like this as well. This you only have to specify the number of days in the past, and it will retrieve the information and specify the Shift in the first Column
DECLARE #days int
SET #days = 1
Select case when DATEPART(hour, t.DateTime) between 8 and 16 then 'A' else 'B' end AS Shift, *
from Table1 t
where t.DateTime between DateAdd(hour, 8, Convert(date, GetDate() - #days))
and DateAdd(hour, 30, Convert(date, GetDate() - #days))
ORDER BY 1, t.DateTime
It seems that you have two shifts per day and the day shift begins before the night shift. So, let's enumerate the shifts and let you choose the one(s) you want that way:
select t.*
from (select t.*,
row_number() over (partition by cast(sp.datetime as date)
order by sp.datetime
) as shiftnumber
from table t
) t
where DATEDIFF(day, CAST(GETDATE() as DATE), CAST(SP.DateTime as DATE)) = -1 and
shiftnumber = 1;
Note that I also changed the date arithmetic. The conversion to dates uses the built-in DATE type. Converting a date to a string and back to a date is inelegant.

SQL Server : Gap / Island, datetime, contiguous block 365 day block

I have a table that looks like this:-
tblMeterReadings
id meter period_start period_end amount
1 1 2014-01-01 00:00 2014-01-01 00:29:59 100.3
2 1 2014-01-01 00:30 2014-01-01 00:59:59 50.5
3 1 2014-01-01 01:00 2014-01-01 01:29:59 70.7
4 1 2014-01-01 01:30 2014-01-01 01:59:59 900.1
5 1 2014-01-01 02:00 2014-01-01 02:29:59 400.0
6 1 2014-01-01 02:30 2014-01-01 02:59:59 200.3
7 1 2014-01-01 03:00 2014-01-01 03:29:59 100.8
8 1 2014-01-01 03:30 2014-01-01 03:59:59 140.3
This is a tiny "contiguous block" from '2014-01-01 00:00' to '2014-01-01 3:59:59'.
In the real table there are "contiguous blocks" of years in length.
I need to find the the period_start and period_end of the most recent CONTINUOUS 365 COMPLETE DAYs (fileterd by meter column).
When I say COMPLETE DAYs I mean a day that has entries spanning 00:00 to 23:59.
When I say CONTINUOUS I mean there must be no days missing.
I would like to select all the rows that make up this block of CONTINUOUS COMPLETE DAYs.
I also need an output like:
block_start block_end total_amount_for_block
2013-02-26 00:00 2014-02-26 23:59:59 1034234.5
This is beyond me, so if someone can solve... I will be very impressed.
Since your granularity is 1 second, you need to expand your periods into all the date/times between the start and end at 1 second intervals. To do this you need to cross join with a numbers table (The numbers table is generated on the fly by ranking object ids from an arbitrary system view, I have limited it to TOP 86400 since this is the number of seconds in a day, and you have stated your time periods never span more than one day):
WITH Numbers AS
( SELECT TOP (86400)
Number = ROW_NUMBER() OVER(ORDER BY a.object_id) - 1
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
ORDER BY a.object_id
)
SELECT r.ID, r.meter, dt.[DateTime]
FROM tblMeterReadings r
CROSS JOIN Numbers n
OUTER APPLY
( SELECT [DateTime] = DATEADD(SECOND, n.Number, r.period_start)
) dt
WHERE dt.[DateTime] <= r.Period_End;
You then have your continuous range in which to perform the normal gaps and islands grouping:
WITH Numbers AS
( SELECT TOP (86400)
Number = ROW_NUMBER() OVER(ORDER BY a.object_id) - 1
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
ORDER BY a.object_id
), Grouped AS
( SELECT r.meter,
Amount = CASE WHEN Number = 1 THEN r.Amount ELSE 0 END,
dt.[DateTime],
GroupingSet = DATEADD(SECOND,
-DENSE_RANK() OVER(PARTITION BY r.Meter
ORDER BY dt.[DateTime]),
dt.[DateTime])
FROM tblMeterReadings r
CROSS JOIN Numbers n
OUTER APPLY
( SELECT [DateTime] = DATEADD(SECOND, n.Number, r.period_start)
) dt
WHERE dt.[DateTime] <= r.Period_End
)
SELECT meter,
PeriodStart = MIN([DateTime]),
PeriodEnd = MAX([DateTime]),
Amount = SUM(Amount)
FROM Grouped
GROUP BY meter, GroupingSet
HAVING DATEADD(YEAR, 1, MIN([DateTime])) < MAX([DateTime]);
N.B. Since the join to Number causes amounts to be duplicated, it is necessary to set all duplicates to 0 using CASE WHEN Number = 1 THEN r.Amount ELSE 0 END, i.e only include the amount for the first row for each ID
Removing the Having clause for your sample data will give:
meter | PeriodStart | PeriodEnd | Amount
------+---------------------+---------------------+----------
1 | 2014-01-01 00:00:00 | 2014-01-01 03:59:59 | 1963
Example on SQL Fiddle
You could try this:
Select MIN(period_start) as "block start"
, MAX(period_end) as "block end"
, SUM(amount) as "total amount"
FROM YourTable
GROUP BY datepart(year, period_start)
, datepart(month, period_start)
, datepart(day, period_start)
, datepart(year, period_end)
, datepart(month, period_end)
, datepart(day, period_end)
Having datepart(year, period_start) = datepart(year, period_end)
AND datepart(month, period_start) = datepart(month, period_end)
AND datepart(day, period_start) = datepart(day, period_end)
AND datepart(hour, MIN(period_start)) = 0
AND datepart(minute,MIN(period_start)) = 0
AND datepart(hour, MAX(period_end)) = 23
AND datepart(minute,MIN(period_end)) = 59