Group by data to get count between the datetime range - sql

Let's say I've a table like below
start_time end_time user_name
2019-01-01 00:00:05 2019-01-01 00:05:05 user1
2019-01-01 00:01:35 2019-01-01 00:06:05 user2
2019-01-01 00:02:05 2019-01-01 00:07:05 user3
2019-01-01 00:03:05 2019-01-01 00:08:05 user1
2019-01-01 00:04:05 2019-01-01 00:09:05 user2
My objective is find out how many users were logged in for a MINUTE. Say like below
time active no of users
2019-01-01 00:00:00 1
2019-01-01 00:01:00 2
2019-01-01 00:02:00 3
2019-01-01 00:03:00 3
2019-01-01 00:04:00 3
Now I first tried to round of time for a new column dateadd(mi, datediff(mi, 0, dateadd(s, 30, start_time)), 0). So, I will receive like above table time column
Next I tried to find the count for rounded datetime like below
SELECT
dateadd(mi, datediff(mi, 0, dateadd(s, 30, start_time)), 0) as RoundedDateTime,
(
SELECT count(distinct(user_name))
FROM entrytable sh
WHERE (sh.end_time > dateadd(mi, datediff(mi, 0, dateadd(s, 30, t.start_time)), 0)
and sh.start_time <= dateadd(mi, datediff(mi, 0, dateadd(s, 30, t.start_time)), 0))
) as usercounter
FROM entrytable t
But, above SQL query is running for longer time and goes to not responding mode.
I could not fix the issue. Can someone help?
Thanks in advance!

The most trivial solution is this:
DECLARE #t TABLE (start_time datetime, end_time datetime, user_name varchar(10));
INSERT INTO #t VALUES
('2019-01-01 00:00:05', '2019-01-01 00:05:05', 'user1'),
('2019-01-01 00:01:35', '2019-01-01 00:06:05', 'user2'),
('2019-01-01 00:02:05', '2019-01-01 00:07:05', 'user3'),
('2019-01-01 00:03:05', '2019-01-01 00:08:05', 'user1'),
('2019-01-01 00:04:05', '2019-01-01 00:09:05', 'user2');
SELECT dt AS date_time, SUM(SUM(val)) OVER (ORDER BY dt) AS active_count
FROM (
SELECT start_time, +1 FROM #t UNION ALL
SELECT end_time, -1 FROM #t
) cte1(dt, val)
GROUP BY dt
This will give you the number of active users whenever there was a change (someone logged in or logged out). Result:
| date_time | active_count |
|-------------------------|--------------|
| 2019-01-01 00:00:05.000 | 1 |
| 2019-01-01 00:01:35.000 | 2 |
| 2019-01-01 00:02:05.000 | 3 |
| 2019-01-01 00:03:05.000 | 4 |
| 2019-01-01 00:04:05.000 | 5 |
| 2019-01-01 00:05:05.000 | 4 |
| 2019-01-01 00:06:05.000 | 3 |
| 2019-01-01 00:07:05.000 | 2 |
| 2019-01-01 00:08:05.000 | 1 |
| 2019-01-01 00:09:05.000 | 0 |
Be advised that the result does not contain the "in-between" dates.

This question was originally tagged for SQL Server 2012, so this answer is for SQL Server.
One method is to generate a list of minutes and then:
with minutes as (
select cast('2019-01-01 00:00:00' as datetime) as mm
union all
select dateadd(minute, 1, minute)
from cte
where mm < '2019-01-01 00:00:05'
)
select m.*,
(select count(*)
from entrytable et
where et.start_time <= m.mm and
et.end_time > m.mm
) as num_actives
from minutes m;

Related

Splitting up events that occur over the day boundary

I have a table of events with a start time and an end time, with some events that have a start time before midnight and an end time after midnight. I'd like to produce output that splits up these events at the midnight barrier so they can be counted toward their respective date.
| EVENT_ID | START_TIME | END_TIME |
|----------|-------------------------|-------------------------|
| 1001 | 2021-02-21 14:00:00.000 | 2021-02-21 18:00:00.000 |
| 1002 | 2021-02-21 17:00:00.000 | 2021-02-22 03:00:00.000 |
| 1003 | 2021-02-21 18:00:00.000 | 2021-02-21 22:00:00.000 |
| 1004 | 2021-02-21 22:00:00.000 | 2021-02-22 07:00:00.000 |
The above table could be produced by the query:
SELECT EVENT_ID,
START_TIME,
END_TIME
FROM EVENTS
WHERE START_TIME BETWEEN '2021-02-21 00:00:00.000' AND '2021-02-21 23:59:59.999'
;
My desired output will split up the events that span multiple days at midnight:
| EVENT_ID | START_TIME | END_TIME |
|----------|-------------------------|-------------------------|
| 1001 | 2021-02-21 14:00:00.000 | 2021-02-21 18:00:00.000 |
| 1002 | 2021-02-21 17:00:00.000 | 2021-02-21 23:59:59.999 |
| 1002 | 2021-02-22 00:00:00.000 | 2021-02-22 03:00:00.000 |
| 1003 | 2021-02-21 18:00:00.000 | 2021-02-21 22:00:00.000 |
| 1004 | 2021-02-21 22:00:00.000 | 2021-02-21 23:59:59.999 |
| 1004 | 2021-02-22 00:00:00.000 | 2021-02-22 07:00:00.000 |
Any help would be greatly appreciated. Ideally I'd like to produce this without functions or the creation of new tables.
Note that I'm using SQL Server 2016
Using table of numbers
with t0(n) as (
select n
from (
values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
) t(n)
),nmbs as(
select row_number() over(order by t1.n) - 1 n
from t0 t1 cross join t0 t2 cross join t0 t3
)
select event_id,
case when n = 0
then start_time
else dateadd(day, n, convert(date, start_time))
end start_time,
case when datediff(day, start_time, end_time) = n
then end_time
else dateadd(second, -1, dateadd(day, n + 1, convert(datetime, convert(date, start_time))))
end as end_time
from Events
cross apply (
select top (datediff(day, start_time, end_time) + 1) n
from nmbs) ns
You can use a recursive CTE for this:
with cte as (
select event_id, start_time,
(case when datediff(day, start_time, end_time) = 0 then end_time
else dateadd(day, 1, convert(date, start_time))
end) as end_time,
end_time as real_end_time
from t
union all
select event_id, end_time,
(case when dateadd(day, 1, convert(date, end_time)) > real_end_time
then real_end_time
else dateadd(day, 1, convert(date, end_time))
end),
real_end_time
from cte
where end_time < real_end_time
)
select *
from cte;
Here is a db<>fiddle.
The following method solves for the case of midnight between START_TIME and END_TIME. The "desired output" above indicates only a single midnight occurs between START_TIME and END_TIME.
IF OBJECT_ID('tempdb..#t') IS NOT NULL DROP TABLE #t
CREATE TABLE #t ( Event_ID INT, START_TIME DATETIME2, END_TIME DATETIME2)
INSERT INTO #t (Event_ID, START_TIME, END_TIME)
VALUES
( 1001, '2021-02-21 14:00:00.000', '2021-02-21 18:00:00.000' )
, ( 1002, '2021-02-21 17:00:00.000', '2021-02-22 03:00:00.000' )
, ( 1003, '2021-02-21 18:00:00.000', '2021-02-21 22:00:00.000' )
, ( 1004, '2021-02-21 22:00:00.000', '2021-02-22 07:00:00.000' )
-- get original data plus midnight after START_TIME
IF OBJECT_ID('tempdb..#stage') IS NOT NULL DROP TABLE #stage
SELECT *
, CONVERT(DATETIME2, CONVERT(DATE, DATEADD(DAY, 1, t.START_TIME))) d
INTO #stage
FROM #t t
-- get all rows
SELECT Event_ID, START_TIME
, CASE WHEN d > END_TIME THEN END_TIME ELSE d END END_TIME
FROM #stage
UNION ALL
-- get rows where midnight occurs between START_TIME and END_TIME
SELECT Event_ID
, CASE WHEN d > END_TIME THEN START_TIME ELSE d END START_TIME
, END_TIME
FROM #stage
WHERE d < END_TIME
ORDER BY Event_ID

Time difference between two times in Hours in SQL Server

I need the time difference between two times in Hours. I have the start time and end time as shown below:
Start time | End Time
-----------+----------
23:00:00 | 19:00:00
23:00:00 | 07:00:00
I need the output for first row as 20, for second row 8.
Try this:
Schema:
create table a(Starttime time,Endtime time)
INSERT INTO a VALUES ('23:00:00','19:00:00')
INSERT INTO a VALUES ('09:00:00','19:00:00')
INSERT INTO a VALUES ('23:00:00','07:00:00')
Query:
select Starttime,Endtime,
CASE WHEN datediff(HOUR,Starttime,Endtime)<0 THEN 24+datediff(HOUR,Starttime,Endtime)
ELSE datediff(HOUR,Starttime,Endtime) END Diff
FROM A
Output:
| Starttime | Endtime | Diff |
|------------------|------------------|------|
| 23:00:00.0000000 | 19:00:00.0000000 | 20 |
| 09:00:00.0000000 | 19:00:00.0000000 | 10 |
| 23:00:00.0000000 | 07:00:00.0000000 | 8 |
Use DATEDIFF:
SELECT
start_time,
end_time,
24 + DATEDIFF(HOUR, start_time, end_time) AS diff_in_hours
FROM yourTable;
Demo
Query as per your requirement, just put your table name at the place of "YourTable"
SELECT Starttime
,Endtime
,CASE
WHEN DATEDIFF(HOUR, Starttime, Endtime) < 0
THEN 24 + DATEDIFF(HOUR, Starttime, Endtime)
ELSE DATEDIFF(HOUR, Starttime, Endtime)
END Time_Difference
FROM YourTable
Use select case
select case when start_time > end_time
then datediff(hour, start_time , dateadd(hh, 24, end_Time))
else datediff(hh, start_time , end_Time) end

SQL Query to convert number value into date

In my transaction table has id Number(11), name Varchar2(25) , transactiondate number(22).
Need to write SQL query to fetch the transaction details. transactiondate should be return as date & time format instead of number.
transaction table
ID Name transactiondate
1 AAA 2458010
2 BBB 2458351
3 CCC 2458712
I got the below result when i execute the below query
Select * from transaction where transactiondate <= TOCHAR(todate('2019/09/17 00:00:00', 'YYYY/MM/DD hh24:mi:ss') , 'J');
ID Name transactiondate
1 AAA 2458010
2 BBB 2458351
I got the query syntax error when i tried execute the below query
Select name, convert(datetime, convert(varchar(10), transactiondate)) as txndateformat
from transaction;
Expecting query that has to be return name and transactiondate as date format instead of number.
I got below result when i execute the below query
Desc transaction;
Name Null? Type
Id Not Null Number(19)
Name Not Null VarChar2(100)
transactiondate Not Null Number(22)
It all depends on when you are measuring time zero from and what your units are.
Here are some typical solutions:
Oracle Setup:
CREATE TABLE transaction ( ID, Name, transactiondate ) AS
SELECT 1, 'AAA', 2456702 FROM DUAL UNION ALL
SELECT 2, 'BBB', 2456703 FROM DUAL
Query:
SELECT name,
TO_DATE( transactiondate, 'J' )
AS julian_date,
DATE '1970-01-01' + NUMTODSINTERVAL( transactiondate / 1000, 'SECOND' )
AS unix_timestamp,
DATE '1970-01-01' + NUMTODSINTERVAL( transactiondate, 'SECOND' )
AS seconds_since_1970,
DATE '1970-01-01' + NUMTODSINTERVAL( transactiondate, 'MINUTE' )
AS minutes_since_1970,
DATE '1970-01-01' + NUMTODSINTERVAL( transactiondate, 'HOUR' )
AS hours_since_1970,
DATE '1900-01-01' + NUMTODSINTERVAL( transactiondate, 'HOUR' )
AS hours_since_1900,
DATE '1899-12-30' + transactiondate
AS excel_date
FROM transaction
Output:
NAME | JULIAN_DATE | UNIX_TIMESTAMP | SECONDS_SINCE_1970 | MINUTES_SINCE_1970 | HOURS_SINCE_1970 | HOURS_SINCE_1900 | EXCEL_DATE
:--- | :------------------ | :------------------ | :------------------ | :------------------ | :------------------ | :------------------ | :------------------
AAA | 2014-02-13 00:00:00 | 1970-01-01 00:40:56 | 1970-01-29 10:25:02 | 1974-09-03 01:02:00 | 2250-04-05 14:00:00 | 2180-04-04 14:00:00 | 8626-03-21 00:00:00
BBB | 2014-02-14 00:00:00 | 1970-01-01 00:40:56 | 1970-01-29 10:25:03 | 1974-09-03 01:03:00 | 2250-04-05 15:00:00 | 2180-04-04 15:00:00 | 8626-03-22 00:00:00
db<>fiddle here
(Note: Excel dates are slightly more complicated if you want to support values before 1900-03-01 but most people do not need this so there is only the simplified version included above.)
I assume that numbers are epoch numbers.
For SQL Server:
SELECT DATEADD(ss, 2456702, '19700101') --ss means interval = seconds
For Oracle:
select to_date('19700101', 'YYYYMMDD') + ( 1 / 24 / 60 / 60) * 2456702
from dual;

Is there a way to group timestamp data by 30 day intervals starting from the min(date) and add them as columns

I am trying to use the min() value of a timestamp as a starting point and then group data by 30 day intervals in order to get a count of occurrences for each unique value within the timestamp date range as columns
i have two tables that i am joining together to get a count. Table 1 (page_creation) has 2 columns labeled link and dt_crtd. Table 2(page visits) has 2 other columns labeled url and date. the tables are being joined by joining table1.link = table2.pagevisits.
After the join i get a table similar to this:
+-------------------+------------------------+
| url | date |
+-------------------+------------------------+
| www.google.com | 2018-01-01 00:00:00' |
| www.google.com | 2018-01-02 00:00:00' |
| www.google.com | 2018-02-01 00:00:00' |
| www.google.com | 2018-02-05 00:00:00' |
| www.google.com | 2018-03-04 00:00:00' |
| www.facebook.com | 2014-01-05 00:00:00' |
| www.facebook.com | 2014-01-07 00:00:00' |
| www.facebook.com | 2014-04-02 00:00:00' |
| www.facebook.com | 2014-04-10 00:00:00' |
| www.facebook.com | 2014-04-11 00:00:00' |
| www.facebook.com | 2014-05-01 00:00:00' |
| www.twitter.com | 2016-02-01 00:00:00' |
| www.twitter.com | 2016-03-04 00:00:00' |
+---------------------+----------------------+
what i am trying to get is results that pull this :
+-------------------+------------------------+------------+------------+-------------+
| url | MIN_Date | Interval 1 | Interval 2| Interval 3 |
+-------------------+------------------------+-------------+-----------+-------------+
| www.google.com | 2018-01-01 00:00:00' | 2 | 2 | 1
| www.facebook.com | 2014-01-05 00:00:00' | 2 | 0 | 1
| www.twitter.com | 2016-02-01 00:00:00' | 1 | 1 | 0
+---------------------+----------------------+-------------+-----------+-------------+
So the 30 day intervals begin from the min(date) as shown in Interval 1 and are counted every 30 days.
Ive looked at other questions such as :
Group rows by 7 days interval starting from a certain date
MySQL query to select min datetime grouped by 30 day intervals
However it did not seem to answer my specific problem.
Ive also looked into pivot syntax but noticed it is only supported for certain DBMS.
Any help would be greatly appreciated.
Thank you.
If I understood your question clearly, you want to calculate page visits between 30 , 60 , 90 days intervals after page creation. If it's the requirement, try below SQL code :-
select a11.url
,Sum(case when a12.date between a11.dt_crtd and a11.dt_crtd+30 then 1 else 0) Interval_1
,Sum(case when a12.date between a11.dt_crtd+31 and a11.dt_crtd+60 then 1 else 0) Interval_2
,Sum(case when a12.date between a11.dt_crtd+61 and a11.dt_crtd+90 then 1 else 0) Interval_3
from page_creation a11
join page_visits a12
on a11.link = a12.url
group by a11.url
If you are using BigQuery, I would recommend:
countif() to count a boolean value
timestamp_add() to add intervals to timestamps
The exact boundaries are a bit vague, but I would go for:
select pc.url,
countif(pv.date >= pc.dt_crtd and
pv.date < timestamp_add(pc.dt_crtd, interval 30 day
) as Interval_00_29,
countif(pv.date >= timestamp_add(pc.dt_crtd, interval 30 day) and
pv.date < timestamp_add(pc.dt_crtd, interval 60 day
) as Interval_30_59,
countif(pv.date >= timestamp_add(pc.dt_crtd, interval 60 day) and
pv.date < timestamp_add(pc.dt_crtd, interval 90 day
) as Interval_60_89
from page_creation pc join
page_visits pv
on pc.link = pv.url
group by pc.url
The way I am reading your scenario and especially based on example of After the join i get a table similar to ... is that you have two tables that you need to UNION - not to JOIN
So, based on that reading below example is for BigQuery Standard SQL (project.dataset.page_creation and project.dataset.page_visits are here just to mimic your Table 1 and Table2)
#standardSQL
WITH `project.dataset.page_creation` AS (
SELECT 'www.google.com' link, TIMESTAMP '2018-01-01 00:00:00' dt_crtd UNION ALL
SELECT 'www.facebook.com', '2014-01-05 00:00:00' UNION ALL
SELECT 'www.twitter.com', '2016-02-01 00:00:00'
), `project.dataset.page_visits` AS (
SELECT 'www.google.com' url, TIMESTAMP '2018-01-02 00:00:00' dt UNION ALL
SELECT 'www.google.com', '2018-02-01 00:00:00' UNION ALL
SELECT 'www.google.com', '2018-02-05 00:00:00' UNION ALL
SELECT 'www.google.com', '2018-03-04 00:00:00' UNION ALL
SELECT 'www.facebook.com', '2014-01-07 00:00:00' UNION ALL
SELECT 'www.facebook.com', '2014-04-02 00:00:00' UNION ALL
SELECT 'www.facebook.com', '2014-04-10 00:00:00' UNION ALL
SELECT 'www.facebook.com', '2014-04-11 00:00:00' UNION ALL
SELECT 'www.facebook.com', '2014-05-01 00:00:00' UNION ALL
SELECT 'www.twitter.com', '2016-03-04 00:00:00'
), `After the join` AS (
SELECT url, dt FROM `project.dataset.page_visits` UNION DISTINCT
SELECT link, dt_crtd FROM `project.dataset.page_creation`
)
SELECT
url, min_date,
COUNTIF(dt BETWEEN min_date AND TIMESTAMP_ADD(min_date, INTERVAL 29 DAY)) Interval_1,
COUNTIF(dt BETWEEN TIMESTAMP_ADD(min_date, INTERVAL 30 DAY) AND TIMESTAMP_ADD(min_date, INTERVAL 59 DAY)) Interval_2,
COUNTIF(dt BETWEEN TIMESTAMP_ADD(min_date, INTERVAL 60 DAY) AND TIMESTAMP_ADD(min_date, INTERVAL 89 DAY)) Interval_3
FROM (
SELECT url, dt, MIN(dt) OVER(PARTITION BY url ORDER BY dt) min_date
FROM `After the join`
)
GROUP BY url, min_date
with result as
Row url min_date Interval_1 Interval_2 Interval_3
1 www.facebook.com 2014-01-05 00:00:00 UTC 2 0 1
2 www.google.com 2018-01-01 00:00:00 UTC 2 2 1
3 www.twitter.com 2016-02-01 00:00:00 UTC 1 1 0

Average Duration in Status - Gaps and Islands

I'm trying to calculate the average turnover time of a piece of equipment in REPAIR status.
I was able to create a query containing a list of equipments with their snapshotted status on each day.
+-----------------+--------------+--------+----------------------+------------+------------------+
| equipmentNumber | snapshotDate | status | previousSnapshotDate | prevStatus | statusChangeFlag |
+-----------------+--------------+--------+----------------------+------------+------------------+
| 123456 | 2018-04-29 | ONHIRE | 2018-04-28 | AVAILABLE | 1 |
| 123456 | 2018-04-30 | ONHIRE | 2018-04-29 | ONHIRE | 0 |
| 123456 | 2018-05-01 | ONHIRE | 2018-04-30 | ONHIRE | 0 |
| 123456 | 2018-05-02 | REPAIR | 2018-05-01 | ONHIRE | 1 |
| 123456 | 2018-05-03 | REPAIR | 2018-05-02 | REPAIR | 0 |
| 123456 | 2018-05-04 | ONHIRE | 2018-05-03 | REPAIR | 1 |
| 654321 | 2018-04-30 | REPAIR | 2018-04-29 | AVAILABLE | 1 |
| 654321 | 2018-05-01 | REPAIR | 2018-04-30 | REPAIR | 0 |
| 654321 | 2018-05-02 | REPAIR | 2018-05-01 | REPAIR | 0 |
+-----------------+--------------+--------+----------------------+------------+------------------+
So, in this example, we have 2 equipments, "123456" was in REPAIR status 2 days on 5/2 and 5/3, and "654321" was in REPAIR status 3 days on 4/30, 5/1, and 5/2. That would be an average repair turnaround time of (2+3) / 2 = 2.5 days.
I tried this algorithm (Detect consecutive dates ranges using SQL) but it doesn't seem to be quite working for my needs.
I attempt to answer Gaps and Islands using an Incrementing ID column, create one if one doesn't exist, and the ROW_NUMBER window function
CREATE TABLE T1
([equipmentNumber] int, [snapshotDate] datetime, [status] varchar(6), [previousSnapshotDate] datetime, [prevStatus] varchar(9), [statusChangeFlag] int)
;
INSERT INTO T1
([equipmentNumber], [snapshotDate], [status], [previousSnapshotDate], [prevStatus], [statusChangeFlag])
VALUES
(123456, '2018-04-29 00:00:00', 'ONHIRE', '2018-04-28 00:00:00', 'AVAILABLE', 1),
(123456, '2018-04-30 00:00:00', 'ONHIRE', '2018-04-29 00:00:00', 'ONHIRE', 0),
(123456, '2018-05-01 00:00:00', 'ONHIRE', '2018-04-30 00:00:00', 'ONHIRE', 0),
(123456, '2018-05-02 00:00:00', 'REPAIR', '2018-05-01 00:00:00', 'ONHIRE', 1),
(123456, '2018-05-03 00:00:00', 'REPAIR', '2018-05-02 00:00:00', 'REPAIR', 0),
(123456, '2018-05-04 00:00:00', 'ONHIRE', '2018-05-03 00:00:00', 'REPAIR', 1),
(654321, '2018-04-30 00:00:00', 'REPAIR', '2018-04-29 00:00:00', 'AVAILABLE', 1),
(654321, '2018-05-01 00:00:00', 'REPAIR', '2018-04-30 00:00:00', 'REPAIR', 0),
(654321, '2018-05-02 00:00:00', 'REPAIR', '2018-05-01 00:00:00', 'REPAIR', 0)
;
;WITH cteX
AS(
SELECT
Id = ROW_NUMBER()OVER(ORDER BY T.equipmentNumber, T.snapshotDate)
,T.equipmentNumber
,T.snapshotDate
,T.[status]
,T.previousSnapshotDate
,T.prevStatus
,T.statusChangeFlag
FROM dbo.T1 T
),cteIsland
AS(
SELECT
Island = X.Id - ROW_NUMBER()OVER(ORDER BY X.Id)
,*
FROM cteX X
WHERE X.[status] = 'REPAIR'
)
SELECT * FROM cteIsland
Note the Island Column
Island Id equipmentNumber status
3 4 123456 REPAIR
3 5 123456 REPAIR
4 7 654321 REPAIR
4 8 654321 REPAIR
4 9 654321 REPAIR
Using the Island Column you can get the answer you need with this TSQL
;WITH cteX
AS(
SELECT
Id = ROW_NUMBER()OVER(ORDER BY T.equipmentNumber, T.snapshotDate)
,T.equipmentNumber
,T.snapshotDate
,T.[status]
,T.previousSnapshotDate
,T.prevStatus
,T.statusChangeFlag
FROM dbo.T1 T
),cteIsland
AS(
SELECT
Island = X.Id - ROW_NUMBER()OVER(ORDER BY X.Id)
,*
FROM cteX X
WHERE X.[status] = 'REPAIR'
)
SELECT
AvgDuration =SUM(Totals.IslandCounts) / (COUNT(Totals.IslandCounts) * 1.0)
FROM
(
SELECT
IslandCounts = COUNT(I.Island)
,I.equipmentNumber
FROM cteIsland I
GROUP BY I.equipmentNumber
) Totals
Answer
AvgDuration
2.50000000000000
Here's the SQLFiddle
That method should work to identify the repair periods:
select equipmentNumber, min(snapshotDate), max(snapshotDate)
from (select t.*,
row_number() over (partition by equipmentNumber order by snapshotDate) as seqnum
from t
) t
where status = 'REPAIR'
group by equipmentNumber, dateadd(day, - seqnum, snapshotDate);
You can get the average using a subquery:
select avg(datediff(day, minsd, maxsd) * 1.0)
from (select equipmentNumber, min(snapshotDate) as minsd, max(snapshotDate) as maxsd
from (select t.*,
row_number() over (partition by equipmentNumber order by snapshotDate) as seqnum
from t
) t
where status = 'REPAIR'
group by equipmentNumber, dateadd(day, - seqnum, snapshotDate)
) e;