Oracle query - select date and time - Overlapping - sql

oralce query Date and time with overlapping
ID startdate enddate hours
a124 10/10/2019 07:30:00 10/10/2019 11:30:00 4
a124 10/10/2019 07:00:00 10/10/2019 15:10:00 8.17
bc24 10/10/2019 07:30:00 10/10/2019 11:30:00 4
bc24 10/10/2019 10:30:00 10/10/2019 15:30:00 5
er67 10/10/2019 09:30:00 10/10/2019 11:30:00 2
er67 10/10/2019 15:30:00 10/10/2019 16:30:00 1
Desired Output :
ID startdate enddate hours
a124 10/10/2019 07:00:00 10/10/2019 15:10:00 8.17
bc24 10/10/2019 07:30:00 10/10/2019 15:30:00 8
er67 10/10/2019 09:30:00 10/10/2019 11:30:00 2
er67 10/10/2019 15:30:00 10/10/2019 16:30:00 1

I would approach this using lag(), a cumulative sum() and aggregation. Here is a step by step explanation.
First, you can use lag() to recover the previous start and end date for the same id:
select
t.*,
lag(startdate) over(partition by id order by startdate) lagstartdate,
lag(enddate) over(partition by id order by startdate) lagenddate
from mytable t
ID | STARTDATE | ENDDATE | HOURS | LAGSTARTDATE | LAGENDDATE
:--- | :------------------ | :------------------ | ----: | :------------------ | :------------------
a124 | 2019-10-10 07:00:00 | 2019-10-10 15:10:00 | 8.17 | null | null
a124 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00 | 4 | 2019-10-10 07:00:00 | 2019-10-10 15:10:00
bc24 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00 | 4 | null | null
bc24 | 2019-10-10 10:30:00 | 2019-10-10 15:30:00 | 5 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00
er67 | 2019-10-10 09:30:00 | 2019-10-10 11:30:00 | 2 | null | null
er67 | 2019-10-10 15:30:00 | 2019-10-10 16:30:00 | 1 | 2019-10-10 09:30:00 | 2019-10-10 11:30:00
Then, you can set up the cumulative sum to slit records having the same id within groups (that will later on be aggregated). When the dates do not overlap, then a new group starts:
select
t.*,
sum(
case when startdate <= lagenddate or enddate <= lagstartdate
then 0
else 1
end
) over(partition by id order by startdate) grp
from (
select
t.*,
lag(startdate) over(partition by id order by startdate) lagstartdate,
lag(enddate) over(partition by id order by startdate) lagenddate
from mytable t
) t
ID | STARTDATE | ENDDATE | HOURS | LAGSTARTDATE | LAGENDDATE | GRP
:--- | :------------------ | :------------------ | ----: | :------------------ | :------------------ | --:
a124 | 2019-10-10 07:00:00 | 2019-10-10 15:10:00 | 8.17 | null | null | 1
a124 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00 | 4 | 2019-10-10 07:00:00 | 2019-10-10 15:10:00 | 1
bc24 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00 | 4 | null | null | 1
bc24 | 2019-10-10 10:30:00 | 2019-10-10 15:30:00 | 5 | 2019-10-10 07:30:00 | 2019-10-10 11:30:00 | 1
er67 | 2019-10-10 09:30:00 | 2019-10-10 11:30:00 | 2 | null | null | 1
er67 | 2019-10-10 15:30:00 | 2019-10-10 16:30:00 | 1 | 2019-10-10 09:30:00 | 2019-10-10 11:30:00 | 2
Finally, you can group the records by id and grp: min() and max() give you the date range, then you can compute the date difference.
Final query:
select
id,
min(startdate) startdate,
max(enddate) enddate,
round((max(enddate) - min(startdate)) * 24, 2) hours
from (
select
t.*,
sum(
case when startdate <= lagenddate or enddate <= lagstartdate
then 0
else 1
end
) over(partition by id order by startdate) grp
from (
select
t.*,
lag(startdate) over(partition by id order by startdate) lagstartdate,
lag(enddate) over(partition by id order by startdate) lagenddate
from mytable t
) t
) t
group by id, grp
order by id, grp
ID | STARTDATE | ENDDATE | HOURS
:--- | :------------------ | :------------------ | ----:
a124 | 2019-10-10 07:00:00 | 2019-10-10 15:10:00 | 8.17
bc24 | 2019-10-10 07:30:00 | 2019-10-10 15:30:00 | 8
er67 | 2019-10-10 09:30:00 | 2019-10-10 11:30:00 | 2
er67 | 2019-10-10 15:30:00 | 2019-10-10 16:30:00 | 1
Demo on DB Fiddle

You can use analytical functions (LAG and SUM) as following to make groups of dates without gaps:
SQL> SELECT
2 ID,
3 MIN(STARTDATE),
4 MAX(ENDDATE),
5 ROUND(SUM(CASE
6 WHEN PREV_ENDDATE BETWEEN STARTDATE AND ENDDATE THEN ENDDATE - PREV_ENDDATE
7 ELSE ENDDATE - STARTDATE
8 END) * 24, 2) AS HOURS
9 FROM
10 (
11 SELECT
12 T.*,
13 SUM(CASE
14 WHEN T.PREV_ENDDATE < T.STARTDATE THEN 1
15 END) OVER(
16 PARTITION BY ID
17 ORDER BY
18 STARTDATE, ENDDATE
19 ) SM
20 FROM
21 (
22 SELECT
23 ID,
24 STARTDATE,
25 ENDDATE,
26 LAG(ENDDATE) OVER(
27 PARTITION BY ID
28 ORDER BY
29 STARTDATE, ENDDATE
30 ) AS PREV_ENDDATE
31 FROM
32 T TOUT
33 WHERE
34 NOT EXISTS (
35 SELECT
36 1
37 FROM
38 T TIN
39 WHERE
40 TIN.ID = TOUT.ID
41 AND TOUT.STARTDATE BETWEEN TIN.STARTDATE AND TIN.ENDDATE
42 AND TOUT.ENDDATE BETWEEN TIN.STARTDATE AND TIN.ENDDATE
43 AND TOUT.ROWID <> TIN.ROWID
44 )
45 ) T
46 )
47 GROUP BY
48 ID,
49 SM;
ID MIN(START MAX(ENDDA HOURS
---- --------- --------- ----------
a124 10-OCT-19 10-OCT-19 8.17
bc24 10-OCT-19 10-OCT-19 8
er67 10-OCT-19 10-OCT-19 1
er67 10-OCT-19 10-OCT-19 2
SQL>
Cheers!!

Related

SQL - Split open & Close time Into intervals of 30 minutes

Purpose: I work in Hospitality Industry. I want to understand at what time the Restaurant is full and what time it is less busy. I have the opening and closing times, I want to split it 30 minute interval period.
I would really appreciate if you could ease help me.
Thanking you in advance
Table
Check# Open CloseTime
25484 17:34 18:06
25488 18:04 21:22
Output
Check# Open Close Duration
25484 17:34 18:00 0:25
25484 18:00 18:30 0:30
25488 18:08 18:30 0:21
25488 18:30 19:00 0:30
25488 19:00 19:30 0:30
25488 19:30 20:00 0:30
25488 20:00 20:30 0:30
25488 20:30 21:00 0:30
25488 21:00 21:30 0:30
I am new to SQL. I am good at Excel, but due to its limitations i want to use SQL. I just know the basics in SQL.
I have tried on the google, but could not find solution to it. All i can see use of Date Keywords, but not the Field name in the code, hence i am unable to use them.
Could you try this, it works in MySQL 8.0:
WITH RECURSIVE times AS (
SELECT time '0:00' AS `Open`, time '0:30' as `Close`
UNION ALL
SELECT addtime(`Open`, '0:30'), addtime(`Close`, '0:30')
FROM times
WHERE `Open` < time '23:30'
)
SELECT c.`Check`,
greatest(t.`Open`, c.`Open`) `Open`,
least(t.`Close`, c.`CloseTime`) `Close`,
timediff(least(t.`Close`, c.`CloseTime`), greatest(t.`Open`, c.`Open`)) `Duration`
FROM times t
JOIN checks c ON (c.`Open` < t.`Close` AND c.`CloseTime` > t.`Open`);
| Check | Open | Close | Duration |
| ----- | -------- | -------- | -------- |
| 25484 | 17:34:00 | 18:00:00 | 00:26:00 |
| 25484 | 18:00:00 | 18:06:00 | 00:06:00 |
| 25488 | 18:04:00 | 18:30:00 | 00:26:00 |
| 25488 | 18:30:00 | 19:00:00 | 00:30:00 |
| 25488 | 19:00:00 | 19:30:00 | 00:30:00 |
| 25488 | 19:30:00 | 20:00:00 | 00:30:00 |
| 25488 | 20:00:00 | 20:30:00 | 00:30:00 |
| 25488 | 20:30:00 | 21:00:00 | 00:30:00 |
| 25488 | 21:00:00 | 21:22:00 | 00:22:00 |
->Fiddle
This works for SQL Server 2019:
WITH times([Open], [Close]) AS (
SELECT cast({t'00:00:00'} as time) as "Open",
cast({t'00:30:00'} as time) as "Close"
UNION ALL
SELECT dateadd(minute, 30, [Open]), dateadd(minute, 30, [Close])
FROM times
WHERE [Open] < cast({t'23:30:00'} as time)
)
SELECT c.[Check],
iif(t.[Open] > c.[Open], t.[Open], c.[Open]) as [Open],
iif(t.[Close] < c.[CloseTime], t.[Close], c.[CloseTime]) as [Close],
datediff(minute,
iif(t.[Open] > c.[Open], t.[Open], c.[Open]),
iif(t.[Close] < c.[CloseTime], t.[Close], c.[CloseTime])) Duration
FROM times t
JOIN checks c ON (c.[Open] < t.[Close] AND c.[CloseTime] > t.[Open]);
Check | Open | Close | Duration
25484 | 17:34:00.0000000 | 18:00:00.0000000 | 26
25484 | 18:00:00.0000000 | 18:06:00.0000000 | 6
25488 | 18:04:00.0000000 | 18:30:00.0000000 | 26
25488 | 18:30:00.0000000 | 19:00:00.0000000 | 30
25488 | 19:00:00.0000000 | 19:30:00.0000000 | 30
25488 | 19:30:00.0000000 | 20:00:00.0000000 | 30
25488 | 20:00:00.0000000 | 20:30:00.0000000 | 30
25488 | 20:30:00.0000000 | 21:00:00.0000000 | 30
25488 | 21:00:00.0000000 | 21:22:00.0000000 | 22
->Fiddle

SQL to show overlapping time periods

How to check in Postgresql 9.2 (SQL command), if in the timestamp records there is some period overlapping others from same id_user. I need to correct an existing table.
For example, a query show the rows 1,3 and 4.
id | id_user | timedate0 | timedate2
---------------------------------------------------
1 | 1 | 2020-04-20 12:00:00 | 2020-04-20 14:00:00
2 | 1 | 2020-04-20 17:00:00 | 2020-04-20 19:30:00
3 | 1 | 2020-04-20 14:30:00 | 2020-04-20 15:40:00
4 | 1 | 2020-04-20 13:00:00 | 2020-04-20 15:00:00
5 | 1 | 2020-04-21 13:00:00 | 2020-04-21 14:00:00
6 | 1 | 2020-04-21 14:00:00 | 2020-04-21 15:00:00
You can use exists:
select t.*
from t
where exists (select 1
from t t2
where t2.timedate0 < t.timedate2 and
t2.timedate2 > t.timedate0 and
t2.id_user = t.id_user and t2.id <> t.id
);

How to calcute future datetime after few workinghours

I am trying to calculate what date will be after 2 or more working hours from now even if I'll start calculating on weekend or after workhours it should be like:
working hours are from 8am to 4pm
I start calculating at Friday at 3pm so if I'll start calculating result should be Monday 9am
if(#data_przyj>#WorkStart AND DATEPART(DATEADD(MINUTE,#ileNaZapytanie,#data_przyj)<#WorkFinish)
BEGIN
while (DATEPART(dw, #CurrentDate)!=1 AND DATEPART(dw, #CurrentDate)!=7))
BEGIN
SET #CurrentDate = DATEADD(day, 1, #CurrentDate)
SET #czyBylPrzeskok =1
END
if (#czyBylPrzeskok =1)
BEGIN
SET #LastDay = #CurrentDate
SET #LastDay = DATEADD(MINUTE, datediff(MINUTE,DATEADD(dd, 0, DATEDIFF(MINUTE, 0, #data_przyj)),#WorkStart), #LastDay)
SET #LastDay = DATEADD(HOUR, datediff(MINUTE,DATEADD(dd, 0, DATEDIFF(HOUR, 0, #data_przyj)),#WorkStart), #LastDay)
END
ELSE
BEGIN
SET #LastDay = DATEADD(MINUTE,#ileNaZapytanie,#data_przyj)
END
SET #IsCalculated = 1
END
else if(#data_przyj>#WorkStart AND DATEADD(MINUTE,#ileNaZapytanie,#data_przyj)>#WorkFinish)
BEGIN
SET #LastDay =DateADD(DD,3,GETDATE());
SET #IsCalculated = 1
END
else if(#data_przyj<#WorkStart )
BEGIN
SET #LastDay =GETDATE();
SET #IsCalculated = 1
END
END
EDIT:
for example working hours:8:00 - 16:00 i have Date '2019-09-06 15:00' so after adding 2 working hours should be '2019-09-09 09:00', for date '2019-09-06 13:00' should be '2019-09-06 15:00' etc
The following solution uses a calendar table with working hours, then use a rolling sum to accumulate each day's business hours and find which day you need to end with.
Using a calendar table will give you the flexibility of having different business time periods and very easily adding or removing holidays.
Setup (calendar table):
IF OBJECT_ID('tempdb..#WorkingCalendar') IS NOT NULL
DROP TABLE #WorkingCalendar
CREATE TABLE #WorkingCalendar (
Date DATE PRIMARY KEY,
IsWorkingDay BIT,
WorkingStartTime DATETIME,
WorkingEndTime DATETIME)
SET DATEFIRST 1 -- 1: Monday, 7: Sunday
DECLARE #StartDate DATE = '2019-01-01'
DECLARE #EndDate DATE = '2030-01-01'
;WITH RecursiveDates AS
(
SELECT
GeneratedDate = #StartDate
UNION ALL
SELECT
GeneratedDate = DATEADD(DAY, 1, R.GeneratedDate)
FROM
RecursiveDates AS R
WHERE
R.GeneratedDate < #EndDate
)
INSERT INTO #WorkingCalendar (
Date,
IsWorkingDay,
WorkingStartTime,
WorkingEndTime)
SELECT
Date = R.GeneratedDate,
IsWorkingDay = CASE
WHEN DATEPART(WEEKDAY, R.GeneratedDate) BETWEEN 1 AND 5 THEN 1 -- From Monday to Friday
ELSE 0 END,
WorkingStartTime = CASE
WHEN DATEPART(WEEKDAY, R.GeneratedDate) BETWEEN 1 AND 5
THEN CONVERT(DATETIME, R.GeneratedDate) + CONVERT(DATETIME, '08:00:00') END,
WorkingEndTime = CASE
WHEN DATEPART(WEEKDAY, R.GeneratedDate) BETWEEN 1 AND 5
THEN CONVERT(DATETIME, R.GeneratedDate) + CONVERT(DATETIME, '16:00:00') END
FROM
RecursiveDates AS R
OPTION
(MAXRECURSION 0)
Generates a table like the following:
+------------+--------------+-------------------------+-------------------------+
| Date | IsWorkingDay | WorkingStartTime | WorkingEndTime |
+------------+--------------+-------------------------+-------------------------+
| 2019-01-01 | 1 | 2019-01-01 08:00:00.000 | 2019-01-01 16:00:00.000 |
| 2019-01-02 | 1 | 2019-01-02 08:00:00.000 | 2019-01-02 16:00:00.000 |
| 2019-01-03 | 1 | 2019-01-03 08:00:00.000 | 2019-01-03 16:00:00.000 |
| 2019-01-04 | 1 | 2019-01-04 08:00:00.000 | 2019-01-04 16:00:00.000 |
| 2019-01-05 | 0 | NULL | NULL |
| 2019-01-06 | 0 | NULL | NULL |
| 2019-01-07 | 1 | 2019-01-07 08:00:00.000 | 2019-01-07 16:00:00.000 |
| 2019-01-08 | 1 | 2019-01-08 08:00:00.000 | 2019-01-08 16:00:00.000 |
| 2019-01-09 | 1 | 2019-01-09 08:00:00.000 | 2019-01-09 16:00:00.000 |
| 2019-01-10 | 1 | 2019-01-10 08:00:00.000 | 2019-01-10 16:00:00.000 |
| 2019-01-11 | 1 | 2019-01-11 08:00:00.000 | 2019-01-11 16:00:00.000 |
| 2019-01-12 | 0 | NULL | NULL |
| 2019-01-13 | 0 | NULL | NULL |
| 2019-01-14 | 1 | 2019-01-14 08:00:00.000 | 2019-01-14 16:00:00.000 |
| 2019-01-15 | 1 | 2019-01-15 08:00:00.000 | 2019-01-15 16:00:00.000 |
| 2019-01-16 | 1 | 2019-01-16 08:00:00.000 | 2019-01-16 16:00:00.000 |
| 2019-01-17 | 1 | 2019-01-17 08:00:00.000 | 2019-01-17 16:00:00.000 |
+------------+--------------+-------------------------+-------------------------+
Proposed Solution:
DECLARE #v_BusinessHoursToAdd INT = 2
DECLARE #v_CurrentDateTimeHour DATETIME = '2019-09-06 15:00'
;WITH CalendarFromNow AS
(
SELECT
T.Date,
WorkingStartTime = CASE
WHEN #v_CurrentDateTimeHour BETWEEN T.WorkingStartTime AND T.WorkingEndTime THEN #v_CurrentDateTimeHour
ELSE T.WorkingStartTime END,
WorkingEndTime = T.WorkingEndTime
FROM
#WorkingCalendar AS T
WHERE
T.Date >= CONVERT(DATE, #v_CurrentDateTimeHour) AND
T.IsWorkingDay = 1
),
RollingBusinessSum AS
(
SELECT
C.Date,
C.WorkingStartTime,
C.WorkingEndTime,
AmountBusinessHours = DATEDIFF(HOUR, C.WorkingStartTime, C.WorkingEndTime),
RollingBusinessHoursSum = SUM(DATEDIFF(HOUR, C.WorkingStartTime, C.WorkingEndTime)) OVER (ORDER BY C.Date),
PendingHours = #v_BusinessHoursToAdd - SUM(DATEDIFF(HOUR, C.WorkingStartTime, C.WorkingEndTime)) OVER (ORDER BY C.Date)
FROM
CalendarFromNow AS C
)
SELECT TOP 1
EndingHour = DATEADD(
HOUR,
R.PendingHours,
R.WorkingEndTime)
FROM
RollingBusinessSum AS R
WHERE
R.PendingHours < 0
ORDER BY
R.Date
Explanation:
The first CTE CalendarFromNow is simply filtering the calendar dates from the current hour's date and reducing the starting working datetime to the current hour, since this is gonna be the starting point to count hours from.
+------------+-------------------------+-------------------------+
| Date | WorkingStartTime | WorkingEndTime |
+------------+-------------------------+-------------------------+
| 2019-09-06 | 2019-09-06 15:00:00.000 | 2019-09-06 16:00:00.000 |
| 2019-09-09 | 2019-09-09 08:00:00.000 | 2019-09-09 16:00:00.000 |
| 2019-09-10 | 2019-09-10 08:00:00.000 | 2019-09-10 16:00:00.000 |
| 2019-09-11 | 2019-09-11 08:00:00.000 | 2019-09-11 16:00:00.000 |
| 2019-09-12 | 2019-09-12 08:00:00.000 | 2019-09-12 16:00:00.000 |
| 2019-09-13 | 2019-09-13 08:00:00.000 | 2019-09-13 16:00:00.000 |
| 2019-09-16 | 2019-09-16 08:00:00.000 | 2019-09-16 16:00:00.000 |
+------------+-------------------------+-------------------------+
The second CTE RollingBusinessSum is calculating the amount of business hours on each day and accumulating them over the days. The last column PendingHours is the result of the amount of hours we need to add from now subtracted by the sum of business hours over the days.
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+
| Date | WorkingStartTime | WorkingEndTime | AmountBusinessHours | RollingBusinessHoursSum | PendingHours |
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+
| 2019-09-06 | 2019-09-06 15:00:00.000 | 2019-09-06 16:00:00.000 | 1 | 1 | 1 |
| 2019-09-09 | 2019-09-09 08:00:00.000 | 2019-09-09 16:00:00.000 | 8 | 9 | -7 |
| 2019-09-10 | 2019-09-10 08:00:00.000 | 2019-09-10 16:00:00.000 | 8 | 17 | -15 |
| 2019-09-11 | 2019-09-11 08:00:00.000 | 2019-09-11 16:00:00.000 | 8 | 25 | -23 |
| 2019-09-12 | 2019-09-12 08:00:00.000 | 2019-09-12 16:00:00.000 | 8 | 33 | -31 |
| 2019-09-13 | 2019-09-13 08:00:00.000 | 2019-09-13 16:00:00.000 | 8 | 41 | -39 |
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+
Finally the first day that the PendingHours column is negative is the day that we arrived at the amount of hours we wanted to add. This is the TOP 1 with ORDER BY. To get the final datetime, we just subtract the pending hours to the end time for that particular day.
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+-------------------------+
| Date | WorkingStartTime | WorkingEndTime | AmountBusinessHours | RollingBusinessHoursSum | PendingHours | EndingHour |
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+-------------------------+
| 2019-09-09 | 2019-09-09 08:00:00.000 | 2019-09-09 16:00:00.000 | 8 | 9 | -7 | 2019-09-09 09:00:00.000 |
+------------+-------------------------+-------------------------+---------------------+-------------------------+--------------+-------------------------+
You might have to tweak performance and do boundary tests but this might give you a flexible idea of how to work with working hours at holidays or different time periods.

How to generate series for date range with minutes interval in oracle?

In Postgres below query is working using generate_series function
SELECT dates
FROM generate_series(CAST('2019-03-01' as TIMESTAMP), CAST('2019-04-01' as TIMESTAMP), interval '30 mins') AS dates
Below query is also working in Oracle but only for date interval
select to_date('2019-03-01','YYYY-MM-DD') + rownum -1 as dates
from all_objects
where rownum <= to_date('2019-03-06','YYYY-MM-DD')-to_date('2019-03-01','YYYY-MM-DD')+1
SELECT dates
FROM generate_series(CAST('2019-03-01' as TIMESTAMP), CAST('2019-04-01' as TIMESTAMP), interval '30 mins') AS dates
I want same result in Oracle for below query
SELECT dates
FROM generate_series(CAST('2019-03-01' as TIMESTAMP), CAST('2019-04-01' as TIMESTAMP), interval '30 mins') AS dates
Use a hierarchical query:
SELECT DATE '2019-03-01' + ( LEVEL - 1 ) * INTERVAL '30' MINUTE AS dates
FROM DUAL
CONNECT BY DATE '2019-03-01' + ( LEVEL - 1 ) * INTERVAL '30' MINUTE <= DATE '2019-04-01';
Output:
| DATES |
| :------------------ |
| 2019-03-01 00:00:00 |
| 2019-03-01 00:30:00 |
| 2019-03-01 01:00:00 |
| 2019-03-01 01:30:00 |
| 2019-03-01 02:00:00 |
| 2019-03-01 02:30:00 |
| 2019-03-01 03:00:00 |
| 2019-03-01 03:30:00 |
| 2019-03-01 04:00:00 |
| 2019-03-01 04:30:00 |
| 2019-03-01 05:00:00 |
| 2019-03-01 05:30:00 |
...
| 2019-03-31 19:30:00 |
| 2019-03-31 20:00:00 |
| 2019-03-31 20:30:00 |
| 2019-03-31 21:00:00 |
| 2019-03-31 21:30:00 |
| 2019-03-31 22:00:00 |
| 2019-03-31 22:30:00 |
| 2019-03-31 23:00:00 |
| 2019-03-31 23:30:00 |
| 2019-04-01 00:00:00 |
db<>fiddle here

SQL query for setting column based on last seven entries

Problem
I am having trouble figuring out how to create a query that can tell if any userentry is preceded by 7 days without any activity (secondsPlayed == 0) and if so, then indicate it with the value of 1, otherwise 0.
This also means that if the user has less than 7 entries, the value will be 0 across all entries.
Input table:
+------------------------------+-------------------------+---------------+
| userid | estimationDate | secondsPlayed |
+------------------------------+-------------------------+---------------+
| a | 2016-07-14 00:00:00 UTC | 192.5 |
| a | 2016-07-15 00:00:00 UTC | 357.3 |
| a | 2016-07-16 00:00:00 UTC | 0 |
| a | 2016-07-17 00:00:00 UTC | 0 |
| a | 2016-07-18 00:00:00 UTC | 0 |
| a | 2016-07-19 00:00:00 UTC | 0 |
| a | 2016-07-20 00:00:00 UTC | 0 |
| a | 2016-07-21 00:00:00 UTC | 0 |
| a | 2016-07-22 00:00:00 UTC | 0 |
| a | 2016-07-23 00:00:00 UTC | 0 |
| a | 2016-07-24 00:00:00 UTC | 0 |
| ---------------------------- | ---------------------- | ---- |
| b | 2016-07-02 00:00:00 UTC | 31.2 |
| b | 2016-07-03 00:00:00 UTC | 42.1 |
| b | 2016-07-04 00:00:00 UTC | 41.9 |
| b | 2016-07-05 00:00:00 UTC | 43.2 |
| b | 2016-07-06 00:00:00 UTC | 91.5 |
| b | 2016-07-07 00:00:00 UTC | 0 |
| b | 2016-07-08 00:00:00 UTC | 0 |
| b | 2016-07-09 00:00:00 UTC | 239.1 |
| b | 2016-07-10 00:00:00 UTC | 0 |
+------------------------------+-------------------------+---------------+
The intended output table should look like this:
Output table:
+------------------------------+-------------------------+---------------+----------+
| userid | estimationDate | secondsPlayed | inactive |
+------------------------------+-------------------------+---------------+----------+
| a | 2016-07-14 00:00:00 UTC | 192.5 | 0 |
| a | 2016-07-15 00:00:00 UTC | 357.3 | 0 |
| a | 2016-07-16 00:00:00 UTC | 0 | 0 |
| a | 2016-07-17 00:00:00 UTC | 0 | 0 |
| a | 2016-07-18 00:00:00 UTC | 0 | 0 |
| a | 2016-07-19 00:00:00 UTC | 0 | 0 |
| a | 2016-07-20 00:00:00 UTC | 0 | 0 |
| a | 2016-07-21 00:00:00 UTC | 0 | 0 |
| a | 2016-07-22 00:00:00 UTC | 0 | 1 |
| a | 2016-07-23 00:00:00 UTC | 0 | 1 |
| a | 2016-07-24 00:00:00 UTC | 0 | 1 |
| ---------------------------- | ----------------------- | ----- | ----- |
| b | 2016-07-02 00:00:00 UTC | 31.2 | 0 |
| b | 2016-07-03 00:00:00 UTC | 42.1 | 0 |
| b | 2016-07-04 00:00:00 UTC | 41.9 | 0 |
| b | 2016-07-05 00:00:00 UTC | 43.2 | 0 |
| b | 2016-07-06 00:00:00 UTC | 91.5 | 0 |
| b | 2016-07-07 00:00:00 UTC | 0 | 0 |
| b | 2016-07-08 00:00:00 UTC | 0 | 0 |
| b | 2016-07-09 00:00:00 UTC | 239.1 | 0 |
| b | 2016-07-10 00:00:00 UTC | 0 | 0 |
+------------------------------+-------------------------+---------------+----------+
Thoughts
At first I was thinking about using the Lag function with a 7 offset, but this would obviously not relate to any of the subjects in between.
I was also thinking about creating a rolling window/average for a period of 7 days and evaluating if this is above 0. However this might be a bit above my skill level.
Any chance anyone has a good solution to this problem.
Assuming that you have data every day (which seems like a reasonable assumption), you can sum a window function:
select t.*,
(case when sum(secondsplayed) over (partition by userid
order by estimationdate
rows between 6 preceding and current row
) = 0 and
row_number() over (partition by userid order by estimationdate) >= 7
then 1
else 0
end) as inactive
from t;
In addition to no holes in the dates, this also assumes that secondsplayed is never negative. (Negative values can easily be incorporated into the logic, but that seems unnecessary.)
In my experience this type of input tables do not consist of inactivity entries and usually look like this (only activity entries are present here)
Input table:
+------------------------------+-------------------------+---------------+
| userid | estimationDate | secondsPlayed |
+------------------------------+-------------------------+---------------+
| a | 2016-07-14 00:00:00 UTC | 192.5 |
| a | 2016-07-15 00:00:00 UTC | 357.3 |
| ---------------------------- | ---------------------- | ---- |
| b | 2016-07-02 00:00:00 UTC | 31.2 |
| b | 2016-07-03 00:00:00 UTC | 42.1 |
| b | 2016-07-04 00:00:00 UTC | 41.9 |
| b | 2016-07-05 00:00:00 UTC | 43.2 |
| b | 2016-07-06 00:00:00 UTC | 91.5 |
| b | 2016-07-09 00:00:00 UTC | 239.1 |
+------------------------------+-------------------------+---------------+
So, below is for BigQuery Standard SQL and input as above
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'a' userid, TIMESTAMP '2016-07-14 00:00:00 UTC' estimationDate, 192.5 secondsPlayed UNION ALL
SELECT 'a', '2016-07-15 00:00:00 UTC', 357.3 UNION ALL
SELECT 'b', '2016-07-02 00:00:00 UTC', 31.2 UNION ALL
SELECT 'b', '2016-07-03 00:00:00 UTC', 42.1 UNION ALL
SELECT 'b', '2016-07-04 00:00:00 UTC', 41.9 UNION ALL
SELECT 'b', '2016-07-05 00:00:00 UTC', 43.2 UNION ALL
SELECT 'b', '2016-07-06 00:00:00 UTC', 91.5 UNION ALL
SELECT 'b', '2016-07-09 00:00:00 UTC', 239.1
), time_frame AS (
SELECT day
FROM UNNEST(GENERATE_DATE_ARRAY('2016-07-02', '2016-07-24')) day
)
SELECT
users.userid,
day,
IFNULL(secondsPlayed, 0) secondsPlayed,
CAST(1 - SIGN(SUM(IFNULL(secondsPlayed, 0))
OVER(
PARTITION BY users.userid
ORDER BY UNIX_DATE(day)
RANGE BETWEEN 6 PRECEDING AND CURRENT ROW
)) AS INT64) AS inactive
FROM time_frame tf
CROSS JOIN (SELECT DISTINCT userid FROM `project.dataset.table`) users
LEFT JOIN `project.dataset.table` t
ON day = DATE(estimationDate) AND users.userid = t.userid
ORDER BY userid, day
with result
Row userid day secondsPlayed inactive
...
13 a 2016-07-14 192.5 0
14 a 2016-07-15 357.3 0
15 a 2016-07-15 357.3 0
16 a 2016-07-16 0.0 0
17 a 2016-07-17 0.0 0
18 a 2016-07-18 0.0 0
19 a 2016-07-19 0.0 0
20 a 2016-07-20 0.0 0
21 a 2016-07-21 0.0 0
22 a 2016-07-22 0.0 1
23 a 2016-07-23 0.0 1
24 a 2016-07-24 0.0 1
25 b 2016-07-02 31.2 0
26 b 2016-07-03 42.1 0
27 b 2016-07-04 41.9 0
28 b 2016-07-05 43.2 0
29 b 2016-07-06 91.5 0
30 b 2016-07-07 0.0 0
31 b 2016-07-08 0.0 0
32 b 2016-07-09 239.1 0
33 b 2016-07-10 0.0 0
...