Related
I have a table with id and date I need two outputs as below. I will pass an input date and I need records according to below conditions
output 1: I need less than or equal date data either it is month end or not for the first row and first-row date-1 year month end data and first-row date-2 year month end data and if data is not available for the particular year then return null on that row.
output 2: Same as first but with a twist that I need whatever data is available on first row date-1 and -2 I do not need null.
declare #tbl table (id int , marketdate date )
insert into #tbl (id,marketdate)
values (1,'2018-05-31'),
(1,'2017-05-29'),
(1,'2016-05-31'),
(2,'2018-02-28'),
(2,'2017-02-28'),
(2,'2016-02-29'),
(2,'2016-02-28')
My query :
;with cte as (
select id , marketdate
from (
select id , marketdate ,row_number() over(partition by id order by marketdate desc) rn
from #tbl
where marketdate <='2018-06-05'
) a where rn=1
union all
select id , marketdate
from (
select b.id , b.marketdate ,row_number() over(partition by b.id order by b.marketdate desc) rn
from #tbl b inner join cte c
on b.id= c.id
where b.marketdate<= dateadd(year,-1,c.marketdate )
) b where rn=1
)
select * from cte
order by id, marketdate desc
output 1:
1 2018-05-31
1 NUll
1 2016-05-31
2 2018-02-28
2 2017-02-28
2 2016-02-29
output 2:
1 2018-05-31
1 2017-05-29
1 2016-05-31
2 2018-02-28
2 2017-02-28
2 2016-02-29
Please help.
This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic
You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum
I am in a situation where I need to find-out total time spent in office for some internal application.
I have sample data like this:
Id EmployeeId ScanDateTime Status
7 87008 2018-08-02 16:03:00.227 1
8 87008 2018-08-02 16:06:17.277 2
9 87008 2018-08-02 16:10:37.107 3
10 87008 2018-08-02 16:20:17.277 2
11 87008 2018-08-02 16:30:37.107 3
12 87008 2018-08-02 20:06:00.000 4
Here Status have different meanings:
1- Start
2- Pause
3- Resume
4- End
Means Employees start their work at ScanDateTime when status is 1. They can go for break(status 2) and come back and resume their work(Status 3) and with status 4 means they are ending their job.
Note: There could be multiple breaks during work hours.
Expected Output:
EmployeeId StartTime EndTime BreakInMins
87008 2018-08-02 16:03:00.227 2018-08-02 20:06:00.000 14
I have tried to follow some example to calculate the expected result set but not helping.
I could not find any such example where this similar example available.
Any help would be appreciated.
Please try this. Handles multiple breaks/employees and cases, when break is still in progress or session is not finished
select
[EmployeeId] = [s].[EmployeeId]
,[StartTime] = [s].[ScanDateTime]
,[EndTime] = [et].[ScanDateTime]
,[BreakInMins] = [b].[BreakInMins]
from
[Scans] as [s] -- here is your table
outer apply
(
select top 1 [ScanDateTime], [Id] from [Scans] where [Id] > [s].[Id] and [EmployeeId] = [s].[EmployeeId] and [Status] = 4 order by [ScanDateTime] asc
) as [et]
outer apply
(
select
[BreakInMins] = sum(isnull([r].[mins], datediff(mi, [sp].[ScanDateTime], getdate())))
from
[Scans] as [sp]
outer apply
(
select top 1 [mins] = datediff(mi, [sp].[ScanDateTime], [ScanDateTime]) from [Scans] where [Id] > [sp].[Id] and [EmployeeId] = [sp].[EmployeeId] and [Status] IN (3, 4) order by [ScanDateTime] asc
) as [r]
where
[sp].[id] > [s].[id] and [sp].[id] < isnull([et].[id], [id] + 1)
and [sp].[EmployeeId] = [s].[EmployeeId]
and [sp].[Status] = 2
) as [b]
where
[Status] = 1;
Here is test-friendly script: script
i consider multiple breaks per day of employee you can check below i also provided fiddle link
select t1.*,t5.breakmins from
(
select EmployeeId,min(StartTime) as StartTime,max(EndTime) as EndTime from
(
select EmployeeId,(case when status=1 then ScanDateTime end) as StartTime,
(case when status=4 then ScanDateTime end) as EndTime,
case when status=3 then ScanDateTime end as ResumeWork,
case when status=2 then ScanDateTime end as pauseTime
from emp
) as t group by EmployeeId
) t1
inner join
(
select EmployeeId, convert(date,ResumeWork) as day ,
sum(case when status=2 then datediff(minute,ResumeWork,res) end ) as breakmins from
(
select EmployeeId,ResumeWork,status ,
lag(ResumeWork) over(PARTITION BY EmployeeId order by ResumeWork desc) as res from
(
select * from
(
select EmployeeId, case when status=3 then ScanDateTime end as ResumeWork,status from emp
) as t1 where ResumeWork is not null
union all
select * from
(
select EmployeeId,case when status=2 then ScanDateTime end as pauseTime,status from emp
) as t2 where pauseTime is not null
) as t3 group by EmployeeId,ResumeWork,status
) t4 group by EmployeeId, convert(date,ResumeWork)
)t5 on t1.EmployeeId=t5.EmployeeId
and convert(date,t1.StartTime)=t5.day
EmployeeId StartTime EndTime breakmins
87008 2018-08-02T16:03:00.227Z 2018-08-02T20:06:00Z 12
http://sqlfiddle.com/#!18/ae60f/6
You can try this.
make a row_number in CTE by Status, because we need to know which Pause time correspond which Resume time. then self join in the CTE by EmployeeId
CREATE TABLE T(
Id INT,
EmployeeId INT,
ScanDateTime DATETIME,
Status INT
);
INSERT INTO T VALUES (7 ,87008 ,'2018-08-02 16:03:00.227',1);
INSERT INTO T VALUES (8 ,87008 ,'2018-08-02 16:06:17.277',2);
INSERT INTO T VALUES (9 ,87008 ,'2018-08-02 16:10:37.107',3);
INSERT INTO T VALUES (10,87008 ,'2018-08-02 16:20:17.277',2);
INSERT INTO T VALUES (11,87008 ,'2018-08-02 16:30:37.107',3);
INSERT INTO T VALUES (12,87008 ,'2018-08-02 20:06:00.000',4);
Query 1:
;with cte as(
SELECT *,
MIN(ScanDateTime) over(partition by EmployeeId order by EmployeeId) StartTime,
MAX(ScanDateTime) over(partition by EmployeeId order by EmployeeId) EndTime,
ROW_NUMBER() OVER(PARTITION BY Status order by id) rn
FROM t
)
select t1.EmployeeId,
t1.StartTime,
t1.EndTime,
SUM(datediff(minute,t1.ScanDateTime,t2.ScanDateTime)) BreakInMins
from
cte t1
inner join cte t2
on t1.rn =t2.rn and t1.Status = 2 and t2.Status = 3 and t1.EmployeeId = t2.EmployeeId
group by t1.EmployeeId,
t1.StartTime,
t1.EndTime
Results:
| EmployeeId | StartTime | EndTime | BreakInMins |
|------------|----------------------|--------------------------|-------------|
| 87008 | 2018-08-02T20:06:00Z | 2018-08-02T16:03:00.227Z | 14 |
EDIT
you can try this query if there are different day in your data. just group by the date.
CREATE TABLE T(
Id INT,
EmployeeId INT,
ScanDateTime DATETIME,
Status INT
);
INSERT INTO T VALUES (7 ,87008 ,'2018-08-02 16:03:00.227',1);
INSERT INTO T VALUES (8 ,87008 ,'2018-08-02 16:06:17.277',2);
INSERT INTO T VALUES (9 ,87008 ,'2018-08-02 16:10:37.107',3);
INSERT INTO T VALUES (10,87008 ,'2018-08-02 16:20:17.277',2);
INSERT INTO T VALUES (11,87008 ,'2018-08-02 16:30:37.107',3);
INSERT INTO T VALUES (12,87008 ,'2018-08-02 20:06:00.000',4);
INSERT INTO T VALUES (27 ,87008 ,'2018-08-03 16:03:00.227',1);
INSERT INTO T VALUES (28 ,87008 ,'2018-08-03 16:06:17.277',2);
INSERT INTO T VALUES (29 ,87008 ,'2018-08-03 16:11:37.107',3);
INSERT INTO T VALUES (210,87008 ,'2018-08-03 16:20:17.277',2);
INSERT INTO T VALUES (211,87008 ,'2018-08-03 16:30:37.107',3);
INSERT INTO T VALUES (212,87008 ,'2018-08-03 20:06:00.000',4);
Query 1:
;with cte as(
SELECT EmployeeId,
MAX(CASE WHEN Status = 1 then ScanDateTime end) StartTime,
MIN(CASE WHEN Status = 4 then ScanDateTime end) EndTime,
CAST(ScanDateTime as date) dt
FROM t
GROUP BY EmployeeId,CAST(ScanDateTime as date)
)
,cte2 as(
SELECT t2.*,
Row_number() over(partition by t2.EmployeeId,t2.Status order by Id) rn,
t1.StartTime,
t1.EndTime,
t1.dt
FROM cte t1
INNER JOIN T t2 ON t1.EmployeeId = t2.EmployeeId and Status in (2,3) and t1.dt = CAST(t2.ScanDateTime as date)
)
select t1.EmployeeId,
t1.StartTime,
t1.EndTime,
SUM(datediff(minute,t1.ScanDateTime,t2.ScanDateTime)) BreakInMins
from cte2 t1
inner join cte2 t2 on
t1.rn = t2.rn
and
t1.EmployeeId = t2.EmployeeId
and t1.Status = 2 and t2.Status =3
group by t1.EmployeeId,
t1.StartTime,
t1.EndTime
Results:
| EmployeeId | StartTime | EndTime | BreakInMins |
|------------|--------------------------|----------------------|-------------|
| 87008 | 2018-08-02T16:03:00.227Z | 2018-08-02T20:06:00Z | 14 |
| 87008 | 2018-08-03T16:03:00.227Z | 2018-08-03T20:06:00Z | 15 |
Try below query: http://sqlfiddle.com/#!18/6fe11/3
select id,min(case when status=1 then stattime end) as starttime,
min(case when status=4 then stattime end) as endtime,
sum(case when status=2 then minute end) as breakinmin
from
(
select id,stattime,status,
DATEdiff(minute,stattime,lead(stattime,1,NULL)
over (partition by id ORDER BY stattime)) as minute
from ForgeRock)a
group by id
id starttime endtime breakinmin
87008 2018-08-02T16:03:00.227Z 2018-08-02T20:06:00Z 14
I have a table with following format:
ID ID1 ID2 DATE
1 1 1 2018-03-01
2 1 1 2018-03-02
3 1 1 2018-03-05
4 1 1 2018-03-06
5 1 1 2018-03-07
6 2 2 2018-03-05
7 2 2 2018-03-05
8 2 2 2018-03-06
9 2 2 2018-03-07
10 2 2 2018-03-08
From this table I have to get all records where ID1 and ID2 are the same in that column and where DATE is 5 consecutive work days (5 dates in a row, ignoring missing dates for Saturday/Sunday; ignore holidays).
I have really no idea how to achieve this. I did search around, but couldn't find anything that helped me. So my question is, how can I achieve following output?
ID ID1 ID2 DATE
1 1 1 2018-03-01
2 1 1 2018-03-02
3 1 1 2018-03-05
4 1 1 2018-03-06
5 1 1 2018-03-07
SQLFiddle to mess around
Assuming you have no duplicates and work is only on weekdays, then there is a simplish solution for this particular case. We can identify the date 4 rows ahead. For a complete week, it is either 4 days ahead or 6 days ahead:
select t.*
from (select t.*, lead(dat, 4) over (order by id2, dat) as dat_4
from t
) t
where datediff(day, dat, dat_4) in (4, 6);
This happens to work because you are looking for a complete week.
Here is the SQL Fiddle.
select t.* from
(select id1,id2,count(distinct dat) count from t
group by id1,id2
having count(distinct dat)=5) t1 right join
t
on t.id1=t1.id1 and t.id2=t1.id2
where count=5
Check this-
Dates of Two weeks with 10 valid dates
http://sqlfiddle.com/#!18/76556/1
Dates of Two weeks with 10 non-unique dates
http://sqlfiddle.com/#!18/b4299/1
and
Dates of Two weeks with less than 10 but unique
http://sqlfiddle.com/#!18/f16cb/1
This query is very verbose without LEAD or LAG and it is the best I could do on my lunch break. You can probably improve on it given the time.
DECLARE #T TABLE
(
ID INT,
ID1 INT,
ID2 INT,
TheDate DATETIME
)
INSERT #T SELECT 1,1,1,'03/01/2018'
INSERT #T SELECT 2,1,1,'03/02/2018'
INSERT #T SELECT 3,1,1,'03/05/2018'
INSERT #T SELECT 4,1,1,'03/06/2018'
INSERT #T SELECT 5,1,1,'03/07/2018'
--INSERT #T SELECT 5,1,1,'03/09/2018'
INSERT #T SELECT 6,2,2,'03/02/2018'
INSERT #T SELECT 7,2,2,'03/05/2018'
INSERT #T SELECT 8,2,2,'03/05/2018'
--INSERT #T SELECT 9,2,2,'03/06/2018'
INSERT #T SELECT 10,2,2,'03/07/2018'
INSERT #T SELECT 11,2,2,'03/08/2018'
INSERT #T SELECT 12,2,2,'03/15/2018'
INSERT #T SELECT 13,1,1,'04/01/2018'
INSERT #T SELECT 14,1,1,'04/02/2018'
INSERT #T SELECT 15,1,1,'04/05/2018'
--SELECT * FROM #T
DECLARE #LowDate DATETIME = DATEADD(DAY,-1,(SELECT MIN(TheDate) FROM #T))
DECLARE #HighDate DATETIME = DATEADD(DAY,1,(SELECT MAX(TheDate) FROM #T))
DECLARE #DaysThreshold INT = 5
;
WITH Dates AS
(
SELECT DateValue=#LowDate
UNION ALL
SELECT DateValue + 1 FROM Dates
WHERE DateValue + 1 < #HighDate
),
Joined AS
(
SELECT * FROM Dates LEFT OUTER JOIN #T T ON T.TheDate=Dates.DateValue
),
Calculations AS
(
SELECT
ID=MAX(J1.ID),
J1.ID1,J1.ID2,
J1.TheDate,
LastDate=MAX(J2.TheDate),
LastDateWasWeekend = CASE WHEN ((DATEPART(DW,DATEADD(DAY,-1,J1.TheDate) ) + ##DATEFIRST) % 7) NOT IN (0, 1) THEN 0 ELSE 1 END,
Offset = DATEDIFF(DAY,MAX(J2.TheDate),J1.TheDate)
FROM
Joined J1
LEFT OUTER JOIN Joined J2 ON J2.ID1=J1.ID1 AND J2.ID2=J1.ID2 AND J2.TheDate<J1.TheDate
WHERE
NOT J1.ID IS NULL
GROUP BY J1.ID1,J1.ID2,J1.TheDate
)
,FindValid AS
(
SELECT
ID,ID1,ID2,TheDate,
IsValid=CASE
WHEN LastDate=TheDate THEN 0
WHEN LastDate IS NULL THEN 1
WHEN Offset=1 THEN 1
WHEN Offset>3 THEN 0
WHEN Offset<=3 THEN
LastDateWasWeekend
END
FROM
Calculations
UNION
SELECT DISTINCT ID=NULL,ID1,ID2, TheDate=#HighDate,IsValid=0 FROM #T
),
FindMax As
(
SELECT
This.ID,This.ID1,This.ID2,This.TheDate,MaxRange=MIN(Next.TheDate)
FROM
FindValid This
LEFT OUTER JOIN FindValid Next ON Next.ID2=This.ID2 AND Next.ID1=This.ID1 AND This.TheDate<Next.TheDate AND Next.IsValid=0
GROUP BY
This.ID,This.ID1,This.ID2,This.TheDate
),
FindMin AS
(
SELECT
This.ID,This.ID1,This.ID2,This.TheDate,This.MaxRange,MinRange=MIN(Next.TheDate)
FROM
FindMax This
LEFT OUTER JOIN FindMax Next ON Next.ID2=This.ID2 AND Next.ID1=This.ID1 AND This.TheDate<Next.MaxRange-- AND Next.IsValid=0 OR Next.TheDate IS NULL
GROUP BY
This.ID,This.ID1,This.ID2,This.TheDate,This.MaxRange
)
,Final AS
(
SELECT
ID1,ID2,MinRange,MaxRange,SequentialCount=COUNT(*)
FROM
FindMin
GROUP BY
ID1,ID2,MinRange,MaxRange
)
SELECT
T.ID,
T.ID1,
T.ID2,
T.TheDate
FROM #T T
INNER JOIN Final ON T.TheDate>= Final.MinRange AND T.TheDate < Final.MaxRange AND T.ID1=Final.ID1 AND T.ID2=Final.ID2
WHERE
SequentialCount>=#DaysThreshold
OPTION (MAXRECURSION 0)
I have a sample table
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
I would like to have a list of txt code and dt date grouped by txt code where interval beetwen txt occurrence is at least one hour and nothing in-between.
To clarify when t1 first occures at '2017-01-01 00:01:00.000'
then next occurrence I am looking for is after at least one hour
which will be '2017-01-01 01:04:00.000'
third occurrence I am looking for is after at least one hour from '2017-01-01 01:04:00.000' and so on.
After some searching I found something like this
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
but this wasn't good because the interval isn't counted from first occurrence but from last, so I got
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
I think I found a workaround because in this case I could group record within same hour but I am not happy with that solution.
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
Any suggestions to improve the script so it will let the interval be any entered value in minutes would be appreciated.
If I have understood correctly I think the following does what you need.
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
Output
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+
I kind of like a method that is a bubble sort. The problem I have found when doing recursive operations is they work great for small sets(think less than 5 or 10k), then behave horrid when you get larger. For this reason I like a cursor approach were you are essentially saying: "Are you larger than a criteria? Yes, No. Insert or Ignore, Delete, move on." This way you are evaluating over every item once and once only, not every variation of a theme of recursion.
DECLARE #Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE #Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO #Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM #Temp)
BEGIN
DECLARE
#CurId INT
, #CurDt DATETIME
, #Curtxt VARCHAR(8)
, #LastDate DATETIME
;
SELECT TOP 1 #CurId = Id, #CurDt = Dt, #Curtxt = txt FROM #Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM #Holder)
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM #Temp WHERE Id = #CurId) = 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 #LastDate = dt FROM #Holder ORDER BY id desc
IF DATEDIFF(HOUR, #LastDate, #CurDt) >= 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, DATEDIFF(MINUTE, #LastDate, #CurDt))
END
END
--Delete the running values and loop again
DELETE #Temp WHERE Id = #CurId
END
Select *
From #Holder