Query to select same event code with at least one hour interval - sql

I have a sample table
CREATE TABLE [dbo].[wt](
[id] [int] NULL,
[dt] [datetime] NULL,
[txt] [nvarchar](50) NULL
) ON [PRIMARY]
GO
INSERT INTO [dbo].[wt]
([id]
,[dt]
,[txt])
VALUES
(1, '2017-01-01 00:01:00.000', 't1'),
(2, '2017-01-01 00:03:00.000', 't1'),
(3, '2017-01-01 00:02:00.000', 't1'),
(4, '2017-01-01 01:04:00.000', 't1'),
(5, '2017-01-01 02:10:00.000', 't1'),
(6, '2017-01-01 00:01:00.000', 't1'),
(7, '2017-01-01 01:05:00.000', 't1'),
(8, '2017-01-01 02:10:00.000', 't2'),
(9, '2017-01-01 00:03:00.000', 't2'),
(10,'2017-01-01 01:04:00.000', 't2'),
(11,'2017-01-01 00:52:00.000', 't1')
I would like to have a list of txt code and dt date grouped by txt code where interval beetwen txt occurrence is at least one hour and nothing in-between.
To clarify when t1 first occures at '2017-01-01 00:01:00.000'
then next occurrence I am looking for is after at least one hour
which will be '2017-01-01 01:04:00.000'
third occurrence I am looking for is after at least one hour from '2017-01-01 01:04:00.000' and so on.
After some searching I found something like this
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from [wt]),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
but this wasn't good because the interval isn't counted from first occurrence but from last, so I got
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 02:10:00.000 8 -65 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
I think I found a workaround because in this case I could group record within same hour but I am not happy with that solution.
select txt, min(dt) dt
into #ttwt
from [wt]
group by txt, substring(convert(varchar,dt,120),1,14)+'00:00.000'
;with a as (
select txt, dt,
rn = row_number() over (partition by txt order by dt asc)
from #ttwt),
b as (
select txt, dt, dt as dt2, rn, null tm, 0 recurrence
from a
where rn = 1
union all
select a.txt, a.dt, a.dt,
a.rn, datediff(MINUTE,a.dt,b.dt) tm,
case when dateadd(MINUTE,-60,a.dt) < b.dt then recurrence + 1 else 0 end
from b join a
on b.rn = a.rn - 1 and b.txt = a.txt
)
select txt, dt, rn, tm, recurrence
from b
where recurrence = 0
order by txt, dt
drop table #ttwt
txt dt rn tm recurrence
t1 2017-01-01 00:01:00.000 1 NULL 0
t1 2017-01-01 01:04:00.000 2 -63 0
t1 2017-01-01 02:10:00.000 3 -66 0
t2 2017-01-01 00:03:00.000 1 NULL 0
t2 2017-01-01 01:04:00.000 2 -61 0
t2 2017-01-01 02:10:00.000 3 -66 0
Any suggestions to improve the script so it will let the interval be any entered value in minutes would be appreciated.

If I have understood correctly I think the following does what you need.
CREATE TABLE #T (id INT , rn INT, txt VARCHAR(10), dt DATETIME, lagDiff INT, runningDiff INT)
INSERT INTO #T (id, rn, txt, dt, lagDiff, runningDiff)
SELECT id
, ROW_NUMBER() OVER( PARTITION BY txt ORDER BY dt, id) -1 rn
, txt
, dt
, DATEDIFF(MINUTE, COALESCE(LAG(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) Diff
, DATEDIFF(MINUTE, COALESCE(FIRST_VALUE(dt) OVER( PARTITION BY txt ORDER BY dt, id), dt), dt) RunningDiff
FROM wt
; WITH CTE AS (
SELECT *, 1 AS Level
FROM #T
WHERE rn = 0
UNION ALL
SELECT T.*, CTE.Level + 1
FROM #T T
INNER JOIN CTE ON CTE.txt = T.txt AND CTE.rn < T.rn AND T.runningDiff - 60 > CTE.runningDiff
WHERE T.rn > 0
)
, X AS (
SELECT txt
, Level
, MIN(rn) rn
FROM CTE
GROUP BY txt, Level
)
SELECT #T.*
FROM X
INNER JOIN #T ON #T.txt = X.txt AND #T.rn = X.rn
Output
+----+----+-----+-------------------------+---------+-------------+
| id | rn | txt | dt | lagDiff | runningDiff |
+----+----+-----+-------------------------+---------+-------------+
| 1 | 0 | t1 | 2017-01-01 00:01:00.000 | 0 | 0 |
| 4 | 5 | t1 | 2017-01-01 01:04:00.000 | 12 | 63 |
| 5 | 7 | t1 | 2017-01-01 02:10:00.000 | 65 | 129 |
| 9 | 0 | t2 | 2017-01-01 00:03:00.000 | 0 | 0 |
| 10 | 1 | t2 | 2017-01-01 01:04:00.000 | 61 | 61 |
| 8 | 2 | t2 | 2017-01-01 02:10:00.000 | 66 | 127 |
+----+----+-----+-------------------------+---------+-------------+

I kind of like a method that is a bubble sort. The problem I have found when doing recursive operations is they work great for small sets(think less than 5 or 10k), then behave horrid when you get larger. For this reason I like a cursor approach were you are essentially saying: "Are you larger than a criteria? Yes, No. Insert or Ignore, Delete, move on." This way you are evaluating over every item once and once only, not every variation of a theme of recursion.
DECLARE #Temp TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, rwn INT
)
DECLARE #Holder TABLE
(
id INT
, dt DATETIME
, txt VARCHAR(8)
, Dif int
)
INSERT INTO #Temp
SELECT *, row_number() over (partition by txt order by dt, id) AS rn
From wt
WHILE EXISTS (SELECT 1 FROM #Temp)
BEGIN
DECLARE
#CurId INT
, #CurDt DATETIME
, #Curtxt VARCHAR(8)
, #LastDate DATETIME
;
SELECT TOP 1 #CurId = Id, #CurDt = Dt, #Curtxt = txt FROM #Temp ORDER BY txt, rwn
--If there is not entry you need a single entry
IF NOT EXISTS (SELECT TOP 1 * FROM #Holder)
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
ELSE
--if you reset the grouping you need to reset and begin anew
IF (SELECT rwn FROM #Temp WHERE Id = #CurId) = 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, null)
END
--if you are going along check the logic for the difference of what the last was compared to the current
ELSE
BEGIN
SELECT TOP 1 #LastDate = dt FROM #Holder ORDER BY id desc
IF DATEDIFF(HOUR, #LastDate, #CurDt) >= 1
BEGIN
INSERT INTO #Holder VALUES (#CurId, #CurDt, #curtxt, DATEDIFF(MINUTE, #LastDate, #CurDt))
END
END
--Delete the running values and loop again
DELETE #Temp WHERE Id = #CurId
END
Select *
From #Holder

Related

SQL: Repeat patterns between date range

DECLARE
#startDate date = '2020-07-03'
#endDate date = 2020-07-06'
I have a tabe as below
---------------------------------------------------------
|EmployeeID | EmpName |Pattern | Frequency |
---------------------------------------------------------
| 11 | X | 1,2,3 | 1 |
| 12 | Y | 4,5 | 1 |
| 13 | Y | 1,2 | 3 |
| 14 | Z | 1,2 | 2 |
---------------------------------------------------------
AND I want to generate dates between given date range.
WANT result table as bellows:
--------------------------------
| EmpId | Dates | Pattern |
--------------------------------
| 11 |2020-07-03 | 1 |
| 11 |2020-07-04 | 2 |
| 11 |2020-07-05 | 3 |
| 11 |2020-07-06 | 1 |
| 12 |2020-07-03 | 4 |
| 12 |2020-07-04 | 5 |
| 12 |2020-07-05 | 4 |
| 12 |2020-07-06 | 5 |
| 13 |2020-07-03 | 1 |
| 13 |2020-07-04 | 1 |
| 13 |2020-07-05 | 1 |
| 13 |2020-07-06 | 2 |
| 14 |2020-07-03 | 1 |
| 14 |2020-07-04 | 1 |
| 14 |2020-07-05 | 2 |
| 14 |2020-07-06 | 2 |
Generate the dates as per given date range for each employee and repeat the pattern for each employee as per their pattern and frequency(days).
means as per frequency(days) pattern will change.
What I have acheived :
Able to generate the records for each employees between the given date range.
What I am not able to get:
I am not able to repeat the pattern based on the frequency for each employee between the date range.
I am able achieve everything but need little help while repeating the pattern based on frequency.*
Note:
Data are storing in this way only.. now I won't change existing schema...
I've came up with this. It's basically a splitter, a tally table and some logic.
Joining (Frequency)-Amount of Tally-datasets with the splitted pattern for the correct amount of pattern-values. Sorting them by their position in the pattern-string.
Join everything together and repeat the pattern by using modulo.
DECLARE #t TABLE( EmployeeID INT
, EmpName VARCHAR(20)
, Pattern VARCHAR(255)
, Frequency INT )
DECLARE #startDate DATE = '2020-07-03'
DECLARE #endDate DATE = '2020-07-09'
INSERT INTO #t
VALUES (11, 'X', '1,2,3', 1),
(12, 'Y', '4,5', 1),
(13, 'Y', '1,2', 3),
(14, 'Z', '1,2', 2)
DECLARE #delimiter CHAR(1) = ',';
WITH split(Txt
, i
, elem
, EmployeeID)
AS (SELECT STUFF(Pattern, 1, CHARINDEX(#delimiter, Pattern+#delimiter+'~'), '')
, 1
, CAST(LEFT(Pattern, CHARINDEX(#delimiter, Pattern+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM #t
UNION ALL
SELECT STUFF(Txt, 1, CHARINDEX(#delimiter, Txt+#delimiter+'~'), '')
, i + 1
, CAST(LEFT(Txt, CHARINDEX(#delimiter, Txt+#delimiter+'~')-1) AS VARCHAR(MAX))
, EmployeeID
FROM split
WHERE Txt > ''),
E1(N) AS (SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 AS a, E1 AS b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 AS a, E2 AS b), --10E+4 or 10,000 rows
E8(N) AS (SELECT 1 FROM E4 AS a , E4 AS b), --10E+8 or 100,000,000 rows
PatternXFrequency(EmployeeID
, Sort
, elem)
AS (SELECT split.EmployeeID
, ROW_NUMBER() OVER(PARTITION BY split.EmployeeID ORDER BY i) - 1
, elem
FROM split
INNER JOIN #t AS t ON t.EmployeeID = split.EmployeeID
CROSS APPLY (SELECT TOP (t.Frequency) 1
FROM E8
) AS Freq(Dummy))
SELECT EmployeeID
, DATEADD(DAY, i_count, #startDate) AS Dates
, elem
FROM (SELECT DATEDIFF(DAY, #startDate, #endDate) + 1) AS t_datediff(t_days)
CROSS APPLY (SELECT TOP (t_days) ROW_NUMBER() OVER(ORDER BY (SELECT 0) ) - 1 FROM E8
) AS t_dateadd(i_count)
CROSS APPLY (SELECT PatternXFrequency.*
FROM (SELECT DISTINCT EmployeeID FROM #t) AS t(EmpID)
CROSS APPLY (SELECT COUNT(Sort)
FROM PatternXFrequency
WHERE EmployeeID = EmpID
) AS EmpPattern(sortCount)
CROSS APPLY (SELECT *
FROM PatternXFrequency
WHERE EmployeeID = EmpID
AND Sort = ((i_count % sortCount))
) AS PatternXFrequency
) AS t
ORDER BY t.EmployeeID
, Dates
This isn't particularly pretty, but it avoids the recursion of a rCTE, so should provide a faster experience. As STRING_SPLIT still doesn't know what ordinal position means, we have to use something else here; I use DelimitedSplit8k_LEAD.
I also assume your expected results are wrong, as they stop short of your end date (20200709). This results in the below:
CREATE TABLE dbo.YourTable (EmployeeID int,
EmpName char(1),
Pattern varchar(8000), --This NEEDS fixing
Frequency tinyint);
INSERT INTO dbo.YourTable
VALUES(11,'X','1,2,3',1),
(12,'Y','4,5',1),
(13,'Y','1,2',3),
(14,'Z','1,2',2);
GO
DECLARE #StartDate date = '20200703',
#EndDate date = '20200709';
WITH CTE AS(
SELECT *,
MAX(ItemNumber) OVER (PARTITION BY EmployeeID) AS MaxItemNumber
FROM dbo.YourTable YT
CROSS APPLY dbo.DelimitedSplit8K_LEAD(YT.Pattern,',') DS),
N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP (SELECT DATEDIFF(DAY,#startDate, #EndDate)+1)
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 AS I
FROM N N1, N N2, N N3) --1000 Rows
SELECT C.EmployeeID,
DATEADD(DAY,T.I, #StartDate),
C.Item
FROM CTE C
JOIN Tally T ON ISNULL(NULLIF((T.I +1) % C.MaxItemNumber,0),C.MaxItemNumber) = C.ItemNumber
ORDER BY EmployeeID,
T.I;
GO
DROP TABLE dbo.YourTable;
Like mentioned in the comments fix your data model.
Your output pattern is a little bit strange.
But is it something like this you are looking for?
DECLARE #startDate date = '2020-07-03'
DECLARE #endDate date = '2020-07-09'
DECLARE #Dates TABLE([Date] Date)
;WITH seq(n) AS
(
SELECT 0 UNION ALL SELECT n + 1 FROM seq
WHERE n < DATEDIFF(DAY, #StartDate, #endDate)
)
INSERT INTO #Dates ([Date])
SELECT DATEADD(Day,n, cast(GetDate() as date)) Date
FROM seq
ORDER BY n
OPTION (MAXRECURSION 0);
SELECT e.EmployeeId, d.Date, x.Value Pattern
FROM Employee e
CROSS APPLY STRING_SPLIT(e.Pattern, ',') x
INNER JOIN #Dates d on 1=1
-- Correct for the first iteration of the pattern
AND DATEDIFF(DAY, DATEADD(DAY, -1, #StartDate), d.Date) = x.Value

How to repeat values in a table in SQL Server?

I have a table in Microsoft SQL Server that logged some values on data change triggers. Now, in order to display some graphs, I would like to get (or repeat) a value per 10 minutes from each column(for example).
I would try to avoid, if possible, an INSERT command modifying the table itself.
Original table:
Time Stamp---- | A | B | C |
---------------+---+---+---+
01-01-19 10:20 | 1 | 0 | 0 |
01-01-19 15:30 | 0 | 0 | 1 |
01-01-19 22:50 | 0 | 1 | 0 |
02-01-19 01:40 | 1 | 0 | 0 |
...
Result I would like to achieve:
Time Stamp---- | A | B | C |
---------------+---+---+---+
01-01-19 10:20 | 1 | 0 | 0 |
01-01-19 10:30 | 1 | 0 | 0 |
01-01-19 10:40 | 1 | 0 | 0 |
01-01-19 10:50 | 1 | 0 | 0 |
...
01-01-19 15:30 | 0 | 0 | 1 |
01-01-19 15:40 | 0 | 0 | 1 |
01-01-19 15:50 | 0 | 0 | 1 |
01-01-19 16:00 | 0 | 0 | 1 |
...
Assuming your dates are mm-dd-yy and times are hh:mm...
create table #Original (
[Time Stamp----] datetime2,
A int,
B int,
C int
)
insert #Original
values ({ts '2019-01-01 10:20:00.000'}, 1, 0, 0)
, ({ts '2019-01-01 15:30:00.000'}, 0, 0, 1)
, ({ts '2019-01-01 22:50:00.000'}, 0, 1, 0)
, ({ts '2019-01-02 01:40:00.000'}, 1, 0, 0)
;
with
boundaries as (
select min(o.[Time Stamp----]) as s
, dateadd(minute, 10, max(o.[Time Stamp----])) as e
from #Original o
),
timeslist as (
select 1 as i
, (select s from boundaries) as s
, (select s from boundaries) as d
union all
select t.i + 1
, t.s
, dateadd(minute, 10, d)
from timeslist t
where d < (select e from boundaries)
),
result as (
select
right('0' + cast(MONTH(t.d) as varchar(2)), 2) + '-' +
right('0' + cast(DAY(t.d) as varchar(2)), 2) + '-' +
right('0' + cast(year(t.d) % 100 as varchar(2)), 2) + ' ' +
right('0' + cast(datepart(hour, t.d) as varchar(2)), 2) + ':' +
right('0' + cast(datepart(minute, t.d) as varchar(2)), 2) as 'Time Stamp----'
, o2.A
, o2.B
, o2.C
from timeslist t
inner join (
select o.[Time Stamp----]
, o.A
, o.B
, o.C
, lead (o.[Time Stamp----], 1, dateadd(minute, 10, o.[Time Stamp----])) over (order by o.[Time Stamp----]) as OldTs
from #Original o
) o2 on o2.[Time Stamp----] <= t.d and o2.OldTs > t.d
)
select *
from result
order by [Time Stamp----]
drop table #Original
To select records with manufactured duplicates, try
SELECT Dateadd(mi, DQ.T,TimeStamp) as 'TimeStamp', A, B, C From YourTable
CROSS JOIN (Select 0 T UNION ALL
Select 10 T UNION ALL
Select 20 T UNION ALL
Select 30 T) DQ
or to insert duplicates, try
INSERT YourTable
SELECT Dateadd(mi, DQ.T,TimeStamp) as 'TimeStamp', A, B, C From YourTable
CROSS JOIN (
Select 10 T UNION ALL
Select 20 T UNION ALL
Select 30 T) DQ
Personally I recommend maling a "Time Table", but i do this on the fly here using a Tally. Anyway, I think this is what you're after?
USE Sandbox;
GO
CREATE TABLE dbo.YourTable ([timestamp] datetime2(0), --This is a bad name for a column, as timestamp means soemthing else in SQL Server
A bit,
B bit,
C bit);
INSERT INTO dbo.YourTable ([timestamp],
A,
B,
C)
VALUES ('2019-01-01T10:20:00',1,0,0),
('2019-01-01T15:30:00',0,0,1),
('2019-01-01T22:50:00',0,1,0),
('2019-01-02T01:40:00',1,0,0);
GO
WITH N AS
(SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT TOP(144) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) -1 AS I
FROM N N1, N N2, N N3),
Times AS(
SELECT DATEADD(MINUTE,T.I * 10,CONVERT(time(0),'00:00:00')) AS TimeSlot
FROM Tally T),
DateTimes AS(
SELECT DISTINCT
CONVERT(datetime,CONVERT(date,YT.[timestamp])) + CONVERT(datetime,T.TimeSlot) AS DateTimeSlot
FROM dbo.YourTable YT
CROSS JOIN Times T),
Groups AS(
SELECT DT.DateTimeSlot,
CONVERT(tinyint,YT.A) AS A, --Can't aggregate Bits
CONVERT(tinyint,YT.B) AS B,
CONVERT(tinyint,YT.C) AS C,
COUNT(YT.A) OVER (ORDER BY DT.DateTimeSlot ASC ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Grp
FROM DateTimes DT
LEFT JOIN dbo.YourTable YT ON DT.DateTimeSlot = YT.[timestamp])
SELECT G.DateTimeSlot,
MAX(G.A) OVER (PARTITION BY G.Grp) AS A,
MAX(G.B) OVER (PARTITION BY G.Grp) AS B,
MAX(G.C) OVER (PARTITION BY G.Grp) AS C
FROM Groups G
ORDER BY G.DateTimeSlot;
GO
DROP TABLE dbo.YourTable;
You can use SQL RECURSION and CROSS JOIN
SQL FIDDLE
Demo
declare #mytable as table(timestamp datetime,A int,B int,C int)
insert into #mytable values
('01-01-19 10:20',1,0,0),('01-01-19 15:30',0,0,1),
('01-01-19 22:50',0,1,0),('01-01-19 01:40',1,0,0)
;with cte as(
select 0 n
union all
select n+10 from cte where n+10 <40)
select dateadd(mi,n,timestamp)[TIMESTAMP],t1.A,t1.B,T1.C
from #mytable t1 cross join cte
order by timestamp

Find time spent in a day along with work break taken

I am in a situation where I need to find-out total time spent in office for some internal application.
I have sample data like this:
Id EmployeeId ScanDateTime Status
7 87008 2018-08-02 16:03:00.227 1
8 87008 2018-08-02 16:06:17.277 2
9 87008 2018-08-02 16:10:37.107 3
10 87008 2018-08-02 16:20:17.277 2
11 87008 2018-08-02 16:30:37.107 3
12 87008 2018-08-02 20:06:00.000 4
Here Status have different meanings:
1- Start
2- Pause
3- Resume
4- End
Means Employees start their work at ScanDateTime when status is 1. They can go for break(status 2) and come back and resume their work(Status 3) and with status 4 means they are ending their job.
Note: There could be multiple breaks during work hours.
Expected Output:
EmployeeId StartTime EndTime BreakInMins
87008 2018-08-02 16:03:00.227 2018-08-02 20:06:00.000 14
I have tried to follow some example to calculate the expected result set but not helping.
I could not find any such example where this similar example available.
Any help would be appreciated.
Please try this. Handles multiple breaks/employees and cases, when break is still in progress or session is not finished
select
[EmployeeId] = [s].[EmployeeId]
,[StartTime] = [s].[ScanDateTime]
,[EndTime] = [et].[ScanDateTime]
,[BreakInMins] = [b].[BreakInMins]
from
[Scans] as [s] -- here is your table
outer apply
(
select top 1 [ScanDateTime], [Id] from [Scans] where [Id] > [s].[Id] and [EmployeeId] = [s].[EmployeeId] and [Status] = 4 order by [ScanDateTime] asc
) as [et]
outer apply
(
select
[BreakInMins] = sum(isnull([r].[mins], datediff(mi, [sp].[ScanDateTime], getdate())))
from
[Scans] as [sp]
outer apply
(
select top 1 [mins] = datediff(mi, [sp].[ScanDateTime], [ScanDateTime]) from [Scans] where [Id] > [sp].[Id] and [EmployeeId] = [sp].[EmployeeId] and [Status] IN (3, 4) order by [ScanDateTime] asc
) as [r]
where
[sp].[id] > [s].[id] and [sp].[id] < isnull([et].[id], [id] + 1)
and [sp].[EmployeeId] = [s].[EmployeeId]
and [sp].[Status] = 2
) as [b]
where
[Status] = 1;
Here is test-friendly script: script
i consider multiple breaks per day of employee you can check below i also provided fiddle link
select t1.*,t5.breakmins from
(
select EmployeeId,min(StartTime) as StartTime,max(EndTime) as EndTime from
(
select EmployeeId,(case when status=1 then ScanDateTime end) as StartTime,
(case when status=4 then ScanDateTime end) as EndTime,
case when status=3 then ScanDateTime end as ResumeWork,
case when status=2 then ScanDateTime end as pauseTime
from emp
) as t group by EmployeeId
) t1
inner join
(
select EmployeeId, convert(date,ResumeWork) as day ,
sum(case when status=2 then datediff(minute,ResumeWork,res) end ) as breakmins from
(
select EmployeeId,ResumeWork,status ,
lag(ResumeWork) over(PARTITION BY EmployeeId order by ResumeWork desc) as res from
(
select * from
(
select EmployeeId, case when status=3 then ScanDateTime end as ResumeWork,status from emp
) as t1 where ResumeWork is not null
union all
select * from
(
select EmployeeId,case when status=2 then ScanDateTime end as pauseTime,status from emp
) as t2 where pauseTime is not null
) as t3 group by EmployeeId,ResumeWork,status
) t4 group by EmployeeId, convert(date,ResumeWork)
)t5 on t1.EmployeeId=t5.EmployeeId
and convert(date,t1.StartTime)=t5.day
EmployeeId StartTime EndTime breakmins
87008 2018-08-02T16:03:00.227Z 2018-08-02T20:06:00Z 12
http://sqlfiddle.com/#!18/ae60f/6
You can try this.
make a row_number in CTE by Status, because we need to know which Pause time correspond which Resume time. then self join in the CTE by EmployeeId
CREATE TABLE T(
Id INT,
EmployeeId INT,
ScanDateTime DATETIME,
Status INT
);
INSERT INTO T VALUES (7 ,87008 ,'2018-08-02 16:03:00.227',1);
INSERT INTO T VALUES (8 ,87008 ,'2018-08-02 16:06:17.277',2);
INSERT INTO T VALUES (9 ,87008 ,'2018-08-02 16:10:37.107',3);
INSERT INTO T VALUES (10,87008 ,'2018-08-02 16:20:17.277',2);
INSERT INTO T VALUES (11,87008 ,'2018-08-02 16:30:37.107',3);
INSERT INTO T VALUES (12,87008 ,'2018-08-02 20:06:00.000',4);
Query 1:
;with cte as(
SELECT *,
MIN(ScanDateTime) over(partition by EmployeeId order by EmployeeId) StartTime,
MAX(ScanDateTime) over(partition by EmployeeId order by EmployeeId) EndTime,
ROW_NUMBER() OVER(PARTITION BY Status order by id) rn
FROM t
)
select t1.EmployeeId,
t1.StartTime,
t1.EndTime,
SUM(datediff(minute,t1.ScanDateTime,t2.ScanDateTime)) BreakInMins
from
cte t1
inner join cte t2
on t1.rn =t2.rn and t1.Status = 2 and t2.Status = 3 and t1.EmployeeId = t2.EmployeeId
group by t1.EmployeeId,
t1.StartTime,
t1.EndTime
Results:
| EmployeeId | StartTime | EndTime | BreakInMins |
|------------|----------------------|--------------------------|-------------|
| 87008 | 2018-08-02T20:06:00Z | 2018-08-02T16:03:00.227Z | 14 |
EDIT
you can try this query if there are different day in your data. just group by the date.
CREATE TABLE T(
Id INT,
EmployeeId INT,
ScanDateTime DATETIME,
Status INT
);
INSERT INTO T VALUES (7 ,87008 ,'2018-08-02 16:03:00.227',1);
INSERT INTO T VALUES (8 ,87008 ,'2018-08-02 16:06:17.277',2);
INSERT INTO T VALUES (9 ,87008 ,'2018-08-02 16:10:37.107',3);
INSERT INTO T VALUES (10,87008 ,'2018-08-02 16:20:17.277',2);
INSERT INTO T VALUES (11,87008 ,'2018-08-02 16:30:37.107',3);
INSERT INTO T VALUES (12,87008 ,'2018-08-02 20:06:00.000',4);
INSERT INTO T VALUES (27 ,87008 ,'2018-08-03 16:03:00.227',1);
INSERT INTO T VALUES (28 ,87008 ,'2018-08-03 16:06:17.277',2);
INSERT INTO T VALUES (29 ,87008 ,'2018-08-03 16:11:37.107',3);
INSERT INTO T VALUES (210,87008 ,'2018-08-03 16:20:17.277',2);
INSERT INTO T VALUES (211,87008 ,'2018-08-03 16:30:37.107',3);
INSERT INTO T VALUES (212,87008 ,'2018-08-03 20:06:00.000',4);
Query 1:
;with cte as(
SELECT EmployeeId,
MAX(CASE WHEN Status = 1 then ScanDateTime end) StartTime,
MIN(CASE WHEN Status = 4 then ScanDateTime end) EndTime,
CAST(ScanDateTime as date) dt
FROM t
GROUP BY EmployeeId,CAST(ScanDateTime as date)
)
,cte2 as(
SELECT t2.*,
Row_number() over(partition by t2.EmployeeId,t2.Status order by Id) rn,
t1.StartTime,
t1.EndTime,
t1.dt
FROM cte t1
INNER JOIN T t2 ON t1.EmployeeId = t2.EmployeeId and Status in (2,3) and t1.dt = CAST(t2.ScanDateTime as date)
)
select t1.EmployeeId,
t1.StartTime,
t1.EndTime,
SUM(datediff(minute,t1.ScanDateTime,t2.ScanDateTime)) BreakInMins
from cte2 t1
inner join cte2 t2 on
t1.rn = t2.rn
and
t1.EmployeeId = t2.EmployeeId
and t1.Status = 2 and t2.Status =3
group by t1.EmployeeId,
t1.StartTime,
t1.EndTime
Results:
| EmployeeId | StartTime | EndTime | BreakInMins |
|------------|--------------------------|----------------------|-------------|
| 87008 | 2018-08-02T16:03:00.227Z | 2018-08-02T20:06:00Z | 14 |
| 87008 | 2018-08-03T16:03:00.227Z | 2018-08-03T20:06:00Z | 15 |
Try below query: http://sqlfiddle.com/#!18/6fe11/3
select id,min(case when status=1 then stattime end) as starttime,
min(case when status=4 then stattime end) as endtime,
sum(case when status=2 then minute end) as breakinmin
from
(
select id,stattime,status,
DATEdiff(minute,stattime,lead(stattime,1,NULL)
over (partition by id ORDER BY stattime)) as minute
from ForgeRock)a
group by id
id starttime endtime breakinmin
87008 2018-08-02T16:03:00.227Z 2018-08-02T20:06:00Z 14

Rotate table in T-SQL

I have a table that contains sequential date in first column and type of date (CreatedOn and ClosedOn). I need to with SELECT that has 2 columns (CreatedOn, ClosedOn) from my table.
I have this:
| Date | ColumnName |
|------------|------------|
| 2017-01-01 | ClosedOn |
| 2017-01-02 | CreatedOn |
| 2017-01-03 | ClosedOn |
| 2017-01-04 | CreatedOn |
And I need to get this:
| CreatedOn | ClosedOn |
|------------|------------|
| NULL | 2017-01-01 |
| 2017-01-02 | 2017-01-03 |
| 2017-01-04 | NULL |
I've tried this:
SELECT
CASE [ColumnName]
WHEN 'CreatedOn' THEN [Date]
ELSE NULL
END,
CASE [ColumnName]
WHEN 'ClosedOn' THEN [Date]
ELSE NULL
END
FROM #Temp
but it doesn't work.
This is a typical case of using a PIVOT in SQL Server, to transpose rows into columns:
select *
from table1
pivot (max(colname) for colname in (ClosedOn, CreatedOn)) p
order by date
Try this and hope it helps. You may have to test it and modify as needed. But the logic if my understanding is correct should be sufficient to build on.
;WITH cte_TestData(Date,ColumnName) AS
(
SELECT '2017-01-01','ClosedOn ' UNION ALL
SELECT '2017-01-02','CreatedOn' UNION ALL
SELECT '2017-01-03','ClosedOn ' UNION ALL
SELECT '2017-01-04','CreatedOn'
)
,cte_PreserveSeq AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS SeqID,Date,ColumnName
FROM cte_TestData
)
,cte_PreResult AS
(
SELECT *
,LEAD (ColumnName, 1,0) OVER (ORDER BY SeqID) AS NextColumnName
,LEAD (Date, 1,0) OVER (ORDER BY SeqID) AS NextDate
,LAG (ColumnName, 1,0) OVER (ORDER BY SeqID) AS PreviousColumnName
,LAG (Date, 1,0) OVER (ORDER BY SeqID) AS PreviousDate
FROM cte_PreserveSeq
)
SELECT DISTINCT
CASE
WHEN ColumnName = 'CreatedOn' AND NextColumnName = 'ClosedOn' THEN DATE
WHEN ColumnName = 'ClosedOn' AND PreviousColumnName = 'CreatedOn' THEN PreviousDate
WHEN ColumnName = 'CreatedOn' THEN DATE
ELSE NULL
END AS CreatedOn,
CASE
WHEN ColumnName = 'CreatedOn' AND NextColumnName = 'ClosedOn' THEN NextDate
WHEN ColumnName = 'ClosedOn' THEN DATE
ELSE NULL
END AS ClosedOn
FROM cte_PreResult
I'm assuming you have other columns, so let's do this
select A1.OtherColumn, A1.[Date], A2.Date
from #Temp A1
full outer join #Temp A2
on A1.OtherColumn = A2.OtherColumn
and A1.ColumnName = 'CreatedOn'
and A2.ColumnName = 'ClosedOn'
EDIT: If no other columns, try
with MyData as
(
select [Date], ColumnName , row_number() over (order by [Date], ColumnName desc) as rn
from #Temp
)
select M1.[Date], M2.[Date]
from MyData M1
full outer join MyData M2
on M2.rn = M1.rn + 1
and mod(M1.rn, 2) = 1

Creating groups of consecutive days meeting a given criteria

I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?
In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.
Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.
Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.
Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);
A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example