Sum the timegap if consecutive timestamps are same for 10 min - sql

i am using the below query to find the null values of a column and get the starttime and endtime of the null values using the below query for some 30,000 rows
SELECT
yt1.[timestamp] AS StartTime,
MIN(yt2.[timestamp]) AS EndTime,
DATEDIFF(MINUTE, yt1.[timestamp], MIN(yt2.[timestamp])) AS DifferenceInMinutes
FROM
Sheet1$ yt1
LEFT JOIN Sheet1$ yt2 ON yt1.[timestamp] < yt2.[timestamp]
WHERE
yt1.TWSPD IS NULL
GROUP BY yt1.[timestamp]
The output is
Start time Endtime DifferenceInMinutes
2012-05-18 20:47:03.000 2012-05-18 20:57:04.000 10
2012-05-18 20:57:04.000 2012-05-18 21:07:04.000 10
2012-05-21 18:25:26.000 2012-05-21 18:35:26.000 10
2012-06-07 17:36:28.000 2012-06-07 17:46:28.000 10
2012-06-07 17:46:28.000 2012-06-07 17:56:28.000 10
2012-06-07 17:56:28.000 2012-06-07 18:06:28.000 10
And for example now i need the output as (removed some row to display better)
Start time Endtime DifferenceInMinutes
2012-05-18 20:47:03.000 2012-05-18 21:07:04.000 20
2012-05-21 18:25:26.000 2012-05-21 18:35:26.000 10
2012-06-07 17:36:28.000 2012-06-07 18:06:28.000 30
The timestamp is for every 10 min, if the null values for consecutive 10min timegap should be added and the starttime and endtime should be displayed as from the first null to last null of consecutive timestamp. Hope the question is clear. Please let me know if i am not clear. Please help

SELECT
yt1.[timestamp] AS StartTime,
MIN(yt2.[timestamp]) AS EndTime,
DATEDIFF(MINUTE, yt1.[timestamp], MIN(yt2.[timestamp])) AS DifferenceInMinutes
into #tmp1
FROM
Sheet1$ yt1
LEFT JOIN Sheet1$ yt2 ON yt1.[timestamp] < yt2.[timestamp]
WHERE
yt1.TWSPD IS NULL
GROUP BY yt1.[timestamp]
Select t1.*
into #tmp2
from #tmp1 t1
left join #tmp1 t2 on t1.Starttime=t2.Endtime
where t2.Endtime is null
Declare #rcn int
Select #rcn=1
While #rcn>0
begin
Update #tmp2 set #tmp2.Endtime=t.endTime,#tmp2.DifferenceInMinutes=#tmp2.DifferenceInMinutes+t.DifferenceInMinutes
from #tmp1 t
where t.Starttime=#tmp2.Endtime
select #rcn=##Rowcount
end
select * from #tmp2
Drop Table #tmp1
Drop Table #tmp2

If you want to query your original table to have grouped output - you can do this:
;with
CTE_start
as
(
select T.timestamp, row_number() over(order by T.timestamp) as RowNum
from temp1 as T
where
not exists
(
select *
from temp1 as TT
where TT.timestamp < T.timestamp and TT.timestamp >= dateadd(mi, -11, T.timestamp)
)
),
CTE_end
as
(
select T.timestamp, row_number() over(order by T.timestamp) as RowNum
from temp1 as T
where
not exists
(
select *
from temp1 as TT
where TT.timestamp > T.timestamp and TT.timestamp <= dateadd(mi, 11, T.timestamp)
)
)
select
s.timestamp as [Start time],
e.timestamp as [End time],
datediff(mi, s.timestamp, e.timestamp) as [DifferenceInMinutes]
from CTE_start as s
inner join CTE_end as e on e.RowNum = s.RowNum
SQL FIDDLE EXAMPLE
Another good one, but you have to copy your data into temporary (variable) table
declare #tmp table (timestamp datetime, RowNum int primary key)
insert into #tmp
select T.timestamp, row_number() over(order by T.timestamp) as RowNum
from temp1 as T
;with CTE
as
(
select T.timestamp, T.RowNum, 1 as GroupNum
from #tmp as T
where RowNum = 1
union all
select
T.timestamp, T.RowNum,
C.GroupNum + case when datediff(mi, C.timestamp, T.timestamp) >= 11 then 1 else 0 end
from #tmp as T
inner join CTE as C on C.RowNum + 1 = T.RowNum
)
select
min(C.timestamp) as [Start time],
max(C.timestamp) as [End time],
datediff(mi, min(C.timestamp), max(C.timestamp)) as [DifferenceInMinutes]
from CTE as C
group by C.GroupNum
SQL FIDDLE EXAMPLE

Related

Find nearest date to start and end of the month

Table contains daily snapshots of specific parameter, but data can be missing for some days. Task is to calculate amount per month, for this sake we need values on start/end of the month, if data is missing, we need pairs of nearest dates i.e:
[Time] Value
2015-04-28 00:00:00.000 76127
2015-05-03 00:00:00.000 76879
2015-05-22 00:00:00.000 79314
2015-06-07 00:00:00.000 81443
Currently i use following code:
select
*
from(
select
[Time],
Value,
ROW_NUMBER() over (partition by CASE WHEN [Time] < '2015-05-01' THEN 1 ELSE 0 END order by abs(DATEDIFF(DAY, '2015-05-01', [Time]))) as rn2,
ROW_NUMBER() over (partition by CASE WHEN [Time] > '2015-05-01' THEN 1 ELSE 0 END order by abs(DATEDIFF(DAY, [Time], '2015-05-01'))) as rn3,
ROW_NUMBER() over (partition by CASE WHEN [Time] < '2015-05-31' THEN 1 ELSE 0 END order by abs(DATEDIFF(DAY, '2015-05-31', [Time]))) as rn4,
ROW_NUMBER() over (partition by CASE WHEN [Time] > '2015-05-31' THEN 1 ELSE 0 END order by abs(DATEDIFF(DAY, [Time], '2015-05-31'))) as rn5,
DATEDIFF(DAY, '2015-05-01', [Time]) as doff,
DATEDIFF(DAY, '2015-05-31', [Time]) as doff2
from
ValueTable
where
[Time] between '2015-04-01' and '2015-06-30'
) r
where
doff = 0 or doff2 = 0 or (doff != 0 and rn2 = 1 and rn3 = 1) or (doff2 != 0 and rn4 = 1 and rn5 = 1)
Is there any more efficient way to do it?
The following code is going to look more complicated because it is longer. However, it should be very fast, because it can make very good use of an index on ValueTable([Time]).
The idea is to look for exact matches. If there are no exact matches, then find the first and last records before and after the dates. This requires union all on six subqueries, but each should make optimal use of an index:
with exact_first as (
select t.*
from ValueTable t
where [Time] = '2015-05-01'
),
exact_last as (
select t.*
from ValueTable t
where [Time] = '2015-05-01'
)
(select ef.*
from exact_first ef
) union all
(select top 1 t.*
from ValueTable t
where [Time] < '2015-05-01' and
not exists (select 1 from exact_first ef2)
order by [Time]
) union all
(select top 1 t.*
from ValueTable t
where [Time] > '2015-05-01' and
not exists (select 1 from exact_first ef2)
order by [Time] desc
) union all
(select el.*
from exact_last el
) union all
(select top 1 t.*
from ValueTable t
where [Time] < '2015-05-31' and
not exists (select 1 from exact_last ef2)
order by [Time]
) union all
(select top 1 t.*
from ValueTable t
where [Time] > '2015-05-31' and
not exists (select 1 from exact_last ef2)
order by [Time] desc;
)

How can I find a value that doesn't exist in a table?

I have a simplified table called Bookings that has two columns BookDate and BookSlot. The BookDate column will have dates only (no time) and the BookSlot column will contain the time of the day in intervals of 30 minutes from 0 to 1410 inclusive. (i.e. 600 = 10:00am)
How can I find the first slot available in the future (not booked) without running through a loop?
Here is the table definition and test data:
Create Table Bookings(
BookDate DateTime Not Null,
BookSlot Int Not Null
)
Go
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',0);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',30);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',60);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',630);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',60);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',90);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',120);
I want a way to return the first available slot that is not in the table and that is in the future (based on server time).
Based on above test data:
If the current server time was 1st Jul, 00:10am, the result should be 1st Jul, 90min (01:30am).
If the current server time was 2nd Jul, 01:05am, the result should be 2nd Jul, 150min (02:30am).
If there are no bookings in the future, the function would simply return the closest half-hour in the future.
--
SQL Fiddle for this is here:
http://sqlfiddle.com/#!6/0e93d/1
Below is one method that will allow bookings up to 256 days in the future, and allow for an empty Booking table. I assume you are using SQL Server 2005 since your BookDate is dateTime instead of date.
In any case, you might consider storing the slots as a complete datetime instead of separate columns. That will facilitate queries and improve performance.
DECLARE #now DATETIME = '2014-07-01 00:10:00';
WITH T4
AS (SELECT N
FROM (VALUES(0),
(0),
(0),
(0),
(0),
(0),
(0),
(0)) AS t(N)),
T256
AS (SELECT Row_number()
OVER(
ORDER BY (SELECT 0)) - 1 AS n
FROM T4 AS a
CROSS JOIN T4 AS b
CROSS JOIN T4 AS c),
START_DATE
AS (SELECT Dateadd(DAY, Datediff(DAY, '', #now), '') AS start_date),
START_TIME
AS (SELECT Dateadd(MINUTE, Datediff(MINUTE, '', #now) / 30 * 30, '') AS
start_time),
DAILY_INTERVALS
AS (SELECT N * 30 AS interval
FROM T256
WHERE N < 48)
SELECT TOP (1) Dateadd(DAY, future_days.N, START_DATE) AS BookDate,
DAILY_INTERVALS.INTERVAL AS BookSlot
FROM START_DATE
CROSS APPLY START_TIME
CROSS APPLY DAILY_INTERVALS
CROSS APPLY T256 AS future_days
WHERE Dateadd(MINUTE, DAILY_INTERVALS.INTERVAL,
Dateadd(DAY, future_days.N, START_DATE)) > START_TIME
AND NOT EXISTS(SELECT *
FROM DBO.BOOKINGS
WHERE BOOKDATE = START_DATE
AND BOOKSLOT = DAILY_INTERVALS.INTERVAL)
ORDER BY BOOKDATE,
BOOKSLOT;
See this SQL Fiddle
It's a bit complicated but try this:
WITH DATA
AS (SELECT *,
Row_number()
OVER (
ORDER BY BOOKDATE, BOOKSLOT) RN
FROM BOOKINGS)
SELECT CASE
WHEN T.BOOKSLOT = 1410 THEN Dateadd(DAY, 1, BOOKDATE)
ELSE BOOKDATE
END Book_Date,
CASE
WHEN T.BOOKSLOT = 1410 THEN 0
ELSE BOOKSLOT + 30
END Book_Slot
FROM (SELECT TOP 1 T1.*
FROM DATA T1
LEFT JOIN DATA t2
ON t1.RN = T2.RN - 1
WHERE t2.BOOKSLOT - t1.BOOKSLOT > 30
OR ( t1.BOOKDATE != T2.BOOKDATE
AND ( t2.BOOKSLOT != 0
OR t1.BOOKSLOT != 630 ) )
OR t2.BOOKSLOT IS NULL)T
Here is the SQL fiddle example.
Explanation
This solution contains 2 parts:
Comparing each line to the next and checking for a gap (can be done easier in SQL 2012)
Adding a half an hour to create the next slot, this includes moving to the next day if needed.
Edit
Added TOP 1 in the query so that only the first slot is returned as requested.
Update
Here is the updated version including 2 new elements (getting current date+ time and dealing with empty table):
DECLARE #Date DATETIME = '2014-07-01',
#Slot INT = 630
DECLARE #time AS TIME = Cast(Getdate() AS TIME)
SELECT #Slot = Datepart(HOUR, #time) * 60 + Round(Datepart(MINUTE, #time) / 30,
0) * 30
+ 30
SET #Date = Cast(Getdate() AS DATE)
;WITH DATA
AS (SELECT *,
Row_number()
OVER (
ORDER BY BOOKDATE, BOOKSLOT) RN
FROM BOOKINGS
WHERE BOOKDATE > #Date
OR ( BOOKDATE = #Date
AND BOOKSLOT >= #Slot ))
SELECT TOP 1 BOOK_DATE,
BOOK_SLOT
FROM (SELECT CASE
WHEN RN = 1
AND NOT (#slot = BOOKSLOT
AND #Date = BOOKDATE) THEN #Date
WHEN T.BOOKSLOT = 1410 THEN Dateadd(DAY, 1, BOOKDATE)
ELSE BOOKDATE
END Book_Date,
CASE
WHEN RN = 1
AND NOT (#slot = BOOKSLOT
AND #Date = BOOKDATE) THEN #Slot
WHEN T.BOOKSLOT = 1410 THEN 0
ELSE BOOKSLOT + 30
END Book_Slot,
1 AS ID
FROM (SELECT TOP 1 T1.*
FROM DATA T1
LEFT JOIN DATA t2
ON t1.RN = T2.RN - 1
WHERE t2.BOOKSLOT - t1.BOOKSLOT > 30
OR ( t1.BOOKDATE != T2.BOOKDATE
AND ( t2.BOOKSLOT != 0
OR t1.BOOKSLOT != 1410 ) )
OR t2.BOOKSLOT IS NULL)T
UNION
SELECT #date AS bookDate,
#slot AS BookSlot,
2 ID)X
ORDER BY X.ID
Play around with the SQL fiddle and let me know what you think.
In SQL Server 2012 and later, you can use the lead() function. The logic is a bit convoluted because of all the boundary conditions. I think this captures it:
select top 1
(case when BookSlot = 1410 then BookDate else BookDate + 1 end) as BookDate,
(case when BookSlot = 1410 then 0 else BookSlot + 30 end) as BookSlot
from (select b.*,
lead(BookDate) over (order by BookDate) as next_dt,
lead(BookSlot) over (partition by BookDate order by BookSlot) as next_bs
from bookings b
) b
where (next_bs is null and BookSlot < 1410 or
next_bs - BookSlot > 30 or
BookSlot = 1410 and (next_dt <> BookDate + 1 or next_dt = BookDate and next_bs <> 0)
)
order by BookDate, BookSlot;
Using a tally table to generate a list of originally available booking slots out 6 weeks (adjustable below):
declare #Date as date = getdate();
declare #slot as int = 30 * (datediff(n,#Date,getdate()) /30);
with
slots as (
select (ROW_NUMBER() over (order by s)-1) * 30 as BookSlot
from(
values (1),(1),(1),(1),(1),(1),(1),(1) -- 4 hour block
)slots(s)
cross join (
values (1),(1),(1),(1),(1),(1) -- 6 blocks of 4 hours each day
)QuadHours(t)
)
,days as (
select (ROW_NUMBER() over (order by s)-1) + getdate() as BookDate
from (
values (1),(1),(1),(1),(1),(1),(1) -- 7 days in a week
)dayList(s)
cross join (
-- set this to number of weeks out to allow bookings to be made
values (1),(1),(1),(1),(1),(1) -- allow 6 weeks of bookings at a time
)weeks(t)
)
,tally as (
select
cast(days.BookDate as date) as BookDate
,slots.BookSlot as BookSLot
from slots
cross join days
)
select top 1
tally.BookDate
,tally.BookSlot
from tally
left join #Bookings book
on tally.BookDate = book.BookDate
and tally.BookSlot = book.BookSlot
where book.BookSlot is null
and ( tally.BookDate > #Date or tally.BookSlot > #slot )
order by tally.BookDate,tally.BookSlot;
go
try this:
SELECT a.bookdate, ((a.bookslot/60.)+.5) * 60
FROM bookings a LEFT JOIN bookings b
ON a.bookdate=b.bookdate AND (a.bookslot/60.)+.50=b.bookslot/60.
WHERE b.bookslot IS null

Merge adjacent rows in SQL?

I'm doing some reporting based on the blocks of time employees work. In some cases, the data contains two separate records for what really is a single block of time.
Here's a basic version of the table and some sample records:
EmployeeID
StartTime
EndTime
Data:
EmpID Start End
----------------------------
#1001 10:00 AM 12:00 PM
#1001 4:00 PM 5:30 PM
#1001 5:30 PM 8:00 PM
In the example, the last two records are contiguous in time. I'd like to write a query that combines any adjacent records so the result set is this:
EmpID Start End
----------------------------
#1001 10:00 AM 12:00 PM
#1001 4:00 PM 8:00 PM
Ideally, it should also be able to handle more than 2 adjacent records, but that is not required.
This article provides quite a few possible solutions to your question
http://www.sqlmag.com/blog/puzzled-by-t-sql-blog-15/tsql/solutions-to-packing-date-and-time-intervals-puzzle-136851
This one seems like the most straight forward:
WITH StartTimes AS
(
SELECT DISTINCT username, starttime
FROM dbo.Sessions AS S1
WHERE NOT EXISTS
(SELECT * FROM dbo.Sessions AS S2
WHERE S2.username = S1.username
AND S2.starttime < S1.starttime
AND S2.endtime >= S1.starttime)
),
EndTimes AS
(
SELECT DISTINCT username, endtime
FROM dbo.Sessions AS S1
WHERE NOT EXISTS
(SELECT * FROM dbo.Sessions AS S2
WHERE S2.username = S1.username
AND S2.endtime > S1.endtime
AND S2.starttime <= S1.endtime)
)
SELECT username, starttime,
(SELECT MIN(endtime) FROM EndTimes AS E
WHERE E.username = S.username
AND endtime >= starttime) AS endtime
FROM StartTimes AS S;
If this is strictly about adjacent rows (not overlapping ones), you could try the following method:
Unpivot the timestamps.
Leave only those that have no duplicates.
Pivot the remaining ones back, coupling every Start with the directly following End.
Or, in Transact-SQL, something like this:
WITH unpivoted AS (
SELECT
EmpID,
event,
dtime,
count = COUNT(*) OVER (PARTITION BY EmpID, dtime)
FROM atable
UNPIVOT (
dtime FOR event IN (StartTime, EndTime)
) u
)
, filtered AS (
SELECT
EmpID,
event,
dtime,
rowno = ROW_NUMBER() OVER (PARTITION BY EmpID, event ORDER BY dtime)
FROM unpivoted
WHERE count = 1
)
, pivoted AS (
SELECT
EmpID,
StartTime,
EndTime
FROM filtered
PIVOT (
MAX(dtime) FOR event IN (StartTime, EndTime)
) p
)
SELECT *
FROM pivoted
;
There's a demo for this query at SQL Fiddle.
CTE with cumulative sum:
DECLARE #t TABLE(EmpId INT, Start TIME, Finish TIME)
INSERT INTO #t (EmpId, Start, Finish)
VALUES
(1001, '10:00 AM', '12:00 PM'),
(1001, '4:00 PM', '5:30 PM'),
(1001, '5:30 PM', '8:00 PM')
;WITH rowind AS (
SELECT EmpId, Start, Finish,
-- IIF returns 1 for each row that should generate a new row in the final result
IIF(Start = LAG(Finish, 1) OVER(PARTITION BY EmpId ORDER BY Start), 0, 1) newrow
FROM #t),
groups AS (
SELECT EmpId, Start, Finish,
-- Cumulative sum
SUM(newrow) OVER(PARTITION BY EmpId ORDER BY Start) csum
FROM rowind)
SELECT
EmpId,
MIN(Start) Start,
MAX(Finish) Finish
FROM groups
GROUP BY EmpId, csum
I have changed a lil' bit the names and types to make the example smaller but this works and should be very fast and it has no number of records limit:
with cte as (
select
x1.id
,x1.t1
,x1.t2
,case when x2.t1 is null then 1 else 0 end as bef
,case when x3.t1 is null then 1 else 0 end as aft
from x x1
left join x x2 on x1.id=x2.id and x1.t1=x2.t2
left join x x3 on x1.id=x3.id and x1.t2=x3.t1
where x2.id is null
or x3.id is null
)
select
cteo.id
,cteo.t1
,isnull(z.t2,cteo.t2) as t2
from cte cteo
outer apply (select top 1 *
from cte ctei
where cteo.id=ctei.id and cteo.aft=0 and ctei.t1>cteo.t1
order by t1) z
where cteo.bef=1
and the fiddle for it : http://sqlfiddle.com/#!3/ad737/12/0
Option with Inline User-Defined Function AND CTE
CREATE FUNCTION dbo.Overlap
(
#availStart datetime,
#availEnd datetime,
#availStart2 datetime,
#availEnd2 datetime
)
RETURNS TABLE
RETURN
SELECT CASE WHEN #availStart > #availEnd2 OR #availEnd < #availStart2
THEN #availStart ELSE
CASE WHEN #availStart > #availStart2 THEN #availStart2 ELSE #availStart END
END AS availStart,
CASE WHEN #availStart > #availEnd2 OR #availEnd < #availStart2
THEN #availEnd ELSE
CASE WHEN #availEnd > #availEnd2 THEN #availEnd ELSE #availEnd2 END
END AS availEnd
;WITH cte AS
(
SELECT EmpID, Start, [End], ROW_NUMBER() OVER (PARTITION BY EmpID ORDER BY Start) AS Id
FROM dbo.TableName
), cte2 AS
(
SELECT Id, EmpID, Start, [End]
FROM cte
WHERE Id = 1
UNION ALL
SELECT c.Id, c.EmpID, o.availStart, o.availEnd
FROM cte c JOIN cte2 ct ON c.Id = ct.Id + 1
CROSS APPLY dbo.Overlap(c.Start, c.[End], ct.Start, ct.[End]) AS o
)
SELECT EmpID, Start, MAX([End])
FROM cte2
GROUP BY EmpID, Start
Demo on SQLFiddle

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK

How to merge time intervals in SQL Server

Suppose I have the following an event table with personId, startDate and endDate.
I want to know how much time the person X spent doing an event (the events can override each other).
If the person just has 1 event, its easy: datediff(dd, startDate, endDate)
If the person has 2 events it gets tricky.
I'll set some scenarios for the expected results.
Scenario 1
startDate endDate
1 4
3 5
This means he the results should be the datediff from 1 to 5
Scenario 2
startDate endDate
1 3
6 9
this means he the results should be the some of datediff(dd,1,3) and datediff(dd,6,9)
How can I get this result on an sql query? I can only think of a bunch of if statements, but the same person can have n events so the query will be really confusing.
Shredder Edit: I'd like to add a 3rd scenario:
startDate endDate
1 5
4 8
11 15
Desired result to Shredder scenario:
(1,5) and (4,8) merge in (1,8) since they overlap then we need to datediff(1,8) + datediff(11,15) => 7 + 4 => 11
You can use a recursive CTE to build a list of dates and then count the distinct dates.
declare #T table
(
startDate date,
endDate date
);
insert into #T values
('2011-01-01', '2011-01-05'),
('2011-01-04', '2011-01-08'),
('2011-01-11', '2011-01-15');
with C as
(
select startDate,
endDate
from #T
union all
select dateadd(day, 1, startDate),
endDate
from C
where dateadd(day, 1, startDate) < endDate
)
select count(distinct startDate) as DayCount
from C
option (MAXRECURSION 0)
Result:
DayCount
-----------
11
Or you can use a numbers table. Here I use master..spt_values:
declare #MinStartDate date
select #MinStartDate = min(startDate)
from #T
select count(distinct N.number)
from #T as T
inner join master..spt_values as N
on dateadd(day, N.Number, #MinStartDate) between T.startDate and dateadd(day, -1, T.endDate)
where N.type = 'P'
Here's a solution that uses the Tally table idea (which I first heard of in an article by Itzk Ben-Gan -- I still cut and paste his code whenver the subject comes up). The idea is to generate a list of ascending integers, join the source data by range against the numbers, and then count the number of distinct numbers, as follows. (This code uses syntax from SQL Server 2008, but with minor modifications would work in SQL 2005.)
First set up some testing data:
CREATE TABLE #EventTable
(
PersonId int not null
,startDate datetime not null
,endDate datetime not null
)
INSERT #EventTable
values (1, 'Jan 1, 2011', 'Jan 4, 2011')
,(1, 'Jan 3, 2011', 'Jan 5, 2011')
,(2, 'Jan 1, 2011', 'Jan 3, 2011')
,(2, 'Jan 6, 2011', 'Jan 9, 2011')
Determine some initial values
DECLARE
#Interval bigint
,#FirstDay datetime
,#PersonId int = 1 -- (or whatever)
Get the first day and the maximum possible number of dates (to keep the cte from generating extra values):
SELECT
#Interval = datediff(dd, min(startDate), max(endDate)) + 1
,#FirstDay = min(startDate)
from #EventTable
where PersonId = #PersonId
Cut and paste over the one routine and modify and test it to only return as many integers as we'll need:
/*
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
select Number from Tally where Number <= #Interval
*/
And now revise it by first joining to the intervals defined in each source row, and then count each distinct value found:
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
SELECT PersonId, count(distinct Number) EventDays
from #EventTable et
inner join Tally
on dateadd(dd, Tally.Number - 1, #FirstDay) between et.startDate and et.endDate
where et.PersonId = #PersonId
and Number <= #Interval
group by PersonId
Take out the #PersonId filter and you'd get it for all persons. And with minor modification you can do it for any time interval, not just days (which is why I set the Tally table to generate severely large numbers.)
The following SQL is for the three scenarios you've described
with sampleData
AS (
SELECT 1 personid,1 startDate,4 endDate
UNION SELECT 1,3,5
UNION SELECT 2,1,3
UNION SELECT 2,6,9
UNION SELECT 3,1,5
UNION SELECT 3,4,8
UNION SELECT 3,11, 15
),
cte
AS (SELECT personid,
startdate,
enddate,
Row_number() OVER(ORDER BY personid, startdate) AS rn
FROM sampledata),
overlaps
AS (SELECT a.personid,
a.startdate,
b.enddate,
a.rn id1,
b.rn id2
FROM cte a
INNER JOIN cte b
ON a.personid = b.personid
AND a.enddate > b.startdate
AND a.rn = b.rn - 1),
nooverlaps
AS (SELECT a.personid,
a.startdate,
a.enddate
FROM cte a
LEFT JOIN overlaps b
ON a.rn = b.id1
OR a.rn = b.id2
WHERE b.id1 IS NULL)
SELECT personid,
SUM(timespent) timespent
FROM (SELECT personid,
enddate - startdate timespent
FROM nooverlaps
UNION
SELECT personid,
enddate - startdate
FROM overlaps) t
GROUP BY personid
Produces this result
Personid timeSpent
----------- -----------
1 4
2 5
3 11
Notes: I used the simple integers but the DateDiffs should work too
Correctness issue There is a correctness issue if your data is allowed to have multiple overlaps as Cheran S noted, the results won't be correct and you should use one of the other answers instead. His example used [1,5],[4,8],[7,11] for the same person ID
Algebra. If B-n is the ending time of the nth event, and A-n is the starting time of the nth event, then the sum of the differences is the difference of the sums. So you can write
select everything else, sum(cast(endDate as int)) - sum(cast(startDate as int)) as daysSpent
If your dates have no time component, this works. Otherwise, you could use a real.
Try something like this
select
personId,
sum(DateDuration) as TotalDuration
from
(
select personId, datediff(dd, startDate, endDate) as DateDuration
from yourEventTable
) a
group by personId
;WITH cte(gap)
AS
(
SELECT sum(b-a) from xxx GROUP BY uid
)
SELECT * FROM cte
Edit 1: I have modified both solutions to get correct results.
Edit 2: I have done comparative tests using the solutions proposed by Mikael Eriksson, Conrad Frix, Philip Kelley and me. All tests use an EventTable with the following structure:
CREATE TABLE EventTable
(
EventID INT IDENTITY PRIMARY KEY
,PersonId INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,CONSTRAINT CK_StartDate_Before_EndDate CHECK(StartDate < EndDate)
);
Also, all tests use warm buffer (no DBCC DROPCLEANBUFFERS) and cold [plan] cache (I have executed DBCC FREEPROCCACHE before every test). Because some solutions use a filter(PersonId = 1) and others not, I have inserted into EventTable rows for only one person (INSERT ...(PersonId,...) VALUES (1,...)).
These are the results:
My solutions use recursive CTEs.
Solution 1:
WITH BaseCTE
AS
(
SELECT e.StartDate
,e.EndDate
,e.PersonId
,ROW_NUMBER() OVER(PARTITION BY e.PersonId ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e
), RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM BaseCTE b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN BaseCTE crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
Solution 2:
DECLARE #Base TABLE --or a temporary table: CREATE TABLE #Base (...)
(
PersonID INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,RowNumber INT NOT NULL
,PRIMARY KEY(PersonID, RowNumber)
);
INSERT #Base (PersonID, StartDate, EndDate, RowNumber)
SELECT e.PersonId
,e.StartDate
,e.EndDate
,ROW_NUMBER() OVER(PARTITION BY e.PersonID ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e;
WITH RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM #Base b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN #Base crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);