Joining massive CTE tables (13,000,000 rows+) performance problems - sql

We have a production database that manages personnel booking at 100s of branches for years in advance with minute level accuracy.
Part of this system are reports that highlight gaps, i.e. compare branch opening hours and staff bookings to see if any branches are open with nobody booked.
It also checks for overlaps, double bookings etc all at the same time, basically minute level accuracy is required.
The way we're doing this is to expand the start and end times of openings hours and bookings into minutes with an integer tally table:
--===== Create and populate the Tally table on the fly
SELECT TOP 16777216
IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM Master.dbo.SysColumns sc1,
Master.dbo.SysColumns sc2,
Master.dbo.SysColumns sc3
--===== Add a Primary Key to maximize performance
ALTER TABLE dbo.Tally
ADD CONSTRAINT PK_Tally_N
PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100
We utilise this static indexed tally table to expand opening hours and bookings as follows:
SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
The problem is, once we have the 13,000,000 "open minutes" and the "booked minutes" we then need to join the results to see what's covered:
SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
As you can imagine, joining on the branch, date & time with 13,000,000 rows all stored in CTE tables takes AGES - running it for a week isnt too bad, about 10 seconds but if we run it for 6 months (13,000,000 minutes) bloats to 25 minutes+
Once we have joined the open minutes to the booked minutes we then group the data on islands and present to the user:
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
Quite simply, is my method efficient? is there any way i can improve on this method whilst retaining minute level accuracy? When dealing with massive CTE tables such as this, would there be any benefit in dumping this data to indexed temp tables & joining them instead?
Another thing I was considering is replacing the DATE & TIME(0) data types that the big join uses, would is be more efficient if I cast these to integers?
Here is the Full CTE in case that helps:
WITH OpeningHours ( [BranchID], [DayOfWeek], [StartTime], [EndTime] )
AS ( SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), AM_open) ,
CONVERT(TIME(0), AM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), AM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), PM_open) ,
CONVERT(TIME(0), PM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), PM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), EVE_open) ,
CONVERT(TIME(0), EVE_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), EVE_open) <> CONVERT(TIME(0), '00:00:00')
),
DateRange ( [Date], [DayOfWeek] )
AS ( SELECT CONVERT(DATE, DATEADD(DAY, N - 1, #StartDate)) ,
DATEPART(WEEKDAY, DATEADD(DAY, N - 1, #StartDate))
FROM tally (NOLOCK)
WHERE N <= DATEDIFF(DAY, #StartDate, #EndDate) + 1
),
OpenMinutes ( [BranchID], [DayOfWeek], [Time] )
AS ( SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
),
OpenDatesAndMinutes ( [Date], [Time], [BranchID] )
AS ( SELECT DateRange.[Date] ,
OpenMinutes.[Time] ,
OpenMinutes.BranchID
FROM DateRange
LEFT OUTER JOIN OpenMinutes ON DateRange.DayOfWeek = OpenMinutes.DayOfWeek
WHERE OpenMinutes.BranchID IS NOT NULL
),
WhiteListEmployees ( [DET_NUMBERA] )
AS ( SELECT DET_NUMBERA
FROM [dbo].[tbl_ChrisCache_WhiteList]
WHERE [TimeSheetV2_SecurityContext] = #TimeSheetV2_SecurityContext
),
BookedMinutesByRole ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking (NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN #ValidPosCodes AS Filter_PostCodes ON dbo.tbl_ChrisCache.POS_NUMBERA = Filter_PostCodes.POSCODE
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutesByWhiteList ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking(NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN WhiteListEmployees Filter_WhiteList ON dbo.tbl_Booking.DET_NUMBERA = Filter_WhiteList.DET_NUMBERA
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutes ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByRole
UNION
SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByWhiteList
),
PreRender ( [Date], [Time], [BranchID], [BookingCount] )
AS ( SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
),
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
DeletedBranches ( [BranchID] )
AS ( SELECT [ShopNo]
FROM [dbo].[vw_BranchList]
WHERE [Branch_Deleted] = 1
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
--dbo.format_timeV2(ISNULL(DATEDIFF(SECOND, MIN([Time]), MAX([Time])), 0)) AS DurationF ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
SELECT [BranchID] ,
CONVERT(VARCHAR(10), DATEADD(DAY, 7, CONVERT(DATETIME, CONVERT(VARCHAR(10), DATEADD(day, -1 - ( DATEPART(dw, [Date]) + ##DATEFIRST - 2 ) % 7, [Date]), 103) + ' 23:59:59', 103)), 103) AS WeekEnding ,
[Date] ,
[Start Time] ,
[End Time] ,
[Duration] ,
CONVERT(VARCHAR, ( [Duration] * 60 ) / 3600) + 'h ' + CONVERT(VARCHAR, ROUND(( ( CONVERT(FLOAT, ( ( [Duration] * 60 ) % 3600 )) ) / 3600 ) * 60, 0)) + 'm' AS [DurationF] ,
[EntryCount] ,
[EntryColour] ,
CASE WHEN [EntryCount] = 0 THEN 'Red'
WHEN [EntryCount] >= 1 THEN 'Green'
END AS DurationColour ,
CASE WHEN [EntryCount] = 0 THEN 'This period of open-time isnt covered'
WHEN [EntryCount] >= 1 THEN 'This period of open-time is covered by ' + CONVERT(VARCHAR, [EntryCount]) + ' booking(s)'
END AS [DurationComment]
FROM FinalRender
WHERE FinalRender.BranchID NOT IN ( SELECT [BranchID]
FROM DeletedBranches )

It's funny, because you have answered your own question with your questions at the end. You should just try them all but to summarize:
Materialize CTEs for better performance. You never know when SQL Server will evaluate a CTE more than once
You can build indexex against temporary tables.
I'm not sure how you jumped from [DayOfWeek],DATEADD(MINUTE, N - 1, StartTime) to the join on [Date],[Time] on the other, but having two columns here doesn't make sense. Use either a single datetime or a bigint representing the seconds from an epoch. UnixTimestamp works well here.

My proposal is not based on your data, but on generated test data, so it can be not fully applicable.
Proposal: In order to move from quadratic degradation of performance to at least linear, batch processing can be used, if data is distributed equally among batch periods.
In example below 2 years of bookings is being processed with 3 day batch interval and it takes it 2 minutes and 30 seconds to get back free periods per day per branch.
Test run results:
2 years - 2 minutes and 30 seconds
4 years - 4 minutes and 55 seconds.
6 years - 6 minutes and 41 seconds
It incorporates the same logic that is being used in question by using numbers to find non-matching minutes.
Schema and test data creation:
IF OBJECT_ID('vwRandomNumber') IS NOT NULL
DROP VIEW vwRandomNumber
GO
IF OBJECT_ID('dbo.fnRandNumber') IS NOT NULL
DROP FUNCTION dbo.fnRandNumber
GO
IF OBJECT_ID('dbo.fnRandomInt') IS NOT NULL
DROP FUNCTION dbo.fnRandomInt
GO
IF OBJECT_ID('tblNumbers') IS NOT NULL
DROP TABLE dbo.tblNumbers
GO
IF OBJECT_ID('Branches') IS NOT NULL
DROP TABLE Branches
GO
IF OBJECT_ID('OpeningHours') IS NOT NULL
DROP TABLE OpeningHours
GO
IF OBJECT_ID('Bookings') IS NOT NULL
DROP TABLE Bookings
GO
CREATE VIEW vwRandomNumber
AS
SELECT Rand() RandomNumber;
GO
CREATE FUNCTION dbo.fnRandNumber()
RETURNS FLOAT
AS
BEGIN
RETURN (SELECT TOP 1 RandomNumber FROM vwRandomNumber)
END;
GO
CREATE FUNCTION dbo.fnRandomInt(#FromNumber INT, #ToNumber INT)
RETURNS INT
AS
BEGIN
RETURN (#FromNumber + ROUND(dbo.fnRandNumber()*(#ToNumber - #FromNumber),0))
END;
GO
CREATE TABLE tblNumbers
(
NumberID INT PRIMARY KEY
)
CREATE TABLE Branches
(
BranchID INT
,BranchName NVARCHAR(100)
);
GO
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<100
)
INSERT INTO
Branches
SELECT N, CAST(NEWID() AS NVARCHAR(100)) FROM cteNumbers
OPTION(MAXRECURSION 0)
CREATE TABLE OpeningHours
(
BranchID INT
, Date DATETIME
, OpenFrom DATETIME
, OpenTo DATETIME
);
GO
CREATE CLUSTERED INDEX CIX_OpeningHours
ON OpeningHours ([Date], [BranchID])
GO
CREATE TABLE Bookings
(
BranchID INT
, BookingDate DATETIME
, BookingFrom DATETIME
, BookingTo DATETIME
)
CREATE CLUSTERED INDEX CIX_Bookings
ON Bookings ([BookingDate],[BranchID])
DECLARE #StartDate DATETIME = DATEADD(month,0,DATEADD(D,0,DATEDIFF(d,0,GETDATE())))
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<2000
)
INSERT INTO
OpeningHours
(
BranchID
, Date
, OpenFrom
, OpenTo
)
SELECT
Branches.BranchID
, Dates.Day
, DATEADD(hour,7,Dates.Day)
, DATEADD(hour,19,Dates.Day)
FROM
(
SELECT
DATEADD(d,N,#StartDate) Day
FROM
cteNumbers
) Dates
CROSS JOIN
Branches
OPTION(MAXRECURSION 0);
INSERT INTO Bookings
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,BookingHours.StartDate
,BookingHours.ToDate
FROM
OpeningHours
CROSS APPLY
(
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate
) BookingHours;
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<5000
)
INSERT INTO
tblNumbers
SELECT N FROM cteNumbers
OPTION(MAXRECURSION 0)
--SELECT COUNT(*) FROM Bookings WHERE
Scripts to get periods with no bookings:
SET NOCOUNT ON
IF OBJECT_ID('tblBranchFreePeriods') IS NOT NULL
DROP TABLE tblBranchFreePeriods
IF OBJECT_ID('tblFreeMinutes') IS NOT NULL
DROP TABLE tblFreeMinutes
CREATE TABLE tblBranchFreePeriods
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
, PeriodEndDate DATETIME
)
CREATE TABLE tblFreeMinutes
(
BranchID INT
,Date DATETIME
,FreeMinute INT
)
IF OBJECT_ID('dbo.tblStartDates') IS NOT NULL
DROP TABLE tblStartDates
CREATE TABLE tblStartDates
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblStartDates
ON tblStartDates([BranchID],[Date])
IF OBJECT_ID('dbo.tblEndDates') IS NOT NULL
DROP TABLE tblEndDates
CREATE TABLE tblEndDates
(
BranchID INT
, Date DATETIME
, PeriodEndDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblEndDate
ON tblEndDates ([BranchID],[Date])
CREATE CLUSTERED INDEX CIX_tblFreeMinutes
ON tblFreeMinutes ([BranchID],[Date],FreeMinute)
DECLARE #ProcessFromDate DATETIME, #ProcessTo DATETIME
SELECT #ProcessFromDate = MIN(OpenFrom), #ProcessTo = DATEADD(year,2,#ProcessFromDate) FROM OpeningHours
DECLARE #BatchSize INT = 3
DECLARE #StartTime DATETIME = GETDATE()
WHILE (#ProcessFromDate <= #ProcessTo) BEGIN
TRUNCATE TABLE tblFreeMinutes
TRUNCATE TABLE tblStartDates
TRUNCATE TABLE tblEndDates
SET #StartTime = GETDATE()
DECLARE #DateFrom DATETIME = #ProcessFromDate, #DateTo DATETIME = DATEADD(d,#BatchSize,#ProcessFromDate)
PRINT 'Date From ' + CAST(#DateFrom AS NVARCHAR(50))
PRINT 'Date To ' + CAST(#DateTO AS NVARCHAR(50))
INSERT INTO
tblFreeMinutes
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,tblOpeningHourMinutes.NumberID Minute
FROM
OpeningHours
INNER JOIN
tblNumbers tblOpeningHourMinutes
ON
NumberID
BETWEEN DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenFrom)
AND
DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenTo)
LEFT OUTER JOIN
Bookings
ON
Bookings.BookingDate = OpeningHours.Date
AND
Bookings.BranchID = OpeningHours.BranchID
AND
tblOpeningHourMinutes.NumberID
BETWEEN
DATEDIFF(minute,Bookings.BookingDate,Bookings.BookingFrom)
AND
DATEDIFF(minute,Bookings.BookingDAte,Bookings.BookingTo)
WHERE
OpeningHours.Date BETWEEN #DateFrom AND #DateTo
AND
Bookings.BookingDate IS NULL
OPTION ( FORCE ORDER )
PRINT 'Populate free minutes ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblStartDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute-1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate start dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblEndDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute+1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate end dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblBranchFreePeriods
SELECT
tblStartDates.BranchID
, tblStartDates.Date
, tblStartDates.PeriodStartDate
, tblEndDate.PeriodEndDate
FROM
tblStartDates
CROSS APPLY
(
SELECT TOP 1
*
FROM
tblEndDates
WHERE
tblEndDates.BranchID = tblStartDates.BranchID
AND
tblEndDates.Date = tblStartDates.Date
AND
tblEndDates.PeriodEndDate > tblStartDates.PeriodStartDate
ORDER BY
PeriodEndDate ASC
) tblEndDate
PRINT 'Return intervals ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
SET #ProcessFromDate = DATEADD(d,#BatchSize+1,#ProcessFromDate)
PRINT ''
PRINT ''
RAISERROR ('',0,0) WITH NOWAIT
--SELECT * FROM tblBranchFreePeriods
--BREAK
END
SELECT
*
FROM
tblBranchFreePeriods
ORDER BY
1,2,3

Related

Error message: Maximum Recursion exhausted even with OPTION( MAXRECURSION 0)

I'm creating a function that will have as input a start date and a number of minutes. The function will add the number of minutes to the start date and it will output an end date, but only considering work hours and excluding weekends and holidays.
You can see part of the function below.
ALTER FUNCTION [dbo].[DataFimPrevisto] (#tempoPrevisto real, #DataIni datetime)
RETURNS datetime
WITH EXECUTE AS CALLER
AS
BEGIN
DECLARE #DataFim datetime;
DECLARE #calculo TABLE( xend datetime, [minutes] int);
WITH
drange (date_start, date_end) AS
(
SELECT
CAST(#DataIni AS DATE) AS date_start,
CAST(DATEADD( YEAR, 1, #DataIni) AS DATE) AS date_end
),
dates0 (adate, date_end) AS
(
SELECT date_start, date_end FROM drange
UNION ALL
SELECT DATEADD(day, 1, adate), date_end FROM dates0 WHERE adate < date_end
),
dates (adate) AS
(
SELECT adate FROM dates0
WHERE DATEPART(dw , adate) NOT IN ('1', '7') AND NOT EXISTS( SELECT 1 FROM BAS_PeriodosExcecoes B WHERE B.Trabalhavel = 0 AND B.DataInicio = adate)
),
hours (hour_start, hour_end) AS
(
SELECT 8.5*60, 12.5*60
UNION
SELECT 13.5*60, 18*60
),
hours_friday (hour_start, hour_end) AS
(
SELECT 8*60, 14*60
),
datehours (xstart, xend) AS
(
SELECT *
FROM
(
SELECT
DATEADD(minute, hour_start, CAST(adate AS datetime)) xstart,
DATEADD(minute, hour_end , CAST(adate AS datetime)) xend
FROM dates AS d, hours AS h
WHERE DATEPART(dw , adate) <> '6'
UNION
SELECT T2.xstart, T2.xend
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY T.xstart ORDER BY T.xend ASC) AS rank
FROM
(
SELECT
#DataIni xstart,
DATEADD(minute, hour_end, CAST(adate AS datetime)) xend
FROM dates AS d, hours AS h
WHERE adate = CAST( #DataIni AS DATE) AND DATEADD(minute, hour_end, CAST(adate AS datetime)) > #DataIni AND DATEPART(dw , adate) <> '6'
) T
) T2
WHERE T2.rank = 1
UNION
SELECT
DATEADD(minute, hour_start, CAST(adate AS datetime)) xstart,
DATEADD(minute, hour_end , CAST(adate AS datetime)) xend
FROM dates AS d, hours_friday AS h
WHERE DATEPART(dw , adate) = '6'
UNION
SELECT T2.xstart, T2.xend
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY T.xstart ORDER BY T.xend ASC) AS rank
FROM
(
SELECT
#DataIni xstart,
DATEADD(minute, hour_end, CAST(adate AS datetime)) xend
FROM dates AS d, hours_friday AS h
WHERE adate = CAST( #DataIni AS DATE) AND DATEADD(minute, hour_end, CAST(adate AS datetime)) > #DataIni AND DATEPART(dw , adate) = '6'
) T
) T2
WHERE T2.rank = 1
) T3 WHERE T3.xstart >= #DataIni
),
cumulative (xend, [minutes]) AS
(
SELECT t.xend, SUM(DATEDIFF(MINUTE, xstart, xend)) OVER (ORDER BY xstart) AS [minutes]
FROM datehours AS t
)
INSERT INTO #calculo
SELECT TOP 1 xend, [minutes]
FROM cumulative
WHERE [minutes] >= #tempoPrevisto
ORDER BY cumulative.xend ASC;
SET #DataFim = (SELECT DATEADD( MINUTE, #tempoPrevisto - MAX([minutes]), MAX( [xend])) FROM #calculo);
RETURN(#DataFim);
END;
When I execute this function with
SELECT dbo.DataFimPrevisto( 21240, DATETIMEFROMPARTS( 2023, 1, 25, 6, 0, 0, 0)) OPTION(MAXRECURSION 0);
SSMS returns the error message
The maximum recursion 100 has been exhausted before statement completion
Even tho I'm using OPTION(MAXRECURSION 0).

distribute accumulated working hours through days

I have Date time when engine has started working and how long was it working. but sometimes it can work more than 24 Hours.
if it worked for 28 Hours on the starting date i will have record
Name started_working Finished working hours_worked
obj-00123 07/02/2018 13:30 08/02/2018 17:30 28
I need to to have record that will show that engine has worked for 10:30 in 07 and 17:30 in 08.
Name started_working Finished working hours_worked
obj-00123 07/02/2018 13:30 07/02/2018 00:00 10:30
obj-00123 07/02/2018 13:30 08/02/2018 17:30 17:30
or something like that. I don't have any idea how can i get this done. can you give me some clues. i dont ask for writing code if its not too easy.
thank you
This might do the trick for you
--Using CTE to show sample data
;WITH cteX( Name,started_working,Finished_working)
AS
(
SELECT
'obj-00123','07/02/2018 13:30','08/02/2018 17:30' UNION ALL
SELECT 'obj-00155','07/02/2018 15:00','07/02/2018 22:30'
)
SELECT
X.Name
, X.started_working
, X.Finished_working
, HoursWorked = CONVERT(VARCHAR(12), DATEADD(minute, DATEDIFF(minute, X.started_working, X.Finished_working), 0), 114)
FROM
(
SELECT
T1.Name
,T1.started_working
,Finished_working = DATEADD(SECOND,0,DATEADD(DAY, DATEDIFF(DAY,-1,T1.started_working),0)) -- Dummy finish time # Midnight
FROM
cteX T1
WHERE
DATEDIFF(DAY,T1.started_working,T1.Finished_working) <> 0 --Create a dummy finish time #Midnight when start and end not on same day
UNION ALL
SELECT
T2.Name
,started_working = CASE WHEN DATEDIFF(DAY,T2.started_working,T2.Finished_working) <> 0
THEN DATEADD(DAY, DATEDIFF(DAY, 0, T2.Finished_working), 0) --Start # Midnight
ELSE T2.started_working
END
,T2.Finished_working
FROM
cteX T2
) X
ORDER BY
X.Name, X.started_working
OUTPUT
Name started_working Finished_working HoursWorked
obj-00123 2018-07-02 13:30:00.000 2018-07-03 00:00:00.000 10:30:00:000
obj-00123 2018-08-02 00:00:00.000 2018-08-02 17:30:00.000 17:30:00:000
obj-00155 2018-07-02 15:00:00.000 2018-07-02 22:30:00.000 07:30:00:000
According to your sample data working hours may be more than several days. In this case you need to use tally table or recursive CTE. I have used recursive CTE since it's easier to handle result fields. Also there are two columns in result named started_working and started_working2. started_working is from your expected output, but I believe you need started_working2 column
declare #T as table (
Name varchar(100)
, started_working datetime
, finished_working datetime
--, hours_worked int
)
insert into #T
values
('obj-00123', '20180207 13:30', '20180208 17:30')
, ('obj-00123', '20180208 19:00', '20180209 05:00')
, ('obj-00123', '20180209 19:00', '20180209 22:00')
, ('obj-00123', '20180210 19:00', '20180213 22:00')
;with rcte as (
select
*, started_working2 = started_working
, next_date = cast(dateadd(dd, 1, started_working) as date), 1 step
from
#T
union all
select
Name, started_working, finished_working
, cast(next_date as datetime)
, dateadd(dd, 1, next_date), step + 1
from
rcte
where
next_date < finished_working
)
select
Name, started_working, started_working2, finished_working
, right(replace(str(diff / 60), ' ', 0), 2) + ':' + right(replace(str(diff % 60), ' ', 0), 2) hours_worked
from (
select
Name, started_working
, case
when step = 1 then started_working
else started_working2
end started_working2
, case
when step = max(step) over (partition by Name, started_working)
then finished_working else next_date
end finished_working
from
rcte
) t
cross apply (select datediff(mi, started_working2, finished_working) diff) ca
I'd approach the solution something like this:
WITH dynamic_twelths_of_hr_table(datetime2_value) AS
(
SELECT '2017-01-01'
UNION ALL
SELECT DATEADD(MINUTE, 5, datetime2_value)
FROM dynamic_twelths_of_hr_table
WHERE DATEADD(MINUTE, 5, datetime2_value) <= '2019-01-01'
)
,twelths_hr_table AS
(
SELECT
DATEADD(DAY, DATEDIFF(DAY, 0, datetime2_value), 0) AS date_value
,datetime2_value
FROM dynamic_twelths_of_hr_table
)
,modified_source_table AS
(
SELECT
name
,objectid
,engine_start
,ISNULL(engine_stop, GETDATE()) AS engine_stop
,IIF(engine_start IS NULL OR engine_stop IS NULL, 1, 0) AS is_still_running
FROM [YOUR_SOURCE_TABLE]
)
SELECT
name
,objectid
,is_still_running
,date_value
,(COUNT(datetime2_value)/12.0) AS hours_run_on_this_day
FROM
modified_source_table
LEFT JOIN
twelths_hr_table AS tht
ON (tht.datetime2_value BETWEEN engine_start AND engine_stop)
GROUP BY
name, objectid, is_still_running, date_value
ORDER BY
name, objectid, is_still_running, date_value
Note I haven't tested this code so please excuse any small syntax errors.
I've also baked in an assumption about the range of dates to be considered (these can be widened, or made dynamic based on when the query runs), and it has a 5 minute resolution (based on the fact that, at a glance, I could only see one value in the engine_stop column that didn't fall on a 5-minute threshold - so I assume sub-5-minute precision is not required).
Basically what it does is expand each engine row out into 5-minute windows (twelths of an hour), and then simply groups these by day and counts the number of windows per day during which the engine was running.
For currently-running engines, it will calculate how long it has run so far. I trust you can tweak the code to your exact requirements.
thank you to all. this worked perfectly. it needed slight polishing and recursion needed to be set to 0.
But creating view is a trouble with CTE.
create view mroobjectenginerowkinghoursdeclare as
declare #T as table (
Name nvarchar(100)
, OBJECTID varchar(50)
, started_working datetime
,STOPFROM datetime
,STARTDATE datetime
,STOPDATE datetime
,MODIFIEDDATETIME datetime
,START_STOP int
,STARTDESCRIPTION nvarchar(300)
,STOPDESCRIPTION nvarchar(300)
,wattage nvarchar (50)
,purpose nvarchar(300)
,location nvarchar(300)
,finished_working datetime
,oldDiff int
)
insert into #T
select
NAME
,OBJECTID
,STOPTO
,STOPFROM
,STARTDATE
,STOPDATE
,MODIFIEDDATETIME
,START_STOP
,STARTDESCRIPTION
,STOPDESCRIPTION
,wattage
,purpose
,location
,next_stopfrom
,diff
FROM [MicrosoftDynamicsAX].[dbo].[mroobjectengineworkinghours]
;with rcte as (
select
*, started_working2 = started_working
, next_date = cast(dateadd(dd, 1, started_working) as date), 1 step
from
#T
union all
select
Name,OBJECTID, started_working,STOPFROM,STARTDATE,STOPDATE,MODIFIEDDATETIME,START_STOP,STARTDESCRIPTION
,STOPDESCRIPTION,wattage
,purpose
,location, finished_working,oldDiff
, cast(next_date as datetime)
, dateadd(dd, 1, next_date), step + 1
from
rcte
where
next_date < finished_working
)
select
Name,OBJECTID, started_working,STOPFROM,STARTDATE,STOPDATE,MODIFIEDDATETIME,START_STOP,STARTDESCRIPTION
,STOPDESCRIPTION,wattage
,purpose
,location,oldDiff, started_working2, finished_working
, right(replace(str(diff / 60), ' ', 0), 2) + ':' + right(replace(str(diff % 60), ' ', 0), 2) hours_worked
from (
select
Name,OBJECTID, started_working,STOPFROM,STARTDATE,STOPDATE,MODIFIEDDATETIME,START_STOP,STARTDESCRIPTION
,STOPDESCRIPTION,wattage
,purpose
,location,oldDiff
, case
when step = 1 then started_working
else started_working2
end started_working2
, case
when step = max(step) over (partition by Name, started_working)
then finished_working else next_date
end finished_working
from
rcte
) t
cross apply (select datediff(mi, started_working2, finished_working) diff) ca
OPTION (MAXRECURSION 0);

Conditional Count On Row_Number

I have a query that calculates the number working days within a month based on a table which stores all our public holidays.
The current output would show all working days, excluding public holidays and Saturday and Sunday, I would like to show each day of the month, but don't increment on a public holiday or Saturday or Sunday.
Is there a way to conditionally increment the row number?
Query is below:
DECLARE #startnum INT=0
DECLARE #endnum INT=365;
WITH gen AS
(
SELECT #startnum AS num
UNION ALL
SELECT num + 1
FROM gen
WHERE num + 1 <= #endnum
)
, holidays AS
(
SELECT CONVERT(DATE, transdate) AS HolidayDate
FROM WORKCALENDER w
WHERE w.CALENDARID = 'PubHoliday'
)
, allDays AS
(
SELECT DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ) ) AS DateOfYear
, DATENAME( dw, DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ))) AS [dayOfWeek]
FROM gen
)
select number = ROW_NUMBER() OVER ( ORDER BY DateOfYear )
, *
from allDays
LEFT OUTER JOIN holidays
ON allDays.DateOfYear = holidays.HolidayDate
WHERE holidays.HolidayDate IS NULL
AND allDays.dayOfWeek NOT IN ( 'Saturday', 'Sunday')
AND DateOfYear >= CONVERT( DATE, '1 ' + DATENAME( MONTH, GETDATE() ) + ' 2016' )
AND DateOfYear < CONVERT( DATE, '1 ' + DATENAME( MONTH, DATEADD( month, 1, GETDATE()) ) + ' 2016' )
option (maxrecursion 10000)
kind of pseudo code
select date, row_number() over (order by date) as num
from ( select date
from allDates
where month = x and weekday
exept
select date
from holidays
where month is x
) as t
union all
select date, null
from holidays
where month is x
order by date
You could use a windowed sum, see how the output of WorkdaySequenceInMonth is composed.
DECLARE #startDate DATE = '20160101'
, #numDays INT = 365
, #num INT = 0;
DECLARE #Holidays TABLE (Holiday DATE);
INSERT INTO #Holidays(Holiday)
VALUES ('20160101')
, ('20160115')
, ('20160714');
WITH nums AS
(
SELECT row_number() OVER (ORDER BY object_id) - 1 as num
FROM sys.columns
),
dateRange as
(
SELECT
DATEADD(DAY, num, #startDate) AS Dt
, num
FROM nums
WHERE num < #numDays
),
Parts AS
(
SELECT
R.Dt as [Date]
, Year(R.Dt) as [Year]
, Month(R.Dt) as [Month]
, Day(R.Dt) as [Day]
, Datename(weekday, R.Dt) as [Weekday]
, CASE WHEN H.Holiday IS NOT NULL
OR Datename(weekday, R.Dt) IN ('Saturday', 'Sunday')
THEN 0
ELSE 1
END AS IsWorkday
FROM dateRange R
LEFT JOIN #Holidays H ON R.Dt = H.Holiday
)
--
select
*
, sum(IsWorkday) over (PARTITION BY [Year],[month]
ORDER BY [Day]
ROWS UNBOUNDED PRECEDING) as WorkdaySequenceInMonth
from Parts
order by [Year], [Month]
Hi You can try this query, the initial part is the data generation, maybe you won't need it.
Then I generate a temp table with all the dates for the time period set in #StartYear, #EndYear
Then just simple queries to return the data
-- generate holidays table
select holiday
into #tempHolidays
from
(
select '20160101' as holiday
union all
select '20160201' as holiday
union all
select '20160205' as holiday
union all
select '20160301' as holiday
union all
select '20160309' as holiday
union all
select '20160315' as holiday
) as t
create table #tempCalendar (Date_temp date)
select * from
#tempHolidays
declare #startYear int , #endYear int, #i int, #dateStart datetime , #dateEnd datetime, #date datetime, #i = 0
Select #startYear = '2016'
,#endYear = '2016'
,#dateStart = (Select cast( (cast(#startYear as varchar(4)) +'0101') as datetime))
,#dateEnd = (Select cast( (cast(#startYear as varchar(4)) +'1231') as datetime))
,#date = #dateStart
--Insert dates of the period of time
while (#date <> #dateEnd)
begin
insert into #tempCalendar
Select #date
set #date = (select DATEADD(dd,1,#date))
end
-- Retrive Date list
Select Date_temp
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
--REtrieve sum of working days per month
select DATEPART(year,Date_temp) as year
,DATEPART(month,Date_temp) as Month
,Count(*) as CountOfWorkingDays
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
Group by DATEPART(year,Date_temp)
,DATEPART(month,Date_temp)
You should change #tempHolidays for your Holidays table, and use #StarYear and #EndYear as your time period.
Here's a simple demo that shows the use of the partition by clause to keep contiguity in your sequencing for non-holidays
IF OBJECT_ID('tempdb.dbo.#dates') IS NOT null
DROP TABLE #dates;
CREATE TABLE #dates (d DATE);
IF OBJECT_ID('tempdb.dbo.#holidays') IS NOT null
DROP TABLE #holidays;
CREATE TABLE #holidays (d DATE);
INSERT INTO [#holidays]
( [d] )
VALUES
('2016-12-25'),
('2017-12-25'),
('2018-12-25');
INSERT INTO [#dates]
( [d] )
SELECT TOP 1000 DATEADD(DAY, n, '2015-12-31')
FROM [Util].dbo.[Numbers] AS [n];
WITH holidays AS (
SELECT d.*, CASE WHEN h.d IS NULL THEN 0 ELSE 1 END AS [IsHoliday]
FROM [#dates] AS [d]
LEFT JOIN [#holidays] AS [h]
ON [d].[d] = [h].[d]
)
SELECT d, ROW_NUMBER() OVER (PARTITION BY [holidays].[IsHoliday] ORDER BY d)
FROM [holidays]
ORDER BY d;
And please forgive my marking only Christmas as a holiday!

Eliminate and reduce overlapping data ranges using SQL

i got a dataset in SQL Server Management Studio. The data looks like the following. i have a identifier for each people userID, date of the record, start timestartime and finish time endtime.
UserID date startime endtime
1 20110203 09:30 09:35
1 20110203 09:31 09:38
1 20110203 10:03 10:05
1 20110203 10:04:00 10:35:00
2 20110203 11:02 11:05
For each people, i want check if there is any overlapping time. If there is, I want to keep the smallest startime and largest endtime. if no overlapping time, I keep the original data. In addition, I want to calculate the duration of maxi endtime and smallest startime.
The result I want should looks like the following. Can anyone teach me how to code this please.
UserID date startime endtime diff
1 20110203 09:30 09:38 00:08
1 20110203 10:03 10:35 00:02
2 20110203 11:02 11:05 00:03
It seems that SELECT with CTE needs to recursively merge undetermined number of rows. In that case I would prefer safe CURSOR based solution:
DECLARE #t TABLE
(
UserId int,
[Date] date,
StartTime time,
EndTime time
);
INSERT INTO #t VALUES
(1, '2011-02-03', '09:30:00', '09:35:00'),
(1, '2011-02-03', '09:31:00', '09:38:00'),
(1, '2011-02-03', '09:36:00', '09:41:00'),
(1, '2011-02-03', '09:40:00', '09:45:00'),
(1, '2011-02-03', '09:42:00', '09:43:00'),
(1, '2011-02-03', '10:03:00', '10:05:00'),
(2, '2011-02-03', '11:02:00', '11:05:00'),
(1, '2011-02-03', '12:00:00', '12:05:00'),
(1, '2011-02-03', '12:04:00', '12:06:00');
------------------
DECLARE #result TABLE
(
UserId int,
[Date] date,
StartTime time,
EndTime time
)
DECLARE cur CURSOR FOR
SELECT UserId, [Date], StartTime, EndTime
FROM #t
ORDER BY UserId, [Date], StartTime;
DECLARE #UserId int
DECLARE #Date date
DECLARE #StartTime time
DECLARE #EndTime time
DECLARE #LastUserId int
DECLARE #LastDate date
DECLARE #LastStartTime time
DECLARE #LastEndTime time
OPEN cur
FETCH NEXT FROM cur INTO #UserId, #Date, #StartTime, #EndTime
SET #LastUserId = #UserId
SET #LastDate = #Date
SET #LastStartTime = #StartTime
SET #LastEndTime = #EndTime
WHILE ##FETCH_STATUS = 0
BEGIN
IF #UserId = #LastUserId AND #Date = #LastDate AND #StartTime <= #LastEndTime
SET #LastEndTime = CASE WHEN #LastEndTime > #EndTime THEN #LastEndTime ELSE #EndTime END
ELSE
BEGIN
INSERT #result(UserId, [Date], StartTime, EndTime) VALUES (#LastUserId, #LastDate, #LastStartTime, #LastEndTime)
SET #LastUserId = #UserId
SET #LastDate = #Date
SET #LastStartTime = #StartTime
SET #LastEndTime = #EndTime
END
FETCH NEXT FROM cur INTO #UserId, #Date, #StartTime, #EndTime
END
INSERT #result(UserId, [Date], StartTime, EndTime) VALUES (#LastUserId, #LastDate, #LastStartTime, #LastEndTime)
CLOSE cur
DEALLOCATE cur
SELECT UserId,
[Date],
StartTime,
EndTime,
CAST(DATEADD(second,DATEDIFF(second,StartTime,EndTime),'2000-01-01') AS time) Diff
FROM #result
which returns
1 2011-02-03 09:30:00.0000000 09:45:00.0000000 00:15:00.0000000
1 2011-02-03 10:03:00.0000000 10:05:00.0000000 00:02:00.0000000
1 2011-02-03 12:00:00.0000000 12:06:00.0000000 00:06:00.0000000
2 2011-02-03 11:02:00.0000000 11:05:00.0000000 00:03:00.0000000
Following a redesigned Version of my previous cte Approach. However, it will still have Problems if there are multiple records for the same user with identical start time... didn't have time to fix that one, but as far as I understood this is not possible in the described process!?
--
-- This part is temporary and has to be replaced by your tables
-- There several more records included now
-- There is still a glitch if the starttime is identical for two records - but as far as I understood, this is not possible in the described case?
--
declare #t table (userid int, date int, starttime time, endtime time);
insert into #t values (1, 20110203, '09:30:00', '09:35:00'), (1, 20110203, '09:31:00', '09:38:00'), (1, 20110203, '09:36:00', '09:41:00'), (1, 20110203, '10:03:00', '10:05:00'),(1, 20110203, '10:04:00', '10:35:00'),
(2, 20110203, '11:02:00', '11:05:00'), (2, 20110203, '11:03:00', '11:20:00'), (2, 20110203, '11:04:00', '11:35:00'), (2, 20110203, '13:02:00', '13:05:00'), (2, 20110203, '13:04:00', '13:15:00');
--
-- First cte: selects all start and endtimes and their - if existing - "overlaps"; recursive cte
--
WITH cte AS(
SELECT 1 lvl, a.userid
,CASE WHEN a.starttime <= ISNULL(b.starttime, a.starttime) THEN a.starttime ELSE b.starttime END AS starttime
,CASE WHEN a.endtime >= ISNULL(b.endtime, a.endtime) THEN a.endtime ELSE b.endtime END AS endtime
FROM #t as a
LEFT OUTER JOIN #t AS b ON b.userid = a.userid
AND b.starttime < a.starttime
AND b.endtime > a.starttime
UNION ALL
select a.lvl+1, a.userid
,CASE WHEN a.starttime <= ISNULL(b.starttime, a.starttime) THEN a.starttime ELSE b.starttime END AS xStart
,CASE WHEN a.endtime >= ISNULL(b.endtime, a.endtime) THEN a.endtime ELSE b.endtime END AS xEnd
from cte as a
INNER JOIN #t AS b ON b.userid = a.userid
AND b.starttime < a.starttime
AND b.endtime > a.starttime
),
--
-- Second cte: get the max. lvl result per user from the recursive cte
--
cteUserMaxLvl AS (
SELECT userid, max(lvl) AS MaxLvl
FROM cte
GROUP BY userid
),
--
-- third cte: get the rows matching the max.lvl; their timespan should cover the desired min. start and max. end
--
cteNoMoreOverlap AS(
SELECT a.userid, starttime, endtime
FROM cte AS a
JOIN cteUserMaxLvl AS b ON a.userid = b.userid AND a.lvl = b.MaxLvl
)
--
-- Select the rows from the "No more overlap" cte
--
SELECT userid, starttime, endtime
FROM cteNoMoreOverlap
UNION ALL
--
-- And finally select all rows, which are not covered by the previously selected timespan
--
SELECT a.userid, min(a.starttime) AS starttime, max(a.endtime) AS endtime
FROM cte AS a
JOIN cteNoMoreOverlap AS b ON a.userid = b.userid AND a.starttime NOT BETWEEN b.starttime AND b.endtime
GROUP BY a.userid
order by userid, starttime, endtime
I believe when you say overlapping time, you are saying within the same hour on the same day. If that is what you mean, following solution might work. Attached is the screenshot of my results.
CREATE TABLE #OverlappingDates
(
UserID INT
, [date] DATE
, starttime VARCHAR(5)
, endtime VARCHAR(5)
);
INSERT INTO #OverlappingDates
( UserID, date, starttime, endtime )
VALUES ( 1 -- UserID - int
, '20110203' -- date - date
, '09:30' -- starttime - time
, '09:35' -- endtime - time
),
( 1 -- UserID - int
, '20110203' -- date - date
, '09:31' -- starttime - time
, '09:38' -- endtime - time
),
( 1 -- UserID - int
, '20110203' -- date - date
, '10:03' -- starttime - time
, '10:05' -- endtime - time
),
( 2 -- UserID - int
, '20110203' -- date - date
, '11:02' -- starttime - time
, '11:05' -- endtime - time
),
( 2 -- UserID - int
, '20110203' -- date - date
, '11:05' -- starttime - time
, '11:15' -- endtime - time
),
( 2 -- UserID - int
, '20110203' -- date - date
, '11:05' -- starttime - time
, '12:00' -- endtime - time
);
WITH cte
AS ( SELECT UserID
, date
, MIN(starttime) AS StartTime
, MAX(endtime) AS EndTime
FROM #OverlappingDates
GROUP BY UserID
, date
, LEFT(starttime, 2)
, LEFT(endtime, 2)
)
SELECT cte.UserID
, cte.date
, cte.StartTime
, cte.EndTime
, ( RIGHT('0'
+ CAST(( DATEDIFF(SECOND,
( CAST(CONCAT(( CAST(cte.[date] AS VARCHAR(10)) ),
' ', cte.StartTime) AS DATETIME) ),
( CAST(CONCAT(( CAST(cte.[date] AS VARCHAR(10)) ),
' ', cte.EndTime) AS DATETIME) )) )
/ 3600 AS VARCHAR(2)), 2) + ':' + RIGHT('0'
+ CAST(( ( DATEDIFF(SECOND,
( CAST(CONCAT(( CAST(cte.[date] AS VARCHAR(10)) ),
' ',
cte.StartTime) AS DATETIME) ),
( CAST(CONCAT(( CAST(cte.[date] AS VARCHAR(10)) ),
' ', cte.EndTime) AS DATETIME) )) )
/ 60 ) % 60 AS VARCHAR(2)),
2) ) AS Diff
FROM cte;

How to breakdown time policy life period by beginning and end of the months

Picture. Need to be like that
I need to break down each policy life period by months. From the beginning of the policy till the end of that month, then from the beginning till the end of the next month and for each of this time period need to calculate the number of days, so then I can calculate earned premium for each policy.
Please see the picture what do I need to achieve.
[DECLARE #EarnedToDate datetime ='2016-06-30'
;WITH Cte_Policies AS
(
SELECT
PolicyNumber
,TransactionEffectiveDate
,TransactionExpirationDate
,WrittenPremium
,DATEDIFF(DAY,TransactionEffectiveDate,TransactionExpirationDate) AS TotalDays
,CASE
WHEN TransactionEffectiveDate> #EarnedToDate THEN 0 --Policy not yet in effect
WHEN TransactionExpirationDate< #EarnedToDate THEN DATEDIFF(DAY,TransactionEffectiveDate,TransactionExpirationDate)
ELSE DATEDIFF(DAY,TransactionEffectiveDate,#EarnedToDate)
END AS EarnedDays
,CASE
WHEN TransactionEffectiveDate > #EarnedToDate THEN DATEDIFF(DAY,TransactionEffectiveDate,TransactionExpirationDate)
WHEN TransactionExpirationDate < #EarnedToDate THEN 0 -- Policy completed
ELSE DATEDIFF(DAY,#EarnedToDate,TransactionExpirationDate)
END AS UnearnedDays
FROM ##TempTable1
)
SELECT PolicyNumber,
TransactionEffectiveDate as TransactionEffectiveDate,
TransactionExpirationDate as TransactionExpirationDate
--WrittenPremium/TotalDays AS DayPremium,
,SUM(CASE WHEN EarnedDays = 0 THEN 0 ELSE WrittenPremium/TotalDays * EarnedDays END) AS EarnedPremium
,SUM(CASE WHEN UnearnedDays = 0 THEN 0 ELSE WrittenPremium/TotalDays * UnearnedDays END) AS UnearnedPremium
FROM
Cte_Policies where PolicyNumber ='PACA1000238-02'
GROUP BY
TransactionEffectiveDate
,TransactionExpirationDate
--,WrittenPremium/TotalDays
,PolicyNumber][1]
My original snippet answered your question on Twitter, but the following code snippet takes the first one a bit further and provides exactly the result set specified on your question here on StackOverflow...
; WITH Earned_to_date AS (
SELECT Cast('2016-07-01' AS DATE) AS Earned_to_date
), policy_data AS (
SELECT
policy_number
, Cast(text_Effective AS DATE) AS TransactionEffectiveDate
, Cast(text_Expiration AS DATE) AS TransactionExpirationDate
, policy_premium
FROM (VALUES
('p1', '1993-01-01', '1994-12-31', 940.00)
, ('p3', '2011-12-01', '2012-05-31', 485.00)
, ('p5', '2011-12-16', '2012-05-15', 485.00)
, ('p7', '2015-12-16', '2016-11-15', 485.00)
) AS z (policy_number, text_Effective
, text_Expiration, policy_premium)
), digits AS (
SELECT digit
FROM (VALUES (0), (1), (2), (3), (4)
, (5), (6), (7), (8), (9)) AS z2 (digit)
), numbers AS (
SELECT 1000 * d4.digit + 100 * d3.digit + 10 * d2.digit + d1.digit AS number
FROM digits AS d1
CROSS JOIN digits AS d2
CROSS JOIN digits AS d3
CROSS JOIN digits AS d4
), calendar AS (
SELECT
DateAdd(month, number, '1753-01-01') AS month_of
, DateAdd(month, number, '1753-02-01') AS month_after
FROM numbers
), policy_dates AS (
SELECT
policy_number
, CASE
WHEN month_of < TransactionEffectiveDate THEN TransactionEffectiveDate
ELSE month_of
END AS StartRiskMonth
, CASE
WHEN TransactionExpirationDate < month_after THEN TransactionExpirationDate
WHEN Earned_to_date.Earned_to_date < month_after THEN Earned_to_date
ELSE month_after
END AS EndRiskMonth
, DateDiff(day, TransactionEffectiveDate, TransactionExpirationDate) AS policy_days
, policy_premium
FROM policy_data
JOIN calendar
ON (policy_data.TransactionEffectiveDate < calendar.month_after
AND calendar.month_of < policy_data.TransactionExpirationDate)
CROSS JOIN Earned_to_date
WHERE month_of < Earned_to_date
)
SELECT policy_number, StartRiskMonth, EndRiskMonth
, DateDiff(day, StartRiskMonth, EndRiskMonth) AS DaysInMonth
, policy_premium * DateDiff(day, StartRiskMonth, EndRiskMonth) / policy_days
FROM policy_dates
ORDER BY policy_number, StartRiskMonth
As I thought from your tweet, this is pretty simple to code but it takes a bit of work to understand.
WITH raw_data AS (
SELECT
Cast(text_TransactionEffectiveDate AS DATE) AS TransactionEffectiveDate
, Cast(text_TransactionExpirationDate AS DATE) AS TransactionExpirationDate
FROM (VALUES
('1953-01-15', '1992-02-23')
, ('2012-08-12', '2012-08-26')
) AS z (text_TransactionEffectiveDate, text_TransactionExpirationDate )
), policy_dates AS (
SELECT
raw_data.*
, DateAdd(month, DateDiff(month, '1753-01-01', TransactionEffectiveDate), '1753-01-01') AS Policy_Start
, DateAdd(month, DateDiff(month, '1753-01-01', TransactionExpirationDate), '1753-02-01') AS Policy_Expired
FROM raw_data
)
SELECT DateDiff(day, Policy_start, Policy_Expired) AS Policy_days, *
FROM policy_dates
This is a computation that is common in the insurance field. The trick lies in computing the beginning of the Effective Month, and the beginning of the expiration month (after the policy has ended). The trick is that at one point a month later than the others (1753-02-01) is used.
-PatP
Add a CTE or two that breaks the policy effective and expiration dates into distinct months, then re-use your existing code/CTE
Be sure to review all boundary conditions for EarnedDays and UnearnedDays and make sure they line up with your business rules
DECLARE #EarnedToDate DATETIME
SET #EarnedToDate = '2016-06-30'
DECLARE #tblPolicies TABLE
(
PolicyNumber VARCHAR(100)
, PolicyEffectiveDate DATETIME
, PolicyExpirationDate DATETIME
, WP MONEY --WrittenPremium
)
DECLARE #tblPolicyMonths TABLE
(
PolicyNumber VARCHAR(100),
MonthStart DATETIME,
MonthEnd DATETIME
)
DECLARE #CurPos INT
SET #CurPos = 1
WHILE #CurPos < 4000
BEGIN
--Create a bunch of policies
INSERT INTO #tblPolicies
SELECT 'P' + CONVERT(varchar, #CurPos), DATEADD(d, #CurPos, GETDATE()), DATEADD(YY, 1, DATEADD(d, #CurPos, GETDATE())), #CurPos
SET #CurPos = #CurPos + 1
END
DECLARE #LastPolicyDate DATETIME
SET #LastPolicyDate = (SELECT MAX(PolicyExpirationDate) FROM #tblPolicies)
;WITH Cte_All_Dates AS
(
SELECT MIN(PolicyEffectiveDate) DateValue
FROM #tblPolicies
UNION ALL
SELECT DateValue + 1
FROM Cte_All_Dates
WHERE DateValue + 1 < = #LastPolicyDate
)
INSERT INTO #tblPolicyMonths
SELECT P.PolicyNumber
,MIN(DateValue)
,MAX(DateValue)
FROM Cte_All_Dates PD
INNER JOIN #tblPolicies P
ON CONVERT(DATE, PD.DateValue) BETWEEN P.PolicyEffectiveDate AND P.PolicyExpirationDate
GROUP BY P.PolicyNumber
, DATEPART(MM, DateValue)
, DATEPART(YY, DateValue)
OPTION (MAXRECURSION 32767);
SELECT
P.PolicyNumber
,CONVERT(DATE, MonthStart) As StartRiskMonth
,CONVERT(DATE, MonthEnd) AS EndRiskMonth
,WP as WrittenPremium
,DATEDIFF(DAY,MonthStart,MonthEnd)+1 AS TotalDays
,CASE
WHEN MonthStart > #EarnedToDate THEN 0 --Policy not yet in effect
WHEN MonthEnd < #EarnedToDate THEN DATEDIFF(DAY,MonthStart,MonthEnd)+1
ELSE DATEDIFF(DAY,MonthStart,#EarnedToDate)+1
END AS EarnedDays
,CASE
WHEN MonthStart > #EarnedToDate THEN DATEDIFF(DAY,MonthStart,MonthEnd)+1
WHEN MonthEnd < #EarnedToDate THEN 0 -- Policy completed
ELSE DATEDIFF(DAY,#EarnedToDate,MonthEnd)
END AS UnearnedDays
, #EarnedToDate AS 'EarnedToDate'
FROM #tblPolicyMonths PM
INNER JOIN #tblPolicies P
ON PM.PolicyNumber = P.PolicyNumber
ORDER BY PolicyNumber, MonthStart, MonthEnd