SQL- Grouping and counting # of days w/o overlapping days - sql

I am trying to group a total count of days within a month that a patient had a catheter line inserted. The data is broken down into stints so is not contiguous throughout the month. I also do not to count overlapping days between the stints. See screenshot and query below.
DECLARE #start_date DATETIME
DECLARE #end_date DATETIME
SET #start_date = '2/1/2022'
SET #end_date = '2/28/2022';
CREATE TABLE mytable(
Patient_ID INTEGER NOT NULL PRIMARY KEY
,startdate DATE NOT NULL
,enddate DATE NOT NULL
,Type_of_Line VARCHAR(4) NOT NULL
,Insertion_Date DATE NOT NULL
,Removal_Date DATE
,_of_Cath_Days INTEGER NOT NULL
);
INSERT INTO mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-01-16','2022-02-11','Port','2021-08-03 00:00:00.000',NULL,11);
INSERT INTO mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-11','2022-02-15','Port','2021-08-03 00:00:00.000',NULL,5);
INSERT INTO mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-15','2022-02-24','Port','2021-08-03 00:00:00.000',NULL,10);
INSERT INTO mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-24','2022-03-23','Port','2021-08-03 00:00:00.000',NULL,5);
WITH stat
AS (SELECT pt.ptkey,
ptid,
ptptinfusionstatus.startdate,
ptptinfusionstatus.enddate
FROM pt
LEFT JOIN ptptinfusionstatus
ON ptptinfusionstatus.ptkey = pt.ptkey
LEFT JOIN ptinfusionstatus
ON ptinfusionstatus.ptinfusionstatuskey =
ptptinfusionstatus.ptinfusionstatuskey
WHERE ptptinfusionstatus.ptinfusionstatuskey IN ( 1, 5 )),
access1
AS (SELECT d.NAME,
d.pharmacyeventandoutcometypedetailkey,
T.pharmacyeventandoutcometypekey
FROM pharmacyeventandoutcometypedetail d WITH (nolock)
LEFT JOIN pharmacyeventandoutcometype t WITH (nolock)
ON t.pharmacyeventandoutcometypekey =
d.pharmacyeventandoutcometypekey
LEFT JOIN pharmacyeventandoutcomelist l WITH (nolock)
ON l.pharmacyeventandoutcomelistkey =
t.pharmacyeventandoutcomelistkey
WHERE l.pharmacyeventandoutcomelistkey = 2),
access2
AS (SELECT stat.ptkey,
stat.ptid,
stat.startdate,
stat.enddate,
Isnull(devicetype.NAME, '') [Access Device_Type],
ppad.insertiondate [Access Device_Insertion Date],
ppad.removaldate [Access Device_Removal Date]
FROM stat WITH (nolock)
JOIN pharmacyptaccessdevice ppad WITH(nolock)
ON ppad.ptkey = stat.ptkey
LEFT JOIN access1 devicetype WITH (nolock)
ON devicetype.pharmacyeventandoutcometypedetailkey =
ppad.accessdevicetypekey
AND devicetype.pharmacyeventandoutcometypekey = 4)
--***MAIN QUERY***
SELECT access2.[ptid] AS 'Patient ID',
access2.startdate,
access2.enddate,
access2.[access device_type] AS 'Type of Line',
access2.[access device_insertion date] AS 'Insertion Date',
access2.[access device_removal date] AS 'Removal Date',
Datediff(d, CASE WHEN [access device_insertion date] >= #start_date AND
[access device_insertion date] >=access2.startdate THEN
access2.[access device_insertion date] WHEN access2.startdate >=
access2.[access device_insertion date] AND
access2.startdate >= #start_date THEN access2.startdate ELSE #start_date
END,
CASE WHEN #end_date <= Isnull(access2.enddate, #end_date) AND #end_date
<= Isnull(access2.[access device_removal date], #end_date) THEN #end_date
WHEN access2.enddate IS NOT NULL AND access2.enddate < #end_date AND
access2.enddate <= Isnull(access2.[access device_removal date],
access2.enddate) THEN access2.enddate ELSE
access2.[access device_removal date] END) + 1 AS '# of Cath Days'
FROM access2
WHERE access2.startdate <= #end_date
AND ( access2.enddate >= #start_date
OR access2.enddate IS NULL )
AND access2.[access device_insertion date] <= #end_date
AND ( access2.[access device_removal date] >= #start_date
OR access2.[access device_removal date] IS NULL )
AND access2.ptid = '10247'
I tried grouping by patient and adding a sum of days at the patient group level in SQL Reporting Services, but could not get around the overlapping days so the counts are all wrong.

Based on the data provided, below is an example code that will group based on patientId and if start date equals end date then it will subtract 1 from number of days.
DECLARE #start_date DATETIME
DECLARE #end_date DATETIME
SET #start_date = '2/1/2022'
SET #end_date = '2/28/2022';
declare #mytable table(
Patient_ID INTEGER NOT NULL -- PRIMARY KEY
,startdate DATE NOT NULL
,enddate DATE NOT NULL
,Type_of_Line VARCHAR(4) NOT NULL
,Insertion_Date DATE NOT NULL
,Removal_Date DATE
,_of_Cath_Days INTEGER NOT NULL
);
INSERT INTO #mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-01-16','2022-02-11','Port','2021-08-03 00:00:00.000',NULL,11);
INSERT INTO #mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-11','2022-02-15','Port','2021-08-03 00:00:00.000',NULL,5);
INSERT INTO #mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-15','2022-02-24','Port','2021-08-03 00:00:00.000',NULL,10);
INSERT INTO #mytable(Patient_ID,startdate,enddate,Type_of_Line,Insertion_Date,Removal_Date,_of_Cath_Days) VALUES (10247,'2022-02-24','2022-03-23','Port','2021-08-03 00:00:00.000',NULL,5);
select
Patient_ID
, startdate
, enddate
, Type_of_Line
, Insertion_Date
, Removal_Date
, LAG([enddate]) OVER (ORDER BY [startdate]) as compareenddate
from #mytable
select Patient_ID
, min(startdate) as startdate
, max(enddate) as enddate
, max(Type_of_Line) as Type_of_Line
, min(Insertion_Date) as Insertion_Date
, max(Removal_Date) as Removal_Date
, sum(iif(startdate = isnull(compareenddate, '1900-01-01'), _of_Cath_Days - 1, _of_Cath_Days) )
from
(
select
Patient_ID
, startdate
, enddate
, Type_of_Line
, Insertion_Date
, Removal_Date
, _of_Cath_Days
, LAG([enddate]) OVER (ORDER BY [startdate]) as compareenddate
from #mytable
) x
group by Patient_ID
Query result

Related

my end goal is to see end of month data for previous month

My end goal is to see end of month data for previous month.
Our processing is a day behind so if today is 7/28/2021 our Process date is 7/27/2021
So, I want my data to be grouped.
DECLARE
#ProcessDate INT
SET #ProcessDate = (SELECT [PrevMonthEnddatekey] FROM dbo.dimdate WHERE datekey = (SELECT [datekey] FROM sometable [vwProcessDate]))
SELECT
ProcessDate
, LoanOrigRiskGrade
,SUM(LoanOriginalBalance) AS LoanOrigBalance
,Count(LoanID) as CountofLoanID
FROM SomeTable
WHERE
ProcessDate in (20210131, 20210228,20210331, 20210430, 20210531, 20210630)
I do not want to hard code these dates into my WHERE statement. I have attached a sample of my results.
I am GROUPING BY ProcessDate, LoanOrigRiskGrade
Then ORDERING BY ProcessDate, LoanOrigIRskGrade
It looks like you want the last day of the month for months within a specified range. You can parameterize that.
For SQL Server:
DECLARE #ProcessDate INT
SET #ProcessDate = (
SELECT [PrevMonthEnddatekey]
FROM dbo.dimdate
WHERE datekey = (
SELECT [datekey]
FROM sometable [vwProcessDate]
)
)
DECLARE #startDate DATE
DECLARE #endDate DATE
SET #startDate = '2021-01-01'
SET #endDate = '2021-06-30'
;
with d (dt, eom) as (
select #startDate
, convert(int, replace(convert(varchar(10), eomonth(#startDate), 102), '.', ''))
union all
select dateadd(month, 1, dt)
, eomonth(dateadd(month, 1, dt))
from d
where dateadd(month, 1, dt) < #endDate
)
SELECT ProcessDate
, LoanOrigRiskGrade
, SUM(LoanOriginalBalance) AS LoanOrigBalance
, Count(LoanID) as CountofLoanID
FROM SomeTable
inner join d on d.eom = SomeTable.ProcessDate
Difficult to check without sample data.

Member Data between date ranges

I have a table in SQL Server 2014 named [Membership] containing personal member data and two date fields named [member from date] and [member to date].
I need to summarise the monthly membership. A member is counted in a given month only if they are a member for that whole month.
So for example, a person with [member from date] of '2014-02-01' and [member to date] of '2015-03-01' would be counted in the month of December 2014, but would not be if the [member to date] was, say, '2014-12-25'.
I need to summarise by every month going back to January 2010 and I have thousands of members in this table. The results need to look similar to this:
Month Count
Jan 2010 3230
Feb 2010 3235
Mar 2010 3232
..
Dec 2016 6279
I can't see how to work this because of the "only if they are a member for that whole month" rule.
Any help will be most appreciated!
Using spt_values and a cte to generate the calendar, here is an example that counts members.
declare #members table (member int, start_date date, end_date date)
insert #members select 1, '2015-12-15', '2017-01-15'
insert #members select 2, '2016-01-15', '2016-12-15'
insert #members select 3, '2016-03-01', '2016-10-31'
declare #cal_from datetime = '2016-01-01';
declare #cal_to datetime = '2016-12-31';
with calendar_cte as (
select top (datediff(month, #cal_from, #cal_to) + 1)
[Month] = month(dateadd(month, number, #cal_from))
, [Year] = year(dateadd(month, number, #cal_from))
, [Start] = dateadd(month, number, #cal_from)
, [End] = dateadd(day, -1, dateadd(month, number + 1, #cal_from))
from [master].dbo.spt_values
where [type] = N'P'
order by number
)
select [Month]
, [Year]
, [Count] = (select count(*)
from #members
where start_date <= [Start]
and end_date >= [End])
from calendar_cte
With the help of a Months table, this can be handled pretty easily:
/* creating a months table */
create table dbo.Months([Month] date primary key, MonthEnd date);
declare #StartDate date = '20100101'
,#NumberOfYears int = 30;
insert dbo.Months([Month],MonthEnd)
select top (12*#NumberOfYears)
[Month] = dateadd(month, row_number() over (order by number) -1, #StartDate)
, MonthEnd = dateadd(day,-1,
dateadd(month, row_number() over (order by number), #StartDate)
)
from master.dbo.spt_values;
/* the query */
select [Month], [Count]=count(*)
from dbo.Months mo
inner join dbo.[Membership] me on
/* Member since the start of the month */
me.MemberFromDate >= mo.[Month]
/* Member for the entire month being counted */
and me.MemberToDate > mo.[MonthEnd]
group by [Month]
order by [Month]
If you really don't want to have a Months table, you can use a cte like this:
declare #StartDate date = '20100101'
,#NumberOfYears int = 30;
;with Months as (
select top (12*#NumberOfYears)
[Month] = dateadd(month, row_number() over (order by number) -1, #StartDate)
, MonthEnd = dateadd(day,-1,
dateadd(month, row_number() over (order by number), #StartDate)
)
from master.dbo.spt_values
)
/* the query */
select [Month], [Count]=count(*)
from Months mo
inner join dbo.[Membership] me on
/* Member since the start of the month */
me.MemberFromDate >= mo.[Month]
/* Member for the entire month being counted */
and me.MemberToDate > mo.[MonthEnd]
group by [Month]
order by [Month]
create table members(name varchar(50),fromdate datetime, todate datetime)
go
create table months(firstday datetime)
go
insert members values('Joe','2014-02-01','2015-03-25'),('Jon','2014-03-12','2015-01-12')
declare #date datetime = '2000-01-01'
while (#date < '2016-01-01')
begin
insert into months values( #date )
select #date = dateadd(month,1,#date)
end
with MyCTE(date) as
( select left(convert(varchar, firstday, 120),7)
from members m
join months d on d.firstday > m.fromdate and d.firstday < datefromparts(year(m.todate),month(m.todate),1)
)
select date as 'month', count(*) as 'count'
from MyCTE
group by date

Conditional Count On Row_Number

I have a query that calculates the number working days within a month based on a table which stores all our public holidays.
The current output would show all working days, excluding public holidays and Saturday and Sunday, I would like to show each day of the month, but don't increment on a public holiday or Saturday or Sunday.
Is there a way to conditionally increment the row number?
Query is below:
DECLARE #startnum INT=0
DECLARE #endnum INT=365;
WITH gen AS
(
SELECT #startnum AS num
UNION ALL
SELECT num + 1
FROM gen
WHERE num + 1 <= #endnum
)
, holidays AS
(
SELECT CONVERT(DATE, transdate) AS HolidayDate
FROM WORKCALENDER w
WHERE w.CALENDARID = 'PubHoliday'
)
, allDays AS
(
SELECT DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ) ) AS DateOfYear
, DATENAME( dw, DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ))) AS [dayOfWeek]
FROM gen
)
select number = ROW_NUMBER() OVER ( ORDER BY DateOfYear )
, *
from allDays
LEFT OUTER JOIN holidays
ON allDays.DateOfYear = holidays.HolidayDate
WHERE holidays.HolidayDate IS NULL
AND allDays.dayOfWeek NOT IN ( 'Saturday', 'Sunday')
AND DateOfYear >= CONVERT( DATE, '1 ' + DATENAME( MONTH, GETDATE() ) + ' 2016' )
AND DateOfYear < CONVERT( DATE, '1 ' + DATENAME( MONTH, DATEADD( month, 1, GETDATE()) ) + ' 2016' )
option (maxrecursion 10000)
kind of pseudo code
select date, row_number() over (order by date) as num
from ( select date
from allDates
where month = x and weekday
exept
select date
from holidays
where month is x
) as t
union all
select date, null
from holidays
where month is x
order by date
You could use a windowed sum, see how the output of WorkdaySequenceInMonth is composed.
DECLARE #startDate DATE = '20160101'
, #numDays INT = 365
, #num INT = 0;
DECLARE #Holidays TABLE (Holiday DATE);
INSERT INTO #Holidays(Holiday)
VALUES ('20160101')
, ('20160115')
, ('20160714');
WITH nums AS
(
SELECT row_number() OVER (ORDER BY object_id) - 1 as num
FROM sys.columns
),
dateRange as
(
SELECT
DATEADD(DAY, num, #startDate) AS Dt
, num
FROM nums
WHERE num < #numDays
),
Parts AS
(
SELECT
R.Dt as [Date]
, Year(R.Dt) as [Year]
, Month(R.Dt) as [Month]
, Day(R.Dt) as [Day]
, Datename(weekday, R.Dt) as [Weekday]
, CASE WHEN H.Holiday IS NOT NULL
OR Datename(weekday, R.Dt) IN ('Saturday', 'Sunday')
THEN 0
ELSE 1
END AS IsWorkday
FROM dateRange R
LEFT JOIN #Holidays H ON R.Dt = H.Holiday
)
--
select
*
, sum(IsWorkday) over (PARTITION BY [Year],[month]
ORDER BY [Day]
ROWS UNBOUNDED PRECEDING) as WorkdaySequenceInMonth
from Parts
order by [Year], [Month]
Hi You can try this query, the initial part is the data generation, maybe you won't need it.
Then I generate a temp table with all the dates for the time period set in #StartYear, #EndYear
Then just simple queries to return the data
-- generate holidays table
select holiday
into #tempHolidays
from
(
select '20160101' as holiday
union all
select '20160201' as holiday
union all
select '20160205' as holiday
union all
select '20160301' as holiday
union all
select '20160309' as holiday
union all
select '20160315' as holiday
) as t
create table #tempCalendar (Date_temp date)
select * from
#tempHolidays
declare #startYear int , #endYear int, #i int, #dateStart datetime , #dateEnd datetime, #date datetime, #i = 0
Select #startYear = '2016'
,#endYear = '2016'
,#dateStart = (Select cast( (cast(#startYear as varchar(4)) +'0101') as datetime))
,#dateEnd = (Select cast( (cast(#startYear as varchar(4)) +'1231') as datetime))
,#date = #dateStart
--Insert dates of the period of time
while (#date <> #dateEnd)
begin
insert into #tempCalendar
Select #date
set #date = (select DATEADD(dd,1,#date))
end
-- Retrive Date list
Select Date_temp
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
--REtrieve sum of working days per month
select DATEPART(year,Date_temp) as year
,DATEPART(month,Date_temp) as Month
,Count(*) as CountOfWorkingDays
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
Group by DATEPART(year,Date_temp)
,DATEPART(month,Date_temp)
You should change #tempHolidays for your Holidays table, and use #StarYear and #EndYear as your time period.
Here's a simple demo that shows the use of the partition by clause to keep contiguity in your sequencing for non-holidays
IF OBJECT_ID('tempdb.dbo.#dates') IS NOT null
DROP TABLE #dates;
CREATE TABLE #dates (d DATE);
IF OBJECT_ID('tempdb.dbo.#holidays') IS NOT null
DROP TABLE #holidays;
CREATE TABLE #holidays (d DATE);
INSERT INTO [#holidays]
( [d] )
VALUES
('2016-12-25'),
('2017-12-25'),
('2018-12-25');
INSERT INTO [#dates]
( [d] )
SELECT TOP 1000 DATEADD(DAY, n, '2015-12-31')
FROM [Util].dbo.[Numbers] AS [n];
WITH holidays AS (
SELECT d.*, CASE WHEN h.d IS NULL THEN 0 ELSE 1 END AS [IsHoliday]
FROM [#dates] AS [d]
LEFT JOIN [#holidays] AS [h]
ON [d].[d] = [h].[d]
)
SELECT d, ROW_NUMBER() OVER (PARTITION BY [holidays].[IsHoliday] ORDER BY d)
FROM [holidays]
ORDER BY d;
And please forgive my marking only Christmas as a holiday!

SQL query to find available future dates except weekends

I have table called "detail" where i am storing start date and end date of jobs.I have one more table called "leaves" which is also have leave startdate and leave enddate fields.I need to find the nearest available dates of a user without weekends and leave dates.
DECLARE #PackagerLastAssignedDate DATETIME
SELECT #PackagerLastAssignedDate = MAX(EndDate) FROM detail WHERE userId = 1
SELECT lveStartDate,lveEndDate FROM Leaves WHERE UserId = 1 and lveStartDate > #PackagerLastAssignedDate
Thanks In advance
Berlin.M
Try this one -
DECLARE
#DateFrom DATETIME
, #DateTo DATETIME
SELECT
#DateFrom = '20130101'
, #DateTo = '20130202'
SELECT [Date]
FROM (
SELECT [Date] = DATEADD(DAY, sv.number, t.DateFrom)
FROM (
SELECT
DateFrom = #DateFrom
, diff = DATEDIFF(DAY, #DateFrom, #DateTo)
) t
JOIN [master].dbo.spt_values sv ON sv.number <= diff
WHERE sv.[type] = 'p'
) t2
WHERE DATENAME(WEEKDAY, [Date]) NOT IN ('Saturday', 'Sunday')
AND NOT EXISTS (
SELECT 1
FROM dbo.Leaves l
WHERE l.UserId = 1
AND t2.[Date] BETWEEN l.lveStartDate AND l.lveEndDate
)

Joining massive CTE tables (13,000,000 rows+) performance problems

We have a production database that manages personnel booking at 100s of branches for years in advance with minute level accuracy.
Part of this system are reports that highlight gaps, i.e. compare branch opening hours and staff bookings to see if any branches are open with nobody booked.
It also checks for overlaps, double bookings etc all at the same time, basically minute level accuracy is required.
The way we're doing this is to expand the start and end times of openings hours and bookings into minutes with an integer tally table:
--===== Create and populate the Tally table on the fly
SELECT TOP 16777216
IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM Master.dbo.SysColumns sc1,
Master.dbo.SysColumns sc2,
Master.dbo.SysColumns sc3
--===== Add a Primary Key to maximize performance
ALTER TABLE dbo.Tally
ADD CONSTRAINT PK_Tally_N
PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100
We utilise this static indexed tally table to expand opening hours and bookings as follows:
SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
The problem is, once we have the 13,000,000 "open minutes" and the "booked minutes" we then need to join the results to see what's covered:
SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
As you can imagine, joining on the branch, date & time with 13,000,000 rows all stored in CTE tables takes AGES - running it for a week isnt too bad, about 10 seconds but if we run it for 6 months (13,000,000 minutes) bloats to 25 minutes+
Once we have joined the open minutes to the booked minutes we then group the data on islands and present to the user:
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
Quite simply, is my method efficient? is there any way i can improve on this method whilst retaining minute level accuracy? When dealing with massive CTE tables such as this, would there be any benefit in dumping this data to indexed temp tables & joining them instead?
Another thing I was considering is replacing the DATE & TIME(0) data types that the big join uses, would is be more efficient if I cast these to integers?
Here is the Full CTE in case that helps:
WITH OpeningHours ( [BranchID], [DayOfWeek], [StartTime], [EndTime] )
AS ( SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), AM_open) ,
CONVERT(TIME(0), AM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), AM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), PM_open) ,
CONVERT(TIME(0), PM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), PM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), EVE_open) ,
CONVERT(TIME(0), EVE_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), EVE_open) <> CONVERT(TIME(0), '00:00:00')
),
DateRange ( [Date], [DayOfWeek] )
AS ( SELECT CONVERT(DATE, DATEADD(DAY, N - 1, #StartDate)) ,
DATEPART(WEEKDAY, DATEADD(DAY, N - 1, #StartDate))
FROM tally (NOLOCK)
WHERE N <= DATEDIFF(DAY, #StartDate, #EndDate) + 1
),
OpenMinutes ( [BranchID], [DayOfWeek], [Time] )
AS ( SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
),
OpenDatesAndMinutes ( [Date], [Time], [BranchID] )
AS ( SELECT DateRange.[Date] ,
OpenMinutes.[Time] ,
OpenMinutes.BranchID
FROM DateRange
LEFT OUTER JOIN OpenMinutes ON DateRange.DayOfWeek = OpenMinutes.DayOfWeek
WHERE OpenMinutes.BranchID IS NOT NULL
),
WhiteListEmployees ( [DET_NUMBERA] )
AS ( SELECT DET_NUMBERA
FROM [dbo].[tbl_ChrisCache_WhiteList]
WHERE [TimeSheetV2_SecurityContext] = #TimeSheetV2_SecurityContext
),
BookedMinutesByRole ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking (NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN #ValidPosCodes AS Filter_PostCodes ON dbo.tbl_ChrisCache.POS_NUMBERA = Filter_PostCodes.POSCODE
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutesByWhiteList ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking(NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN WhiteListEmployees Filter_WhiteList ON dbo.tbl_Booking.DET_NUMBERA = Filter_WhiteList.DET_NUMBERA
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutes ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByRole
UNION
SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByWhiteList
),
PreRender ( [Date], [Time], [BranchID], [BookingCount] )
AS ( SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
),
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
DeletedBranches ( [BranchID] )
AS ( SELECT [ShopNo]
FROM [dbo].[vw_BranchList]
WHERE [Branch_Deleted] = 1
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
--dbo.format_timeV2(ISNULL(DATEDIFF(SECOND, MIN([Time]), MAX([Time])), 0)) AS DurationF ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
SELECT [BranchID] ,
CONVERT(VARCHAR(10), DATEADD(DAY, 7, CONVERT(DATETIME, CONVERT(VARCHAR(10), DATEADD(day, -1 - ( DATEPART(dw, [Date]) + ##DATEFIRST - 2 ) % 7, [Date]), 103) + ' 23:59:59', 103)), 103) AS WeekEnding ,
[Date] ,
[Start Time] ,
[End Time] ,
[Duration] ,
CONVERT(VARCHAR, ( [Duration] * 60 ) / 3600) + 'h ' + CONVERT(VARCHAR, ROUND(( ( CONVERT(FLOAT, ( ( [Duration] * 60 ) % 3600 )) ) / 3600 ) * 60, 0)) + 'm' AS [DurationF] ,
[EntryCount] ,
[EntryColour] ,
CASE WHEN [EntryCount] = 0 THEN 'Red'
WHEN [EntryCount] >= 1 THEN 'Green'
END AS DurationColour ,
CASE WHEN [EntryCount] = 0 THEN 'This period of open-time isnt covered'
WHEN [EntryCount] >= 1 THEN 'This period of open-time is covered by ' + CONVERT(VARCHAR, [EntryCount]) + ' booking(s)'
END AS [DurationComment]
FROM FinalRender
WHERE FinalRender.BranchID NOT IN ( SELECT [BranchID]
FROM DeletedBranches )
It's funny, because you have answered your own question with your questions at the end. You should just try them all but to summarize:
Materialize CTEs for better performance. You never know when SQL Server will evaluate a CTE more than once
You can build indexex against temporary tables.
I'm not sure how you jumped from [DayOfWeek],DATEADD(MINUTE, N - 1, StartTime) to the join on [Date],[Time] on the other, but having two columns here doesn't make sense. Use either a single datetime or a bigint representing the seconds from an epoch. UnixTimestamp works well here.
My proposal is not based on your data, but on generated test data, so it can be not fully applicable.
Proposal: In order to move from quadratic degradation of performance to at least linear, batch processing can be used, if data is distributed equally among batch periods.
In example below 2 years of bookings is being processed with 3 day batch interval and it takes it 2 minutes and 30 seconds to get back free periods per day per branch.
Test run results:
2 years - 2 minutes and 30 seconds
4 years - 4 minutes and 55 seconds.
6 years - 6 minutes and 41 seconds
It incorporates the same logic that is being used in question by using numbers to find non-matching minutes.
Schema and test data creation:
IF OBJECT_ID('vwRandomNumber') IS NOT NULL
DROP VIEW vwRandomNumber
GO
IF OBJECT_ID('dbo.fnRandNumber') IS NOT NULL
DROP FUNCTION dbo.fnRandNumber
GO
IF OBJECT_ID('dbo.fnRandomInt') IS NOT NULL
DROP FUNCTION dbo.fnRandomInt
GO
IF OBJECT_ID('tblNumbers') IS NOT NULL
DROP TABLE dbo.tblNumbers
GO
IF OBJECT_ID('Branches') IS NOT NULL
DROP TABLE Branches
GO
IF OBJECT_ID('OpeningHours') IS NOT NULL
DROP TABLE OpeningHours
GO
IF OBJECT_ID('Bookings') IS NOT NULL
DROP TABLE Bookings
GO
CREATE VIEW vwRandomNumber
AS
SELECT Rand() RandomNumber;
GO
CREATE FUNCTION dbo.fnRandNumber()
RETURNS FLOAT
AS
BEGIN
RETURN (SELECT TOP 1 RandomNumber FROM vwRandomNumber)
END;
GO
CREATE FUNCTION dbo.fnRandomInt(#FromNumber INT, #ToNumber INT)
RETURNS INT
AS
BEGIN
RETURN (#FromNumber + ROUND(dbo.fnRandNumber()*(#ToNumber - #FromNumber),0))
END;
GO
CREATE TABLE tblNumbers
(
NumberID INT PRIMARY KEY
)
CREATE TABLE Branches
(
BranchID INT
,BranchName NVARCHAR(100)
);
GO
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<100
)
INSERT INTO
Branches
SELECT N, CAST(NEWID() AS NVARCHAR(100)) FROM cteNumbers
OPTION(MAXRECURSION 0)
CREATE TABLE OpeningHours
(
BranchID INT
, Date DATETIME
, OpenFrom DATETIME
, OpenTo DATETIME
);
GO
CREATE CLUSTERED INDEX CIX_OpeningHours
ON OpeningHours ([Date], [BranchID])
GO
CREATE TABLE Bookings
(
BranchID INT
, BookingDate DATETIME
, BookingFrom DATETIME
, BookingTo DATETIME
)
CREATE CLUSTERED INDEX CIX_Bookings
ON Bookings ([BookingDate],[BranchID])
DECLARE #StartDate DATETIME = DATEADD(month,0,DATEADD(D,0,DATEDIFF(d,0,GETDATE())))
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<2000
)
INSERT INTO
OpeningHours
(
BranchID
, Date
, OpenFrom
, OpenTo
)
SELECT
Branches.BranchID
, Dates.Day
, DATEADD(hour,7,Dates.Day)
, DATEADD(hour,19,Dates.Day)
FROM
(
SELECT
DATEADD(d,N,#StartDate) Day
FROM
cteNumbers
) Dates
CROSS JOIN
Branches
OPTION(MAXRECURSION 0);
INSERT INTO Bookings
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,BookingHours.StartDate
,BookingHours.ToDate
FROM
OpeningHours
CROSS APPLY
(
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate
) BookingHours;
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<5000
)
INSERT INTO
tblNumbers
SELECT N FROM cteNumbers
OPTION(MAXRECURSION 0)
--SELECT COUNT(*) FROM Bookings WHERE
Scripts to get periods with no bookings:
SET NOCOUNT ON
IF OBJECT_ID('tblBranchFreePeriods') IS NOT NULL
DROP TABLE tblBranchFreePeriods
IF OBJECT_ID('tblFreeMinutes') IS NOT NULL
DROP TABLE tblFreeMinutes
CREATE TABLE tblBranchFreePeriods
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
, PeriodEndDate DATETIME
)
CREATE TABLE tblFreeMinutes
(
BranchID INT
,Date DATETIME
,FreeMinute INT
)
IF OBJECT_ID('dbo.tblStartDates') IS NOT NULL
DROP TABLE tblStartDates
CREATE TABLE tblStartDates
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblStartDates
ON tblStartDates([BranchID],[Date])
IF OBJECT_ID('dbo.tblEndDates') IS NOT NULL
DROP TABLE tblEndDates
CREATE TABLE tblEndDates
(
BranchID INT
, Date DATETIME
, PeriodEndDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblEndDate
ON tblEndDates ([BranchID],[Date])
CREATE CLUSTERED INDEX CIX_tblFreeMinutes
ON tblFreeMinutes ([BranchID],[Date],FreeMinute)
DECLARE #ProcessFromDate DATETIME, #ProcessTo DATETIME
SELECT #ProcessFromDate = MIN(OpenFrom), #ProcessTo = DATEADD(year,2,#ProcessFromDate) FROM OpeningHours
DECLARE #BatchSize INT = 3
DECLARE #StartTime DATETIME = GETDATE()
WHILE (#ProcessFromDate <= #ProcessTo) BEGIN
TRUNCATE TABLE tblFreeMinutes
TRUNCATE TABLE tblStartDates
TRUNCATE TABLE tblEndDates
SET #StartTime = GETDATE()
DECLARE #DateFrom DATETIME = #ProcessFromDate, #DateTo DATETIME = DATEADD(d,#BatchSize,#ProcessFromDate)
PRINT 'Date From ' + CAST(#DateFrom AS NVARCHAR(50))
PRINT 'Date To ' + CAST(#DateTO AS NVARCHAR(50))
INSERT INTO
tblFreeMinutes
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,tblOpeningHourMinutes.NumberID Minute
FROM
OpeningHours
INNER JOIN
tblNumbers tblOpeningHourMinutes
ON
NumberID
BETWEEN DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenFrom)
AND
DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenTo)
LEFT OUTER JOIN
Bookings
ON
Bookings.BookingDate = OpeningHours.Date
AND
Bookings.BranchID = OpeningHours.BranchID
AND
tblOpeningHourMinutes.NumberID
BETWEEN
DATEDIFF(minute,Bookings.BookingDate,Bookings.BookingFrom)
AND
DATEDIFF(minute,Bookings.BookingDAte,Bookings.BookingTo)
WHERE
OpeningHours.Date BETWEEN #DateFrom AND #DateTo
AND
Bookings.BookingDate IS NULL
OPTION ( FORCE ORDER )
PRINT 'Populate free minutes ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblStartDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute-1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate start dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblEndDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute+1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate end dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblBranchFreePeriods
SELECT
tblStartDates.BranchID
, tblStartDates.Date
, tblStartDates.PeriodStartDate
, tblEndDate.PeriodEndDate
FROM
tblStartDates
CROSS APPLY
(
SELECT TOP 1
*
FROM
tblEndDates
WHERE
tblEndDates.BranchID = tblStartDates.BranchID
AND
tblEndDates.Date = tblStartDates.Date
AND
tblEndDates.PeriodEndDate > tblStartDates.PeriodStartDate
ORDER BY
PeriodEndDate ASC
) tblEndDate
PRINT 'Return intervals ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
SET #ProcessFromDate = DATEADD(d,#BatchSize+1,#ProcessFromDate)
PRINT ''
PRINT ''
RAISERROR ('',0,0) WITH NOWAIT
--SELECT * FROM tblBranchFreePeriods
--BREAK
END
SELECT
*
FROM
tblBranchFreePeriods
ORDER BY
1,2,3