SQL - possible pivot issue - sql

I have a table with the following structure
Item Id, Start Date, End Date
1 , 2015-01-01, 2015-06-01
2 , 2015-01-01, 2015-02-01
3 , 2015-03-01, 2015-08-01
4 , 2015-06-01, 2015-10-01
I would like to view results so i will have each month in the column.
Each row will contain the id of the item that is within this month.
Example:
I am asking for all items that are within 2015-01-01 to 2015-03-01.
The results should display, in columns, all the months within that range. So in this case it's 3 columns, Jan Feb and March.
The number of rows will be the total number of items that are within that range BUT each cell should show value of item id only if that item is within range:
example:
2015-01-01, 2015-02-01, 2015-03-01
1 1 1
2 2 NULL
NULL NULL 3

In order to use pivot, you can create a recursive cte get each item id and the list of months it covers, then pivot the cte.
;WITH cte AS
(
SELECT [Item Id], [Start Date], [End Date]
FROM Table1
WHERE [Start Date] BETWEEN '2015-01-01' AND '2015-03-01' --Date Range you want
OR [End Date] BETWEEN '2015-01-01' AND '2015-03-01' --Date Range you want
UNION ALL
SELECT [Item Id], DATEADD(MONTH, 1, [Start Date]), [End Date]
FROM cte
WHERE DATEADD(MONTH, 1, [Start Date]) <= [End Date]
)
SELECT [2015-01-01],[2015-02-01],[2015-03-01] --List of Dates you want
FROM (
SELECT [Item Id] rn, -- need a unique id here to give one row per record
[Item Id],
CONVERT(VARCHAR(10), [Start Date], 120) [Start Date] -- Format date to yyyy-mm-dd
FROM cte
) t
PIVOT
( MAX([Item Id])
FOR [Start Date] IN ([2015-01-01],[2015-02-01],[2015-03-01])
) p

You most likely need to use dynamic SQL.
This is your data:
declare #first date = '20150101';
declare #last date = '20150301';
Create Table #items(ItemId int, StartDate date, EndDate date);
Insert into #items(ItemId, StartDate, EndDate) values
(1, '2015-01-01', '2015-06-01')
, (2, '2015-01-01', '2015-02-01')
, (3, '2015-03-01', '2015-08-01')
, (4, '2015-06-01', '2015-10-01');
You first need to get the range of values and columns:
declare #values varchar(max);
declare #cols varchar(max);
with range(d) as (
Select top(DATEDIFF(month, #first, #last)+1) cast(DATEADD(month, ROW_NUMBER() over(order by (select 0))-1, #first) as varchar(20))
From (
Select 1 From (values(1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) as x1(n)
Cross Join (values(1), (1), (1), (1), (1), (1), (1), (1), (1), (1)) as x2(n)
) as x(n)
)
Select #values = coalesce(''+#values+ ', ', ' ') + '('''+d+''')'
, #cols = coalesce(''+#cols+ ', ', ' ') + '['+left(DATENAME(month, d), 3)+CAST(year(d) as char(4))+']'
From range
;
This basically create a row for each date between #first and #last and concatenate them with parenthesis and commas (#values) or brackets (#cols).
Content in #values and #cols look like this:
#values = ('2015-01-01'), ('2015-02-01'), ('2015-03-01')
#cols = [Jan2015], [Feb2015], [Mar2015]
You then create a SQL script using theses 2 variables:
declare #sql nvarchar(max);
Set #sql = '
Select *
From (
Select i.ItemId, d = left(DATENAME(month, r.d), 3)+CAST(year(r.d) as char(4))
, id = case when r.d >= i.StartDate and r.d <= i.EndDate then i.ItemId end
From (values'+#values+') as r(d)
Cross Join (Select ItemId, StartDate, EndDate From #items
Where (#first >= StartDate and #first <= EndDate) or (#last >= StartDate and #last <= EndDate)
) i
) as dates
Pivot (
min(id)
For d in('+#cols+')
) as piv
';
This is the pivot query.
Created SQL will look like this in this example:
Select *
From (
Select i.ItemId, d = left(DATENAME(month, r.d), 3)+CAST(year(r.d) as char(4))
, id = case when r.d >= i.StartDate and r.d <= i.EndDate then i.ItemId end
From (values ('2015-01-01'), ('2015-02-01'), ('2015-03-01')) as r(d)
Cross Join (Select ItemId, StartDate, EndDate From #items
Where (#first >= StartDate and #first <= EndDate) or (#last >= StartDate and #last <= EndDate)
) i
) as dates
Pivot (
min(id)
For d in( [Jan2015], [Feb2015], [Mar2015])
) as piv
You can finally execute the script:
exec sp_executesql #sql, N'#first date, #last date', #first, #last;
Ouput:
ItemId Jan2015 Feb2015 Mar2015
1 1 1 1
2 2 2 NULL
3 NULL NULL 3

Probably something like....
Select
CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 1 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-01-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 2 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-02-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 3 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-03-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 4 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-04-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 5 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-05-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 6 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-06-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 7 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-07-01]
,CASE WHEN EXISTS (SELECT 1 FROM TableName where Month(Start) = 8 AND ItemId = t.ItemId) THEN t.ItemId END AS [2015-08-01]
,..... and so on..... for all the other months...
from TableName t

Related

Multi Day Values

I am having some troubles with my query. I am trying to get the total import rate for the current day, while also matching up the previous day with the correlating hour.
Example: 1AM (Current Day) = 20
1AM(Yesterday) = 25.
As of right now the columns Current and Yesterday are showing identical values.
SELECT
z.[import Hour],
COUNT(z.orderno) as CurrentDate,
COUNT(od.orderno) as Yesterday
FROM (
(select datepart(hh, recvtime) as [import Hour],
orderno
from mck_hvs.orderheader with (nolock)
where convert(date, recvtime) = convert(date, getdate())
and orderno not like '%ST'
UNION
select datepart(hh, recvtime) as [import Hour],
orderno from mck_hvs.oldorderheader with (nolock)
where convert(date, recvtime) = convert(date, getdate())
and orderno not like '%ST' ) as z
Join
(
select datepart(hh, od.recvtime) as [import Hour],
od.orderno from mck_hvs.oldorderheader od with (nolock)
where convert(date, od.recvtime) = convert(date, getdate()-1)
and od.orderno not like '%ST' ) as OD
ON z.[import Hour] = od.[import Hour] )
group by z.[import Hour]
If you use following data:
DECLARE #Orders AS TABLE(OrderNo INT,OrderTaken datetime)
INSERT INTO #Orders VALUES(123,'2017-07-24 12:20:24')
INSERT INTO #Orders VALUES(124,'2017-07-24 12:30:24')
INSERT INTO #Orders VALUES(125,'2017-07-24 13:40:24')
INSERT INTO #Orders VALUES(126,'2017-07-24 13:50:24')
INSERT INTO #Orders VALUES(227,'2017-07-25 12:20:24')
INSERT INTO #Orders VALUES(228,'2017-07-25 12:30:24')
INSERT INTO #Orders VALUES(229,'2017-07-25 13:40:24')
INSERT INTO #Orders VALUES(220,'2017-07-25 13:50:24')
with output:
The following SQL:
DECLARE #Date DATETIME='2017-07-25'
;WITH today AS(
SELECT Cast(OrderTaken As Date) oDate,
CASE WHEN DATEPART(hh,OrderTaken) > 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)-12) + ' PM' WHEN DATEPART(hh,OrderTaken) = 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)) + ' PM' WHEN DATEPART(hh,OrderTaken) < 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)) + ' AM' END As oHour,
Count(OrderNo) OrderCountToday FROM #Orders
WHERE CAST(OrderTaken AS DATE)=CAST(#Date AS DATE)
GROUP BY Cast(OrderTaken As Date),DATEPart(Hour,OrderTaken)
)
,yesterday AS(
SELECT Cast(OrderTaken As Date) oDate,
CASE WHEN DATEPART(hh,OrderTaken) > 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)-12) + ' PM' WHEN DATEPART(hh,OrderTaken) = 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)) + ' PM' WHEN DATEPART(hh,OrderTaken) < 12 THEN CONVERT(VARCHAR(2),DATEPART(hh,OrderTaken)) + ' AM' END As oHour,
Count(OrderNo) OrderCountYesterday
FROM #Orders
WHERE CAST(OrderTaken AS DATE)=CAST(#Date-1 AS DATE)
GROUP BY Cast(OrderTaken As Date),DATEPart(Hour,OrderTaken)
)
SELECT t.oDate Today,t.oHour Hour,t.OrderCountToday,y.OrderCountYesterday FROM today t join yesterday y ON t.oHour=y.oHour and t.oDate=dateadd(day,1,y.oDate)
will result as:
p.s.: You can summarize this SQL a lot if you don't want to use AM/PM thing in the hour. Even the logic for AM/PM can be summarized further but that will show 12 PM as 0 PM.
Hope this help.
I've included a bit of code up front just to generate some test data. You probably won't need it, but others might.
USE sandbox
GO
--------------------------------------------------------------------------------------------
--Recreate the OP's environment in my sandpit
DROP TABLE IF EXISTS mck_hvs.orderheader --2016 syntax
create table mck_hvs.orderheader(
recvtime datetime NOT NULL
,orderno varchar(20) NOT NULL
)
;
ALTER TABLE mck_hvs.orderheader
ADD CONSTRAINT PK_mck_hvs_orderheader_orderno PRIMARY KEY CLUSTERED (orderno)
;
GO
--------------------------------------------------------------------------------------------
DROP TABLE IF EXISTS mck_hvs.oldorderheader
;
create table mck_hvs.oldorderheader(
recvtime datetime NOT NULL
,orderno varchar(20) NOT NULL
)
;
ALTER TABLE mck_hvs.oldorderheader
ADD CONSTRAINT PK_mck_hvs_oldorderheader_orderno PRIMARY KEY CLUSTERED (orderno)
;
GO
--------------------------------------------------------------------------------------------
--Generate some test data (about two years worth working back from today)
--First the old data (not today's)
INSERT mck_hvs.oldorderheader
SELECT top 100000 dateadd(mi,abs(checksum(newid())) %1440,dateadd(dd,-abs(checksum(newid())) %720,getdate()-1)) as recvtime
,right('0000000' + cast(row_number() OVER(ORDER BY (SELECT NULL)) as varchar(20)),7)
+ char(82 + abs(checksum(newid()))%4) + char(82 + abs(checksum(newid()))%4) as orderno--add two random chars from R,S,T,U to the end
FROM sys.columns col1
cross join sys.columns col2
;
--Now today's (assume 500 orders came in)
INSERT mck_hvs.orderheader
SELECT top 500 dateadd(mi,abs(checksum(newid())) %1440,dateadd(dd,datediff(dd,0,getdate()),0)) --add a random number of minutes to midnight last night
,right('0000000' + cast(row_number() OVER(ORDER BY (SELECT NULL)) +100000 as varchar(20)),7)
+ char(82 + abs(checksum(newid()))%4) + char(82 + abs(checksum(newid()))%4) --add two random chars from R,S,T,U to the end
FROM sys.columns col1
cross join sys.columns col2
;
--------------------------------------------------------------------------------------------
WITH yesterday /*all my problems seemed so far away*/as
(
SELECT datepart(hour,old.recvtime) as received_hour
,count(*) as orders_received
FROM mck_hvs.oldorderheader as old
WHERE old.recvtime < dateadd(dd,datediff(dd,0,getdate()),0) --Yesterday
AND old.recvtime >= dateadd(dd,datediff(dd,0,getdate())-1,0)
AND old.orderno not like '%ST' --Note: This is not a SARGable search.
GROUP BY datepart(hour,recvtime)
)
, today as
(
SELECT datepart(hour,ordr.recvtime) as received_hour
,count(*) as orders_received
FROM mck_hvs.orderheader as ordr
WHERE ordr.orderno not like '%ST' --Google "SARG". Trust me.
GROUP BY datepart(hour,ordr.recvtime)
)
SELECT isnull(yesterday.received_hour,today.received_hour) as received_hour
,isnull(yesterday.orders_received,0) as orders_received_yesterday
,isnull(today.orders_received,0) as orders_received_today
FROM yesterday
--FULL JOIN in case there are hours of activity in one table that don't exist in the other table.
FULL JOIN today ON yesterday.received_hour = today.received_hour
;

Iterate value dynamically

I'm using the below query to calculate a budget value dynamically means iterating upto selected date value.
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMonth) end) as [Budget]
Here variable #DaysPastinMonth should be dynamic. Means if I select a date as 03/31/2017. Then the query should run upto the previous month value. Another example is if I select August, then I need to run query from Jan-Aug.
For Jan
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinJanMonth) end) as [Budget]
For Feb
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinFebMonth) end) as [Budget]
For Mar
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMarMonth) end) as [Budget]
Also I have created variables for all the 12 months which holds DaysPastinMonth.
Can anyone suggest how this can be achieved using case statement.
You are thinking about this in loop when you could do it with set based operations.
----------------------------------------------------------
--Create a table of dates for testing
----------------------------------------------------------
if object_id('tempdb..#dates') is not null
drop table #dates
create table #dates(d date
,RN bigint)
declare #sdate datetime='2017-01-01 00:00'
declare #edate datetime='2017-7-31 00:00'
insert into #dates
select
DATEADD(d,number,#sdate)
,row_number() over (order by (select null)) as RN
from
master..spt_values
where
type='P'
and number<=datediff(d,#sdate,#edate)
declare #numOfDays int = (select count(*) from #dates)
----------------------------------------------------------
--Populate Test Data
----------------------------------------------------------
if object_id('tempdb..#testTable') is not null
drop table #testTable
create table #testTable([Name] varchar(64),
[Value] decimal (16,4),
DT datetime)
insert into #testTable ([Name],[Value],DT)
select
'Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
union all
select
'Not The Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
----------------------------------------------------------
--Instead of making your variables "dynamic" which
--would likely consist of some loop, just pass in the
--month you care about and let SQL do the work
----------------------------------------------------------
declare #month datetime = '2016-03-31'
select
DT
,[Value]
,[Name]
,sum(case when [Name] = 'Budget'
then [Value] +
(([Value] / (DATEDIFF(day,DATEADD(month, DATEDIFF(month, 0, #month), 0),#month)))
*
(DATEDIFF(DAY,DATEADD(MONTH, DATEDIFF(MONTH, 0, #month)-1, 0),DATEADD(MONTH, DATEDIFF(MONTH, -1, #month)-1, -1)))) end) as Budget
from
#testTable
where
DT >= DATEADD(yy, DATEDIFF(yy, 0, #month), 0) --this is Jan 1 of the year associated with your vairable
group by
DT
,[Name]
,[Value]

Conditional Count On Row_Number

I have a query that calculates the number working days within a month based on a table which stores all our public holidays.
The current output would show all working days, excluding public holidays and Saturday and Sunday, I would like to show each day of the month, but don't increment on a public holiday or Saturday or Sunday.
Is there a way to conditionally increment the row number?
Query is below:
DECLARE #startnum INT=0
DECLARE #endnum INT=365;
WITH gen AS
(
SELECT #startnum AS num
UNION ALL
SELECT num + 1
FROM gen
WHERE num + 1 <= #endnum
)
, holidays AS
(
SELECT CONVERT(DATE, transdate) AS HolidayDate
FROM WORKCALENDER w
WHERE w.CALENDARID = 'PubHoliday'
)
, allDays AS
(
SELECT DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ) ) AS DateOfYear
, DATENAME( dw, DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ))) AS [dayOfWeek]
FROM gen
)
select number = ROW_NUMBER() OVER ( ORDER BY DateOfYear )
, *
from allDays
LEFT OUTER JOIN holidays
ON allDays.DateOfYear = holidays.HolidayDate
WHERE holidays.HolidayDate IS NULL
AND allDays.dayOfWeek NOT IN ( 'Saturday', 'Sunday')
AND DateOfYear >= CONVERT( DATE, '1 ' + DATENAME( MONTH, GETDATE() ) + ' 2016' )
AND DateOfYear < CONVERT( DATE, '1 ' + DATENAME( MONTH, DATEADD( month, 1, GETDATE()) ) + ' 2016' )
option (maxrecursion 10000)
kind of pseudo code
select date, row_number() over (order by date) as num
from ( select date
from allDates
where month = x and weekday
exept
select date
from holidays
where month is x
) as t
union all
select date, null
from holidays
where month is x
order by date
You could use a windowed sum, see how the output of WorkdaySequenceInMonth is composed.
DECLARE #startDate DATE = '20160101'
, #numDays INT = 365
, #num INT = 0;
DECLARE #Holidays TABLE (Holiday DATE);
INSERT INTO #Holidays(Holiday)
VALUES ('20160101')
, ('20160115')
, ('20160714');
WITH nums AS
(
SELECT row_number() OVER (ORDER BY object_id) - 1 as num
FROM sys.columns
),
dateRange as
(
SELECT
DATEADD(DAY, num, #startDate) AS Dt
, num
FROM nums
WHERE num < #numDays
),
Parts AS
(
SELECT
R.Dt as [Date]
, Year(R.Dt) as [Year]
, Month(R.Dt) as [Month]
, Day(R.Dt) as [Day]
, Datename(weekday, R.Dt) as [Weekday]
, CASE WHEN H.Holiday IS NOT NULL
OR Datename(weekday, R.Dt) IN ('Saturday', 'Sunday')
THEN 0
ELSE 1
END AS IsWorkday
FROM dateRange R
LEFT JOIN #Holidays H ON R.Dt = H.Holiday
)
--
select
*
, sum(IsWorkday) over (PARTITION BY [Year],[month]
ORDER BY [Day]
ROWS UNBOUNDED PRECEDING) as WorkdaySequenceInMonth
from Parts
order by [Year], [Month]
Hi You can try this query, the initial part is the data generation, maybe you won't need it.
Then I generate a temp table with all the dates for the time period set in #StartYear, #EndYear
Then just simple queries to return the data
-- generate holidays table
select holiday
into #tempHolidays
from
(
select '20160101' as holiday
union all
select '20160201' as holiday
union all
select '20160205' as holiday
union all
select '20160301' as holiday
union all
select '20160309' as holiday
union all
select '20160315' as holiday
) as t
create table #tempCalendar (Date_temp date)
select * from
#tempHolidays
declare #startYear int , #endYear int, #i int, #dateStart datetime , #dateEnd datetime, #date datetime, #i = 0
Select #startYear = '2016'
,#endYear = '2016'
,#dateStart = (Select cast( (cast(#startYear as varchar(4)) +'0101') as datetime))
,#dateEnd = (Select cast( (cast(#startYear as varchar(4)) +'1231') as datetime))
,#date = #dateStart
--Insert dates of the period of time
while (#date <> #dateEnd)
begin
insert into #tempCalendar
Select #date
set #date = (select DATEADD(dd,1,#date))
end
-- Retrive Date list
Select Date_temp
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
--REtrieve sum of working days per month
select DATEPART(year,Date_temp) as year
,DATEPART(month,Date_temp) as Month
,Count(*) as CountOfWorkingDays
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
Group by DATEPART(year,Date_temp)
,DATEPART(month,Date_temp)
You should change #tempHolidays for your Holidays table, and use #StarYear and #EndYear as your time period.
Here's a simple demo that shows the use of the partition by clause to keep contiguity in your sequencing for non-holidays
IF OBJECT_ID('tempdb.dbo.#dates') IS NOT null
DROP TABLE #dates;
CREATE TABLE #dates (d DATE);
IF OBJECT_ID('tempdb.dbo.#holidays') IS NOT null
DROP TABLE #holidays;
CREATE TABLE #holidays (d DATE);
INSERT INTO [#holidays]
( [d] )
VALUES
('2016-12-25'),
('2017-12-25'),
('2018-12-25');
INSERT INTO [#dates]
( [d] )
SELECT TOP 1000 DATEADD(DAY, n, '2015-12-31')
FROM [Util].dbo.[Numbers] AS [n];
WITH holidays AS (
SELECT d.*, CASE WHEN h.d IS NULL THEN 0 ELSE 1 END AS [IsHoliday]
FROM [#dates] AS [d]
LEFT JOIN [#holidays] AS [h]
ON [d].[d] = [h].[d]
)
SELECT d, ROW_NUMBER() OVER (PARTITION BY [holidays].[IsHoliday] ORDER BY d)
FROM [holidays]
ORDER BY d;
And please forgive my marking only Christmas as a holiday!

how can I get data from every month from past one year in t-sql and each month should be in different column

I want data from past 1 year, for example today is 02-05-2014, i need data from May 2013, June 2013..till April 2014 as separate columns. If anybody can help with this code in t-sql that will be very helpful for me. Thanks
ok here i want the data. below is my column
created date
------------
02-05-2013
16-05-2013
05-06-2013
22-07-2013
01-08-2013
09-08-2013
02-09-2013
03-10-2013
19-11-2013
11-12-2013
03-01-2014
29-02-2014
15-03-2014
19-04-2014
I want the result as
May 2013 June 2013 July 2013 August 2013 till April 2014
-------- --------- --------- ----------- ----------
02-05-2013 05-06-2013 22-07-2013 01-08-2013 19-04-2014
16-05-2013 09-08-2013
and also I want to make Columns dynamically which is very important for my query
Although everyone was quick to suggest using PIVOT, that really won't work here because PIVOT would need to aggregate each column into one (max, min, whatever) date for that month and year.
I gave this question some attention because it was actually an interesting challenge. The reality is, this is best done by some reporting tool, such as SSRS, because your output is really a timeline report.
Because of the requirement for dynamic columns and such, I really don't see how this can be done without variables and some dynamic sql, so assuming that is acceptable, the following is tested and will output exactly what you described. It essentially creates 12 CTE tables, each containing the dates for a month and year, (counting backwards from whatever month and year you run the sql). It then creates the report simply by using a FULL OUTER JOIN of all the tables. However, if you full joined just the 12 tables, each column would have its values randomly over several rows with many nulls in betweeen. To line the dates in each column up at the top, it was necessary to add a base table with sequential numbers that each mo/yr table can join on. The sequential numbers are generated up to the max number of dates for any given mo/yr. (Note: with the base number table, a LEFT OUTER JOIN would have sufficed as well...)
This assumes the table name is dbo.MyTable and the date column is CreatedDate:
DECLARE #cteSql nvarchar(MAX) = '';
DECLARE #tblSql nvarchar(MAX) = '';
DECLARE #frmSql nvarchar(MAX) = '';
DECLARE #colNm varchar(10);
DECLARE #tblNm varchar(3);
DECLARE #i int = 0;
/* today's date */
DECLARE #td date = GETDATE();
/* max number of dates per yr/mo */
DECLARE #maxItems int = (SELECT MAX(CNT) FROM (SELECT COUNT(*) AS CNT FROM dbo.MyTable GROUP BY YEAR(CreatedDate), MONTH(CreatedDate)) T)
/* a table of sequential numbers up to the max per yr/mo; this is so the full outer join is laid out neatly */
SET #cteSql = 'WITH T(id) AS( SELECT id = 1 UNION ALL SELECT id + 1 FROM T WHERE id + 1 <= ' + CAST(#maxItems AS varchar(16)) + ')';
/* count down from current date to past 12 months */
WHILE #i > -12
BEGIN
/* a simple name for each CTE: T0, T1, T2 etc */
SET #tblNm = 'T' + CAST((#i*-1) AS varchar(2));
/* rpt column names; [Jan 2014], [Feb 2014] etc */
SET #colNm = '[' + RIGHT(CONVERT(varchar(11), DATEADD(m, #i, #td), 106),8) + ']';
/* each CTE contains a sequential id and the dates belonging to that month and yr */
SET #cteSql += ', ' + #tblNm + '(id, ' + #colNm + ')'
+ ' AS (SELECT ROW_NUMBER() OVER(ORDER BY CreatedDate) AS id, CreatedDate FROM dbo.MyTable WHERE YEAR(CreatedDate) = ' + CAST(YEAR(DATEADD(m, #i, #td)) AS varchar(4))
+ ' AND MONTH(CreatedDate) = ' + CAST(MONTH(DATEADD(m, #i, #td)) AS varchar(2)) + ')';
/* this will eventually be the SELECT statement for the report...just the month columns, not the id */
SET #tblSql = ', ' + #colNm + #tblSql;
/* concatenate all the columns using FULL OUTER JOIN with the first table of simple sequential numbers as the driver */
SET #frmSql += ' FULL OUTER JOIN ' + #tblNm + ' ON T.id = ' + #tblNm + '.id ';
SET #i -= 1;
END
/* put all the sql together */
SET #tblSql = #cteSql + ' SELECT' + STUFF(#tblSql, 1, 1, '') + ' FROM T ' + #frmSql
/* view the generated sql */
-- SELECT #tblSql AS X
/* this should generate the report you described above, showing the last 12 months from whatever date you run it */
EXECUTE (#tblSql)
Output:
Jun 2013 Jul 2013 Aug 2013 Sep 2013 Oct 2013 Nov 2013 Dec 2013 Jan 2014 Feb 2014 Mar 2014 Apr 2014 May 2014
---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
2013-06-05 2013-07-22 2013-08-01 2013-09-02 2013-10-03 2013-11-19 2013-12-11 2014-01-03 2014-02-28 2014-03-15 2014-04-19 NULL
2013-06-07 NULL 2013-08-09 NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL 2013-08-10 NULL NULL NULL NULL NULL NULL NULL NULL NULL
As it turns out, the sql generated is conceptually similar to what #Hogan suggested, although I did not realize it at first. It really just adds the dynamic naming plus the segregation by yr/mo and not just month.
Here is a way to do it without a dynamic pivot. I only did it for 2013, you can see what is needed to add more columns:
(working fiddle: http://sqlfiddle.com/#!6/d9797/1)
with nums as
(
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =1
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =2
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =3
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =4
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =5
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =6
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =7
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =8
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =9
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =10
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =11
union all
select [create date],
MONTH([create date]) as M,
ROW_NUMBER() OVER (ORDER BY [create date] ASC) as RN
from table1
where MONTH([create date]) =12
),maxrn as
(
select MAX(RN) as maxnum from nums
), rowNumbers as
(
select 1 as RN
union all
select RN+1 AS RN
from rowNumbers
where RN < (select maxnum from maxrn)
)
SELECT
nJan.[create date] as [Jan 2013],
nFeb.[create date] as [Feb 2013],
nMar.[create date] as [Mar 2013],
nApr.[create date] as [Apr 2013],
nMay.[create date] as [May 2013],
nJun.[create date] as [Jun 2013],
nJul.[create date] as [Jul 2013],
nAug.[create date] as [Aug 2013],
nSep.[create date] as [Sep 2013],
nOct.[create date] as [Oct 2013],
nNov.[create date] as [Nov 2013],
nDec.[create date] as [Dec 2013]
FROM rowNumbers n
LEFT JOIN nums nJan ON n.RN = nJan.RN and nJan.M = 1
LEFT JOIN nums nFeb ON n.RN = nFeb.RN and nFeb.M = 2
LEFT JOIN nums nMar ON n.RN = nMar.RN and nMar.M = 3
LEFT JOIN nums nApr ON n.RN = nApr.RN and nApr.M = 4
LEFT JOIN nums nMay ON n.RN = nMay.RN and nMay.M = 5
LEFT JOIN nums nJun ON n.RN = nJun.RN and nJun.M = 6
LEFT JOIN nums nJul ON n.RN = nJul.RN and nJul.M = 7
LEFT JOIN nums nAug ON n.RN = nAug.RN and nAug.M = 8
LEFT JOIN nums nSep ON n.RN = nSep.RN and nSep.M = 9
LEFT JOIN nums nOct ON n.RN = nOct.RN and nOct.M = 10
LEFT JOIN nums nNov ON n.RN = nNov.RN and nNov.M = 11
LEFT JOIN nums nDec ON n.RN = nDec.RN and nDec.M = 12
ORDER BY n.RN ASC
OOOPS this already have an answer T_T,
but try this if you have a free time. :)
use a lot of things in this case:
CTE
STUFF
CONVERT
SUBSTRING
declare #xCol nvarchar(max);
--Begin create column
with cte as
(
select 1 as id,
aa.month as mm, aa.Year
--,aa.xmonth as mmm
, aa.ord
from
(
select
xdate
,Year(xdate) as Year
--,month(xdate) as xmonth
, substring(convert(nvarchar(max),xdate,106),3,len(convert(nvarchar(max),xdate,106))) as month
,convert(nvarchar(6), xdate,112) as ord
from tempData vv
) aa
group by aa.ord, aa.month,aa.Year
--order by aa.Year
)
select
distinct
--c.id,
#xCol = stuff(
(
select ',' + c2.mm
from cte c2
where c.id = c2.id
for xml path ('')
),1,0,''
)
from cte c
;
set #xCol= SUBSTRING(#xCol,2, len(#xCol))
select #xCol = '[' + replace(#xCol,',','],[') + ']'
--select #xCol as '#columns', len(#xCol)
--END CREATE COLUMNS
--CREATE INPUT STRING
Declare #tbl_inputstr table
(
id int,
xstr nvarchar(max)
)
;
with cte as
(
select
a.xdate, a.month
,row_number() over(partition by a.month order by a.xdate) as xrow
from
(
select
xdate
,Year(xdate) as Year
,month(xdate) as xmonth
,convert(nvarchar(6),xdate,112) as month2
, substring(convert(nvarchar(max),xdate,106),3,len(convert(nvarchar(max),xdate,106))) as month
from tempData
) a
)
insert into #tbl_inputstr(id,xstr)
select distinct c.xrow as id,
' Insert into #tempData (' + substring(stuff(
(
select ',[' + cast(c2.month as nvarchar(max)) + ']'
from cte c2
where c.xrow = c2.xrow
for xml path ('')
),1,0,''
),2,len(stuff(
(
select ',[' + cast(c2.month as nvarchar(max)) + ']'
from cte c2
where c.xrow = c2.xrow
for xml path ('')
),1,0,''
))) + ')'
+' Values(' + Substring(stuff(
(
select ',''' + cast(c2.xdate as nvarchar(max)) + ''''
from cte c2
where c.xrow = c2.xrow
for xml path ('')
),1,0,''
),2,len(stuff(
(
select ',''' + cast(c2.xdate as nvarchar(max)) + ''''
from cte c2
where c.xrow = c2.xrow
for xml path ('')
),1,0,''
))) + ')'
from cte c
order by c.xrow;
select * from #tbl_inputstr
Declare #inputStr nvarchar(max)
select #inputStr =
substring(stuff
(
(
select ';' + xstr
from #tbl_inputstr
for xml path('')
),1,0,''
),2, len(stuff
(
(
select ';' + xstr
from #tbl_inputstr
for xml path('')
),1,0,''
))
)
select #inputStr= 'Declare #tempData Table (' +replace(#xCol,']', '] nvarchar(max)') + ');' + #inputStr
+ '; select ' + #xCol
+ ' from #tempData'
exec(#inputStr)
--END INPUT STRING

Joining massive CTE tables (13,000,000 rows+) performance problems

We have a production database that manages personnel booking at 100s of branches for years in advance with minute level accuracy.
Part of this system are reports that highlight gaps, i.e. compare branch opening hours and staff bookings to see if any branches are open with nobody booked.
It also checks for overlaps, double bookings etc all at the same time, basically minute level accuracy is required.
The way we're doing this is to expand the start and end times of openings hours and bookings into minutes with an integer tally table:
--===== Create and populate the Tally table on the fly
SELECT TOP 16777216
IDENTITY(INT,1,1) AS N
INTO dbo.Tally
FROM Master.dbo.SysColumns sc1,
Master.dbo.SysColumns sc2,
Master.dbo.SysColumns sc3
--===== Add a Primary Key to maximize performance
ALTER TABLE dbo.Tally
ADD CONSTRAINT PK_Tally_N
PRIMARY KEY CLUSTERED (N) WITH FILLFACTOR = 100
We utilise this static indexed tally table to expand opening hours and bookings as follows:
SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
The problem is, once we have the 13,000,000 "open minutes" and the "booked minutes" we then need to join the results to see what's covered:
SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
As you can imagine, joining on the branch, date & time with 13,000,000 rows all stored in CTE tables takes AGES - running it for a week isnt too bad, about 10 seconds but if we run it for 6 months (13,000,000 minutes) bloats to 25 minutes+
Once we have joined the open minutes to the booked minutes we then group the data on islands and present to the user:
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
Quite simply, is my method efficient? is there any way i can improve on this method whilst retaining minute level accuracy? When dealing with massive CTE tables such as this, would there be any benefit in dumping this data to indexed temp tables & joining them instead?
Another thing I was considering is replacing the DATE & TIME(0) data types that the big join uses, would is be more efficient if I cast these to integers?
Here is the Full CTE in case that helps:
WITH OpeningHours ( [BranchID], [DayOfWeek], [StartTime], [EndTime] )
AS ( SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), AM_open) ,
CONVERT(TIME(0), AM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), AM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), PM_open) ,
CONVERT(TIME(0), PM_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), PM_open) <> CONVERT(TIME(0), '00:00:00')
UNION ALL
SELECT BranchID ,
DayOfWeek ,
CONVERT(TIME(0), EVE_open) ,
CONVERT(TIME(0), EVE_close)
FROM db_BranchDetails.dbo.tbl_ShopOpeningTimes (NOLOCK)
INNER JOIN #tbl_Days Filter_Days ON db_BranchDetails.dbo.tbl_ShopOpeningTimes.DayOfWeek = Filter_Days.DayNumber
WHERE CONVERT(TIME(0), EVE_open) <> CONVERT(TIME(0), '00:00:00')
),
DateRange ( [Date], [DayOfWeek] )
AS ( SELECT CONVERT(DATE, DATEADD(DAY, N - 1, #StartDate)) ,
DATEPART(WEEKDAY, DATEADD(DAY, N - 1, #StartDate))
FROM tally (NOLOCK)
WHERE N <= DATEDIFF(DAY, #StartDate, #EndDate) + 1
),
OpenMinutes ( [BranchID], [DayOfWeek], [Time] )
AS ( SELECT [BranchID] ,
[DayOfWeek] ,
DATEADD(MINUTE, N - 1, StartTime)
FROM OpeningHours
LEFT OUTER JOIN tally ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, OpeningHours.StartTime, OpeningHours.EndTime) + 1
),
OpenDatesAndMinutes ( [Date], [Time], [BranchID] )
AS ( SELECT DateRange.[Date] ,
OpenMinutes.[Time] ,
OpenMinutes.BranchID
FROM DateRange
LEFT OUTER JOIN OpenMinutes ON DateRange.DayOfWeek = OpenMinutes.DayOfWeek
WHERE OpenMinutes.BranchID IS NOT NULL
),
WhiteListEmployees ( [DET_NUMBERA] )
AS ( SELECT DET_NUMBERA
FROM [dbo].[tbl_ChrisCache_WhiteList]
WHERE [TimeSheetV2_SecurityContext] = #TimeSheetV2_SecurityContext
),
BookedMinutesByRole ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking (NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN #ValidPosCodes AS Filter_PostCodes ON dbo.tbl_ChrisCache.POS_NUMBERA = Filter_PostCodes.POSCODE
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutesByWhiteList ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [BookingDate] ,
DATEADD(MINUTE, N - 1, StartTime) ,
BranchID ,
COUNT(BookingID) AS Bookings
FROM tbl_Booking(NOLOCK)
INNER JOIN tbl_BookingReason (NOLOCK) ON dbo.tbl_BookingReason.ReasonID = dbo.tbl_Booking.ReasonID
INNER JOIN tbl_ChrisCache (NOLOCK) ON dbo.tbl_Booking.DET_NUMBERA = dbo.tbl_ChrisCache.DET_NUMBERA
INNER JOIN WhiteListEmployees Filter_WhiteList ON dbo.tbl_Booking.DET_NUMBERA = Filter_WhiteList.DET_NUMBERA
LEFT OUTER JOIN tally (NOLOCK) ON tally.N BETWEEN 0
AND DATEDIFF(MINUTE, tbl_Booking.StartTime, tbl_Booking.EndTime) + 1
WHERE ( Void = 0 )
AND tbl_BookingReason.CoverRequired = 0 --#### Only use bookings that dont require cover
AND tbl_booking.BranchID <> '023' --#### Branch 23 will always have messy data
AND ( dbo.tbl_Booking.BookingDate BETWEEN #StartDate
AND #EndDate )
GROUP BY [BookingDate] ,
BranchID ,
DATEADD(MINUTE, N - 1, StartTime)
),
BookedMinutes ( [Date], [Time], [BranchID], BookingCount )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByRole
UNION
SELECT [Date] ,
[Time] ,
[BranchID] ,
BookingCount
FROM BookedMinutesByWhiteList
),
PreRender ( [Date], [Time], [BranchID], [BookingCount] )
AS ( SELECT OpenDatesAndMinutes.[Date] ,
OpenDatesAndMinutes.[Time] ,
OpenDatesAndMinutes.[BranchID] ,
ISNULL(BookedMinutes.BookingCount, 0) AS BookingCount
FROM OpenDatesAndMinutes
LEFT OUTER JOIN BookedMinutes ON OpenDatesAndMinutes.BranchID = BookedMinutes.BranchID
AND OpenDatesAndMinutes.[Date] = BookedMinutes.[Date]
AND OpenDatesAndMinutes.[Time] = BookedMinutes.[Time]
),
CrossTabPrep ( [Date], [Time], [BranchID], [BookingCount], [Grp] )
AS ( SELECT [Date] ,
[Time] ,
[BranchID] ,
[BookingCount] ,
DATEPART(HOUR, Time) * 60 + DATEPART(MINUTE, Time) - ROW_NUMBER() OVER ( PARTITION BY [BranchID], Date, [BookingCount] ORDER BY Time ) AS [Grp]
FROM PreRender
),
DeletedBranches ( [BranchID] )
AS ( SELECT [ShopNo]
FROM [dbo].[vw_BranchList]
WHERE [Branch_Deleted] = 1
),
FinalRender ( [BranchID], [Date], [Start Time], [End Time], [Duration], [EntryCount], [EntryColour] )
AS ( SELECT [BranchID] ,
[Date] ,
MIN([Time]) AS [Start Time] ,
MAX([Time]) AS [End Time] ,
ISNULL(DATEDIFF(MINUTE, MIN([Time]), MAX([Time])), 0) AS Duration ,
--dbo.format_timeV2(ISNULL(DATEDIFF(SECOND, MIN([Time]), MAX([Time])), 0)) AS DurationF ,
[BookingCount] AS EntryCount ,
CASE WHEN [BookingCount] = 0 THEN 'Red'
WHEN [BookingCount] = 1 THEN 'Green'
ELSE 'Yellow'
END AS EntryColour
FROM CrossTabPrep
GROUP BY [BranchID] ,
[Date] ,
[BookingCount] ,
[Grp]
)
SELECT [BranchID] ,
CONVERT(VARCHAR(10), DATEADD(DAY, 7, CONVERT(DATETIME, CONVERT(VARCHAR(10), DATEADD(day, -1 - ( DATEPART(dw, [Date]) + ##DATEFIRST - 2 ) % 7, [Date]), 103) + ' 23:59:59', 103)), 103) AS WeekEnding ,
[Date] ,
[Start Time] ,
[End Time] ,
[Duration] ,
CONVERT(VARCHAR, ( [Duration] * 60 ) / 3600) + 'h ' + CONVERT(VARCHAR, ROUND(( ( CONVERT(FLOAT, ( ( [Duration] * 60 ) % 3600 )) ) / 3600 ) * 60, 0)) + 'm' AS [DurationF] ,
[EntryCount] ,
[EntryColour] ,
CASE WHEN [EntryCount] = 0 THEN 'Red'
WHEN [EntryCount] >= 1 THEN 'Green'
END AS DurationColour ,
CASE WHEN [EntryCount] = 0 THEN 'This period of open-time isnt covered'
WHEN [EntryCount] >= 1 THEN 'This period of open-time is covered by ' + CONVERT(VARCHAR, [EntryCount]) + ' booking(s)'
END AS [DurationComment]
FROM FinalRender
WHERE FinalRender.BranchID NOT IN ( SELECT [BranchID]
FROM DeletedBranches )
It's funny, because you have answered your own question with your questions at the end. You should just try them all but to summarize:
Materialize CTEs for better performance. You never know when SQL Server will evaluate a CTE more than once
You can build indexex against temporary tables.
I'm not sure how you jumped from [DayOfWeek],DATEADD(MINUTE, N - 1, StartTime) to the join on [Date],[Time] on the other, but having two columns here doesn't make sense. Use either a single datetime or a bigint representing the seconds from an epoch. UnixTimestamp works well here.
My proposal is not based on your data, but on generated test data, so it can be not fully applicable.
Proposal: In order to move from quadratic degradation of performance to at least linear, batch processing can be used, if data is distributed equally among batch periods.
In example below 2 years of bookings is being processed with 3 day batch interval and it takes it 2 minutes and 30 seconds to get back free periods per day per branch.
Test run results:
2 years - 2 minutes and 30 seconds
4 years - 4 minutes and 55 seconds.
6 years - 6 minutes and 41 seconds
It incorporates the same logic that is being used in question by using numbers to find non-matching minutes.
Schema and test data creation:
IF OBJECT_ID('vwRandomNumber') IS NOT NULL
DROP VIEW vwRandomNumber
GO
IF OBJECT_ID('dbo.fnRandNumber') IS NOT NULL
DROP FUNCTION dbo.fnRandNumber
GO
IF OBJECT_ID('dbo.fnRandomInt') IS NOT NULL
DROP FUNCTION dbo.fnRandomInt
GO
IF OBJECT_ID('tblNumbers') IS NOT NULL
DROP TABLE dbo.tblNumbers
GO
IF OBJECT_ID('Branches') IS NOT NULL
DROP TABLE Branches
GO
IF OBJECT_ID('OpeningHours') IS NOT NULL
DROP TABLE OpeningHours
GO
IF OBJECT_ID('Bookings') IS NOT NULL
DROP TABLE Bookings
GO
CREATE VIEW vwRandomNumber
AS
SELECT Rand() RandomNumber;
GO
CREATE FUNCTION dbo.fnRandNumber()
RETURNS FLOAT
AS
BEGIN
RETURN (SELECT TOP 1 RandomNumber FROM vwRandomNumber)
END;
GO
CREATE FUNCTION dbo.fnRandomInt(#FromNumber INT, #ToNumber INT)
RETURNS INT
AS
BEGIN
RETURN (#FromNumber + ROUND(dbo.fnRandNumber()*(#ToNumber - #FromNumber),0))
END;
GO
CREATE TABLE tblNumbers
(
NumberID INT PRIMARY KEY
)
CREATE TABLE Branches
(
BranchID INT
,BranchName NVARCHAR(100)
);
GO
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<100
)
INSERT INTO
Branches
SELECT N, CAST(NEWID() AS NVARCHAR(100)) FROM cteNumbers
OPTION(MAXRECURSION 0)
CREATE TABLE OpeningHours
(
BranchID INT
, Date DATETIME
, OpenFrom DATETIME
, OpenTo DATETIME
);
GO
CREATE CLUSTERED INDEX CIX_OpeningHours
ON OpeningHours ([Date], [BranchID])
GO
CREATE TABLE Bookings
(
BranchID INT
, BookingDate DATETIME
, BookingFrom DATETIME
, BookingTo DATETIME
)
CREATE CLUSTERED INDEX CIX_Bookings
ON Bookings ([BookingDate],[BranchID])
DECLARE #StartDate DATETIME = DATEADD(month,0,DATEADD(D,0,DATEDIFF(d,0,GETDATE())))
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<2000
)
INSERT INTO
OpeningHours
(
BranchID
, Date
, OpenFrom
, OpenTo
)
SELECT
Branches.BranchID
, Dates.Day
, DATEADD(hour,7,Dates.Day)
, DATEADD(hour,19,Dates.Day)
FROM
(
SELECT
DATEADD(d,N,#StartDate) Day
FROM
cteNumbers
) Dates
CROSS JOIN
Branches
OPTION(MAXRECURSION 0);
INSERT INTO Bookings
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,BookingHours.StartDate
,BookingHours.ToDate
FROM
OpeningHours
CROSS APPLY
(
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(0,3), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(4,9), OpeningHours.OpenFrom) ToDate UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(1,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(6,9), OpeningHours.OpenFrom) UNION ALL
SELECT DATEADD(hour, dbo.fnRandomInt(2,5), OpeningHours.OpenFrom) StartDate
,DATEADD(hour, dbo.fnRandomInt(5,8), OpeningHours.OpenFrom) TODate
) BookingHours;
;WITH cteNumbers AS (
SELECT 1 N
UNION ALL
SELECT N+1 FROM cteNumbers WHERE N<5000
)
INSERT INTO
tblNumbers
SELECT N FROM cteNumbers
OPTION(MAXRECURSION 0)
--SELECT COUNT(*) FROM Bookings WHERE
Scripts to get periods with no bookings:
SET NOCOUNT ON
IF OBJECT_ID('tblBranchFreePeriods') IS NOT NULL
DROP TABLE tblBranchFreePeriods
IF OBJECT_ID('tblFreeMinutes') IS NOT NULL
DROP TABLE tblFreeMinutes
CREATE TABLE tblBranchFreePeriods
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
, PeriodEndDate DATETIME
)
CREATE TABLE tblFreeMinutes
(
BranchID INT
,Date DATETIME
,FreeMinute INT
)
IF OBJECT_ID('dbo.tblStartDates') IS NOT NULL
DROP TABLE tblStartDates
CREATE TABLE tblStartDates
(
BranchID INT
, Date DATETIME
, PeriodStartDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblStartDates
ON tblStartDates([BranchID],[Date])
IF OBJECT_ID('dbo.tblEndDates') IS NOT NULL
DROP TABLE tblEndDates
CREATE TABLE tblEndDates
(
BranchID INT
, Date DATETIME
, PeriodEndDate DATETIME
)
CREATE CLUSTERED INDEX CIX_tblEndDate
ON tblEndDates ([BranchID],[Date])
CREATE CLUSTERED INDEX CIX_tblFreeMinutes
ON tblFreeMinutes ([BranchID],[Date],FreeMinute)
DECLARE #ProcessFromDate DATETIME, #ProcessTo DATETIME
SELECT #ProcessFromDate = MIN(OpenFrom), #ProcessTo = DATEADD(year,2,#ProcessFromDate) FROM OpeningHours
DECLARE #BatchSize INT = 3
DECLARE #StartTime DATETIME = GETDATE()
WHILE (#ProcessFromDate <= #ProcessTo) BEGIN
TRUNCATE TABLE tblFreeMinutes
TRUNCATE TABLE tblStartDates
TRUNCATE TABLE tblEndDates
SET #StartTime = GETDATE()
DECLARE #DateFrom DATETIME = #ProcessFromDate, #DateTo DATETIME = DATEADD(d,#BatchSize,#ProcessFromDate)
PRINT 'Date From ' + CAST(#DateFrom AS NVARCHAR(50))
PRINT 'Date To ' + CAST(#DateTO AS NVARCHAR(50))
INSERT INTO
tblFreeMinutes
SELECT
OpeningHours.BranchID
,OpeningHours.Date
,tblOpeningHourMinutes.NumberID Minute
FROM
OpeningHours
INNER JOIN
tblNumbers tblOpeningHourMinutes
ON
NumberID
BETWEEN DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenFrom)
AND
DATEDIFF(minute,OpeningHours.Date,OpeningHours.OpenTo)
LEFT OUTER JOIN
Bookings
ON
Bookings.BookingDate = OpeningHours.Date
AND
Bookings.BranchID = OpeningHours.BranchID
AND
tblOpeningHourMinutes.NumberID
BETWEEN
DATEDIFF(minute,Bookings.BookingDate,Bookings.BookingFrom)
AND
DATEDIFF(minute,Bookings.BookingDAte,Bookings.BookingTo)
WHERE
OpeningHours.Date BETWEEN #DateFrom AND #DateTo
AND
Bookings.BookingDate IS NULL
OPTION ( FORCE ORDER )
PRINT 'Populate free minutes ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblStartDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute-1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate start dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblEndDates
SELECT
tblFreeMinutes.BranchID
, tblFreeMinutes.Date
, DATEADD(minute,tblFreeMInutes.FreeMinute,tblFreeMinutes.Date)
FROM
tblFreeMinutes
LEFT OUTER JOIN
tblFreeMinutes tblFreeMinutesIn
ON
tblFreeMinutesIn.Date = tblFreeMinutes.Date
AND
tblFreeMinutesIn.BranchID = tblFreeMinutes.BranchID
AND
tblFreeMinutesIn.FreeMinute = tblFreeMinutes.FreeMinute+1
WHERE
tblFreeMinutesIn.BranchID IS NULL
PRINT 'Populate end dates ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
INSERT INTO
tblBranchFreePeriods
SELECT
tblStartDates.BranchID
, tblStartDates.Date
, tblStartDates.PeriodStartDate
, tblEndDate.PeriodEndDate
FROM
tblStartDates
CROSS APPLY
(
SELECT TOP 1
*
FROM
tblEndDates
WHERE
tblEndDates.BranchID = tblStartDates.BranchID
AND
tblEndDates.Date = tblStartDates.Date
AND
tblEndDates.PeriodEndDate > tblStartDates.PeriodStartDate
ORDER BY
PeriodEndDate ASC
) tblEndDate
PRINT 'Return intervals ' + CAST(DATEDIFF(millisecond,#StartTime,GETDATE()) AS NVARCHAR(50))
SET #StartTime = GETDATE()
SET #ProcessFromDate = DATEADD(d,#BatchSize+1,#ProcessFromDate)
PRINT ''
PRINT ''
RAISERROR ('',0,0) WITH NOWAIT
--SELECT * FROM tblBranchFreePeriods
--BREAK
END
SELECT
*
FROM
tblBranchFreePeriods
ORDER BY
1,2,3