Function with recursive CTE seems to be very slow - sql

I am currently working on a function in which I use a recursive CTE, but it seems that have poor performance. I need this to be in function (so no temp tables) so I can easily use it within stored procedures.
Here is the code:
CREATE FUNCTION [dbo].[Web_GetDailyLoadListUDF]
(
#CustomerID INT
, #StartDate DATETIME
, #Days INT
, #IncludeChildren BIT
)
RETURNS #TableOfValues TABLE
(
RowID SMALLINT IDENTITY(1,1)
, DailyLoadCount INT
, DailyLoadDate VARCHAR(6)
, FullDate DATETIME
)
AS
BEGIN
DECLARE #MaxDate DATETIME;
SET #MaxDate = DATEADD(dd, #Days * -1.7, DATEDIFF(dd, 0, #StartDate));
WITH DateCTE AS
(
SELECT DATEADD(dd, 0, DATEDIFF(dd, 0, #StartDate)) AS DateValue
UNION ALL
SELECT DATEADD(DAY, -1, DateValue)
FROM DateCTE
WHERE DATEADD(DAY, -1, DateValue) > #MaxDate
)
INSERT INTO #TableOfValues
SELECT * FROM
(
SELECT TOP (#Days)
(
SELECT COUNT(*)
FROM dbo.[Load] l WITH (NOLOCK)
JOIN dbo.LoadCustomer lc WITH (NOLOCK)
ON lc.LoadID = l.ID
JOIN dbo.Customer c WITH (NOLOCK)
ON c.ID = lc.CustomerID
WHERE DATEADD(dd, 0, DATEDIFF(dd, 0, l.LoadDate)) = dct.DateValue
AND l.StateType = 1
AND lc.Main = 1
AND (c.ID = #CustomerID OR (#IncludeChildren = 1 AND c.ParentCustomerID = #CustomerID))
) AS DailyLoadCount
, CONVERT(VARCHAR(6), dct.DateValue, 107) AS DailyLoadDate
, dct.DateValue
FROM DateCTE dct
WHERE
DATEPART(DW, dct.DateValue) NOT IN (1, 7)
AND dct.DateValue NOT IN
(
SELECT HolidayDate FROM Holiday
)
ORDER BY dct.DateValue DESC
) AS S
ORDER BY s.DateValue ASC
RETURN
END
What this SQL is supposed to retrieve is the number of loads per day, for the past #Days that are business days (no weekends/holidays).
I basically just need some help optimizing this so that it doesn't run so slow. (Takes up to 20 seconds per customer, and this will be called over thousands).

Your main problem is just here
WHERE DATEADD(dd, 0, DATEDIFF(dd, 0, l.LoadDate)) = dct.DateValue
It should be
WHERE l.LoadDate >= dct.DateValue
AND l.LoadDate < dct.DateValue +1
Create composite indexs on Load(LoadDate, ID) and Load(ID, LoadDate) and drop the one that does not get used in the query plan.
You should show the query plan whenever you are asking questions about performance. To view the query plan, run the query inside the function on its own using variables for the input parameters. From the menu in SSMS, enable the option "Query -> Include Actual Execution Plan"
Since you don't have enough rep to post images, you can reveal the text plan as follows. Provide some sensible parameters in the first SELECT statement.
set showplan_text on;
Then, run the below in TEXT mode, i.e. press Ctrl-T then Ctrl-E.
DECLARE
#CustomerID INT
, #StartDate DATETIME
, #Days INT
, #IncludeChildren BIT
SELECT
#CustomerID = 1
, #StartDate = '20110201'
, #Days = 10
, #IncludeChildren = 1
DECLARE #TableOfValues TABLE
(
RowID SMALLINT IDENTITY(1,1)
, DailyLoadCount INT
, DailyLoadDate VARCHAR(6)
, FullDate DATETIME
)
DECLARE #MaxDate DATETIME;
SET #MaxDate = DATEADD(dd, #Days * -1.7, DATEDIFF(dd, 0, #StartDate));
WITH DateCTE AS
(
SELECT DATEADD(dd, 0, DATEDIFF(dd, 0, #StartDate)) AS DateValue
UNION ALL
SELECT DATEADD(DAY, -1, DateValue)
FROM DateCTE
WHERE DATEADD(DAY, -1, DateValue) > #MaxDate
)
INSERT INTO #TableOfValues
SELECT * FROM
(
SELECT TOP (#Days)
(
SELECT COUNT(*)
FROM dbo.[Load] l WITH (NOLOCK)
JOIN dbo.LoadCustomer lc WITH (NOLOCK)
ON lc.LoadID = l.ID
JOIN dbo.Customer c WITH (NOLOCK)
ON c.ID = lc.CustomerID
WHERE DATEADD(dd, 0, DATEDIFF(dd, 0, l.LoadDate)) = dct.DateValue
AND l.StateType = 1
AND lc.Main = 1
AND (c.ID = #CustomerID OR (#IncludeChildren = 1 AND c.ParentCustomerID = #CustomerID))
) AS DailyLoadCount
, CONVERT(VARCHAR(6), dct.DateValue, 107) AS DailyLoadDate
, dct.DateValue
FROM DateCTE dct
WHERE
DATEPART(DW, dct.DateValue) NOT IN (1, 7)
AND dct.DateValue NOT IN
(
SELECT HolidayDate FROM Holiday
)
ORDER BY dct.DateValue DESC
) AS S
ORDER BY s.DateValue ASC
SELECT * FROM #TableOfValues
Edit the plan into your question

You should use an inline UDF instead (right now you are actually using a temp table)
See http://msdn.microsoft.com/en-us/library/ms189294.aspx
Or convert it into a view instead.

Correlated subqueries run row-by-row, do not use them. Use a join or a join to a derived table instead. You also need to make sure any where clauses can take advantage of the indexing. Search on saragble queries to see what kinds of things cannot use indexes and what can be done to make it use an index.

Related

my end goal is to see end of month data for previous month

My end goal is to see end of month data for previous month.
Our processing is a day behind so if today is 7/28/2021 our Process date is 7/27/2021
So, I want my data to be grouped.
DECLARE
#ProcessDate INT
SET #ProcessDate = (SELECT [PrevMonthEnddatekey] FROM dbo.dimdate WHERE datekey = (SELECT [datekey] FROM sometable [vwProcessDate]))
SELECT
ProcessDate
, LoanOrigRiskGrade
,SUM(LoanOriginalBalance) AS LoanOrigBalance
,Count(LoanID) as CountofLoanID
FROM SomeTable
WHERE
ProcessDate in (20210131, 20210228,20210331, 20210430, 20210531, 20210630)
I do not want to hard code these dates into my WHERE statement. I have attached a sample of my results.
I am GROUPING BY ProcessDate, LoanOrigRiskGrade
Then ORDERING BY ProcessDate, LoanOrigIRskGrade
It looks like you want the last day of the month for months within a specified range. You can parameterize that.
For SQL Server:
DECLARE #ProcessDate INT
SET #ProcessDate = (
SELECT [PrevMonthEnddatekey]
FROM dbo.dimdate
WHERE datekey = (
SELECT [datekey]
FROM sometable [vwProcessDate]
)
)
DECLARE #startDate DATE
DECLARE #endDate DATE
SET #startDate = '2021-01-01'
SET #endDate = '2021-06-30'
;
with d (dt, eom) as (
select #startDate
, convert(int, replace(convert(varchar(10), eomonth(#startDate), 102), '.', ''))
union all
select dateadd(month, 1, dt)
, eomonth(dateadd(month, 1, dt))
from d
where dateadd(month, 1, dt) < #endDate
)
SELECT ProcessDate
, LoanOrigRiskGrade
, SUM(LoanOriginalBalance) AS LoanOrigBalance
, Count(LoanID) as CountofLoanID
FROM SomeTable
inner join d on d.eom = SomeTable.ProcessDate
Difficult to check without sample data.

SQL Temp Table Cost 75% Insert

I have been looking for ways to reduce this Table Insert cost down from 75%. The one way that I was thinking we be to create the temp table before the insert but that didnt help is there any other way to improve insert into Temp Table.
#StartDate date = null,
#EndDate date = null
IF #StartDate IS NULL AND #EndDate IS NULL
BEGIN
SET #StartDate = DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()) + -12, 0)
SET #EndDate = DATEADD(MILLISECOND, -3, DATEADD(MONTH, DATEDIFF(MONTH, 0, GETDATE()), 0))
END
SELECT CustomerId, LogDate, StartDate = #StartDate, EndDate = #EndDate
INTO #Temp1
FROM Log l
where l.LogDate >= #StartDate and l.LogDate < dateadd(day, 1, #EndDate)
select
l.*,
a.StateId,
a.CountryId
into #Temp
from #Temp1 l
JOIN [Address] a ON a.CustomerId = l.CustomerId

Converting SQL Server UDF to inline table-valued function

I am new here and new to SQL. I got this tip to create a scalar function that extends the functionality of the built-in DateAdd function (namely to exclude weekends and holidays). It is working fine for a single date but when I use it on a table, it is extremely slow.
I have seen some recommendation to use inline table-valued function instead. Would anyone be so kind to point me in the direction, how I would go about converting the below to inline table-valued function? I greatly appreciate it.
ALTER FUNCTION [dbo].[CalcWorkDaysAddDays]
(#StartDate AS DATETIME, #Days AS INT)
RETURNS DATE
AS
BEGIN
DECLARE #Count INT = 0
DECLARE #WorkDay INT = 0
DECLARE #Date DATE = #StartDate
WHILE #WorkDay < #Days
BEGIN
SET #Count = #Count - 1
SET #Date = DATEADD(DAY, #Count, #StartDate)
IF NOT (DATEPART(WEEKDAY, #Date) IN (1,7) OR
EXISTS (SELECT * FROM RRCP_Calendar WHERE Is_Holiday = 1 AND Calendar_Date = #Date))
BEGIN
SET #WorkDay = #WorkDay + 1
END
END
RETURN #Date
END
This should do the trick...
CREATE FUNCTION dbo.tfn_CalcWorkDaysAddDays
(
#StartDate DATETIME,
#Days INT
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
SELECT
TheDate = MIN(x.Calendar_Date)
FROM (
SELECT TOP (#Days)
c.Calendar_Date
FROM
dbo.RRCP_Calendar c
WHERE
c.Calendar_Date < #StartDate
AND c.Is_Holiday = 0
AND c.is_Weekday = 1 -- this should be part of your calendar table. do not calculate on the fly.
ORDER BY
c.Calendar_Date DESC
) x;
GO
Note: for best performance, you'll want a unique, filtered, nonclustered index on on your calendar table...
CREATE UNIQUE NONCLUSTERED INDEX uix_RRCPCalendar_CalendarDate_IsHoliday_isWeekday ON dbo.RRCP_Calendar (
Calendar_Date, Is_Holiday, is_Weekday)
WHERE Is_Holiday = 0 AND is_Weekday = 1;
Try this and see if it returns the same values as your function, just without the loop:
SELECT WorkDays =
DATEADD(WEEKDAY, #Days, #StartDate) -
(SELECT COUNT(*)
FROM RRCP_Calendar
WHERE Is_Holiday = 1
AND Calendar_Date >= #StartDate
AND Calendar_Date <= DATEADD(DAY, #Days, #StartDate)
)
And yes, you can sometimes get substantially better performance with a non-procedural table-valued-function, but you have to set it up right. Look up SARGability and non-procedural table-valued-functions for more info, but if the above query works, this should do the trick:
CREATE FUNCTION dbo.SelectWorkDaysAddDays(#StartDate DATE, #Days INT)
RETURNS TABLE
AS
RETURN
SELECT WorkDays =
DATEADD(WEEKDAY, #Days, #StartDate) -
(SELECT COUNT(*)
FROM RRCP_Calendar
WHERE Is_Holiday = 1
AND Calendar_Date >= #StartDate
AND Calendar_Date <= DATEADD(DAY, #Days, #StartDate)
)
GO
And then you call the function by using an OUTER APPLY join:
SELECT y.foo
, y.bar
, dt.WorkDays
FROM dbo.YourTable y
OUTER APPLY dbo.SelectWorkDaysAddDays(#StartDate, #Days) dt
Say [dbo].[CalcWorkDaysAddDays], getdate(), 2 would return Sept 8,
2017 since it is adding two days. This function is similar to DateAdd
but it is excluding weekends and holidays
The code you've posted doesn't do this.
But if you want the result described, the function can be smth like this:
alter FUNCTION [dbo].[CalcWorkDaysAddDays_inline](#StartDate As DateTime,#Days AS INT)
returns table
as return
with cte as
(
select *,
ROW_NUMBER() over(order by Calendar_Date) as rn
from RRCP_Calendar
where Calendar_Date > #StartDate and #Days > 0
and not (DATEPART(WEEKDAY,Calendar_Date) IN (1,7) or Is_Holiday = 1)
union ALL
select *,
ROW_NUMBER() over(order by Calendar_Date desc) as rn
from RRCP_Calendar
where Calendar_Date < #StartDate and #Days < 0
and not (DATEPART(WEEKDAY,Calendar_Date) IN (1,7) or Is_Holiday = 1)
)
select cast(Calendar_Date as date) as dt
from cte
where rn = abs(#Days);

Calculation of date in function

1 28/11/2011 ...
How do I write a function in SQL to implement the above pattern?
You could do something like this in SQL Server:
DECLARE #BaseDate DATE = '20111107';
DECLARE #EndDate DATE = GETDATE(); --Or the "end of dates in the database"
WITH RecursiveCTE AS (
SELECT
1 AS [Counter],
#BaseDate AS [MyDate]
UNION ALL
SELECT
[Counter] + 1,
DATEADD(DAY, 7, MyDate)
FROM
RecursiveCTE
WHERE
MyDate < #EndDate)
SELECT * FROM RecursiveCTE OPTION (MAXRECURSION 0);
To handle dates that aren't exact and make this into a function you would do this:
--Function definition
CREATE FUNCTION SuperDuperDataCalculator (
#BaseDate DATE = '20131016',
#EndDate DATE = '20131020')
RETURNS #Results TABLE (
[Counter] INT,
[Date] DATE)
AS
BEGIN
WITH RecursiveCTE AS (
SELECT
1 AS [Counter],
#BaseDate AS [MyDate]
UNION ALL
SELECT
[Counter] + 1,
CASE WHEN DATEADD(DAY, 7, MyDate) > #EndDate THEN #EndDate ELSE DATEADD(DAY, 7, MyDate) END
FROM
RecursiveCTE
WHERE
MyDate < #EndDate)
INSERT INTO
#Results
SELECT * FROM RecursiveCTE OPTION (MAXRECURSION 0);
RETURN;
END;
GO
--Usage
SELECT * FROM SuperDuperDataCalculator('20131016', '20131020');
--Results
Counter Date
1 2013-10-16
2 2013-10-20
Note that we have to use a multi-statement table-valued function as there is a bug in SQL Server where it won't let you use OPTIONs in a simple table-valued function. The alternative would be to remove the OPTION (MAXRECURSION 0) from the function and remember to use this every time you reference it (i.e. a pretty poor alternative).
...and finally, if you wanted to just return the maximum counter value you could rewrite this as a scalar-valued function, i.e.:
--Function definition
CREATE FUNCTION SuperDuperDataCalculator (
#BaseDate DATE = '20131016',
#EndDate DATE = '20131020')
RETURNS INT
AS
BEGIN
DECLARE #Results TABLE (
[Counter] INT,
[Date] DATE);
DECLARE #ReturnValue INT;
WITH RecursiveCTE AS (
SELECT
1 AS [Counter],
#BaseDate AS [MyDate]
UNION ALL
SELECT
[Counter] + 1,
CASE WHEN DATEADD(DAY, 7, MyDate) > #EndDate THEN #EndDate ELSE DATEADD(DAY, 7, MyDate) END
FROM
RecursiveCTE
WHERE
MyDate < #EndDate)
INSERT INTO
#Results
SELECT * FROM RecursiveCTE OPTION (MAXRECURSION 0);
SELECT #ReturnValue = MAX([Counter]) FROM #Results;
RETURN #ReturnValue;
END;
GO
SELECT dbo.SuperDuperDataCalculator('20131016', '20131020');
Try this - It will get all the weeks and assign a rownumber in the subquery. Then only select the records where row number = 1 because there might be more results for that week. So hence RowNo = 1
SELECT ROW_NUMBER() OVER(ORDER BY RowNo) AS IncrementalWeek,dte
FROM
(
SELECT DISTINCT DATEPART(ww,CONVERT(VARCHAR(20),createdDate,111)) AS [week],
CONVERT(VARCHAR(20),createdDate,111) AS dte,
ROW_NUMBER() OVER(PARTITION BY DATEPART(ww,Convert(VARCHAR(20),createdDate,111)) ORDER BY DATEPART(ww,CONVERT(VARCHAR(20),createdDate,111))) AS RowNo
FROM YourTable
) AS tble
WHERE RowNo = 1
ORDER BY [week]

SQL finding Missing Record

I have a scenario where I have to find out missing record.
--Code for Creating Source Table
CREATE TABLE [dbo].[NaTarget](
[BillKey] [int] NULL,
[StartDate] [date] NULL,
[EndDate] [date] NULL
)
GO
--Code for Creating Target Table
CREATE TABLE [dbo].[NaSource](
[BillKey] [int] NULL,
[StartDate] [date] NULL,
[EndDate] [date] NULL
)
GO
--Inserting Records in Source
INSERT INTO [dbo].[NaSource] ([BillKey],[StartDate],[EndDate])
VALUES('1','2014-01-13','2014-03-27')
GO
INSERT INTO [dbo].[NaSource]([BillKey],[StartDate],[EndDate])
VALUES('2','2014-02-14','2014-04-20')
GO
INSERT INTO [dbo].[NaSource]([BillKey],[StartDate],[EndDate])
VALUES('3','2013-11-13','2014-01-18')
GO
--Inserting records In Target
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('1','2014-01-13' , '2014-01-31' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('1','2014-02-01' , '2014-02-28' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('1','2014-03-01' , '2014-03-27' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('2','2014-02-14' , '2014-02-28' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('2','2014-03-01' , '2014-03-31' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('2','2014-04-01' , '2014-04-20' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('3','2013-11-13' , '2013-11-30' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('3','2013-12-01' , '2013-12-31' )
INSERT INTO [dbo].[NaTarget] ([BillKey] ,[StartDate],[EndDate])
VALUES ('3','2014-01-01' , '2014-01-18' )
Now for any BillKey, StartDate in target will be StartDate from Source and EndDate will be last date of month and now for same Billkey, next record will have 1st date of next month and EndDate will be last date, until last date of same BillKey is reached.
I have to find any record if it gets deleted.
Example if BillKey = 3
StartDate= 2013-12-01 EndDate = 2013-12-31 is
not present in target we need to find it
Example will explain it better
Here is an attempt at this: If I understand your question correctly, you're looking to check to see if any expected values in the Target table based on the Start and End Dates in the Source table aren't actually there.
You'll need to essentially recreate the results table with what you are expecting from the NaSource table's StartDate and EndDate, and check that against the NaTarget table.
I'm positive there's a more efficient way of doing this (preferably without using cursors and while loops), but this should give you the results you're looking for:
Declare #Results Table
(
BillKey Int,
StartDate Date,
EndDate Date
)
Declare #BillKey Int
Declare #EndDate Date
Declare #Cur Date
Declare cur Cursor Fast_Forward For
Select BillKey, StartDate, EndDate
From NaSource
Open cur
While 1 = 1
Begin
Fetch Next From cur Into #BillKey, #Cur, #EndDate
If ##FETCH_STATUS <> 0 Break
While (#Cur < #EndDate)
Begin
Insert #Results
Select #BillKey, #Cur,
Case When DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, #Cur) + 1, 0)) > #EndDate
Then Convert(Date, #EndDate)
Else Convert(Date, DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, #Cur) + 1, 0)))
End As EndDate
Set #Cur = DATEADD(m, DATEDIFF(m, -1, #Cur), 0)
End
End
Close cur
Deallocate cur
Select R.*
From #Results R
Where Not Exists
(
Select 1
From NaTarget T
Where R.BillKey = T.BillKey
And R.StartDate = T.StartDate
And R.EndDate = T.EndDate
)
Here's my solution using recursive CTE. Build what the natarget table should look like and compare it to the actual natarget. I started getting confused on the dates piece so it may be simplified but this does work.
;with targetCte
as
(
select billkey,
startdate,
CAST(DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, startdate) + 1, 0)) as DATE) as enddate
from nasource
union all
select t.billkey,
cast(DATEADD(month, DATEDIFF(mm, 0, dateadd(mm, 1, t.startdate)), 0) as DATE) ,
case
when cast(DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, DATEADD(month, DATEDIFF(mm, 0, dateadd(mm, 1, t.startdate)), 0)) + 1, 0)) as DATE) < n.enddate then cast(DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, DATEADD(month, DATEDIFF(mm, 0, dateadd(mm, 1, t.startdate)), 0)) + 1, 0)) as DATE)
else n.enddate
end
as enddate
from targetCte t
join nasource n on n.billkey = t.billkey
where t.enddate < n.enddate
)
select * from targetcte t
where not exists
(select *
from natarget nt
where t.billkey = nt.billkey
and t.startdate = nt.startdate
and t.enddate = nt.enddate)
Insert all records into one table with a unique ID (call this main table)
Take the table with deleted records then run a SELECT * on the Main table where ID NOT IN ID column of the deleted records table
It's filtering by reconciliation. LEFT Join both tables on StartDate AND EndDate pairs WHERE RIGHT keys are NULL. Google SQL Joins and you can find a very useful diagram on issues like that.