How to calculate average of variables in SQL Server - sql

I am trying to calculate date ranges between rows and then get an average.
declare #date0 date = (
select top 1 my_date
from someTable
order by my_date desc)
declare #date1 date = (
select my_date
from someTable
order by my_date desc
offset 1 rows
fetch next 1 row only)
declare #date2 date = (
select my_date
from someTable
order by my_date desc
offset 2 rows
fetch next 1 row only)
declare #date3 date = (
select my_date
from someTable
order by my_date desc
offset 3 rows
fetch next 1 row only)
select
[Range 1] = dateDiff(day, #date1, #date0),
[Range 2] = dateDiff(day, #date2, #date1),
[Range 3] = dateDiff(day, #date3, #date2),
[Avg Range] = avg(
nullIf(#date0, 0),
nullIf(#date1, 0),
nullIf(#date2, 0),
nullIf(#date3, 0)
)
The range calculations work fine, but a bit clumsy.
However, I'm not sure how to handle the average. It looks like the function is supposed to run against a table and not an array, but I was having issues inserting the variables into a temp table column.
How can I get the average of these ranges (not including range = 0)?

Use UNION ALL to return a row for each case:
select avg(t.[Range]) [Avg Range]
from (
select dateDiff(day, #date1, #date0) [Range]
union all
select dateDiff(day, #date2, #date1)
union all
select dateDiff(day, #date3, #date2)
) t
where t.[Range] <> 0

AVG is an aggregate function, designed to be used with GROUP BY or windows. You can simply do the math in your query:
select
[Range 1] = dateDiff(day, #date1, #date0),
[Range 2] = dateDiff(day, #date2, #date1),
[Range 3] = dateDiff(day, #date3, #date2),
[Avg Range] = (
nullIf(#date0, 0) +
nullIf(#date1, 0) +
nullIf(#date2, 0) +
nullIf(#date3, 0)
) /
(
CASE WHEN #date0 IS NULL THEN 0 ELSE 1 END +
CASE WHEN #date1 IS NULL THEN 0 ELSE 1 END +
CASE WHEN #date2 IS NULL THEN 0 ELSE 1 END +
CASE WHEN #date3 IS NULL THEN 0 ELSE 1 END
)

No reason to use four different queries:
with dates as (
select
row_number() over (order by my_date desc) rn,
datediff(days, lag(my_date) over (order by my_date desc), my_date) diff
from T
)
select avg(nullif(diff, 0)) from dates where rn <= 3;
or
with dates as (
select
datediff(days, lag(my_date) over (order by my_date desc), my_date) diff
from T
order by my_date desc
fetch next three rows only
)
select avg(nullif(diff, 0)) from dates;
Using distinct would also let you easily get the top three dates and not have to mess around with nullif().
with dates as (
select distinct
datediff(days, lag(my_date) over (order by my_date desc), my_date) diff
from T
order by my_date desc
fetch next three rows only
)
select avg(diff) from dates;

Related

Selecting count of consecutives dates before and after a specified date based on start/end

I'm trying to determine the number of records with consecutive dates (previous record ends on the same date as the start date of the next record) before and after a specified date, and ignore any consecutive records as soon as there is a break in the chain.
If I have the following data:
-- declare vars
DECLARE #dateToCheck date = '2020-09-20'
DECLARE #numRecsBefore int = 0
DECLARE #numRecsAfter int = 0
DECLARE #tempID int
-- temp table
CREATE TABLE #dates
(
[idx] INT IDENTITY(1,1),
[startDate] DATETIME ,
[endDate] DATETIME,
[prevEndDate] DATETIME
)
-- insert temp table
INSERT INTO #dates
( [startDate], [endDate] )
VALUES ( '2020-09-01', '2020-09-04' ),
( '2020-09-04', '2020-09-10' ),
( '2020-09-10', '2020-09-16' ),
( '2020-09-17', '2020-09-19' ),
( '2020-09-19', '2020-09-20' ),
--
( '2020-09-20', '2020-09-23' ),
( '2020-09-25', '2020-09-26' ),
( '2020-09-27', '2020-09-28' ),
( '2020-09-28', '2020-09-30' ),
( '2020-10-01', '2020-09-05' )
-- update with previous records endDate
DECLARE #maxRows int = (SELECT MAX(idx) FROM #dates)
DECLARE #intCount int = 0
WHILE #intCount <= #maxRows
BEGIN
UPDATE #dates SET prevEndDate = (SELECT endDate FROM #dates WHERE idx = (#intCount - 1) ) WHERE idx=#intCount
SET #intCount = #intCount + 1
END
-- clear any breaks in the chain?
-- number of consecutive records before this date
SET #numRecsBefore = (SELECT COUNT(idx) FROM #dates WHERE startDate = prevEndDate AND endDate <= #dateToCheck)
-- number of consecutive records after this date
SET #numRecsAfter = (SELECT COUNT(idx) FROM #dates WHERE startDate = prevEndDate AND endDate >= #dateToCheck)
-- return & clean up
SELECT * FROM #dates
SELECT #numRecsBefore AS numBefore, #numRecsAfter AS numAfter
DROP TABLE #dates
With the specified date being '2020-09-20, I would expect #numRecsBefore = 2 and #numRecsAfter = 1. That is not what I am getting, as its counting all the consecutive records.
There has to be a better way to do this. I know the loop isn't optimal, but I couldn't get LAG() or LEAD() to work. I've spend all morning trying different methods and searching, but everything I find doesn't deal with two dates, or breaks in the chain.
This reads like a gaps-and-island problem. Islands represents rows whose date ranges are adjacent, and you want to count how many records preceed of follow a current date in the same island.
You could do:
select
max(case when #dateToCheck > startdate and #dateToCheck <= enddate then numRecsBefore end) as numRecsBefore,
max(case when #dateToCheck >= startdate and #dateToCheck < enddate then numRecsAfter end) as numRecsAfter
from (
select d.*,
count(*) over(partition by grp order by startdate) as numRecsBefore,
count(*) over(partition by grp order by startdate desc) as numRecsAfter
from (
select d.*,
sum(case when startdate = lag_enddate then 0 else 1 end) over(order by startdate) as grp
from (
select d.*,
lag(enddate) over(order by startdate) as lag_enddate
from #dates d
) d
) d
) d
This uses lag() and a cumulative sum() to define the islands. The a window count gives the number and preceding and following records on the same island. The final step is conditional aggrgation; extra care needs to be taken on the inequalities to take in account various possibilites (typically, the date you search for might not always match a range bound).
Demo on DB Fiddle
I think this is what you are after, however, this does not give the results in your query; I suspect that is because they aren't the expected results? One of the conditional aggregated may also want to be a >= or <=, but I don't know which:
WITH CTE AS(
SELECT startDate,
endDate,
CASE startDate WHEN LAG(endDate) OVER (ORDER BY startDate ASC) THEN 1 END AS IsSame
FROM #dates d)
SELECT COUNT(CASE WHEN startDate < #dateToCheck THEN IsSame END) AS numBefore,
COUNT(CASE WHEN startDate > #dateToCheck THEN IsSame END) AS numAfter
FROM CTE;

SQL case statement closest to current date

Need help create a case statement to find the closest date from date table. My data: https://imgur.com/hkBu4SA
I basically want to set:
Y flag if it's closest to today's date from a.FROM_EFFDT and is not null.
F if to_effdate is null
else N
WHEN a.FROM_EFFDT < GETDATE() AND (to_effdate) IS NOT NULL THEN 'Y'
WHEN to_effdate IS NULL THEN 'F'
ELSE 'N'
You can use window functions:
(case when row_number() over (order by abs(datediff(day, getdate(), to_effdate)) = 1
then 'Y'
when to_effdate is null then 'F'
else 'N'
end)
You may be able to accomplish it with something like this. Though this isn't bulletproof, you could get duplicates if the closest date is tied.
create table Dates (from_effdt datetime, to_effdt datetime, flag varchar(1))
insert Dates (from_effdt, to_effdt, flag)
values
('2019-03-16', null, '') ,
('2018-06-14', '2019-03-16', '') ,
('2018-05-14', '2018-06-14', '') ,
('2018-01-01', '2018-05-14', '')
select * from Dates
UPDATE Dates
SET flag =
CASE
WHEN from_effdt = (
select top 1 from_effdt
from Dates
order by ABS ( DATEDIFF(day, from_effdt, getdate()) )
)
THEN 'Y'
ELSE
'N'
END
*update, not sure why I created it as an update. This select should do.
SELECT from_effdt, to_effdt,
CASE
WHEN from_effdt = (
select top 1 from_effdt
from Dates
order by ABS ( DATEDIFF(day, from_effdt, getdate()) )
)
THEN 'Y'
ELSE
'N'
END [numberOfDaysAway]
FROM Dates
You can simply do this:
CASE
WHEN from_effdt = (
select from_effdt
from Dates
where abs(datediff(second, from_effdt, getdate()))
= (select min(
abs(datediff(second, from_effdt, getdate()))
)
from Dates)
)
THEN 'Y'
ELSE
'N'
END
ROW_NUMBER() Over (Partition by id order by to_effdt desc)
,id
,from_effdt
,to_effdt
, CASE WHEN (ROW_NUMBER() Over (Partition by id order by to_effdt desc) = 1) THEN ('Y')
WHEN (to_effdt IS NULL) THEN ('F') ELSE ('N') End as flag
from a

Generate Dates recursively in SQL Server

I have some dates I want to calculate which is currently done over several subqueries. Each subsequent subquery uses the result (a date) of the previous query in its calculation. E.g.
DECLARE #Date DATE = '20170101'
SELECT #foo1 = (SELECT TOP 1 dbo.DateFunction(DateField)
FROM [DateTable]
WHERE DateField <= #Date
ORDER BY DateField DESC)
SELECT #foo2 = (SELECT TOP 1 dbo.DateFunction(DateField)
FROM [DateTable]
WHERE DateField <= #foo1
ORDER BY DateField DESC)
....
SELECT #fooN = (SELECT TOP 1 dbo.DateFunction(DateField)
FROM [DateTable]
WHERE DateField <= #fooNMinus1
ORDER BY DateField DESC)
Is it possible (perhaps using CTE) to make a recursive query to achieve this for a specified number of times?
Weeks are almost always 7 days, so you can get the first one and then just add seven days. If so:
WITH dates as (
SELECT MAX(dbo.DateFunction(DateField)) as dte, 1 as counter
FROM [DateTable]
WHERE DateField <= #Date
UNION ALL
SELECT DATEADD(DAY, 7, dte), counter + 1
FROM dates
WHERE counter < #n
)
SELECT dte
FROM dates;
You can use small tally table as below
Declare #d1 date = '2017-01-01'
Declare #d2 date = '2017-12-31'
select top (datediff(day, #d1, #d2)+1) dt = DateAdd(day, Row_Number() over (order by (Select NULL))-1, #d1)
from master..spt_values s1, master..spt_values s2
Or custom tally tables
;with num as
( select * from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) v(n) )
, n1 as (select n1.* from num n1, num n2, num n3, num n4) --numbers generation
select top (datediff(day, #d1, #d2)+1) dt = DateAdd(day, Row_Number() over (order by (Select NULL))-1, #d1)
from n1
Yes, you can use a recursive query. Since top and aggregates are not allowed in the recursive part, you can use the row_number() function instead.
Declare #date date = cast(getdate() as date), #n int = 10
declare #DateTable table (DateField date)
insert into #DateTable values ('2017-05-01'),('2017-05-02'),('2017-05-03'),('2017-05-04'),('2017-05-05'),('2017-05-06'),('2017-05-07'),('2017-05-08'),('2017-05-09'),('2017-05-10'),
('2017-05-11'),('2017-05-12'),('2017-05-13'),('2017-05-14'),('2017-05-15'),('2017-05-16'),('2017-05-17'),('2017-05-18'),('2017-05-19'),('2017-05-20')
;with date_rte as (
select top 1 dbo.DateFunction(DateField) datefield, 0 recursions, cast(1 as bigint) rn
from #dateTable
where datefield <= #date
order by datefield desc
union all
select dbo.DateFunction(DateField), recursions+1, ROW_NUMBER() over (order by d.datefield desc)
from #datetable d
join date_rte r on d.DateField <= r.datefield
where recursions < #n and rn = 1
)
select datefield
from date_rte
where rn=1 and recursions = #n

Iterate value dynamically

I'm using the below query to calculate a budget value dynamically means iterating upto selected date value.
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMonth) end) as [Budget]
Here variable #DaysPastinMonth should be dynamic. Means if I select a date as 03/31/2017. Then the query should run upto the previous month value. Another example is if I select August, then I need to run query from Jan-Aug.
For Jan
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinJanMonth) end) as [Budget]
For Feb
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinFebMonth) end) as [Budget]
For Mar
SUM(case when Name = 'Budget' then Value + ((Value/#TotaldaysinMonth) *
#DaysPastinMarMonth) end) as [Budget]
Also I have created variables for all the 12 months which holds DaysPastinMonth.
Can anyone suggest how this can be achieved using case statement.
You are thinking about this in loop when you could do it with set based operations.
----------------------------------------------------------
--Create a table of dates for testing
----------------------------------------------------------
if object_id('tempdb..#dates') is not null
drop table #dates
create table #dates(d date
,RN bigint)
declare #sdate datetime='2017-01-01 00:00'
declare #edate datetime='2017-7-31 00:00'
insert into #dates
select
DATEADD(d,number,#sdate)
,row_number() over (order by (select null)) as RN
from
master..spt_values
where
type='P'
and number<=datediff(d,#sdate,#edate)
declare #numOfDays int = (select count(*) from #dates)
----------------------------------------------------------
--Populate Test Data
----------------------------------------------------------
if object_id('tempdb..#testTable') is not null
drop table #testTable
create table #testTable([Name] varchar(64),
[Value] decimal (16,4),
DT datetime)
insert into #testTable ([Name],[Value],DT)
select
'Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
union all
select
'Not The Budget'
,r.randomNumber
,d.d
from
#dates d
inner join
(SELECT TOP (select #numOfDays)
randomNumber,
row_number() over (order by (select null)) as RN
FROM (
SELECT CAST(ABS(CAST(NEWID() AS binary(6)) %100000) + RAND() AS DECIMAL (16,4)) + 1 randomNumber
FROM sysobjects) sample
GROUP BY randomNumber
ORDER BY randomNumber DESC) r on r.RN = d.RN
----------------------------------------------------------
--Instead of making your variables "dynamic" which
--would likely consist of some loop, just pass in the
--month you care about and let SQL do the work
----------------------------------------------------------
declare #month datetime = '2016-03-31'
select
DT
,[Value]
,[Name]
,sum(case when [Name] = 'Budget'
then [Value] +
(([Value] / (DATEDIFF(day,DATEADD(month, DATEDIFF(month, 0, #month), 0),#month)))
*
(DATEDIFF(DAY,DATEADD(MONTH, DATEDIFF(MONTH, 0, #month)-1, 0),DATEADD(MONTH, DATEDIFF(MONTH, -1, #month)-1, -1)))) end) as Budget
from
#testTable
where
DT >= DATEADD(yy, DATEDIFF(yy, 0, #month), 0) --this is Jan 1 of the year associated with your vairable
group by
DT
,[Name]
,[Value]

7 day average in SQL Server 2014

I need to modify the following T-SQL statement to include a rolling 7 day average of the revenue.
What do I need to include in the following code to achieve that?
SELECT
CAST(create_dtg AS DATE) DATE,
SUM([agent_rev] + [anchor_rev] + [corp_rev] + [offsite_rev]) AS RevenueTotals,
SUM([media_est_cost] + [other_cost]) AS COSTTOTALS
FROM
[dbo].[dw_rpt_traffic]
WHERE
[create_dtg] >= ( Getdate() - 90 )
--GROUP BY CREATE_DTG
--ORDER BY CREATE_DTG ASC
I also tried using Parttion by, however, this returns the same value as the Revenuetotals.
Select a.dte, a.revenuetotals, a.COSTTOTALS, AVG(A.RevenueTotals) OVER (PARTITION BY a.dte ORDER BY a.dte ROWS 7 PRECEDING) as Day7Avg
from
(
select CAST(CREATE_DTG AS DATE) as dte,
SUM([AGENT_REV]+[ANCHOR_REV]+[CORP_REV]+[OFFSITE_REV]) as RevenueTotals,
SUM([MEDIA_EST_COST]+[OTHER_COST]) as COSTTOTALS
FROM [dbo].[dw_rpt_traffic]
where [CREATE_DTG] >= (GetDate() - 90)
GROUP BY CREATE_DTG
) as A
Group by a.dte, a.revenuetotals, a.COSTTOTALS
order by a.dte
Thanks, Karen
For rolling aggregates I typically use an OVER clause with ROWS [...] PRECEDING [...].
WITH cte
AS ( SELECT x.Date
,x.Revenue
,AVG(x.Revenue) OVER ( ORDER BY x.Date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS [MA7]
FROM ( SELECT CAST(t.Date AS DATE) AS [Date]
,SUM(t.Revenue) AS [Revenue]
FROM #tmp t
WHERE CAST(t.Date AS DATE) > CAST(GETDATE() - 96 AS DATE)
GROUP BY CAST(t.Date AS DATE)
) x
)
SELECT c.Date
,c.Revenue
,c.MA7
FROM cte c
WHERE c.Date > CAST(GETDATE() - 90 AS DATE)
ORDER BY c.Date;
The table above was generated with the following:
IF ( OBJECT_ID('tempdb..#tmp') IS NOT NULL )
DROP TABLE #tmp;
CREATE TABLE #tmp
(
[Date] DATETIME
,[Revenue] DECIMAL(18, 2)
);
--
DECLARE #first INT = 0
,#last INT = 200;
WHILE #first < #last
BEGIN
INSERT INTO #tmp
( Date, Revenue )
VALUES ( GETDATE() - #first * 0.5, RAND() * 100000 );
SET #first = #first + 1;
END;
Probably the easiest way uses outer apply:
with rt as (
select CAST(CREATE_DTG AS DATE) as dte,
SUM([AGENT_REV]+[ANCHOR_REV]+[CORP_REV]+[OFFSITE_REV]) as RevenueTotals,
SUM([MEDIA_EST_COST]+[OTHER_COST]) as COSTTOTALS
from [dbo].[dw_rpt_traffic]
where [CREATE_DTG] >= (GetDate() - 90)
)
select rt.*, rolling.avgrt
from rt outer apply
(select avg(rt2.RevenueTotals) as avgrt
from rt rt2
where rt2.dte >= dateadd(day, -6, rt.dte) and
rt2.dte <= rt.dte
) rolling
order by dte;