Get available dates between range of dates [duplicate] - sql

This question already has answers here:
Left Outer Join Not Working?
(4 answers)
Closed 10 months ago.
I have a property rental site
So I want to know if property is available for a range of dates
First of all I defined the range of dates as:
DECLARE #AvailableRentalStartingDate DATETIME = '2022-04-11'
, #AvailableRentalEndingDate DATETIME = '2022-04-24'
Now the property rent like:
DECLARE #Rentals AS TABLE
(
[PropertyId] UNIQUEIDENTIFIER,
[StartingDate] DATE,
[EndingDate] DATE
)
INSERT INTO #Rentals ([PropertyId], [StartingDate], [EndingDate])
VALUES ('A5B2B505-EC6F-EC11-A004-00155E014807','2022-04-11 16:47:20.897', '2022-04-14 16:47:20.897'),
('A5B2B505-EC6F-EC11-A004-00155E014807','2022-04-16 16:47:20.897','2022-04-21 16:47:20.897')
As you can see we have available date 2022-04-15
Dates table
DECLARE #Dates AS TABLE
(
DateName DATE
)
DECLARE #TotalDays INT = 365;
WHILE #TotalDays > 0
BEGIN
INSERT INTO #Dates ([DateName])
SELECT DATEADD(DAY, #TotalDays, '2021-12-31');
SELECT #TotalDays = #TotalDays - 1;
END
Then the select
SELECT [R].[PropertyId], [D].[DateName], CASE WHEN [R].[StartingDate] IS NULL THEN 1 ELSE 0 END AS [IsAvailable]
FROM #Dates AS D
LEFT JOIN #Rentals R ON [D].[DateName] >= [R].[StartingDate] AND [D].[DateName] <= [R].[EndingDate]
WHERE [D].[DateName] BETWEEN #AvailableRentalStartingDate AND #AvailableRentalEndingDate
AND [R].[PropertyId] = 'A5B2B505-EC6F-EC11-A004-00155E014807'
ORDER BY [D].[DateName]
The problem is it does not identify the null on the available date 2022-04-15, it just return the not available dates.
I just want to know if that propertyId it's available, in this case it should be available because 2022-04-15 is available. How can I get only one row showing available true? Regards

your problem is the where clause of propertyid;
you should have that as a JOIN condition.
WHERE clause and ON conditions can be interchangeably used in INNER JOIN but in OUTER JOIN they impact the meaning.
demo link
SELECT [R].[PropertyId], [D].[DateName], CASE WHEN [R].[StartingDate] IS NULL THEN 1 ELSE 0 END AS [IsAvailable]
FROM #Dates AS D
LEFT JOIN #Rentals R ON [D].[DateName] >= [R].[StartingDate] AND [D].[DateName] <= [R].[EndingDate]
AND [R].[PropertyId] = 'A5B2B505-EC6F-EC11-A004-00155E014807'
WHERE [D].[DateName] BETWEEN #AvailableRentalStartingDate AND #AvailableRentalEndingDate
ORDER BY [D].[DateName]

Related

Return 0 values where corresponding type is missing

I need a select query where I get amount of visits, type of visit to sort it and in which dates this happend.
My problem is that I dont get 0s at times where there are no visits.
I am using this code:
SELECT COUNT(*) AS numberOfVisits,
CONVERT(DATE, CONVERT(VARCHAR(10), CAST(dateOfVisit AS DATE), 121)) AS dateOfVisit,
typeOfVisit
FROM WEB
WHERE dateOfVisit >= '1-1-2021'
AND dateOfVisit <= '1-31-2021'
GROUP BY CAST(dateOfVisit AS DATE), typeOfVisit
ORDER BY CAST(dateOfVisit AS DATE)
/*i am converting and casting the date for other reasons that i need in
my API/frontend and i also get the condition of date from api 1-1-2021
and 1-31-2021 are just for sample*/
and with this i get the following results:
https://imgur.com/a/o3c9A2p
The columns Follow as: numberOfVisits | dateOfVisit | typeOfVisit
As you can see I have 4 types.
The first 3 rows contain types 2,3,4 but not 1; I want to have an extra row here with value 0, same date and type 1.
The actual value 0 is also not written in the database and neither is null
The desired result would be like this:
0 2021-01-04 1
1 2021-01-04 2
10 2021-01-04 3
2 2021-01-04 4
and if the result would only show that there were visits of type 2 then it would be something like this:
0 2021-01-04 1
1 2021-01-04 2
0 2021-01-04 3
0 2021-01-04 4
UPDATE:
So i did what i think is a calander table without actualy creating one and i do get the correct format of data but now my count is way off... let me show you
the query :
DECLARE #MinDate DATE = '1-1-2021',
#MaxDate DATE = '1-31-2021';
SELECT COUNT(*) AS numberOfVisits,
CONVERT(DATE, CONVERT(VARCHAR(10), CAST(b.VisitDate AS DATE), 121)) AS dateOfVisit,
a.typeOfVisit
FROM WEB a
CROSS JOIN WEB b
WHERE b.VisitDate >= #MinDate
AND b.VisitDate <= #MaxDate
GROUP BY CAST(b.VisitDate AS DATE), a.typeOfVisit
ORDER BY CAST(b.VisitDate AS DATE)
and the result of query : https://imgur.com/a/BtpoqD1
As you can see the dates and types can all be seen now but the count is huge and if dont do cross join then the dates with no count cant be seen again... I have no clue what goes wrong can you please help me ?
Maybe it wasnt clear enough what i wanted in my question, but i managed to come to a solution with case in my select.
If anyone is interested in the solution i came up with, here is how i wrote the select with everything else the same as in the original question.
SELECT SUM(CASE WHEN TypeOfVisit = 1 THEN 1 ELSE 0 END) VisitType1,
//...
You can't count if there is no table record of date. You need to have calendar table with every date to start select from then left join your visit table and count.
It would be something like this:
if OBJECT_ID('tempdb..#Calendar') is not null drop table #Calendar
create table #Calendar (adDate date)
declare #dDate date = '2021-01-01'
while #dDate <= '2021-01-31'
begin
insert into #Calendar (adDate)
select #dDate
set #dDate = dateadd(DAY, 1, #dDate)
end
select
c.adDate,
w.typeOfVisit,
COUNT(w.visit) as VisitCount
from #Calendar c
left join web w on w.dateOfVisit = c.adDate
group by
c.adDate,
w.typeOfVisit
order by
c.adDate
If you want to have 0 visits by type also, not just by day then you can do it like this:
if OBJECT_ID('tempdb..#Calendar') is not null drop table #Calendar
create table #Calendar (adDate date, anTypeOfVisit int)
declare
#dDate date = '2021-01-01',
#nTypeOfVisit int
while #dDate <= '2021-01-31'
begin
set #nTypeOfVisit = 1
while #nTypeOfVisit <=4
begin
insert into #Calendar (adDate, anTypeOfVisit)
select #dDate, #nTypeOfVisit
set #nTypeOfVisit = #nTypeOfVisit + 1
end
set #dDate = dateadd(DAY, 1, #dDate)
end
select
c.adDate,
c.anTypeOfVisit,
COUNT(w.dateofvisit) as VisitCount
from #Calendar c
left join web w on w.dateOfVisit = c.adDate and w.typeofvisit = c.anTypeOfVisit
group by
c.adDate,
c.anTypeOfVisit
order by
c.adDate,
c.anTypeOfVisit
You need to create a table that contains a row for each visit type for each date. Then do a left join to bring in the counts from your actual data
DROP TABLE IF EXISTS #Visit
CREATE TABLE #Visit (ID INT IDENTITY(1,1) ,DateOfVisit Date,TypeOfVisit int)
INSERT INTO #Visit
VALUES ('2021-01-04',2),('2021-01-04',3),('2021-01-04',3),('2021-01-04',4);
WITH cte_DistinctVisitDate AS (
SELECT DateOfVisit
FROM #Visit
WHERE DateOfVisit BETWEEN '2020-01-04' AND '2021-01-04' /*Always use YYYY-MM-DD for params as it is culture-agnostic*/
GROUP BY DateOfVisit
),
cte_TypePerDate AS (
/*One row for each type on each visit date*/
SELECT DateOfVisit,TypeOfVisit
FROM cte_DistinctVisitDate
CROSS JOIN (VALUES (1),(2),(3),(4)) AS B(TypeOfVisit) /*I am hard coding raw data, but you should use lookup table that contains each visit type*/
),
cte_VisitCount AS (
/*Actual counts, but only lists visit types with visits*/
SELECT DateOfVisit,TypeOfVisit,VisitCnt = COUNT(*)
FROM #Visit
GROUP BY DateOfVisit,TypeOfVisit
)
SELECT A.DateOfVisit,A.TypeOfVisit,VisitCnt = ISNULL(B.VisitCnt,0)
FROM cte_TypePerDate AS A
LEFT JOIN cte_VisitCount AS B
ON A.DateOfVisit = B.DateOfVisit
AND A.TypeOfVisit = B.TypeOfVisit

Displaying student absent dates

Here is the table I want to display:
tblAttendance table
CustomerId
Id
Attendence
Date
and
tblStudent
CustomerId
Name
Now I want to search by from to date and want absent date.
How can I achieve this?
I tried below code:
ALTER PROCEDURE spExceptDate
AS
declare #StartDate DATE, #EndDate DATE
set #StartDate = '2020-02-15';
set #EndDate = '2020-02-25';
BEGIN
SELECT CustomerId,FirstName+' '+LastName,Date,Attendance
FROM
[dbo].[tblAttendance] att
LEFT JOIN
[dbo].[tblStudent] st
ON att.CustomerId = st.Code
EXCEPT
SELECT CustomerId,FirstName+' '+LastName,Date,Attendance
FROM
[dbo].[tblAttendance] att
LEFT JOIN
[dbo].[tblStudent] st
ON att.CustomerId = st.Code
where att.Date>='2020-02-15' and att.Date<='2020-02-25'
END
GO
i want date for which student absend
Basically what you need is list of possible dates between From and To
DECLARE #StartDate DATE = '2020-02-15',
#EndDate DATE = '2020-02-25' ;
--Create a CTE to get all dates between from and to (you should filter holidays and weekends)
WITH SchoolCalendar (WorkingDay)
AS (SELECT #StartDate
UNION ALL
SELECT DATEADD(DAY, 1, WorkingDay)
FROM SchoolCalendar
WHERE WorkingDay< #EndDate
)
--Use the CTE to determine the Absense records
SELECT st.Code CustomerId, st.FirstName+' '+st.LastName Name,st.WorkingDay Date, COALESCE(Attendance,'A') Attendance
FROM (SELECT * from SchoolCalendar, tblStudent) st
LEFT JOIN [dbo].[tblAttendance] att ON att.Date = st.WorkingDay AND att.CustomerId = st.Code
WHERE st.WorkingDay>=#StartDate and st.WorkingDay<=#EndDate
ORDER BY st.Code, st.WorkingDay
You may need only one query if you want to fetch only absent student names from the given date range
SELECT CustomerId,FirstName+' '+LastName,Date,Attendance
FROM [dbo].[tblAttendance] att
LEFT JOIN [dbo].[tblStudent] st ON att.CustomerId = st.Code
WHERE att.Date>='2020-02-15' and att.Date<='2020-02-25' AND att.Attendance <> 'P'
^^^^^^^^^^^^^^^^^^^^

Aggregate for each day over time series, without using non-equijoin logic

Initial Question
Given the following dataset paired with a dates table:
MembershipId | ValidFromDate | ValidToDate
==========================================
0001 | 1997-01-01 | 2006-05-09
0002 | 1997-01-01 | 2017-05-12
0003 | 2005-06-02 | 2009-02-07
How many Memberships were open on any given day or timeseries of days?
Initial Answer
Following this question being asked here, this answer provided the necessary functionality:
select d.[Date]
,count(m.MembershipID) as MembershipCount
from DIM.[Date] as d
left join Memberships as m
on(d.[Date] between m.ValidFromDateKey and m.ValidToDateKey)
where d.CalendarYear = 2016
group by d.[Date]
order by d.[Date];
though a commenter remarked that There are other approaches when the non-equijoin takes too long.
Followup
As such, what would the equijoin only logic look like to replicate the output of the query above?
Progress So Far
From the answers provided so far I have come up with the below, which outperforms on the hardware I am working with across 3.2 million Membership records:
declare #s date = '20160101';
declare #e date = getdate();
with s as
(
select d.[Date] as d
,count(s.MembershipID) as s
from dbo.Dates as d
join dbo.Memberships as s
on d.[Date] = s.ValidFromDateKey
group by d.[Date]
)
,e as
(
select d.[Date] as d
,count(e.MembershipID) as e
from dbo.Dates as d
join dbo.Memberships as e
on d.[Date] = e.ValidToDateKey
group by d.[Date]
),c as
(
select isnull(s.d,e.d) as d
,sum(isnull(s.s,0) - isnull(e.e,0)) over (order by isnull(s.d,e.d)) as c
from s
full join e
on s.d = e.d
)
select d.[Date]
,c.c
from dbo.Dates as d
left join c
on d.[Date] = c.d
where d.[Date] between #s and #e
order by d.[Date]
;
Following on from that, to split this aggregate into constituent groups per day I have the following, which is also performing well:
declare #s date = '20160101';
declare #e date = getdate();
with s as
(
select d.[Date] as d
,s.MembershipGrouping as g
,count(s.MembershipID) as s
from dbo.Dates as d
join dbo.Memberships as s
on d.[Date] = s.ValidFromDateKey
group by d.[Date]
,s.MembershipGrouping
)
,e as
(
select d.[Date] as d
,e..MembershipGrouping as g
,count(e.MembershipID) as e
from dbo.Dates as d
join dbo.Memberships as e
on d.[Date] = e.ValidToDateKey
group by d.[Date]
,e.MembershipGrouping
),c as
(
select isnull(s.d,e.d) as d
,isnull(s.g,e.g) as g
,sum(isnull(s.s,0) - isnull(e.e,0)) over (partition by isnull(s.g,e.g) order by isnull(s.d,e.d)) as c
from s
full join e
on s.d = e.d
and s.g = e.g
)
select d.[Date]
,c.g
,c.c
from dbo.Dates as d
left join c
on d.[Date] = c.d
where d.[Date] between #s and #e
order by d.[Date]
,c.g
;
Can anyone improve on the above?
If most of your membership validity intervals are longer than few days, have a look at an answer by Martin Smith. That approach is likely to be faster.
When you take calendar table (DIM.[Date]) and left join it with Memberships, you may end up scanning the Memberships table for each date of the range. Even if there is an index on (ValidFromDate, ValidToDate), it may not be super useful.
It is easy to turn it around.
Scan the Memberships table only once and for each membership find those dates that are valid using CROSS APPLY.
Sample data
DECLARE #T TABLE (MembershipId int, ValidFromDate date, ValidToDate date);
INSERT INTO #T VALUES
(1, '1997-01-01', '2006-05-09'),
(2, '1997-01-01', '2017-05-12'),
(3, '2005-06-02', '2009-02-07');
DECLARE #RangeFrom date = '2006-01-01';
DECLARE #RangeTo date = '2006-12-31';
Query 1
SELECT
CA.dt
,COUNT(*) AS MembershipCount
FROM
#T AS Memberships
CROSS APPLY
(
SELECT dbo.Calendar.dt
FROM dbo.Calendar
WHERE
dbo.Calendar.dt >= Memberships.ValidFromDate
AND dbo.Calendar.dt <= Memberships.ValidToDate
AND dbo.Calendar.dt >= #RangeFrom
AND dbo.Calendar.dt <= #RangeTo
) AS CA
GROUP BY
CA.dt
ORDER BY
CA.dt
OPTION(RECOMPILE);
OPTION(RECOMPILE) is not really needed, I include it in all queries when I compare execution plans to be sure that I'm getting the latest plan when I play with the queries.
When I looked at the plan of this query I saw that the seek in the Calendar.dt table was using only ValidFromDate and ValidToDate, the #RangeFrom and #RangeTo were pushed to the residue predicate. It is not ideal. The optimiser is not smart enough to calculate maximum of two dates (ValidFromDate and #RangeFrom) and use that date as a starting point of the seek.
It is easy to help the optimiser:
Query 2
SELECT
CA.dt
,COUNT(*) AS MembershipCount
FROM
#T AS Memberships
CROSS APPLY
(
SELECT dbo.Calendar.dt
FROM dbo.Calendar
WHERE
dbo.Calendar.dt >=
CASE WHEN Memberships.ValidFromDate > #RangeFrom
THEN Memberships.ValidFromDate
ELSE #RangeFrom END
AND dbo.Calendar.dt <=
CASE WHEN Memberships.ValidToDate < #RangeTo
THEN Memberships.ValidToDate
ELSE #RangeTo END
) AS CA
GROUP BY
CA.dt
ORDER BY
CA.dt
OPTION(RECOMPILE)
;
In this query the seek is optimal and doesn't read dates that may be discarded later.
Finally, you may not need to scan the whole Memberships table.
We need only those rows where the given range of dates intersects with the valid range of the membership.
Query 3
SELECT
CA.dt
,COUNT(*) AS MembershipCount
FROM
#T AS Memberships
CROSS APPLY
(
SELECT dbo.Calendar.dt
FROM dbo.Calendar
WHERE
dbo.Calendar.dt >=
CASE WHEN Memberships.ValidFromDate > #RangeFrom
THEN Memberships.ValidFromDate
ELSE #RangeFrom END
AND dbo.Calendar.dt <=
CASE WHEN Memberships.ValidToDate < #RangeTo
THEN Memberships.ValidToDate
ELSE #RangeTo END
) AS CA
WHERE
Memberships.ValidToDate >= #RangeFrom
AND Memberships.ValidFromDate <= #RangeTo
GROUP BY
CA.dt
ORDER BY
CA.dt
OPTION(RECOMPILE)
;
Two intervals [a1;a2] and [b1;b2] intersect when
a2 >= b1 and a1 <= b2
These queries assume that Calendar table has an index on dt.
You should try and see what indexes are better for the Memberships table.
For the last query, if the table is rather large, most likely two separate indexes on ValidFromDate and on ValidToDate would be better than one index on (ValidFromDate, ValidToDate).
You should try different queries and measure their performance on the real hardware with real data. Performance may depend on the data distribution, how many memberships there are, what are their valid dates, how wide or narrow is the given range, etc.
I recommend to use a great tool called SQL Sentry Plan Explorer to analyse and compare execution plans. It is free. It shows a lot of useful stats, such as execution time and number of reads for each query. The screenshots above are from this tool.
On the assumption your date dimension contains all dates contained in all membership periods you can use something like the following.
The join is an equi join so can use hash join or merge join not just nested loops (which will execute the inside sub tree once for each outer row).
Assuming index on (ValidToDate) include(ValidFromDate) or reverse this can use a single seek against Memberships and a single scan of the date dimension. The below has an elapsed time of less than a second for me to return the results for a year against a table with 3.2 million members and general active membership of 1.4 million (script)
DECLARE #StartDate DATE = '2016-01-01',
#EndDate DATE = '2016-12-31';
WITH MD
AS (SELECT Date,
SUM(Adj) AS MemberDelta
FROM Memberships
CROSS APPLY (VALUES ( ValidFromDate, +1),
--Membership count decremented day after the ValidToDate
(DATEADD(DAY, 1, ValidToDate), -1) ) V(Date, Adj)
WHERE
--Members already expired before the time range of interest can be ignored
ValidToDate >= #StartDate
AND
--Members whose membership starts after the time range of interest can be ignored
ValidFromDate <= #EndDate
GROUP BY Date),
MC
AS (SELECT DD.DateKey,
SUM(MemberDelta) OVER (ORDER BY DD.DateKey ROWS UNBOUNDED PRECEDING) AS CountOfNonIgnoredMembers
FROM DIM_DATE DD
LEFT JOIN MD
ON MD.Date = DD.DateKey)
SELECT DateKey,
CountOfNonIgnoredMembers AS MembershipCount
FROM MC
WHERE DateKey BETWEEN #StartDate AND #EndDate
ORDER BY DateKey
Demo (uses extended period as the calendar year of 2016 isn't very interesting with the example data)
One approach is to first use an INNER JOIN to find the set of matches and COUNT() to project MemberCount GROUPed BY DateKey, then UNION ALL with the same set of dates, with a 0 on that projection for the count of members for each date. The last step is to SUM() the MemberCount of this union, and GROUP BY DateKey. As requested, this avoids LEFT JOIN and NOT EXISTS. As another member pointed out, this is not an equi-join, because we need to use a range, but I think it does what you intend.
This will serve up 1 year's worth of data with around 100k logical reads. On an ordinary laptop with a spinning disk, from cold cache, it serves 1 month in under a second (with correct counts).
Here is an example that creates 3.3 million rows of random duration. The query at the bottom returns one month's worth of data.
--Stay quiet for a moment
SET NOCOUNT ON
SET STATISTICS IO OFF
SET STATISTICS TIME OFF
--Clean up if re-running
DROP TABLE IF EXISTS DIM_DATE
DROP TABLE IF EXISTS FACT_MEMBER
--Date dimension
CREATE TABLE DIM_DATE
(
DateKey DATE NOT NULL
)
--Membership fact
CREATE TABLE FACT_MEMBER
(
MembershipId INT NOT NULL
, ValidFromDateKey DATE NOT NULL
, ValidToDateKey DATE NOT NULL
)
--Populate Date dimension from 2001 through end of 2018
DECLARE #startDate DATE = '2001-01-01'
DECLARE #endDate DATE = '2018-12-31'
;WITH CTE_DATE AS
(
SELECT #startDate AS DateKey
UNION ALL
SELECT
DATEADD(DAY, 1, DateKey)
FROM
CTE_DATE AS D
WHERE
D.DateKey < #endDate
)
INSERT INTO
DIM_DATE
(
DateKey
)
SELECT
D.DateKey
FROM
CTE_DATE AS D
OPTION (MAXRECURSION 32767)
--Populate Membership fact with members having a random membership length from 1 to 36 months
;WITH CTE_DATE AS
(
SELECT #startDate AS DateKey
UNION ALL
SELECT
DATEADD(DAY, 1, DateKey)
FROM
CTE_DATE AS D
WHERE
D.DateKey < #endDate
)
,CTE_MEMBER AS
(
SELECT 1 AS MembershipId
UNION ALL
SELECT MembershipId + 1 FROM CTE_MEMBER WHERE MembershipId < 500
)
,
CTE_MEMBERSHIP
AS
(
SELECT
ROW_NUMBER() OVER (ORDER BY NEWID()) AS MembershipId
, D.DateKey AS ValidFromDateKey
FROM
CTE_DATE AS D
CROSS JOIN CTE_MEMBER AS M
)
INSERT INTO
FACT_MEMBER
(
MembershipId
, ValidFromDateKey
, ValidToDateKey
)
SELECT
M.MembershipId
, M.ValidFromDateKey
, DATEADD(MONTH, FLOOR(RAND(CHECKSUM(NEWID())) * (36-1)+1), M.ValidFromDateKey) AS ValidToDateKey
FROM
CTE_MEMBERSHIP AS M
OPTION (MAXRECURSION 32767)
--Add clustered Primary Key to Date dimension
ALTER TABLE DIM_DATE ADD CONSTRAINT PK_DATE PRIMARY KEY CLUSTERED
(
DateKey ASC
)
--Index
--(Optimize in your spare time)
DROP INDEX IF EXISTS SK_FACT_MEMBER ON FACT_MEMBER
CREATE CLUSTERED INDEX SK_FACT_MEMBER ON FACT_MEMBER
(
ValidFromDateKey ASC
, ValidToDateKey ASC
, MembershipId ASC
)
RETURN
--Start test
--Emit stats
SET STATISTICS IO ON
SET STATISTICS TIME ON
--Establish range of dates
DECLARE
#rangeStartDate DATE = '2010-01-01'
, #rangeEndDate DATE = '2010-01-31'
--UNION the count of members for a specific date range with the "zero" set for the same range, and SUM() the counts
;WITH CTE_MEMBER
AS
(
SELECT
D.DateKey
, COUNT(*) AS MembershipCount
FROM
DIM_DATE AS D
INNER JOIN FACT_MEMBER AS M ON
M.ValidFromDateKey <= #rangeEndDate
AND M.ValidToDateKey >= #rangeStartDate
AND D.DateKey BETWEEN M.ValidFromDateKey AND M.ValidToDateKey
WHERE
D.DateKey BETWEEN #rangeStartDate AND #rangeEndDate
GROUP BY
D.DateKey
UNION ALL
SELECT
D.DateKey
, 0 AS MembershipCount
FROM
DIM_DATE AS D
WHERE
D.DateKey BETWEEN #rangeStartDate AND #rangeEndDate
)
SELECT
M.DateKey
, SUM(M.MembershipCount) AS MembershipCount
FROM
CTE_MEMBER AS M
GROUP BY
M.DateKey
ORDER BY
M.DateKey ASC
OPTION (RECOMPILE, MAXDOP 1)
Here's how I'd solve this problem with equijoin:
--data generation
declare #Membership table (MembershipId varchar(10), ValidFromDate date, ValidToDate date)
insert into #Membership values
('0001', '1997-01-01', '2006-05-09'),
('0002', '1997-01-01', '2017-05-12'),
('0003', '2005-06-02', '2009-02-07')
declare #startDate date, #endDate date
select #startDate = MIN(ValidFromDate), #endDate = max(ValidToDate) from #Membership
--in order to use equijoin I need all days between min date and max date from Membership table (both columns)
;with cte as (
select #startDate [date]
union all
select DATEADD(day, 1, [date]) from cte
where [date] < #endDate
)
--in this query, we will assign value to each day:
--one, if project started on that day
--minus one, if project ended on that day
--then, it's enough to (cumulative) sum all this values to get how many projects were ongoing on particular day
select [date],
sum(case when [DATE] = ValidFromDate then 1 else 0 end +
case when [DATE] = ValidToDate then -1 else 0 end)
over (order by [date] rows between unbounded preceding and current row)
from cte [c]
left join #Membership [m]
on [c].[date] = [m].ValidFromDate or [c].[date] = [m].ValidToDate
option (maxrecursion 0)
Here's another solution:
--data generation
declare #Membership table (MembershipId varchar(10), ValidFromDate date, ValidToDate date)
insert into #Membership values
('0001', '1997-01-01', '2006-05-09'),
('0002', '1997-01-01', '2017-05-12'),
('0003', '2005-06-02', '2009-02-07')
;with cte as (
select CAST('2016-01-01' as date) [date]
union all
select DATEADD(day, 1, [date]) from cte
where [date] < '2016-12-31'
)
select [date],
(select COUNT(*) from #Membership where ValidFromDate < [date]) -
(select COUNT(*) from #Membership where ValidToDate < [date]) [ongoing]
from cte
option (maxrecursion 0)
Pay attention, I think #PittsburghDBA is right when it says that current query return wrong result.
The last day of membership is not counted and so final sum is lower than it should be.
I have corrected it in this version.
This should improve a bit your actual progress:
declare #s date = '20160101';
declare #e date = getdate();
with
x as (
select d, sum(c) c
from (
select ValidFromDateKey d, count(MembershipID) c
from Memberships
group by ValidFromDateKey
union all
-- dateadd needed to count last day of membership too!!
select dateadd(dd, 1, ValidToDateKey) d, -count(MembershipID) c
from Memberships
group by ValidToDateKey
)x
group by d
),
c as
(
select d, sum(x.c) over (order by d) as c
from x
)
select d.day, c cnt
from calendar d
left join c on d.day = c.d
where d.day between #s and #e
order by d.day;
First of all, your query yields '1' as MembershipCount even if no active membership exists for the given date.
You should return SUM(CASE WHEN m.MembershipID IS NOT NULL THEN 1 ELSE 0 END) AS MembershipCount.
For optimal performance create an index on Memberships(ValidFromDateKey, ValidToDateKey, MembershipId) and another on DIM.[Date](CalendarYear, DateKey).
With that done, the optimal query shall be:
DECLARE #CalendarYear INT = 2000
SELECT dim.DateKey, SUM(CASE WHEN con.MembershipID IS NOT NULL THEN 1 ELSE 0 END) AS MembershipCount
FROM
DIM.[Date] dim
LEFT OUTER JOIN (
SELECT ValidFromDateKey, ValidToDateKey, MembershipID
FROM Memberships
WHERE
ValidFromDateKey <= CONVERT(DATETIME, CONVERT(VARCHAR, #CalendarYear) + '1231')
AND ValidToDateKey >= CONVERT(DATETIME, CONVERT(VARCHAR, #CalendarYear) + '0101')
) con
ON dim.DateKey BETWEEN con.ValidFromDateKey AND con.ValidToDateKey
WHERE dim.CalendarYear = #CalendarYear
GROUP BY dim.DateKey
ORDER BY dim.DateKey
Now, for your last question, what would be the equijoin equivalent query.
There is NO WAY you can rewrite this as a non-equijoin!
Equijoin doesn't imply using join sintax. Equijoin implies using an equals predicate, whatever the sintax.
Your query yields a range comparison, hence equals doesn't apply: a between or similar is required.

Trying Exclude 2 days within the year

I added my dates with the query so you can so what dates im running. From these dates im trying to exclude 2 days 10/6/2016 and 10/7/2016
DECLARE #Startdate AS DATETIME
DECLARE #EndDate AS DATETIME
SET #Startdate = '10/1/2015'
SET #EndDate = '9/30/2016
SELECT A.agent_name, COUNT(*) AS CH, (CAST(SUM(reporting_call_matrix.talk_time) / COUNT(reporting_call_matrix.answer_time) AS float)
+ CAST(SUM(reporting_call_matrix.hold_time) AS float) / COUNT(reporting_call_matrix.answer_time)) +
CAST(SUM(reporting_call_matrix.work_time) AS float) / COUNT(reporting_call_matrix.answer_time) AS AHT,
answer_agent_id
FROM reporting_call_matrix INNER JOIN
reporting_agents AS A ON reporting_call_matrix.answer_agent_id = A.agent_id INNER JOIN
reporting_split_info ON reporting_call_matrix.split = reporting_split_info.split
WHERE (reporting_call_matrix.answer_agent_id IS NOT NULL) AND (reporting_call_matrix.split IN (9,23)) AND
(reporting_call_matrix.queued_time >= #StartDate) AND (reporting_call_matrix.queued_time < DATEADD(d, 1, #EndDate)) AND
(reporting_call_matrix.answer_time IS NOT NULL) AND
GROUP BY A.agent_name,answer_agent_id
No idea what you are actually trying to do here but I cleaned up this query so it is more legible. I also greatly simplified that nightmarish calculation. You don't have to cast everything to a float, just simply multiple it by 1.0. Also, float is probably not a great choice if you want accuracy because it is an approximate datatype where 1.0 will be numeric which is an exact datatype.
With some aliases and formatting this is so much easier to read.
SELECT A.agent_name
, COUNT(*) AS CH
, (SUM(rcm.talk_time) + SUM(rcm.hold_time) / COUNT(rcm.answer_time) * 1.0) + (SUM(rcm.work_time) / COUNT(rcm.answer_time) * 1.0) AS AHT
, answer_agent_id
FROM reporting_call_matrix rcm
INNER JOIN reporting_agents AS A ON rcm.answer_agent_id = A.agent_id
INNER JOIN reporting_split_info rsi ON rcm.split = rsi.split
WHERE rcm.answer_agent_id IS NOT NULL
AND rcm.split IN (9,23)
AND rcm.queued_time >= #StartDate
AND rcm.queued_time < DATEADD(day, 1, #EndDate)
AND rcm.answer_time IS NOT NULL
GROUP BY A.agent_name
, rcm.answer_agent_id
Now, what is the actual question here?
With your recent update I will hazard a guess that queued_time is a date datatype?
Why not simply add one more predicate to your where clause?
AND rcm.queued_time not in ('2016-10-06', '2016-10-07')

SQL grouping and running total of open items for a date range

I have a table of items that, for sake of simplicity, contains the ItemID, the StartDate, and the EndDate for a list of items.
ItemID StartDate EndDate
1 1/1/2011 1/15/2011
2 1/2/2011 1/14/2011
3 1/5/2011 1/17/2011
...
My goal is to be able to join this table to a table with a sequential list of dates,
and say both how many items are open on a particular date, and also how many items are cumulatively open.
Date ItemsOpened CumulativeItemsOpen
1/1/2011 1 1
1/2/2011 1 2
...
I can see how this would be done with a WHILE loop,
but that has performance implications. I'm wondering how
this could be done with a set-based approach?
SELECT COUNT(CASE WHEN d.CheckDate = i.StartDate THEN 1 ELSE NULL END)
AS ItemsOpened
, COUNT(i.StartDate)
AS ItemsOpenedCumulative
FROM Dates AS d
LEFT JOIN Items AS i
ON d.CheckDate BETWEEN i.StartDate AND i.EndDate
GROUP BY d.CheckDate
This may give you what you want
SELECT DATE,
SUM(ItemOpened) AS ItemsOpened,
COUNT(StartDate) AS ItemsOpenedCumulative
FROM
(
SELECT d.Date, i.startdate, i.enddate,
CASE WHEN i.StartDate = d.Date THEN 1 ELSE 0 END AS ItemOpened
FROM Dates d
LEFT OUTER JOIN Items i ON d.Date BETWEEN i.StartDate AND i.EndDate
) AS x
GROUP BY DATE
ORDER BY DATE
This assumes that your date values are DATE data type. Or, the dates are DATETIME with no time values.
You may find this useful. The recusive part can be replaced with a table. To demonstrate it works I had to populate some sort of date table. As you can see, the actual sql is short and simple.
DECLARE #i table (itemid INT, startdate DATE, enddate DATE)
INSERT #i VALUES (1,'1/1/2011', '1/15/2011')
INSERT #i VALUES (2,'1/2/2011', '1/14/2011')
INSERT #i VALUES (3,'1/5/2011', '1/17/2011')
DECLARE #from DATE
DECLARE #to DATE
SET #from = '1/1/2011'
SET #to = '1/18/2011'
-- the recusive sql is strictly to make a datelist between #from and #to
;WITH cte(Date)
AS (
SELECT #from DATE
UNION ALL
SELECT DATEADD(day, 1, DATE)
FROM cte ch
WHERE DATE < #to
)
SELECT cte.Date, sum(case when cte.Date=i.startdate then 1 else 0 end) ItemsOpened, count(i.itemid) ItemsOpenedCumulative
FROM cte
left join #i i on cte.Date between i.startdate and i.enddate
GROUP BY cte.Date
OPTION( MAXRECURSION 0)
If you are on SQL Server 2005+, you could use a recursive CTE to obtain running totals, with the additional help of the ranking function ROW_NUMBER(), like this:
WITH grouped AS (
SELECT
d.Date,
ItemsOpened = COUNT(i.ItemID),
rn = ROW_NUMBER() OVER (ORDER BY d.Date)
FROM Dates d
LEFT JOIN Items i ON d.Date BETWEEN i.StartDate AND i.EndDate
GROUP BY d.Date
WHERE d.Date BETWEEN #FilterStartDate AND #FilterEndDate
),
cumulative AS (
SELECT
Date,
ItemsOpened,
ItemsOpenedCumulative = ItemsOpened
FROM grouped
WHERE rn = 1
UNION ALL
SELECT
g.Date,
g.ItemsOpened,
ItemsOpenedCumulative = g.ItemsOpenedCumulative + c.ItemsOpened
FROM grouped g
INNER JOIN cumulative c ON g.Date = DATEADD(day, 1, c.Date)
)
SELECT *
FROM cumulative