Related
Trying to convert the SQL with while loop code into DAX. Trying to build this query without using temp tables as access is an issue on the database and only have views to work with. I believe best option for me is to code it in DAX. Could someone help with it.
DECLARE #sd DATETIME
DECLARE #ed DATETIME
SELECT #sd = CONVERT(DATETIME, '2021-01-31')
SELECT #ed = GETDATE()
DECLARE #date DATETIME = EOMONTH(#sd)
WHILE ( (#date) <= #ed )
BEGIN
SELECT MONTH(#date) as Month, YEAR(#date) as Year, DAY(#date) as Day, A.*
FROM [people] A
WHERE A.effective_date = (SELECT MAX(B.effective_date)
FROM [people] B
WHERE B.employee_id = A.employee_id
AND B.record_id = A.record_id
AND B.effective_date <= #date)
AND A.effective_sequence = (SELECT MAX(C.effective_sequence)
FROM [people] C
WHERE C.employee_id = A.employee_id
AND C.record_id = A.record_id
AND C.effective_date = A.effective_date)
ORDER BY A.employee_id;
SET #date = EOMONTH(DATEADD(MONTH,1,#date))
END
While you could do this as a view, you would either have to hard-code the start and end dates, or filter them afterwards (which is likely to be inefficient). Instead you can do this as an inline Table Valued Function.
We can use a virtual tally-table (generated with a couple cross-joins) to generate a row for each month
We can use row-numbering instead of the two subqueries
CREATE FUNCTION dbo.GetData (#sd DATETIME, #ed DATETIME)
RETURNS TABLE AS RETURN
WITH L0 AS (
SELECT *
FROM (VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) v(n)
),
L1 AS (
SELECT 1 n FROM L0 a CROSS JOIN L0 b
)
SELECT
MONTH(m.Month) as Month,
YEAR(m.Month) as Year,
DAY(m.Month) as Day,
p.* -- specify columns
FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY m.Month, p.employee_id, p.record_id ORDER BY p.effective_date, p.effective_sequence) AS rn
FROM [people] p
CROSS JOIN (
SELECT TOP (DATEDIFF(month, #sd, #ed) + 1)
DATEADD(month, ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1, EOMONTH(#sd)) AS Month
FROM L1
) m
WHERE p.effective_date <= m.Month
) p
WHERE p.rn = 1
;
Then in PowerBI you can just do for example
SELECT *
FROM dbo.GetData ('2021-01-31', GETDATE()) d
ORDER BY
d.employee_id
Note that you cannot put the ORDER BY within the function, it doesn't work.
The data looks like
[Month] [Date]
---------------
201306 1
201306 2
201306 5
201306 6
201306 7
201307 1
201307 4
201307 6
201309 1
201309 2
How to find all missing Dates by Month?
Here is the expected results
[Month] [Date]
---------------
201306 3
201306 4
201307 2
201307 3
201307 5
I think does it and pretty efficiently
declare #T table (yy int, dd int);
insert into #T values
(201306, 1)
, (201306, 2)
, (201306, 5)
, (201306, 6)
, (201306, 7)
, (201307, 1)
, (201307, 4)
, (201307, 6)
, (201309, 1)
, (201309, 2);
with cte as
( select yy, min(dd) + 1 as mn, max(dd) as mx
from #T
group by yy
having min(dd) + 1 < max(dd)
union all
select c.yy, c.mn + 1, c.mx
from cte c
where c.mn + 1 < c.mx
)
select yy, mn as dd
from cte
except
select yy, dd
from #T t
order by yy, mn;
yy dd
----------- -----------
201306 3
201306 4
201307 2
201307 3
201307 5
You would need some kind of lookup tables which could has intermediate dates and use cross join with left join to find missing dates
First thought
;with cte as (
select min(date) mdate, max(date) mxdate from table
union all
select mdate+1 as mdate, mxdate
from cte c
where c.mdate < c.mxdate
)
select distinct t.Month, c.mdate
from table t cross join (select mdate from cte) c
left join table t1 on t1.month = t.Month and t1.date = c.mdate
where t1.date is null
Second thought
;with cte as (
select month, min(date) over (partition by month) mdate, max(date) over (partition by month) mxdate
from sample t union all
select month, mdate+1 as mdate, mxdate
from cte c
where c.month = month and c.mdate < c.mxdate
)
select c.month, c.mdate
from cte c left join sample t1
on t1.month = c.Month and t1.date = c.mdate
where t1.date is null
group by c.month, c.mdate
Demo
Consider using a recursive query
with rndata as
(
select row_number() over (partition by mon order by d) rn, * from data
), rcte as
(
select mon, d, (select max(d) from data where data.mon = rndata.mon) max_d
from rndata where rn = 1
union all
select rcte.mon, rcte.d + 1, rcte.max_d
from rcte
where rcte.d + 1 < max_d
)
select mon, d
from rcte
where not exists (
select 1
from data
where rcte.mon = data.mon and
rcte.d = data.d
)
dbfiddle demo
Consider using below approach.
CREATE TABLE #Date([Month] int, [Date] int)
INSERT INTO #Date
VALUES(201306, 1)
,(201306, 2)
,(201306, 5)
,(201306, 6)
,(201306, 7)
,(201307, 1)
,(201307, 4)
,(201307, 6)
,(201309, 1)
,(201309, 2)
;WITH CTE AS
(
SELECT
*,LEAD([Date]) OVER(ORDER BY [Month],[Date]) AS NextDate
FROM #Date d
)
SELECT
d.[Month], m.Dt AS [Date]
FROM CTE d
CROSS APPLY( SELECT v.Dt
FROM
(VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11)
,(11),(12),(13),(14),(15),(16),(17),(18),(19),(20)
,(21),(22),(23),(24),(25),(26),(27),(28),(29),(30),(31)
) AS v(Dt)
WHERE v.Dt > d.Date AND v.Dt < d.NextDate
) m
The only limitation this approach has is, it is not able to find missing days that falls before first date.
This is a fast but still simple solution:
1) use sys.sysobjects as tally table to gett all dates in each month.
2) calc min/max range for each month to keep only gaps inside the range.
3) join tally and range to get expected dates for each month, and left join your table to math existing dates.
4) filter in WHERE condition only missing dates
declare #T table ([month] int, [date] int);
insert into #T values
(201306, 1)
, (201306, 2)
, (201306, 5)
, (201306, 6)
, (201306, 7)
, (201307, 1)
, (201307, 4)
, (201307, 6)
, (201309, 1)
, (201309, 2);
with
n as (select top 31 ROW_NUMBER() over (order by id) n from sys.sysobjects),
r as (select [month], MIN([date]) dd1, MAX([date]) dd2 from #t group by [month])
select r.[month], n [date]
from r
join n on n between dd1 and dd2
left join #T t on n.N = t.[date] and r.[month] = t.[month]
where dd2<>dd1 and t.[date] is null
order by r.[month], n
You can use numbers/Tally table approach like below:
See live demo
create table sample ([Month] int, [Date] int)
insert into sample values
(201306, 1)
,(201306, 2)
,(201306, 5)
,(201306, 6)
,(201306, 7)
,(201307, 1)
,(201307, 4)
,(201307, 6)
,(201309, 1)
,(201309, 2);
; with daysinmonth as
(
select * from
(
values
(1,31),(2,28),(3,31),(4,30),(5,31),(6,30),
(7,31),(8,31),(9,30),(10,31),(11,30),(12,31)
) v(m,d)
)
select [month], dd
from sample
cross apply
(
select top
(
select d from
daysinmonth
where m=cast( right(cast([Month] as varchar(6)),2) as int)
)
row_number() over ( order by (select null)) dd
from
sys.tables t1 cross join
sys.tables t2
) c
where [date]<>dd
I currently have data like the following (but bigger!)
/*--:::::::::::
DROP TABLE #target
DROP TABLE #Fact
*/--:::::::::::
CREATE TABLE #target
(
PlayerKey INT,
Name VARCHAR(8),
LiveKey INT
);
INSERT INTO #target
values
(1,'michael',20130103),
(2,'jackson',20130107);
CREATE TABLE #Fact
(
DateKey INT,
PlayerKey INT,
Amount INT
);
INSERT INTO #Fact
values
(20130101,1,10),
(20130102,1,90),
(20130103,1,18),
(20130103,2,79),
(20130103,3,99),
(20130104,2,15),
(20130105,1,12),
(20130105,2,15),
(20130106,1,60),
(20130107,1,96),
(20130107,2,88),
(20130107,4,28),
(20130108,1,13),
(20130108,2,15),
(20130109,1,33),
(20130109,2,67),
(20130110,1,19),
(20130110,2,17)
;
The start of the query is as follows.
DECLARE #NumDays INT = 3;
WITH basic_cte AS
(
SELECT rn = ROW_NUMBER() OVER(PARTITION BY d.Name ORDER BY f.DateKey),
f.DateKey,
d.Name,
f.Amount
FROM #Fact f
INNER JOIN #target d ON
f.PlayerKey = d.PlayerKey AND
f.DateKey >= d.LiveKey AND
f.DateKey < CONVERT(CHAR(8),CONVERT(DATETIME,CONVERT(DATETIME,CONVERT(CHAR(8),d.LiveKey,112))+#NumDays),112)
)
SELECT x.*,
"RollingAmount" = SUM(Amount) OVER(PARTITION BY Name ORDER BY DateKey)
FROM basic_cte x;
This gives the following:
Assuming that we have a DimDate production view available how do I ensure that michael has a row for 20130104 with an amount of 0?
Also is it possible in the same script, to add new columns "AmountAll" and "AmountAllRolling" which would give numbers across all the players including PlayerKeys 3 and 4? I'm guessing this would involve changing the INNER JOIN to a LEFT OUTER JOIN?
So given the above the final result would be as follows:
EDIT
via all the excellent help from Bogdan I've got the following.
I've added an extra total AmountGroup that is the total across the specified players - this was just "nice-to-have" and not part of the original specification.
DECLARE #NumDays INT = 3;
WITH basic_cte AS
(
SELECT rn = ROW_NUMBER() OVER(PARTITION BY Name ORDER BY x.DateKey),
x.DateKey,
d.Name,
Amount = ISNULL(f.Amount,0),
AmountGroup = ISNULL(f.AmountGroup,0),
AmountAll = ISNULL(f.AmountAll,0)
FROM (
SELECT t.*,
EndLiveKey = CONVERT(INT,CONVERT(CHAR(8),CONVERT(DATETIME,CONVERT(DATETIME,CONVERT(CHAR(8),t.LiveKey,112))+#NumDays),112))
FROM #target t
) d
CROSS APPLY
(
SELECT dm.DateKey
FROM WHData.dbo.vw_DimDate dm
WHERE dm.DateKey >= d.LiveKey AND
dm.DateKey < d.EndLiveKey
) x
OUTER APPLY
(
SELECT Amount = SUM(CASE WHEN PlayerKey1 = PlayerKey2 THEN fbase.Amount END),
AmountGroup = SUM(CASE WHEN inGroup = 1 THEN fbase.Amount ELSE 0 END),
AmountAll = SUM(fbase.Amount)
FROM
(
SELECT fct.Amount,
fct.PlayerKey AS PlayerKey1,
d.PlayerKey AS PlayerKey2,
CASE WHEN tt.PlayerKey IS NULL THEN 0 ELSE 1 END AS inGroup
FROM #Fact fct
LEFT OUTER JOIN #target tt ON
fct.PlayerKey = tt.PlayerKey
WHERE fct.DateKey = x.DateKey
) fbase
) f
)
SELECT y.*,
"RollingAmount" = SUM(Amount) OVER(PARTITION BY Name ORDER BY DateKey),
"RollingAmountGroup" = SUM(AmountGroup) OVER(PARTITION BY Name ORDER BY DateKey),
"RollingAmountAll" = SUM(AmountAll) OVER(PARTITION BY Name ORDER BY DateKey)
FROM basic_cte y;
I assume that you have a DimDate table with the following structure:
CREATE TABLE DimDate
(
DateKey INT PRIMARY KEY
);
and DateKey column doesn't has gaps.
Solution:
DECLARE #NumDays INT = 3;
WITH basic_cte AS
(
SELECT x.DateKey,
d.Name,
Amount = ISNULL(f.Amount,0)
FROM
(
SELECT t.*, CONVERT(INT,CONVERT(CHAR(8),CONVERT(DATETIME,CONVERT(DATETIME,CONVERT(CHAR(8),t.LiveKey,112))+#NumDays),112)) AS EndLiveKey
FROM #target t
) d
CROSS APPLY
(
SELECT dm.DateKey
FROM DimDate dm
WHERE dm.DateKey >= d.LiveKey
AND dm.DateKey < d.EndLiveKey
) x
LEFT OUTER JOIN #Fact f
ON f.PlayerKey = d.PlayerKey
AND f.DateKey = x.DateKey
)
SELECT rn = ROW_NUMBER() OVER(PARTITION BY Name ORDER BY DateKey),
y.*,
"RollingAmount" = SUM(Amount) OVER(PARTITION BY Name ORDER BY DateKey)
FROM basic_cte y;
Edit #1:
DECLARE #NumDays INT = 3;
WITH basic_cte AS
(
SELECT rn = ROW_NUMBER() OVER(PARTITION BY Name ORDER BY x.DateKey),
x.DateKey,
d.Name,
Amount = ISNULL(f.Amount,0),
AmountAll = ISNULL(fall.AmountAll,0)
FROM
(
SELECT t.*, CONVERT(INT,CONVERT(CHAR(8),CONVERT(DATETIME,CONVERT(DATETIME,CONVERT(CHAR(8),t.LiveKey,112))+#NumDays),112)) AS EndLiveKey
FROM #target t
) d
CROSS APPLY
(
SELECT dm.DateKey
FROM DimDate dm
WHERE dm.DateKey >= d.LiveKey
AND dm.DateKey < d.EndLiveKey
) x
OUTER APPLY
(
SELECT SUM(fct.Amount) AS Amount
FROM #Fact fct
WHERE fct.DateKey = x.DateKey
AND fct.PlayerKey = d.PlayerKey
) f
OUTER APPLY
(
SELECT SUM(fct.Amount) AS AmountAll
FROM #Fact fct
WHERE fct.DateKey = x.DateKey
) fall
)
SELECT
y.*,
"RollingAmount" = SUM(Amount) OVER(PARTITION BY Name ORDER BY DateKey),
"RollingAmountAll" = SUM(AmountAll) OVER(PARTITION BY Name ORDER BY DateKey)
FROM basic_cte y;
Edit #2:
DECLARE #NumDays INT = 3;
WITH basic_cte AS
(
SELECT rn = ROW_NUMBER() OVER(PARTITION BY Name ORDER BY x.DateKey),
x.DateKey,
d.Name,
Amount = ISNULL(f.Amount,0),
AmountAll = ISNULL(f.AmountAll,0)
FROM
(
SELECT t.*, EndLiveKey = CONVERT(INT,CONVERT(CHAR(8),CONVERT(DATETIME,CONVERT(DATETIME,CONVERT(CHAR(8),t.LiveKey,112))+#NumDays),112))
FROM #target t
) d
CROSS APPLY
(
SELECT dm.DateKey
FROM DimDate dm
WHERE dm.DateKey >= d.LiveKey
AND dm.DateKey < d.EndLiveKey
) x
OUTER APPLY
(
SELECT AmountAll = SUM(fbase.Amount),
Amount = SUM(CASE WHEN PlayerKey1 = PlayerKey2 THEN fbase.Amount END)
FROM
(
SELECT fct.Amount, fct.PlayerKey AS PlayerKey1, d.PlayerKey AS PlayerKey2
FROM #Fact fct
WHERE fct.DateKey = x.DateKey
) fbase
) f
)
SELECT
y.*,
"RollingAmount" = SUM(Amount) OVER(PARTITION BY Name ORDER BY DateKey),
"RollingAmountAll" = SUM(AmountAll) OVER(PARTITION BY Name ORDER BY DateKey)
FROM basic_cte y;
Suppose I have the following an event table with personId, startDate and endDate.
I want to know how much time the person X spent doing an event (the events can override each other).
If the person just has 1 event, its easy: datediff(dd, startDate, endDate)
If the person has 2 events it gets tricky.
I'll set some scenarios for the expected results.
Scenario 1
startDate endDate
1 4
3 5
This means he the results should be the datediff from 1 to 5
Scenario 2
startDate endDate
1 3
6 9
this means he the results should be the some of datediff(dd,1,3) and datediff(dd,6,9)
How can I get this result on an sql query? I can only think of a bunch of if statements, but the same person can have n events so the query will be really confusing.
Shredder Edit: I'd like to add a 3rd scenario:
startDate endDate
1 5
4 8
11 15
Desired result to Shredder scenario:
(1,5) and (4,8) merge in (1,8) since they overlap then we need to datediff(1,8) + datediff(11,15) => 7 + 4 => 11
You can use a recursive CTE to build a list of dates and then count the distinct dates.
declare #T table
(
startDate date,
endDate date
);
insert into #T values
('2011-01-01', '2011-01-05'),
('2011-01-04', '2011-01-08'),
('2011-01-11', '2011-01-15');
with C as
(
select startDate,
endDate
from #T
union all
select dateadd(day, 1, startDate),
endDate
from C
where dateadd(day, 1, startDate) < endDate
)
select count(distinct startDate) as DayCount
from C
option (MAXRECURSION 0)
Result:
DayCount
-----------
11
Or you can use a numbers table. Here I use master..spt_values:
declare #MinStartDate date
select #MinStartDate = min(startDate)
from #T
select count(distinct N.number)
from #T as T
inner join master..spt_values as N
on dateadd(day, N.Number, #MinStartDate) between T.startDate and dateadd(day, -1, T.endDate)
where N.type = 'P'
Here's a solution that uses the Tally table idea (which I first heard of in an article by Itzk Ben-Gan -- I still cut and paste his code whenver the subject comes up). The idea is to generate a list of ascending integers, join the source data by range against the numbers, and then count the number of distinct numbers, as follows. (This code uses syntax from SQL Server 2008, but with minor modifications would work in SQL 2005.)
First set up some testing data:
CREATE TABLE #EventTable
(
PersonId int not null
,startDate datetime not null
,endDate datetime not null
)
INSERT #EventTable
values (1, 'Jan 1, 2011', 'Jan 4, 2011')
,(1, 'Jan 3, 2011', 'Jan 5, 2011')
,(2, 'Jan 1, 2011', 'Jan 3, 2011')
,(2, 'Jan 6, 2011', 'Jan 9, 2011')
Determine some initial values
DECLARE
#Interval bigint
,#FirstDay datetime
,#PersonId int = 1 -- (or whatever)
Get the first day and the maximum possible number of dates (to keep the cte from generating extra values):
SELECT
#Interval = datediff(dd, min(startDate), max(endDate)) + 1
,#FirstDay = min(startDate)
from #EventTable
where PersonId = #PersonId
Cut and paste over the one routine and modify and test it to only return as many integers as we'll need:
/*
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
select Number from Tally where Number <= #Interval
*/
And now revise it by first joining to the intervals defined in each source row, and then count each distinct value found:
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
SELECT PersonId, count(distinct Number) EventDays
from #EventTable et
inner join Tally
on dateadd(dd, Tally.Number - 1, #FirstDay) between et.startDate and et.endDate
where et.PersonId = #PersonId
and Number <= #Interval
group by PersonId
Take out the #PersonId filter and you'd get it for all persons. And with minor modification you can do it for any time interval, not just days (which is why I set the Tally table to generate severely large numbers.)
The following SQL is for the three scenarios you've described
with sampleData
AS (
SELECT 1 personid,1 startDate,4 endDate
UNION SELECT 1,3,5
UNION SELECT 2,1,3
UNION SELECT 2,6,9
UNION SELECT 3,1,5
UNION SELECT 3,4,8
UNION SELECT 3,11, 15
),
cte
AS (SELECT personid,
startdate,
enddate,
Row_number() OVER(ORDER BY personid, startdate) AS rn
FROM sampledata),
overlaps
AS (SELECT a.personid,
a.startdate,
b.enddate,
a.rn id1,
b.rn id2
FROM cte a
INNER JOIN cte b
ON a.personid = b.personid
AND a.enddate > b.startdate
AND a.rn = b.rn - 1),
nooverlaps
AS (SELECT a.personid,
a.startdate,
a.enddate
FROM cte a
LEFT JOIN overlaps b
ON a.rn = b.id1
OR a.rn = b.id2
WHERE b.id1 IS NULL)
SELECT personid,
SUM(timespent) timespent
FROM (SELECT personid,
enddate - startdate timespent
FROM nooverlaps
UNION
SELECT personid,
enddate - startdate
FROM overlaps) t
GROUP BY personid
Produces this result
Personid timeSpent
----------- -----------
1 4
2 5
3 11
Notes: I used the simple integers but the DateDiffs should work too
Correctness issue There is a correctness issue if your data is allowed to have multiple overlaps as Cheran S noted, the results won't be correct and you should use one of the other answers instead. His example used [1,5],[4,8],[7,11] for the same person ID
Algebra. If B-n is the ending time of the nth event, and A-n is the starting time of the nth event, then the sum of the differences is the difference of the sums. So you can write
select everything else, sum(cast(endDate as int)) - sum(cast(startDate as int)) as daysSpent
If your dates have no time component, this works. Otherwise, you could use a real.
Try something like this
select
personId,
sum(DateDuration) as TotalDuration
from
(
select personId, datediff(dd, startDate, endDate) as DateDuration
from yourEventTable
) a
group by personId
;WITH cte(gap)
AS
(
SELECT sum(b-a) from xxx GROUP BY uid
)
SELECT * FROM cte
Edit 1: I have modified both solutions to get correct results.
Edit 2: I have done comparative tests using the solutions proposed by Mikael Eriksson, Conrad Frix, Philip Kelley and me. All tests use an EventTable with the following structure:
CREATE TABLE EventTable
(
EventID INT IDENTITY PRIMARY KEY
,PersonId INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,CONSTRAINT CK_StartDate_Before_EndDate CHECK(StartDate < EndDate)
);
Also, all tests use warm buffer (no DBCC DROPCLEANBUFFERS) and cold [plan] cache (I have executed DBCC FREEPROCCACHE before every test). Because some solutions use a filter(PersonId = 1) and others not, I have inserted into EventTable rows for only one person (INSERT ...(PersonId,...) VALUES (1,...)).
These are the results:
My solutions use recursive CTEs.
Solution 1:
WITH BaseCTE
AS
(
SELECT e.StartDate
,e.EndDate
,e.PersonId
,ROW_NUMBER() OVER(PARTITION BY e.PersonId ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e
), RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM BaseCTE b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN BaseCTE crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
Solution 2:
DECLARE #Base TABLE --or a temporary table: CREATE TABLE #Base (...)
(
PersonID INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,RowNumber INT NOT NULL
,PRIMARY KEY(PersonID, RowNumber)
);
INSERT #Base (PersonID, StartDate, EndDate, RowNumber)
SELECT e.PersonId
,e.StartDate
,e.EndDate
,ROW_NUMBER() OVER(PARTITION BY e.PersonID ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e;
WITH RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM #Base b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN #Base crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
I have a table of items that, for sake of simplicity, contains the ItemID, the StartDate, and the EndDate for a list of items.
ItemID StartDate EndDate
1 1/1/2011 1/15/2011
2 1/2/2011 1/14/2011
3 1/5/2011 1/17/2011
...
My goal is to be able to join this table to a table with a sequential list of dates,
and say both how many items are open on a particular date, and also how many items are cumulatively open.
Date ItemsOpened CumulativeItemsOpen
1/1/2011 1 1
1/2/2011 1 2
...
I can see how this would be done with a WHILE loop,
but that has performance implications. I'm wondering how
this could be done with a set-based approach?
SELECT COUNT(CASE WHEN d.CheckDate = i.StartDate THEN 1 ELSE NULL END)
AS ItemsOpened
, COUNT(i.StartDate)
AS ItemsOpenedCumulative
FROM Dates AS d
LEFT JOIN Items AS i
ON d.CheckDate BETWEEN i.StartDate AND i.EndDate
GROUP BY d.CheckDate
This may give you what you want
SELECT DATE,
SUM(ItemOpened) AS ItemsOpened,
COUNT(StartDate) AS ItemsOpenedCumulative
FROM
(
SELECT d.Date, i.startdate, i.enddate,
CASE WHEN i.StartDate = d.Date THEN 1 ELSE 0 END AS ItemOpened
FROM Dates d
LEFT OUTER JOIN Items i ON d.Date BETWEEN i.StartDate AND i.EndDate
) AS x
GROUP BY DATE
ORDER BY DATE
This assumes that your date values are DATE data type. Or, the dates are DATETIME with no time values.
You may find this useful. The recusive part can be replaced with a table. To demonstrate it works I had to populate some sort of date table. As you can see, the actual sql is short and simple.
DECLARE #i table (itemid INT, startdate DATE, enddate DATE)
INSERT #i VALUES (1,'1/1/2011', '1/15/2011')
INSERT #i VALUES (2,'1/2/2011', '1/14/2011')
INSERT #i VALUES (3,'1/5/2011', '1/17/2011')
DECLARE #from DATE
DECLARE #to DATE
SET #from = '1/1/2011'
SET #to = '1/18/2011'
-- the recusive sql is strictly to make a datelist between #from and #to
;WITH cte(Date)
AS (
SELECT #from DATE
UNION ALL
SELECT DATEADD(day, 1, DATE)
FROM cte ch
WHERE DATE < #to
)
SELECT cte.Date, sum(case when cte.Date=i.startdate then 1 else 0 end) ItemsOpened, count(i.itemid) ItemsOpenedCumulative
FROM cte
left join #i i on cte.Date between i.startdate and i.enddate
GROUP BY cte.Date
OPTION( MAXRECURSION 0)
If you are on SQL Server 2005+, you could use a recursive CTE to obtain running totals, with the additional help of the ranking function ROW_NUMBER(), like this:
WITH grouped AS (
SELECT
d.Date,
ItemsOpened = COUNT(i.ItemID),
rn = ROW_NUMBER() OVER (ORDER BY d.Date)
FROM Dates d
LEFT JOIN Items i ON d.Date BETWEEN i.StartDate AND i.EndDate
GROUP BY d.Date
WHERE d.Date BETWEEN #FilterStartDate AND #FilterEndDate
),
cumulative AS (
SELECT
Date,
ItemsOpened,
ItemsOpenedCumulative = ItemsOpened
FROM grouped
WHERE rn = 1
UNION ALL
SELECT
g.Date,
g.ItemsOpened,
ItemsOpenedCumulative = g.ItemsOpenedCumulative + c.ItemsOpened
FROM grouped g
INNER JOIN cumulative c ON g.Date = DATEADD(day, 1, c.Date)
)
SELECT *
FROM cumulative