How to merge time intervals in SQL Server - sql

Suppose I have the following an event table with personId, startDate and endDate.
I want to know how much time the person X spent doing an event (the events can override each other).
If the person just has 1 event, its easy: datediff(dd, startDate, endDate)
If the person has 2 events it gets tricky.
I'll set some scenarios for the expected results.
Scenario 1
startDate endDate
1 4
3 5
This means he the results should be the datediff from 1 to 5
Scenario 2
startDate endDate
1 3
6 9
this means he the results should be the some of datediff(dd,1,3) and datediff(dd,6,9)
How can I get this result on an sql query? I can only think of a bunch of if statements, but the same person can have n events so the query will be really confusing.
Shredder Edit: I'd like to add a 3rd scenario:
startDate endDate
1 5
4 8
11 15
Desired result to Shredder scenario:
(1,5) and (4,8) merge in (1,8) since they overlap then we need to datediff(1,8) + datediff(11,15) => 7 + 4 => 11

You can use a recursive CTE to build a list of dates and then count the distinct dates.
declare #T table
(
startDate date,
endDate date
);
insert into #T values
('2011-01-01', '2011-01-05'),
('2011-01-04', '2011-01-08'),
('2011-01-11', '2011-01-15');
with C as
(
select startDate,
endDate
from #T
union all
select dateadd(day, 1, startDate),
endDate
from C
where dateadd(day, 1, startDate) < endDate
)
select count(distinct startDate) as DayCount
from C
option (MAXRECURSION 0)
Result:
DayCount
-----------
11
Or you can use a numbers table. Here I use master..spt_values:
declare #MinStartDate date
select #MinStartDate = min(startDate)
from #T
select count(distinct N.number)
from #T as T
inner join master..spt_values as N
on dateadd(day, N.Number, #MinStartDate) between T.startDate and dateadd(day, -1, T.endDate)
where N.type = 'P'

Here's a solution that uses the Tally table idea (which I first heard of in an article by Itzk Ben-Gan -- I still cut and paste his code whenver the subject comes up). The idea is to generate a list of ascending integers, join the source data by range against the numbers, and then count the number of distinct numbers, as follows. (This code uses syntax from SQL Server 2008, but with minor modifications would work in SQL 2005.)
First set up some testing data:
CREATE TABLE #EventTable
(
PersonId int not null
,startDate datetime not null
,endDate datetime not null
)
INSERT #EventTable
values (1, 'Jan 1, 2011', 'Jan 4, 2011')
,(1, 'Jan 3, 2011', 'Jan 5, 2011')
,(2, 'Jan 1, 2011', 'Jan 3, 2011')
,(2, 'Jan 6, 2011', 'Jan 9, 2011')
Determine some initial values
DECLARE
#Interval bigint
,#FirstDay datetime
,#PersonId int = 1 -- (or whatever)
Get the first day and the maximum possible number of dates (to keep the cte from generating extra values):
SELECT
#Interval = datediff(dd, min(startDate), max(endDate)) + 1
,#FirstDay = min(startDate)
from #EventTable
where PersonId = #PersonId
Cut and paste over the one routine and modify and test it to only return as many integers as we'll need:
/*
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
select Number from Tally where Number <= #Interval
*/
And now revise it by first joining to the intervals defined in each source row, and then count each distinct value found:
;WITH
Pass0 as (select 1 as C union all select 1), --2 rows
Pass1 as (select 1 as C from Pass0 as A, Pass0 as B),--4 rows
Pass2 as (select 1 as C from Pass1 as A, Pass1 as B),--16 rows
Pass3 as (select 1 as C from Pass2 as A, Pass2 as B),--256 rows
Pass4 as (select 1 as C from Pass3 as A, Pass3 as B),--65536 rows
Pass5 as (select 1 as C from Pass4 as A, Pass4 as B),--4,294,967,296 rows
Tally as (select row_number() over(order by C) as Number from Pass5)
SELECT PersonId, count(distinct Number) EventDays
from #EventTable et
inner join Tally
on dateadd(dd, Tally.Number - 1, #FirstDay) between et.startDate and et.endDate
where et.PersonId = #PersonId
and Number <= #Interval
group by PersonId
Take out the #PersonId filter and you'd get it for all persons. And with minor modification you can do it for any time interval, not just days (which is why I set the Tally table to generate severely large numbers.)

The following SQL is for the three scenarios you've described
with sampleData
AS (
SELECT 1 personid,1 startDate,4 endDate
UNION SELECT 1,3,5
UNION SELECT 2,1,3
UNION SELECT 2,6,9
UNION SELECT 3,1,5
UNION SELECT 3,4,8
UNION SELECT 3,11, 15
),
cte
AS (SELECT personid,
startdate,
enddate,
Row_number() OVER(ORDER BY personid, startdate) AS rn
FROM sampledata),
overlaps
AS (SELECT a.personid,
a.startdate,
b.enddate,
a.rn id1,
b.rn id2
FROM cte a
INNER JOIN cte b
ON a.personid = b.personid
AND a.enddate > b.startdate
AND a.rn = b.rn - 1),
nooverlaps
AS (SELECT a.personid,
a.startdate,
a.enddate
FROM cte a
LEFT JOIN overlaps b
ON a.rn = b.id1
OR a.rn = b.id2
WHERE b.id1 IS NULL)
SELECT personid,
SUM(timespent) timespent
FROM (SELECT personid,
enddate - startdate timespent
FROM nooverlaps
UNION
SELECT personid,
enddate - startdate
FROM overlaps) t
GROUP BY personid
Produces this result
Personid timeSpent
----------- -----------
1 4
2 5
3 11
Notes: I used the simple integers but the DateDiffs should work too
Correctness issue There is a correctness issue if your data is allowed to have multiple overlaps as Cheran S noted, the results won't be correct and you should use one of the other answers instead. His example used [1,5],[4,8],[7,11] for the same person ID

Algebra. If B-n is the ending time of the nth event, and A-n is the starting time of the nth event, then the sum of the differences is the difference of the sums. So you can write
select everything else, sum(cast(endDate as int)) - sum(cast(startDate as int)) as daysSpent
If your dates have no time component, this works. Otherwise, you could use a real.

Try something like this
select
personId,
sum(DateDuration) as TotalDuration
from
(
select personId, datediff(dd, startDate, endDate) as DateDuration
from yourEventTable
) a
group by personId

;WITH cte(gap)
AS
(
SELECT sum(b-a) from xxx GROUP BY uid
)
SELECT * FROM cte

Edit 1: I have modified both solutions to get correct results.
Edit 2: I have done comparative tests using the solutions proposed by Mikael Eriksson, Conrad Frix, Philip Kelley and me. All tests use an EventTable with the following structure:
CREATE TABLE EventTable
(
EventID INT IDENTITY PRIMARY KEY
,PersonId INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,CONSTRAINT CK_StartDate_Before_EndDate CHECK(StartDate < EndDate)
);
Also, all tests use warm buffer (no DBCC DROPCLEANBUFFERS) and cold [plan] cache (I have executed DBCC FREEPROCCACHE before every test). Because some solutions use a filter(PersonId = 1) and others not, I have inserted into EventTable rows for only one person (INSERT ...(PersonId,...) VALUES (1,...)).
These are the results:
My solutions use recursive CTEs.
Solution 1:
WITH BaseCTE
AS
(
SELECT e.StartDate
,e.EndDate
,e.PersonId
,ROW_NUMBER() OVER(PARTITION BY e.PersonId ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e
), RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM BaseCTE b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN BaseCTE crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);
Solution 2:
DECLARE #Base TABLE --or a temporary table: CREATE TABLE #Base (...)
(
PersonID INT NOT NULL
,StartDate DATETIME NOT NULL
,EndDate DATETIME NOT NULL
,RowNumber INT NOT NULL
,PRIMARY KEY(PersonID, RowNumber)
);
INSERT #Base (PersonID, StartDate, EndDate, RowNumber)
SELECT e.PersonId
,e.StartDate
,e.EndDate
,ROW_NUMBER() OVER(PARTITION BY e.PersonID ORDER BY e.StartDate, e.EndDate) RowNumber
FROM EventTable e;
WITH RecursiveCTE
AS
(
SELECT b.PersonId
,b.RowNumber
,b.StartDate
,b.EndDate
,b.EndDate AS MaxEndDate
,1 AS PseudoDenseRank
FROM #Base b
WHERE b.RowNumber = 1
UNION ALL
SELECT crt.PersonId
,crt.RowNumber
,crt.StartDate
,crt.EndDate
,CASE WHEN crt.EndDate > prev.MaxEndDate THEN crt.EndDate ELSE prev.MaxEndDate END
,CASE WHEN crt.StartDate <= prev.MaxEndDate THEN prev.PseudoDenseRank ELSE prev.PseudoDenseRank + 1 END
FROM RecursiveCTE prev
INNER JOIN #Base crt ON prev.PersonId = crt.PersonId
AND prev.RowNumber + 1 = crt.RowNumber
), SumDaysPerPersonAndInterval
AS
(
SELECT src.PersonId
,src.PseudoDenseRank --Interval ID
,DATEDIFF(DAY, MIN(src.StartDate), MAX(src.EndDate)) Days
FROM RecursiveCTE src
GROUP BY src.PersonId, src.PseudoDenseRank
)
SELECT x.PersonId, SUM( x.Days ) DaysPerPerson
FROM SumDaysPerPersonAndInterval x
GROUP BY x.PersonId
OPTION(MAXRECURSION 32767);

Related

Find all missing Numbers using one SQL query

The data looks like
[Month] [Date]
---------------
201306 1
201306 2
201306 5
201306 6
201306 7
201307 1
201307 4
201307 6
201309 1
201309 2
How to find all missing Dates by Month?
Here is the expected results
[Month] [Date]
---------------
201306 3
201306 4
201307 2
201307 3
201307 5
I think does it and pretty efficiently
declare #T table (yy int, dd int);
insert into #T values
(201306, 1)
, (201306, 2)
, (201306, 5)
, (201306, 6)
, (201306, 7)
, (201307, 1)
, (201307, 4)
, (201307, 6)
, (201309, 1)
, (201309, 2);
with cte as
( select yy, min(dd) + 1 as mn, max(dd) as mx
from #T
group by yy
having min(dd) + 1 < max(dd)
union all
select c.yy, c.mn + 1, c.mx
from cte c
where c.mn + 1 < c.mx
)
select yy, mn as dd
from cte
except
select yy, dd
from #T t
order by yy, mn;
yy dd
----------- -----------
201306 3
201306 4
201307 2
201307 3
201307 5
You would need some kind of lookup tables which could has intermediate dates and use cross join with left join to find missing dates
First thought
;with cte as (
select min(date) mdate, max(date) mxdate from table
union all
select mdate+1 as mdate, mxdate
from cte c
where c.mdate < c.mxdate
)
select distinct t.Month, c.mdate
from table t cross join (select mdate from cte) c
left join table t1 on t1.month = t.Month and t1.date = c.mdate
where t1.date is null
Second thought
;with cte as (
select month, min(date) over (partition by month) mdate, max(date) over (partition by month) mxdate
from sample t union all
select month, mdate+1 as mdate, mxdate
from cte c
where c.month = month and c.mdate < c.mxdate
)
select c.month, c.mdate
from cte c left join sample t1
on t1.month = c.Month and t1.date = c.mdate
where t1.date is null
group by c.month, c.mdate
Demo
Consider using a recursive query
with rndata as
(
select row_number() over (partition by mon order by d) rn, * from data
), rcte as
(
select mon, d, (select max(d) from data where data.mon = rndata.mon) max_d
from rndata where rn = 1
union all
select rcte.mon, rcte.d + 1, rcte.max_d
from rcte
where rcte.d + 1 < max_d
)
select mon, d
from rcte
where not exists (
select 1
from data
where rcte.mon = data.mon and
rcte.d = data.d
)
dbfiddle demo
Consider using below approach.
CREATE TABLE #Date([Month] int, [Date] int)
INSERT INTO #Date
VALUES(201306, 1)
,(201306, 2)
,(201306, 5)
,(201306, 6)
,(201306, 7)
,(201307, 1)
,(201307, 4)
,(201307, 6)
,(201309, 1)
,(201309, 2)
;WITH CTE AS
(
SELECT
*,LEAD([Date]) OVER(ORDER BY [Month],[Date]) AS NextDate
FROM #Date d
)
SELECT
d.[Month], m.Dt AS [Date]
FROM CTE d
CROSS APPLY( SELECT v.Dt
FROM
(VALUES(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11)
,(11),(12),(13),(14),(15),(16),(17),(18),(19),(20)
,(21),(22),(23),(24),(25),(26),(27),(28),(29),(30),(31)
) AS v(Dt)
WHERE v.Dt > d.Date AND v.Dt < d.NextDate
) m
The only limitation this approach has is, it is not able to find missing days that falls before first date.
This is a fast but still simple solution:
1) use sys.sysobjects as tally table to gett all dates in each month.
2) calc min/max range for each month to keep only gaps inside the range.
3) join tally and range to get expected dates for each month, and left join your table to math existing dates.
4) filter in WHERE condition only missing dates
declare #T table ([month] int, [date] int);
insert into #T values
(201306, 1)
, (201306, 2)
, (201306, 5)
, (201306, 6)
, (201306, 7)
, (201307, 1)
, (201307, 4)
, (201307, 6)
, (201309, 1)
, (201309, 2);
with
n as (select top 31 ROW_NUMBER() over (order by id) n from sys.sysobjects),
r as (select [month], MIN([date]) dd1, MAX([date]) dd2 from #t group by [month])
select r.[month], n [date]
from r
join n on n between dd1 and dd2
left join #T t on n.N = t.[date] and r.[month] = t.[month]
where dd2<>dd1 and t.[date] is null
order by r.[month], n
You can use numbers/Tally table approach like below:
See live demo
create table sample ([Month] int, [Date] int)
insert into sample values
(201306, 1)
,(201306, 2)
,(201306, 5)
,(201306, 6)
,(201306, 7)
,(201307, 1)
,(201307, 4)
,(201307, 6)
,(201309, 1)
,(201309, 2);
; with daysinmonth as
(
select * from
(
values
(1,31),(2,28),(3,31),(4,30),(5,31),(6,30),
(7,31),(8,31),(9,30),(10,31),(11,30),(12,31)
) v(m,d)
)
select [month], dd
from sample
cross apply
(
select top
(
select d from
daysinmonth
where m=cast( right(cast([Month] as varchar(6)),2) as int)
)
row_number() over ( order by (select null)) dd
from
sys.tables t1 cross join
sys.tables t2
) c
where [date]<>dd

SQL - Select values from a table based on dates using incrementing dates

I have a SQL table of dates (MM/DD format), targets, and levels, as such:
Date Target Level
10/2 1000 1
10/4 2000 1
10/7 2000 2
I want to use those dates as tiers, or checkpoints, for when to use the respective targets and levels. So, anything on or after those dates (until the next date) would use that target/level. Anything before the first date just uses the values from the first date.
I want to select a range of dates (a 5 week range of dates, with the start date and end date of the range being determined by the current day: 3 weeks back from today, to 2 weeks forward from today) and fill in the targets and levels accordingly, as such:
Date Target Level
10/1 1000 1
10/2 1000 1
10/3 1000 1
10/4 2000 1
10/5 2000 1
10/6 2000 1
10/7 2000 2
10/8 2000 2
...
11/5 2000 2
How do I go about:
Selecting the range of dates (as efficiently as possible)
Filling in the range of dates with the respective target/level from the appropriate date in my table?
Thank you.
You can do this using outer apply. The following creates a list of dates using a recursive CTE:
with d as (
select cast(getdate() as date) as dte
union all
select dateadd(day, -1, dte)
from d
where dte >= getdate() - 30
select d.dte, t.target, t.level
from d outer apply
(select top 1 t.*
from t
where d.dte >= t.dte
order by t.dte desc
);
you can use a CTE to generate your 'missing' dates, then use a CROSS APPLY to obtain the target and level that was last active (by querying the TOP 1 DESC where the date is on or before current date) - finally I introduced 'maximum date' as a variable
DECLARE #MAXD as DATETIME = '20161105';
WITH DATS AS (SELECT MIN([Date]) D FROM dbo.YourTab
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, CA.Target, CA.Level from DATS
CROSS APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y
WHERE
Y.[Date] <= DATS.D
ORDER BY Y.Date DESC) CA
option (maxrecursion 0);
I made a bit of a change with dates to go back 3 and forward two weeks - also I switched to outer apply to handle no data in force
DECLARE #MIND as DATETIME = dateadd(week,-3,cast(getdate() as date));
DECLARE #MAXD as DATETIME = dateadd(week, 5,#MIND);
WITH DATS AS (SELECT #MIND D
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, CA.Target, CA.Level from DATS
OUTER APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y WHERE Y.[Date] <= DATS.D ORDER BY Y.Date DESC) CA
ORDER BY DATS.D
option (maxrecursion 0);
Final change - if there is no earlier value for the date - take first future row
DECLARE #MIND as DATETIME = dateadd(week,-3,cast(getdate() as date));
DECLARE #MAXD as DATETIME = dateadd(week, 5,#MIND);
WITH DATS AS (SELECT #MIND D
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, COALESCE(CA.Target, MQ.Target) Target , COALESCE(CA.Level, MQ.Level) Level from DATS
OUTER APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y WHERE Y.[Date] <= DATS.D ORDER BY Y.Date DESC) CA
OUTER APPLY
(
SELECT TOP 1 M.Target, M.Level FROM YourTab M ORDER BY M.[Date] ASC
) MQ
ORDER BY DATS.D
option (maxrecursion 0);
I don't know why you store dates as MM/DD but you need some conversion into right datatype. This could do a trick:
;WITH YourTable AS (
SELECT *
FROM (VALUES
('10/2', 1000, 1),
('10/4', 2000, 1),
('10/7', 2000, 2)
) as t([Date], [Target], [Level])
), dates_cte AS ( --this CTE is generating dates you need
SELECT DATEADD(week,-3,GETDATE()) as d --3 weeks back
UNION ALL
SELECT dateadd(day,1,d)
FROM dates_cte
WHERE d < DATEADD(week,2,GETDATE()) --2 weeks forward
)
SELECT REPLACE(CONVERT(nvarchar(5),d,101),'/0','/') as [Date],
COALESCE(t.[Target],t1.[Target]) [Target],
COALESCE(t.[Level],t1.[Level]) [Level]
FROM dates_cte dc
OUTER APPLY ( --Here we got PREVIOUS values
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) <= dc.d
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) DESC
) t
OUTER APPLY ( --Here we got NEXT values and use them if there is no PREV
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) >= dc.d
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) ASC
) t1
Output:
Date Target Level
10/5 2000 1
10/6 2000 1
10/7 2000 2
10/8 2000 2
10/9 2000 2
10/10 2000 2
10/11 2000 2
10/12 2000 2
...
11/9 2000 2
EDIT
With Categories:
;WITH YourTable AS (
SELECT *
FROM (VALUES
('10/2', 1000, 1, 'A'),
('10/4', 3000, 1, 'B'),
('10/7', 2000, 2, 'A')
) as t([Date], [Target], [Level], [Category])
), dates_cte AS (
SELECT DATEADD(week,-3,GETDATE()) as d
UNION ALL
SELECT dateadd(day,1,d)
FROM dates_cte
WHERE d < DATEADD(week,2,GETDATE())
)
SELECT REPLACE(CONVERT(nvarchar(5),d,101),'/0','/') as [Date],
COALESCE(t.[Target],t1.[Target]) [Target],
COALESCE(t.[Level],t1.[Level]) [Level],
c.Category
FROM dates_cte dc
CROSS JOIN (
SELECT DISTINCT Category
FROM YourTable
) c
OUTER APPLY (
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) <= dc.d
AND c.Category = Category
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) DESC
) t
OUTER APPLY (
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) >= dc.d
AND c.Category = Category
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) ASC
) t1
ORDER BY c.Category, d
Not sure if I'm over simplifying this, but:
select min(X.Date) Date_Range_Start, max(X.date) Date_Range_End
, V.<value_date>
, isnull(X.Target, 'Out of range') Target
, isnull(X.Level, 'Out of range') Level
from X --replace this with your table
left join <value_table> V --table with dates to be assessed
on V.<Date> between X.Date_Range_Start and X.Date_Range_End
group by Target, Level, V.<value_date>

Calculate Average Qty On Hand of Inventory

I'm trying to find the average qty on hand of my inventory over a date range from parameter #StartDate by averaging the ending qty from each day. I have three tables: a part table, a part transaction table, and a warehouse table, mocked up below.
PartNum | PartNum TranDate TranQty | PartNum OnHandQty
---------- | ------------------------------------ | --------------------
P1 | P1 6/28/2016 5 | P1 30
P2 | P1 6/26/2016 3 | P2 2
| P1 6/26/2016 -1 |
| P1 6/15/2016 2 |
| P2 6/15/2016 1 |
If today is 6/30/2016 and #StartDate = 6/1/2016, I expect a result like:
PartNum AverageOnHand
------------------------
P1 22.9
P2 1.5
However, I don't know what function would best allow me to get to an appropriate weighted sum which I could divide by the difference in dates. Is there a SumProduct function or similar that I can use here? My code, so far, is below:
select
[Part].[PartNum] as [Part_PartNum],
(max(PartWhse.OnHandQty)*datediff(day,max(PartTran.TranDate),Constants.Today)) as [Calculated_WeightedSum],
(WeightedSum/DATEDIFF(day, #StartDate, Constants.Today)) as [Calculated_AverageOnHand]
from Erp.Part as Part
right outer join Erp.PartTran as PartTran on
Part.PartNum = PartTran.PartNum
inner join Erp.PartWhse as PartWhse on
Part.PartNum = PartWhse.PartNum
group by [Part].[PartNum]
Here is a sql-server 2012 + method that is interesting.
;WITH cte AS (
SELECT
p.PartNum
,CAST(t.TranDate AS DATE) AS TranDate
,i.OnHandQty
--,SUM(SUM(t.TranQty)) OVER (PARTITION BY p.PartNum ORDER BY CAST(t.TranDate AS DATE) DESC) AS InventoryChange
,i.OnHandQty - SUM(SUM(t.TranQty)) OVER (PARTITION BY p.PartNum ORDER BY CAST(t.TranDate AS DATE) DESC) AS InventoryOnDate
,DATEDIFF(day,
CAST(ISNULL(LAG(MAX(TranDate)) OVER (PARTITION BY p.PartNum ORDER BY CAST(t.TranDate AS DATE) ASC),#StartDate) AS DATE)
,CAST(t.TranDate AS DATE)
) AS DaysAtInventory
FROM
#Parts p
LEFT JOIN #Transact t
ON p.PartNum = t.PartNum
LEFT JOIN #Inventory i
ON p.PartNum = i.PartNum
GROUP BY
p.PartNum
,CAST(t.TranDate AS DATE)
,i.OnHandQty
)
SELECT
PartNum
,(SUM(ISNULL(DaysAtInventory,0) * ISNULL(InventoryOnDate,0))
+ ((DATEDIFF(day,MAX(TranDate),CAST(GETDATE() AS DATE)) + 1) * ISNULL(MAX(OnHandQty),0)))
/((DATEDIFF(day,CAST(#StartDate AS DATE),CAST(GETDATE() AS DATE)) + 1) * 1.00) AS AvgDailyInventory
FROM
cte
GROUP BY
PartNum
This one actually gave me the 22.9 but 1.53333 the 333 gets introduced because 1 day has to get put somewhere so I stuck it as the current inventory.
Here is a previous method I answered with and this one it is a little easier to conceptualize the data..... I would be curious about performance differences between the 2 methods.
Some of these steps can be combined to be a little more concise but this works (although I got 22.6 not .1 or .9....) I rounded everything to a whole date while doing this so that you don't have to worry about beginning and end of day.
DECLARE #StartDate DATETIME = '6/1/2016'
;WITH cteDates AS (
SELECT #StartDate AS d
UNION ALL
SELECT
d + 1 AS d
FROM
cteDates c
WHERE c.d + 1 <= CAST(CAST(GETDATE() AS DATE) AS DATETIME)
--get dates to today beginning of day
)
, ctePartsDaysCross AS (
SELECT
d.d
,p.PartNum
,ISNULL(i.OnHandQty,0) AS OnHandQty
FROM
cteDates d
CROSS JOIN #Parts p
LEFT JOIN #Inventory i
ON p.PartNum = i.PartNum
)
, cteTransactsQuantityByDate AS (
SELECT
CAST(t.TranDate AS DATE) as d
,t.PartNum
,TranQty = SUM(t.TranQty)
FROM
#Transact t
GROUP BY
CAST(t.TranDate AS DATE)
,t.PartNum
)
,cteDailyInventory AS (
SELECT
c.d
,c.PartNum
,c.OnHandQty - SUM(ISNULL(t.TranQty,0)) OVER (PARTITION BY c.PartNum ORDER BY c.d DESC) AS DailyOnHand
FROM
ctePartsDaysCross c
LEFT JOIN cteTransactsQuantityByDate t
ON c.d = t.d
AND c.PartNum = t.PartNum
)
SELECT
PartNum
,AVG(CAST(DailyOnHand AS DECIMAL(6,3)))
FROM
cteDailyInventory
GROUP BY
PartNum
Here is the test data:
IF OBJECT_ID('tempdb..#Parts') IS NOT NULL
BEGIN
DROP TABLE #Parts
END
IF OBJECT_ID('tempdb..#Transact') IS NOT NULL
BEGIN
DROP TABLE #Transact
END
IF OBJECT_ID('tempdb..#Inventory') IS NOT NULL
BEGIN
DROP TABLE #Inventory
END
CREATE TABLE #Parts (
PartNum CHAR(2)
)
CREATE TABLE #Transact (
AutoId INT IDENTITY(1,1) NOT NULL
,PartNum CHAR(2)
,TranDate DATETIME
,TranQty INT
)
CREATE TABLE #Inventory (
PartNum CHAR(2)
,OnHandQty INT
)
INSERT INTO #Parts (PartNum) VALUES ('P1'),('P2'),('P3')
INSERT INTO #Transact (PartNum, TranDate, TranQty)
VALUES ('P1','6/28/2016',5),('P1','6/26/2016',3),('P1','6/26/2016',-1)
,('P1','6/15/2016',2) ,('P2','6/15/2016',1)
INSERT INTO #Inventory (PartNum, OnHandQty) VALUES ('P1',30),('P2',2)
I am thinking 1 recursive cte might be simpler might post that as an update.
Reverse the transactions to compute daily quantities. Add in the missing dates and look backward to the most recent date to fill in the daily quantities. I think I'm going to try for a better solution than this one.
http://rextester.com/JLD19862
with trn as (
select PartNum, TranDate, TranQty from PartTran
union all
select PartNum, cast('20160601' as date), 0 from PartWhse
union all
select PartNum, cast('20160630' as date), 0 from PartWhse
), qty as (
select
t.PartNum, t.TranDate,
-- assumes that end date corresponds with OnHandQty
min(w.OnHandQty) + sum(t.TranQty)
- sum(sum(t.TranQty))
over (partition by t.PartNum order by t.TranDate desc) as DailyOnHand,
coalesce(
lead(t.TranDate) over (partition by t.PartNum order by t.TranDate),
dateadd(day, 1, t.TranDate)
) as NextTranDate
-- if lead() isn't available...
-- coalesce(
-- (
-- select min(t2.TranDate) from trn as t2
-- where t2.PartNum = t.PartNum and t2.TranDate > t.TranDate
-- ),
-- dateadd(day, 1, t.TranDate)
-- ) as NextTranDate
from PartWhse as w inner join trn as t on t.PartNum = w.PartNum
where t.TranDate between '20160601' and '20160630'
group by t.PartNum, t.TranDate
)
select
PartNum,
sum(datediff(day, TranDate, NextTranDate) * DailyOnHand) * 1.00
/ sum(datediff(day, TranDate, NextTranDate)) as DailyAvg
from qty
group by PartNum;
I was able to solve this with a sum. First, I multiplied the final quantity on hand by the number of days in the range. Next, I multiplied each change in inventory by the time from #StartDate until the TransDate.
select
[Part].[PartNum] as [Part_PartNum],
(max(PartWhse.OnHandQty)*datediff(day,#StartDate,Constants.Today)-
sum(PartTran.TranQty*datediff(day,#StartDate,PartTran.TranDate))) as [Calculated_WeightedSum],
(WeightedSum/DATEDIFF(day, #StartDate, Constants.Today)) as [Calculated_AverageOnHand]
from Erp.Part as Part
right outer join Erp.PartTran as PartTran on
Part.PartNum = PartTran.PartNum
inner join Erp.PartWhse as PartWhse on
Part.PartNum = PartWhse.PartNum
group by [Part].[PartNum]
Thanks for your help everyone! You really helped me think it through.

Running totals with initial value then adding the totals as stated by the date

Imagine we have a table:
SELECT SUM(A) AS TOTALS,DATE,STUFF FROM TABLE WHERE DATE BETWEEN 'DATESTART' AND 'DATEEND'
GROUP BY DATE,STUFF
Normally this gets the totals as:
totals stuff date
23 x 01.01.1900
3 x 02.01.1900
44 x 06.01.1900
But what if we have the previous the data before the startdate,and i want to add those initial data to my startdate value; for example; from the begining of time i already have a sum value of x lets say 100
so i want my table to start from 123 and add the previous data such as:
123
126
126+44 and so on...
totals stuff date
123 x 01.01.1900
126 x 02.01.1900
170 x 06.01.1900
How can i achieve that?
Source data:
WITH Stocks
AS (
SELECT
Dep.Dept_No ,
SUM(DSL.Metre) AS Metre ,
CONVERT(VARCHAR(10), Date, 112) AS Date
FROM
DS (NOLOCK) DSL
JOIN TBL_Depts (NOLOCK) Dep ON Dep.Dept_No = DSL.Dept
WHERE
1 = 1 AND
DSL.Sil = 0 AND
DSL.Depo IN ( 5000, 5001, 5002, 5003, 5004, 5014, 5018, 5021, 5101, 5109, 5303 ) AND
Dep.Dept_No NOT IN ( 6002 ) AND
Dep.Dept_No IN ( 6000, 6001, 6003, 6004, 6005, 6011, 6024, 6030 ) AND
DSL.Date BETWEEN '2013-06-19' AND '2013-06-20'
GROUP BY
Dep.Dept_No ,
CONVERT(VARCHAR(10), Date, 112)
)
SELECT
Stocks.Metre ,
Dep.Dept AS Dept ,
Stocks.Date
FROM
Stocks
LEFT JOIN TBL_Depts (NOLOCK) Dep ON Stocks.Dept = Dep.Dept
ORDER BY
Stocks.Metre DESC
Any RDBMS with window and analytic functions (SQL Server 2012, PostgreSQL but not MySQL)
SELECT
SumA + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(a) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate BETWEEN '20130101' AND '20130106'
GROUP BY
other, aDate
) X
CROSS JOIN
(
SELECT
SUM(a) AS SumA
FROM
SomeTable
WHERE
aDate < '20130101'
) Y
ORDER BY
aDate;
or
SELECT
SUM(SumA) OVER () + SUM(SumARange) OVER (ORDER BY aDate ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS TOTALS,
other, aDate
FROM
(
SELECT
SUM(CASE WHEN aDate < '20130101' THEN a ELSE 0 END) AS SumA,
SUM(CASE WHEN aDate BETWEEN '20130101' AND '20130106' THEN a ELSE 0 END) AS SumARange,
other, aDate
FROM
SomeTable
WHERE
aDate <= '20130106'
GROUP BY
other, aDate
) X
ORDER BY
aDate;
SQLFiddle example and another
Use option with APPLY operator to calculate the totals. You need also add additional CASE expression in the GROUP BY clause
;WITH cte AS
(
SELECT SUM(a) AS sumA, [stuff], MAX([Date]) AS [Date]
FROM SomeTable
WHERE [Date] <= '20130106'
GROUP BY [stuff], CASE WHEN [Date] <= '20130101' THEN 1 ELSE [Date] END
)
SELECT o.total, [stuff], [Date]
FROM cte c CROSS APPLY (
SELECT SUM(c2.sumA) AS total
FROM cte c2
WHERE c.[Date] >= c2.[Date]
) o
See example on SQLFiddle

How to count open records, grouped by hour and day in SQL-server-2008-r2

I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK