Searching SQL table for two consecutive missing dates - sql

I want to search through a SQL table and find two consecutive missing dates.
For example, person 1 inserts 'diary' entry on day 1 and day 2, misses day 3 and day 4, and enters an entry on day 5.
I am not posting code because I am not sure of how to do this at all.
Thanks!

This uses a LEVEL aggregate to build the list of calendar dates from the first entry to the last, then uses LAG() to check a given date with the previous date, and then checks that neither of those dates had an associated entry to find those two-day gaps:
With diary as (
select to_date('01/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('02/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('04/01/2016','dd/mm/yyyy') entry_dt from dual union all
--leave two day gap of 5th and 6th
select to_date('07/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('08/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('10/01/2016','dd/mm/yyyy') entry_dt from dual )
select calendar_dt -1, calendar_dt
FROM (
select calendar_dt, entry_dt, lag(entry_dt) over (order by calendar_dt) prev_entry_dt
from diary
RIGHT OUTER JOIN (select min(entry_dt) + lvl as calendar_dt
FROM diary
,(select level lvl
from dual connect by level < (select max(entry_dt) - min(entry_dt)+1 from diary))
group by lvl) ON calendar_dt = entry_dt
order by calendar_dt
)
where entry_dt is null and prev_entry_dt is null
returns:
CALENDAR_DT-1, CALENDAR_DT
05/01/2016, 06/01/2016
I am only doing the calendar building to simplify building all 2-day gaps, as if a person took three days off that would be two overlapping two-day gaps (day 1-2, and days 2-3). If you want a far simpler query that outputs the start and end point of any gap of two or more days, then the following works:
With diary as (
select to_date('01/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('02/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('04/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('07/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('08/01/2016','dd/mm/yyyy') entry_dt from dual union all
select to_date('10/01/2016','dd/mm/yyyy') entry_dt from dual )
select prev_entry_dt +1 gap_start, entry_dt -1 gap_end
FROM (
select entry_dt, lag(entry_dt) over (order by entry_dt) prev_entry_dt
from diary
order by entry_dt
) where entry_dt - prev_entry_dt > 2

My high level approach to this problem would be to select from a dynamic table of dates, using an integer counter to add or subtract from the current DateTime to get as many dates as you require into the future or past, then LEFT join your data table to this, order by date and select the first row, or N many rows which have a NULL join.
So your data ends up being
DATE ENTRY_ID
---- -----
2016-01-01 1
2016-01-02 2
2016-01-03 NULL
2016-01-04 3
2016-01-05 4
2016-01-06 NULL
2016-01-07 NULL
2016-01-08 NULL
And you can pick all of the values you need from this dataset

Try this your problem looks like similar to this :-
Declare #temp Table(id int identity(1,1) not null,CDate smalldatetime ,val int)
insert into #temp select '10/2/2012',1
insert into #temp select '10/3/2012',1
insert into #temp select '10/5/2012',1
insert into #temp select '10/7/2012',2
insert into #temp select '10/9/2012',2
insert into #temp select '10/10/2012',2
insert into #temp select '10/13/2012',2
insert into #temp select '10/15/2012',2
DECLARE #startDate DATE= '10/01/2012'
DECLARE #endDate DATE= '10/15/2012'
SELECT t.Id, X.[Date],Val = COALESCE(t.val,0)
FROM
(SELECT [Date] = DATEADD(Day,Number,#startDate)
FROM master..spt_values
WHERE Type='P'
AND DATEADD(day,Number,#startDate) <= #endDate)X
LEFT JOIN #temp t
ON X.[Date] = t.CDate
Alternative you can try this :-
WITH dates AS (
SELECT CAST('2009-01-01' AS DATETIME) 'date'
UNION ALL
SELECT DATEADD(dd, 1, t.date)
FROM dates t
WHERE DATEADD(dd, 1, t.date) <= '2009-02-01')
SELECT t.eventid, d.date
FROM dates d
JOIN TABLE t ON d.date BETWEEN t.startdate AND t.enddate

Related

Set based query to replace loop to populate all month end dates from given date for all records

I have a table that stores patient lab test results. There can be results from multiple tests like Albumin, Potassium, Phosphorus etc. First reading for each patient from each of these categories is stored in a table called #MetricFirstGroupReading.
CREATE TABLE #MetricFirstGroupReading (Patient_Key INT, Metric_Group VARCHAR(100),
Observation_Date DATE)
ALTER TABLE #MetricFirstGroupReading
ADD CONSTRAINT UQ_MetricFirst UNIQUE (Patient_Key, Metric_Group);
INSERT INTO #MetricFirstGroupReading
SELECT 1, 'Albumin', '2018-11-15' UNION
SELECT 1, 'Potassium', '2018-12-10' UNION
SELECT 2, 'Albumin', '2018-10-20' UNION
SELECT 2, 'Potassium', '2018-11-25'
Now, I need to populate all month end dates upto current month into a new table, for each record from the #MetricFirstGroupReading table. Following is the expected result when the query run on December 2018.
I know how to do it using WHILE loops. How to do this without loops, using set based SQL queries, in SQL Server 2016?
Following worked. This is an expansion of the idea present in tsql: How to retrieve the last date of each month between given date range
Query
CREATE TABLE #AllMonthEnds (MonthEndDate DATE)
DECLARE #Start datetime
DECLARE #End datetime
SELECT #Start = '2000-01-01'
SELECT #End = DATEADD(MONTH,1,GETDATE())
;With CTE as
(
SELECT #Start as Date,Case When DatePart(mm,#Start)<>DatePart(mm,#Start+1) then 1 else 0 end as [Last]
UNION ALL
SELECT Date+1,Case When DatePart(mm,Date+1)<>DatePart(mm,Date+2) then 1 else 0 end from CTE
WHERE Date<#End
)
INSERT INTO #AllMonthEnds
SELECT [Date]
FROM CTE
WHERE [Last]=1
OPTION ( MAXRECURSION 0 )
SELECT T.Patient_Key, T.Metric_Group, T.Observation_Date AS First_Observation_Date,
DATEDIFF(MONTh,Observation_Date, MonthEndDate) AS MonthDiff,
A.MonthEndDate AS IterationDate
FROM #AllMonthEnds A
INNER JOIN
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY Patient_Key, Metric_Group ORDER BY Observation_Date) AS RowVal
FROM #MetricFirstGroupReading M
)T
ON A.MonthEndDate >= T.Observation_Date
WHERE RowVal = 1
ORDER BY Patient_Key, Metric_Group, T.Observation_Date, A.MonthEndDate
How about:
select MetricFirstGroupReading.*, datediff(month, MetricFirstGroupReading.Observation_Date, months.monthendval) monthdiff, months.*
into allmonths
from
(
SELECT 1 patientid, 'Albumin' test, '2018-11-15' Observation_Date UNION
SELECT 1 patientid, 'Potassium' test, '2018-12-10' Observation_Date UNION
SELECT 2 patientid, 'Albumin' test, '2018-10-20' Observation_Date UNION
SELECT 2 patientid, 'Potassium' test, '2018-11-25' Observation_Date) MetricFirstGroupReading
join
(
select '2018-10-31' monthendval union
select '2018-11-30' monthendval union
select '2018-12-31' monthendval
) months on MetricFirstGroupReading.Observation_Date< months.monthendval
Replace the first select union with your table, and add or remove month ends from the second inner select.
Consider building a temp table of all 12 month end dates, then join to main table by date range. Use DateDiff for month difference:
CREATE TABLE #MonthEndDates (Month_End_Value DATE)
INSERT INTO #MonthEndDates
VALUES ('2018-01-31'),
('2018-02-28'),
('2018-03-31'),
('2018-04-30'),
('2018-05-31'),
('2018-04-30'),
('2018-06-30'),
('2018-07-31'),
('2018-08-31'),
('2018-09-30'),
('2018-10-31'),
('2018-11-30'),
('2018-12-31')
SELECT m.Patient_Key, m.Metric_Group, m.Observation_Date,
DateDiff(month, m.Observation_Date, d.Month_End_Value) AS Month_Diff,
d.Month_End_Value
FROM #MetricFirstGroupReading m
INNER JOIN #MonthEndDates d
ON m.Observation_Date < d.Month_End_Value
GO
Rextester Demo

SQL counting days with gap / overlapping

I am working on a "counting days" problem almost identical to this one. I have a list of date(s), and need to count how many days used excluding duplicate, and handling the gaps. Same input and output.
From: Markus Jarderot
Input
ID d1 d2
1 2011-08-01 2011-08-08
1 2011-08-02 2011-08-06
1 2011-08-03 2011-08-10
1 2011-08-12 2011-08-14
2 2011-08-01 2011-08-03
2 2011-08-02 2011-08-06
2 2011-08-05 2011-08-09
Output
ID hold_days
1 11
2 8
SQL to find time elapsed from multiple overlapping intervals
But for the life of me I couldn't understand Markus Jarderot's solution.
SELECT DISTINCT
t1.ID,
t1.d1 AS date,
-DATEDIFF(DAY, (SELECT MIN(d1) FROM Orders), t1.d1) AS n
FROM Orders t1
LEFT JOIN Orders t2 -- Join for any events occurring while this
ON t2.ID = t1.ID -- is starting. If this is a start point,
AND t2.d1 <> t1.d1 -- it won't match anything, which is what
AND t1.d1 BETWEEN t2.d1 AND t2.d2 -- we want.
GROUP BY t1.ID, t1.d1, t1.d2
HAVING COUNT(t2.ID) = 0
Why is DATEDIFF(DAY, (SELECT MIN(d1) FROM Orders), t1.d1) picking from the min(d1) from the entire list? Is that regardless of ID.
And what does t1.d1 BETWEEN t2.d1 AND t2.d2 do? Is that to ensure only overlapped interval are calculated?
Same thing with group by, I think because if in the event the same identical period will be discarded? I tried to trace the solution by hand but getting more confused.
This is mostly a duplicate of my answer here (including explanation) but with the inclusion of grouping on an id column. It should use a single table scan and does not require a recursive sub-query factoring clause (CTE) or self joins.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE your_table ( id, usr, start_date, end_date ) AS
SELECT 1, 'A', DATE '2017-06-01', DATE '2017-06-03' FROM DUAL UNION ALL
SELECT 1, 'B', DATE '2017-06-02', DATE '2017-06-04' FROM DUAL UNION ALL -- Overlaps previous
SELECT 1, 'C', DATE '2017-06-06', DATE '2017-06-06' FROM DUAL UNION ALL
SELECT 1, 'D', DATE '2017-06-07', DATE '2017-06-07' FROM DUAL UNION ALL -- Adjacent to previous
SELECT 1, 'E', DATE '2017-06-11', DATE '2017-06-20' FROM DUAL UNION ALL
SELECT 1, 'F', DATE '2017-06-14', DATE '2017-06-15' FROM DUAL UNION ALL -- Within previous
SELECT 1, 'G', DATE '2017-06-22', DATE '2017-06-25' FROM DUAL UNION ALL
SELECT 1, 'H', DATE '2017-06-24', DATE '2017-06-28' FROM DUAL UNION ALL -- Overlaps previous and next
SELECT 1, 'I', DATE '2017-06-27', DATE '2017-06-30' FROM DUAL UNION ALL
SELECT 1, 'J', DATE '2017-06-27', DATE '2017-06-28' FROM DUAL UNION ALL -- Within H and I
SELECT 2, 'K', DATE '2011-08-01', DATE '2011-08-08' FROM DUAL UNION ALL -- Your data below
SELECT 2, 'L', DATE '2011-08-02', DATE '2011-08-06' FROM DUAL UNION ALL
SELECT 2, 'M', DATE '2011-08-03', DATE '2011-08-10' FROM DUAL UNION ALL
SELECT 2, 'N', DATE '2011-08-12', DATE '2011-08-14' FROM DUAL UNION ALL
SELECT 3, 'O', DATE '2011-08-01', DATE '2011-08-03' FROM DUAL UNION ALL
SELECT 3, 'P', DATE '2011-08-02', DATE '2011-08-06' FROM DUAL UNION ALL
SELECT 3, 'Q', DATE '2011-08-05', DATE '2011-08-09' FROM DUAL;
Query 1:
SELECT id,
SUM( days ) AS total_days
FROM (
SELECT id,
dt - LAG( dt ) OVER ( PARTITION BY id
ORDER BY dt ) + 1 AS days,
start_end
FROM (
SELECT id,
dt,
CASE SUM( value ) OVER ( PARTITION BY id
ORDER BY dt ASC, value DESC, ROWNUM ) * value
WHEN 1 THEN 'start'
WHEN 0 THEN 'end'
END AS start_end
FROM your_table
UNPIVOT ( dt FOR value IN ( start_date AS 1, end_date AS -1 ) )
)
WHERE start_end IS NOT NULL
)
WHERE start_end = 'end'
GROUP BY id
Results:
| ID | TOTAL_DAYS |
|----|------------|
| 1 | 25 |
| 2 | 13 |
| 3 | 9 |
The brute force method is to create all days (in a recursive query) and then count:
with dates(id, day, d2) as
(
select id, d1 as day, d2 from mytable
union all
select id, day + 1, d2 from dates where day < d2
)
select id, count(distinct day)
from dates
group by id
order by id;
Unfortunately there is a bug in some Oracle versions and recursive queries with dates don't work there. So try this code and see whether it works in your system. (I have Oracle 11.2 and the bug still exists there; so I guess you need Oracle 12c.)
I guess Markus' idea is to find all starting points that are not within other ranges and all ending points that aren't. Then just take the first starting point till the first ending point, then the next starting point till the next ending point, etc. As Markus isn't using a window function to number starting and ending points, he must find a more complicated way to achieve this. Here is the query with ROW_NUMBER. Maybe this gives you a start what to look for in Markus' query.
select startpoint.id, sum(endpoint.day - startpoint.day)
from
(
select id, d1 as day, row_number() over (partition by id order by d1) as rn
from mytable m1
where not exists
(
select *
from mytable m2
where m1.id = m2.id
and m1.d1 > m2.d1 and m1.d1 <= m2.d2
)
) startpoint
join
(
select id, d2 as day, row_number() over (partition by id order by d1) as rn
from mytable m1
where not exists
(
select *
from mytable m2
where m1.id = m2.id
and m1.d2 >= m2.d1 and m1.d2 < m2.d2
)
) endpoint on endpoint.id = startpoint.id and endpoint.rn = startpoint.rn
group by startpoint.id
order by startpoint.id;
If all your intervals start at different dates, consider them in ascending order by d1 counting how many days are from d1 to the next interval.
You can discard an interval of it is contained in another one.
The last interval won't have a follower.
This query should give you how many days each interval give
select a.id, a.d1,nvl(min(b.d1), a.d2) - a.d1
from orders a
left join orders b
on a.id = b.id and a.d1 < b.d1 and a.d2 between b.d1 and b.d2
group by a.id, a.d1
Then group by id and sum days

Calculating per day in SQL

I have an sql table like that:
Id Date Price
1 21.09.09 25
2 31.08.09 16
1 23.09.09 21
2 03.09.09 12
So what I need is to get min and max date for each id and dif in days between them. It is kind of easy. Using SQLlite syntax:
SELECT id,
min(date),
max(date),
julianday(max(date)) - julianday(min(date)) as dif
from table group by id
Then the tricky one: how can I receive the price per day during this difference period. I mean something like this:
ID Date PricePerDay
1 21.09.09 25
1 22.09.09 0
1 23.09.09 21
2 31.08.09 16
2 01.09.09 0
2 02.09.09 0
2 03.09.09 12
I create a cte as you mentioned with calendar but dont know how to get the desired result:
WITH RECURSIVE
cnt(x) AS (
SELECT 0
UNION ALL
SELECT x+1 FROM cnt
LIMIT (SELECT ((julianday('2015-12-31') - julianday('2015-01-01')) + 1)))
SELECT date(julianday('2015-01-01'), '+' || x || ' days') as date FROM cnt
p.s. If it will be in sqllite syntax-would be awesome!
You can use a recursive CTE to calculate all the days between the min date and max date. The rest is just a left join and some logic:
with recursive cte as (
select t.id, min(date) as thedate, max(date) as maxdate
from t
group by id
union all
select cte.id, date(thedate, '+1 day') as thedate, cte.maxdate
from cte
where cte.thedate < cte.maxdate
)
select cte.id, cte.date,
coalesce(t.price, 0) as PricePerDay
from cte left join
t
on cte.id = t.id and cte.thedate = t.date;
One method is using a tally table.
To build a list of dates and join that with the table.
The date stamps in the DD.MM.YY format are first changed to the YYYY-MM-DD date format.
To make it possible to actually use them as a date in the SQL.
At the final select they are formatted back to the DD.MM.YY format.
First some test data:
create table testtable (Id int, [Date] varchar(8), Price int);
insert into testtable (Id,[Date],Price) values (1,'21.09.09',25);
insert into testtable (Id,[Date],Price) values (1,'23.09.09',21);
insert into testtable (Id,[Date],Price) values (2,'31.08.09',16);
insert into testtable (Id,[Date],Price) values (2,'03.09.09',12);
The SQL:
with Digits as (
select 0 as n
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9
),
t as (
select Id,
('20'||substr([Date],7,2)||'-'||substr([Date],4,2)||'-'||substr([Date],1,2)) as [Date],
Price
from testtable
),
Dates as (
select Id, date(MinDate,'+'||(d2.n*10+d1.n)||' days') as [Date]
from (
select Id, min([Date]) as MinDate, max([Date]) as MaxDate
from t
group by Id
) q
join Digits d1
join Digits d2
where date(MinDate,'+'||(d2.n*10+d1.n)||' days') <= MaxDate
)
select d.Id,
(substr(d.[Date],9,2)||'.'||substr(d.[Date],6,2)||'.'||substr(d.[Date],3,2)) as [Date],
coalesce(t.Price,0) as Price
from Dates d
left join t on (d.Id = t.Id and d.[Date] = t.[Date])
order by d.Id, d.[Date];
The recursive SQL below was totally inspired by the excellent answer from Gordon Linoff.
And a recursive SQL is probably more performant for this anyway.
(He should get the 15 points for the accepted answer).
The difference in this version is that the datestamps are first formatted to YYYY-MM-DD.
with t as (
select Id,
('20'||substr([Date],7,2)||'-'||substr([Date],4,2)||'-'||substr([Date],1,2)) as [Date],
Price
from testtable
),
cte as (
select Id, min([Date]) as [Date], max([Date]) as MaxDate from t
group by Id
union all
select Id, date([Date], '+1 day'), MaxDate from cte
where [Date] < MaxDate
)
select cte.Id,
(substr(cte.[Date],9,2)||'.'||substr(cte.[Date],6,2)||'.'||substr(cte.[Date],3,2)) as [Date],
coalesce(t.Price, 0) as PricePerDay
from cte
left join t
on (cte.Id = t.Id and cte.[Date] = t.[Date])
order by cte.Id, cte.[Date];

count rows by hour and include zeroes

I am trying to count (by hour) the number of entries into a database. I have successfully written a query that counts by hour, but it omits hours with zero entries. I need the results to include the zeroes. I have looked around the internet, and have found a lot of suggestions. I have created a View that has one column with datetime entries by minute. I've tried joining the main table to this view and I get the same results as without the join. Still no zeroes. Wondering how to get this query to return zeroes. I am using MS SQL 2008 R2. any suggestions?
declare #limit datetime;
use InputArchive
set #limit = current_timestamp;
set #limit = DATEADD(hour, -72, #limit);
SELECT DATEADD(hour, datediff(hour, 0, ArchivedItems.RecordCreated), 0) as TimeHour, COUNT(ISNULL((ArchivedItems.RecordCreated),' ')) as NumPerHour
FROM ArchivedItems
LEFT OUTER JOIN vw_hoursalot
ON vw_hoursalot.dtHr = ArchivedItems.RecordCreated
where InputTypeId = 5 or InputTypeId = 6 or InputTypeId = 8 and (ArchivedItems.RecordCreated >= #limit)
Group BY DATEADD(hour, Datediff(hour, 0, ArchivedItems.RecordCreated), 0)
order by DATEADD(hour, datediff(hour, 0, ArchivedItems.RecordCreated), 0) desc
option (MAXRECURSION 0)
Update: I changed the hoursalot view to be by the hour
I'm sorry but I'm not sure what you mean by full view SQL.
I can't put any strait info from the archivedItems table for legal reasons but the RecordCreated column is a strait timestamp ie '2013-04-05 14:09:59.167'
Try putting the vw_hoursalot as the leftmost table in the condition - this will mean that all rows from the view will be returned, whether a corresponding record in ArchivedHours is found or not.
I've edited again - this time I took the time to mock up some dummy data, and I realized that a problem in the code, both mine and yours, is that the ArchivedItems.RecordCreated column is being used as a filter in the WHERE condition. Therefore, only records with an existing RecordCreated value will be returned.
I've moved that condition to the JOIN, and run the query below against some very basic data I mocked up. Assuming that your vw_HoursALot view is returning twenty-four integers numbered 0-23, this ought to get you the data you are looking for.
PLEASE NOTE: I made the assumption that InputTypeID is from ArchivedItems.
WITH -- I used these two CTEs as my dummy data, based on the information in your post
vw_HoursALot AS
(
SELECT 1 dtHr UNION SELECT 2 UNION SELECT 3 UNION SELECT 4 UNION SELECT 5 UNION
SELECT 6 UNION SELECT 7 UNION SELECT 8 UNION SELECT 9 UNION SELECT 10 UNION
SELECT 11 UNION SELECT 12 UNION SELECT 13 UNION SELECT 14 UNION SELECT 15 UNION
SELECT 16 UNION SELECT 17 UNION SELECT 18 UNION SELECT 19 UNION SELECT 20 UNION
SELECT 21 UNION SELECT 22 UNION SELECT 23 UNION SELECT 0
),
ArchivedItems AS
(
SELECT GETDATE() AS RecordCreated, 5 AS InputTypeID
UNION SELECT DATEADD(HOUR, -3, GETDATE()), 6 AS InputTypeID
)
-- this part is actually doing the work
SELECT
vw_HoursALot.dtHr,
COUNT(ArchivedItems.RecordCreated) AS NumPerHour
FROM
vw_hoursalot
LEFT OUTER JOIN
ArchivedItems ON
vw_hoursalot.dtHr = DATEPART(hour, ArchivedItems.RecordCreated) AND
ArchivedItems.RecordCreated >= DATEADD(hour, -72, GETDATE()) AND
(
InputTypeId = 5 OR
InputTypeId = 6 OR
InputTypeId = 8
)
GROUP BY vw_HoursALot.dtHr
ORDER BY vw_HoursALot.dtHr DESC
OPTION (MAXRECURSION 0)
Here is what I came up with:
declare #limit datetime;
declare #BaseTime datetime
set #BaseTime = '20141020 15:00'; --must be an even hour
set #limit = DATEADD(hour, -72, #BaseTime);
print #Basetime
;WITH
D1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
D2(N) AS (SELECT 1 FROM D1 a, D1 b),
Numbers AS (SELECT TOP (100) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1 AS Number FROM D2),
AllHours AS (SELECT DATEADD(hour,numbers.number,#limit) AS hr FROM Numbers),
Raw_Data AS (
SELECT DATEADD(hour,DATEDIFF(hour,#BaseTime,RecordCreated),#BaseTime) AS HourRecorded FROM
ArchivedItems
WHERE RecordCreated BETWEEN #limit AND #BaseTime
AND InputTypeID IN (5,6,8)
)
SELECT count(Raw_Data.HourRecorded),AllHours.hr
FROM AllHours left outer join Raw_Data on AllHours.hr = Raw_Data.HourRecorded
GROUP BY AllHours.hr
order by AllHours.hr
Here is what I used to create the test data
;WITH
D1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
),
D2(N) AS (SELECT 1 FROM D1 a, D1 b),
D4(N) AS (SELECT 1 FROM D2 a, D2 b),
Numbers AS (SELECT TOP (10000) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))-1 AS Number FROM D4)
INSERT INTO ArchivedItems(InputTypeID, RecordCreated)
SELECT ABS(CAST(CAST(NEWID() AS VARBINARY) % 10 AS INT)), DATEADD(MINUTE, ABS(CAST(CAST(NEWID() AS VARBINARY) % 10000 AS INT)),'20141017')
FROM Numbers
--Make sure there is a gap
DELETE FROM ArchivedItems WHERE RecordCreated BETWEEN '2014-10-20 06:00:00.000' AND '2014-10-20 08:00:00.000'

sql server rolling 12 months sum with date gaps

Suppose I have a table that indicates the number of items sold in a particular month for each sales rep. However, there will not be a row for a particular person in months where there were no sales. Example
rep_id month_yr num_sales
1 01/01/2012 3
1 05/01/2012 1
1 11/01/2012 1
2 02/01/2012 2
2 05/01/2012 1
I want to be able to create a query that shows for each rep_id and all possible months (01/01/2012, 02/01/2012, etc. through current) a rolling 12 month sales sum, like this:
rep_id month_yr R12_Sum
1 11/01/2012 5
1 12/01/2012 5
1 01/01/2013 5
1 02/01/2013 2
I have found some examples online, but the problem I'm running into is I'm missing some dates for each rep_id. Do I need to cross join or something?
To solve this problem, you need a driver table that has all year/month combinations. Then, you need to create this for each rep.
The solution is then to left join the actual data to this driver and aggregate the period that you want. Here is the query:
with months as (
select 1 as mon union all select 2 union all select 3 union all select 4 union all
select 5 as mon union all select 6 union all select 7 union all select 8 union all
select 9 as mon union all select 10 union all select 11 union all select 12
),
years as (select 2010 as yr union all select 2011 union all select 2012 union all select 2013
),
monthyears as (
select yr, mon, yr*12+mon as yrmon
from months cross join years
),
rmy as (
select *
from monthyears my cross join
(select distinct rep_id from t
) r
)
select rmy.rep_id, rmy.yr, rmy.mon, SUM(t.num_sales) as r12_sum
from rmy join
t
on rmy.rep_id = t.rep_id and
t.year(month_yr)*12 + month(month_yr) between rmy.yrmon - 11 and rmy.yrmon
group by rmy.rep_id, rmy.yr, rmy.mon
order by 1, 2, 3
This hasn't been tested, so it may have syntactic errors. Also, it doesn't convert the year/month combination back to a date, leaving the values in separate columns.
Here is one solution:
SELECT
a.rep_id
,a.month_yr
,SUM(b.R12_Sum) AS R12_TTM
FROM YourTable a
LEFT OUTER JOIN YourTable b
ON a.rep_id = b.rep_id
AND a.month_yr <= b.month_yr
AND a.month_yr >= DATEADD(MONTH, -11, b.month_yr)
GROUP BY
a.rep_id
,a.month_yr
It's certainly not pretty but is more simple than a CTE, numbers table or self join:
DECLARE #startdt DATETIME
SET #startdt = '2012-01-01'
SELECT rep_id, YEAR(month_yr), MONTH(month_yr), SUM(num_sales)
FROM MyTable WHERE month_yr >= #startdt AND month_yr < DATEADD(MONTH,1,#startdt)
UNION ALL
SELECT rep_id, YEAR(month_yr), MONTH(month_yr), SUM(num_sales)
FROM MyTable WHERE month_yr >= DATEADD(MONTH,1,#startdt) AND month_yr < DATEADD(MONTH,2,#startdt)
UNION ALL
SELECT rep_id, YEAR(month_yr), MONTH(month_yr), SUM(num_sales)
FROM MyTable WHERE month_yr >= DATEADD(MONTH,2,#startdt) AND month_yr < DATEADD(MONTH,3,#startdt)
UNION ALL
SELECT rep_id, YEAR(month_yr), MONTH(month_yr), SUM(num_sales)
FROM MyTable WHERE month_yr >= DATEADD(MONTH,3,#startdt) AND month_yr < DATEADD(MONTH,4,#startdt)
UNION ALL
etc etc
The following demonstrates using a CTE to generate a table of dates and generating a summary report using the CTE. Sales representatives are omitted from the results when they have had no applicable sales.
Try jiggling the reporting parameters, e.g. setting #RollingMonths to 1, for more entertainment.
-- Sample data.
declare #Sales as Table ( rep_id Int, month_yr Date, num_sales Int );
insert into #Sales ( rep_id, month_yr, num_sales ) values
( 1, '01/01/2012', 3 ),
( 1, '05/01/2012', 1 ),
( 1, '11/01/2012', 1 ),
( 2, '02/01/2012', 1 ),
( 2, '05/01/2012', 2 );
select * from #Sales;
-- Reporting parameters.
declare #ReportEnd as Date = DateAdd( day, 1 - Day( GetDate() ), GetDate() ); -- The first of the current month.
declare #ReportMonths as Int = 6; -- Number of months to report.
declare #RollingMonths as Int = 12; -- Number of months in rolling sums.
-- Report.
-- A CTE generates a table of month/year combinations covering the desired reporting time period.
with ReportingIntervals as (
select DateAdd( month, 1 - #ReportMonths, #ReportEnd ) as ReportingInterval,
DateAdd( month, 1 - #RollingMonths, DateAdd( month, 1 - #ReportMonths, #ReportEnd ) ) as FirstRollingMonth
union all
select DateAdd( month, 1, ReportingInterval ), DateAdd( month, 1, FirstRollingMonth )
from ReportingIntervals
where ReportingInterval < #ReportEnd )
-- Join the CTE with the sample data and summarize.
select RI.ReportingInterval, S.rep_id, Sum( S.num_sales ) as R12_Sum
from ReportingIntervals as RI left outer join
#Sales as S on RI.FirstRollingMonth <= S.month_yr and S.month_yr <= RI.ReportingInterval
group by RI.ReportingInterval, S.rep_id
order by RI.ReportingInterval, S.rep_id