SQL for islands-and-gaps: islands can overlap - sql

I have robots with certificates. There are two kinds of certificates. For each kind of certificate (identified by Certif_ID), for each robot I need the most recent certified date-span.
Update for clarity: Date-spans that do not overlap but are contiguous are treated as a single span. See the first two records in the sample table that is at the top of the code.
Date-spans may overlap! These must be treated as a single span. This is where I'm having a problem.
in SQL Server 2012, run this code as-is to see what's happening.
BEGIN -- #certif_span
IF OBJECT_ID('TEMPDB..#certif_span') IS NOT NULL DROP TABLE #certif_span;
CREATE TABLE #certif_span
( Robot_ID CHAR(3)
, Certif_ID SMALLINT
, d_Start SMALLDATETIME
, d_End SMALLDATETIME );
INSERT INTO #certif_span VALUES ('210', '1', '2000-01-01', '2001-02-02');
INSERT INTO #certif_span VALUES ('210', '1', '2001-02-03', '2001-12-31');
INSERT INTO #certif_span VALUES ('210', '1', '2000-01-01', '2000-12-31');
INSERT INTO #certif_span VALUES ('880', '1', '2001-01-01', '2001-12-31');
INSERT INTO #certif_span VALUES ('880', '1', '2002-02-02', '2003-02-01');
INSERT INTO #certif_span VALUES ('880', '1', '2003-01-01', '2004-12-31'); -- *
INSERT INTO #certif_span VALUES ('880', '7', '2010-05-05', '2011-05-04');
INSERT INTO #certif_span VALUES ('880', '7', '2011-05-05', '2012-02-10');
INSERT INTO #certif_span VALUES ('880', '7', '2013-03-03', '2013-04-04');
INSERT INTO #certif_span VALUES ('880', '7', '2013-04-01', '2013-05-05'); -- *
-- * This line has dates that overlap with the line above
END
SELECT Robot_ID
, Certif_ID
, d_Start = FORMAT(d_Start, 'yyyy-MM-dd')
, d_End = FORMAT(d_End, 'yyyy-MM-dd')
, commentary = 'Here is the raw data'
FROM #certif_span AS cs
ORDER BY Robot_ID
, Certif_ID
, d_End
IF OBJECT_ID('TEMPDB..#prac_date_span') IS NOT NULL DROP TABLE #prac_date_span;
SELECT DISTINCT
cs.Robot_ID
, cs.Certif_ID
, cs.d_Start
, cs.d_End
INTO
--DROP TABLE --SELECT * FROM
#prac_date_span
FROM
#certif_span AS cs
GROUP BY
cs.Robot_ID
, cs.Certif_ID
, cs.d_Start
, cs.d_End
ORDER BY 1, 2, 3;
BEGIN
IF OBJECT_ID('TEMPDB..#prac_date_span_grp') IS NOT NULL
DROP TABLE #prac_date_span_grp;
WITH cte as (
SELECT
a.Robot_ID, a.Certif_ID
, a.d_Start, a.d_End
FROM
#prac_date_span a
LEFT JOIN #prac_date_span b
ON a.Robot_ID = b.Robot_ID
AND b.Certif_ID = a.Certif_ID
AND a.d_Start - 1 = b.d_End
WHERE
b.Robot_ID IS NULL
UNION ALL -----------------------------
SELECT
a.Robot_ID, a.Certif_ID
, a.d_Start, b.d_End
FROM
cte a
JOIN
#prac_date_span b
ON a.Robot_ID = b.Robot_ID
AND b.Certif_ID = a.Certif_ID
AND b.d_Start - 1 = a.d_End
)
SELECT
Robot_ID
, Certif_ID
, d_Start
, d_End = MAX(d_End)
INTO
--drop table --select * from
#prac_date_span_grp
FROM cte
GROUP BY Robot_ID, Certif_ID, d_Start
ORDER BY Robot_ID, Certif_ID;
END
SELECT
Robot_ID
, Certif_ID
, d_Start = FORMAT(d_Start, 'yyyy-MM-dd')
, d_End = FORMAT(d_End, 'yyyy-MM-dd')
, commentary = 'Here is the grouped data (flawed)'
FROM #prac_date_span_grp
SELECT
Robot_ID
, Certif_ID
, d_Start = FORMAT(MAX(d_Start), 'yyyy-MM-dd')
, d_End = FORMAT(MAX(d_End), 'yyyy-MM-dd')
, commentary = 'Final result: Start date ' +
CASE FORMAT(MAX(d_Start), 'yyyy-MM-dd')
WHEN '2003-01-01' THEN 'should be 2002-02-02'
WHEN '2013-04-01' THEN 'should be 2013-03-03'
ELSE 'good' END
FROM #prac_date_span_grp
GROUP BY Robot_ID, Certif_ID
The final result should be:
Robot_ID Certif_ID d_Start d_End
210 1 2000-01-01 2001-12-31
880 1 2002-02-02 2004-12-31
880 7 2013-03-03 2013-05-05
I've been fiddling with the date comparisons. In this bit from the cte, the -1 looks like it allows for a one-day stagger in date-spans:
AND b.Certif_ID = a.Certif_ID
AND a.d_Start - 1 = b.d_End
...
AND b.Certif_ID = a.Certif_ID
AND b.d_Start - 1 = a.d_End
I feel certain this is the point that needs fixing. I've tried changing the date compare to >=. (This requires me to deal with max recursion.) The grouping changes, but is not correct.

This is not a simple task. I hope this will answer the problem.
Declare #certif_span TABLE(Robot_ID CHAR(3), Certif_ID SMALLINT, StartDate date, EndDate date);
INSERT INTO #certif_span VALUES ('210', '1', '2000-01-01', '2001-02-02');
INSERT INTO #certif_span VALUES ('210', '1', '2001-02-03', '2001-12-31');
INSERT INTO #certif_span VALUES ('210', '1', '2000-01-01', '2000-12-31');
INSERT INTO #certif_span VALUES ('880', '1', '2001-01-01', '2001-12-31');
INSERT INTO #certif_span VALUES ('880', '1', '2002-02-02', '2003-02-01');
INSERT INTO #certif_span VALUES ('880', '1', '2003-01-01', '2004-12-31'); -- *
INSERT INTO #certif_span VALUES ('880', '7', '2010-05-05', '2011-05-04');
INSERT INTO #certif_span VALUES ('880', '7', '2011-05-05', '2012-02-10');
INSERT INTO #certif_span VALUES ('880', '7', '2013-03-03', '2013-04-04');
INSERT INTO #certif_span VALUES ('880', '7', '2013-04-01', '2013-05-05'); -- *
;with Src as(
SELECT ROW_NUMBER() Over(Partition by Robot_ID, Certif_ID order by StartDate, EndDate) as RN
,a.*
FROM #certif_span as a
)
, Islands as(
SELECT RN, Robot_ID, Certif_ID, StartDate, EndDate, 0 as islandNo, EndDate AS MovingEnd
FROM Src as a WHERE a.RN=1
UNION ALL
SELECT a.RN, a.Robot_ID, a.Certif_ID, a.StartDate, a.EndDate
, b.islandNo + CASE WHEN DATEDIFF(d, a.StartDate, b.MovingEnd)>=-1 THEN 0 ELSE 1 END as IslandNO
, CASE WHEN a.EndDate>b.MovingEnd THEN a.EndDate ELSE b.MovingEnd END as MovingEnd
FROM Src as a
INNER JOIN Islands as b on a.Robot_ID=b.Robot_ID and a.Certif_ID=b.Certif_ID and a.RN=b.RN+1
) -- SELECT * FROM Islands order by Robot_ID, Certif_ID, IslandNo
, LastIsland as(
SELECT Robot_ID, Certif_ID, islandNo, MIN(StartDate) as startDate, MAX(EndDate) as EndDate
,ROW_NUMBER() over(partition by Robot_ID, Certif_ID order by IslandNO desc) as RN
FROM Islands
Group by Robot_ID, Certif_ID, islandNo
)
SELECT Robot_ID, Certif_ID, startDate, EndDate
FROM LastIsland
where RN=1

This was a head scratcher, because it is not your typical Gaps-and-Islands, so it dawned on me to create the gaps-and-islands off the date dimension first.
Now, I do have one additional island than perhaps you were expecting. But, not matter how I look at it, it seems to hold true.
I should also note that I use a TVF (table-valued user-defined function) to create dynamic date ranges. This logic could easily be ported into a preliminary cte. A tally/calendar table would to the trick as well.
The SQL
;with cte0 as(
Select A.*,GrpSeq=RetSeq-Row_Number() over (Order by RetSeq)
From (
Select Distinct RetSeq,RetVal
From [dbo].[udf-Range-Date]((Select min(d_Start) from #certif_span),(Select max(d_End) from #certif_span),'DD',1) A
Join #certif_span B on A.RetVal between B.d_Start and B.d_End
) A
)
, cte1 as(
Select d_Start = min(A.RetVal)
,d_End = max(A.RetVal)
From cte0 A
Group By GrpSeq
)
Select Robot_ID = min(Robot_ID)
,Certif_ID = min(Certif_ID)
,A.d_Start
,A.d_End
from cte1 A
Join #certif_span B on B.d_Start Between A.d_Start and A.d_End
Group By A.d_Start,A.d_End
Returns
Robot_ID Certif_ID d_Start d_End
210 1 2000-01-01 2001-12-31
880 1 2002-02-02 2004-12-31
880 7 2010-05-05 2012-02-10 << Extra Mentioned
880 7 2013-03-03 2013-05-05
The UDF if Needed
CREATE FUNCTION [dbo].[udf-Range-Date] (#R1 datetime,#R2 datetime,#Part varchar(10),#Incr int)
Returns Table
Return (
with cte0(M) As (Select 1+Case #Part When 'YY' then DateDiff(YY,#R1,#R2)/#Incr When 'QQ' then DateDiff(QQ,#R1,#R2)/#Incr When 'MM' then DateDiff(MM,#R1,#R2)/#Incr When 'WK' then DateDiff(WK,#R1,#R2)/#Incr When 'DD' then DateDiff(DD,#R1,#R2)/#Incr When 'HH' then DateDiff(HH,#R1,#R2)/#Incr When 'MI' then DateDiff(MI,#R1,#R2)/#Incr When 'SS' then DateDiff(SS,#R1,#R2)/#Incr End),
cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (Select M from cte0) Row_Number() over (Order By (Select NULL)) From cte1 a, cte1 b, cte1 c, cte1 d, cte1 e, cte1 f, cte1 g, cte1 h ),
cte3(N,D) As (Select 0,#R1 Union All Select N,Case #Part When 'YY' then DateAdd(YY, N*#Incr, #R1) When 'QQ' then DateAdd(QQ, N*#Incr, #R1) When 'MM' then DateAdd(MM, N*#Incr, #R1) When 'WK' then DateAdd(WK, N*#Incr, #R1) When 'DD' then DateAdd(DD, N*#Incr, #R1) When 'HH' then DateAdd(HH, N*#Incr, #R1) When 'MI' then DateAdd(MI, N*#Incr, #R1) When 'SS' then DateAdd(SS, N*#Incr, #R1) End From cte2 )
Select RetSeq = N+1
,RetVal = D
From cte3,cte0
Where D<=#R2
)
/*
Max 100 million observations -- Date Parts YY QQ MM WK DD HH MI SS
Syntax:
Select * from [dbo].[udf-Range-Date]('2016-10-01','2020-10-01','YY',1)
Select * from [dbo].[udf-Range-Date]('2016-01-01','2017-01-01','MM',1)
*/

Related

How to Get the Start Date and End Date according to a Date List with SQL?

I have a question and would be really appreciate if anyone could help.
I have a list of dates for many accounts like showed in the attached picture, and I want to create start date and end date base on this date list. At the same time, if for one account, the date list is not continuous, then the start date and end date should be re-calculated from the break point.
Take the above picture as an example, the date list for account 2376 breaks for 2022-01-04, 2022-01-05, and 2022-01-06, so the result I want would be the following table, which shows the account has dates from 2022-01-01 to 2022-01-03, then has dates from 2022-01-07 to 2022-01-09:
Thanks a lot for any suggestions, I would really appreciate it!
So you can use a query like below
WITH AugmentedData AS
(
SELECT *,
DATEDIFF(M,MIN(Date) OVER( PARTITION BY AccountID ORDER BY Date ASC), date) -
ROW_NUMBER() OVER(PARTITION BY AccountID ORDER BY Date ASC) AS GroupingDate
FROM TB
)
SELECT AccountID, Min(Date) AS StartDate, MAX(Date) AS EndDate
FROM AugmentedData
GROUP BY AccountID, GroupingDate
Also adding a demo link
Try it:
DECLARE #Test TABLE (
[AccountID] [bigint] NOT NULL,
[Date] [date] NOT NULL
);
-- sample
--INSERT INTO #Test
--SELECT 2376, CONVERT([datetime], '2022-01-01', 120) UNION ALL
--SELECT 2376, CONVERT([datetime], '2022-01-02', 120) UNION ALL
--SELECT 2376, CONVERT([datetime], '2022-01-03', 120) UNION ALL
--SELECT 2376, CONVERT([datetime], '2022-01-07', 120) UNION ALL
--SELECT 2376, CONVERT([datetime], '2022-01-08', 120) UNION ALL
--SELECT 2376, CONVERT([datetime], '2022-01-09', 120);
-- add month, year up
-- add mixing
-- add another id
INSERT INTO #Test
SELECT 2376, CONVERT([datetime], '2022-01-01', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2022-12-31', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2022-01-31', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2022-12-30', 120) UNION ALL
SELECT 2376, CONVERT([datetime], '2022-01-08', 120) UNION ALL
SELECT 2376, CONVERT([datetime], '2022-01-03', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2022-02-02', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2022-02-01', 120) UNION ALL
SELECT 2532, CONVERT([datetime], '2023-01-01', 120) UNION ALL
SELECT 2376, CONVERT([datetime], '2022-01-09', 120) UNION ALL
SELECT 2376, CONVERT([datetime], '2022-01-07', 120) UNION ALL
SELECT 2376, CONVERT([datetime], '2022-01-02', 120);
WITH [CheckDiffs] AS (
SELECT
t1.[AccountID],
t1.[Date],
CASE WHEN ISNULL(DATEDIFF(dd, x.[Date], t1.[Date]), 0) = 1 THEN 1 ELSE 0 END AS [IsContinious]
FROM
#Test t1
OUTER APPLY (
SELECT TOP 1
t2.[Date]
FROM #Test t2
WHERE
t2.[AccountID] = t1.[AccountID]
AND t2.[Date] < t1.[Date]
ORDER BY
t2.[Date] DESC
) x
),
[DateSeparate] AS (
SELECT
t1.[AccountID],
MIN(t1.[Date]) OVER (PARTITION BY x.[Date]) AS [Begin],
MAX(t1.[Date]) OVER (PARTITION BY x.[Date]) AS [End]
FROM
[CheckDiffs] t1
OUTER APPLY (
SELECT TOP 1
t2.[Date]
FROM [CheckDiffs] t2
WHERE
t2.[AccountID] = t1.[AccountID]
AND t2.[Date] <= t1.[Date]
AND t2.[IsContinious] = 0
ORDER BY
t2.[Date] DESC
) x
)
-- and group it
SELECT
t.[AccountID],
t.[Begin],
t.[End]
FROM
[DateSeparate] t
GROUP BY
t.[AccountID],
t.[Begin],
t.[End]

Error message: Maximum Recursion exhausted even with OPTION( MAXRECURSION 0)

I'm creating a function that will have as input a start date and a number of minutes. The function will add the number of minutes to the start date and it will output an end date, but only considering work hours and excluding weekends and holidays.
You can see part of the function below.
ALTER FUNCTION [dbo].[DataFimPrevisto] (#tempoPrevisto real, #DataIni datetime)
RETURNS datetime
WITH EXECUTE AS CALLER
AS
BEGIN
DECLARE #DataFim datetime;
DECLARE #calculo TABLE( xend datetime, [minutes] int);
WITH
drange (date_start, date_end) AS
(
SELECT
CAST(#DataIni AS DATE) AS date_start,
CAST(DATEADD( YEAR, 1, #DataIni) AS DATE) AS date_end
),
dates0 (adate, date_end) AS
(
SELECT date_start, date_end FROM drange
UNION ALL
SELECT DATEADD(day, 1, adate), date_end FROM dates0 WHERE adate < date_end
),
dates (adate) AS
(
SELECT adate FROM dates0
WHERE DATEPART(dw , adate) NOT IN ('1', '7') AND NOT EXISTS( SELECT 1 FROM BAS_PeriodosExcecoes B WHERE B.Trabalhavel = 0 AND B.DataInicio = adate)
),
hours (hour_start, hour_end) AS
(
SELECT 8.5*60, 12.5*60
UNION
SELECT 13.5*60, 18*60
),
hours_friday (hour_start, hour_end) AS
(
SELECT 8*60, 14*60
),
datehours (xstart, xend) AS
(
SELECT *
FROM
(
SELECT
DATEADD(minute, hour_start, CAST(adate AS datetime)) xstart,
DATEADD(minute, hour_end , CAST(adate AS datetime)) xend
FROM dates AS d, hours AS h
WHERE DATEPART(dw , adate) <> '6'
UNION
SELECT T2.xstart, T2.xend
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY T.xstart ORDER BY T.xend ASC) AS rank
FROM
(
SELECT
#DataIni xstart,
DATEADD(minute, hour_end, CAST(adate AS datetime)) xend
FROM dates AS d, hours AS h
WHERE adate = CAST( #DataIni AS DATE) AND DATEADD(minute, hour_end, CAST(adate AS datetime)) > #DataIni AND DATEPART(dw , adate) <> '6'
) T
) T2
WHERE T2.rank = 1
UNION
SELECT
DATEADD(minute, hour_start, CAST(adate AS datetime)) xstart,
DATEADD(minute, hour_end , CAST(adate AS datetime)) xend
FROM dates AS d, hours_friday AS h
WHERE DATEPART(dw , adate) = '6'
UNION
SELECT T2.xstart, T2.xend
FROM
(
SELECT *, ROW_NUMBER() OVER(PARTITION BY T.xstart ORDER BY T.xend ASC) AS rank
FROM
(
SELECT
#DataIni xstart,
DATEADD(minute, hour_end, CAST(adate AS datetime)) xend
FROM dates AS d, hours_friday AS h
WHERE adate = CAST( #DataIni AS DATE) AND DATEADD(minute, hour_end, CAST(adate AS datetime)) > #DataIni AND DATEPART(dw , adate) = '6'
) T
) T2
WHERE T2.rank = 1
) T3 WHERE T3.xstart >= #DataIni
),
cumulative (xend, [minutes]) AS
(
SELECT t.xend, SUM(DATEDIFF(MINUTE, xstart, xend)) OVER (ORDER BY xstart) AS [minutes]
FROM datehours AS t
)
INSERT INTO #calculo
SELECT TOP 1 xend, [minutes]
FROM cumulative
WHERE [minutes] >= #tempoPrevisto
ORDER BY cumulative.xend ASC;
SET #DataFim = (SELECT DATEADD( MINUTE, #tempoPrevisto - MAX([minutes]), MAX( [xend])) FROM #calculo);
RETURN(#DataFim);
END;
When I execute this function with
SELECT dbo.DataFimPrevisto( 21240, DATETIMEFROMPARTS( 2023, 1, 25, 6, 0, 0, 0)) OPTION(MAXRECURSION 0);
SSMS returns the error message
The maximum recursion 100 has been exhausted before statement completion
Even tho I'm using OPTION(MAXRECURSION 0).

group by day with missing days

I have a sql server table with the following structure and data:
Created , keyword
'2017-10-03 19:18:00', 'test7'
'2017-10-07 01:06:00', 'test3'
'2017-10-07 15:19:00', 'test2'
'2017-10-07 21:39:00', 'test10'
'2017-10-08 00:36:00', 'test3'
'2017-10-08 01:26:00', 'test13'
'2017-10-08 01:33:00', 'test9'
'2017-10-08 08:23:00', 'test13'
'2017-10-08 09:35:00', 'test9'
'2017-10-08 12:38:00', 'test9'
'2017-10-08 15:07:00', 'test2'
'2017-10-10 05:09:00', 'test4'
I would like to run a query that counts activity and group it by day, also accounting for days when nothing was recorded and show the activity for those days as zero. As such I want a query that will return the result set below:
'2017-10-03', 1
'2017-10-04', 0
'2017-10-05', 0
'2017-10-06', 0
'2017-10-07', 3
'2017-10-08', 7
'2017-10-09', 0
'2017-10-03', 1
I know how to run a query and group it by count for days, but not how to account for days nothing was recorded. As I am new to Sql, I would really appreciate it if someone can provide a working example. Thanks in advance
Try this:
declare #startDate date = '2017-10-01'
declare #endDate date = '2017-10-31'
;with cte as (
select cast(#startDate as date) [dayOfYear]
union all
select DATEADD(day, 1, [dayOfYear]) from cte
where [dayOfYear] < #endDate
)
select dayOfYear, SUM(case when Created is null then 0 else 1 end) from cte
left join MY_TABLE [T] on cte.dayOfYear = CAST(T.Created as date)
group by dayOfYear
The logic is as follows:
get table with all days between #startDate and #endDate (the CTE - I specified first and last of October). Then we left join your table and when the days has no match, we define corresponding value to 0, 1 otherwise. Then it's enough to sum these values day-wise.
Here is a solution when you don't have calendar table:
select row_number() over(order by getdate()) - 1 as n
into #nums
from sys.columns cross join sys.columns c2;
declare #t table(Created datetime, keyword varchar(100));
insert into #t values
('2017-10-03 19:18:00', 'test7'),
('2017-10-07 01:06:00', 'test3'),
('2017-10-07 15:19:00', 'test2'),
('2017-10-07 21:39:00', 'test10'),
('2017-10-08 00:36:00', 'test3'),
('2017-10-08 01:26:00', 'test13'),
('2017-10-08 01:33:00', 'test9'),
('2017-10-08 08:23:00', 'test13'),
('2017-10-08 09:35:00', 'test9'),
('2017-10-08 12:38:00', 'test9'),
('2017-10-08 15:07:00', 'test2'),
('2017-10-10 05:09:00', 'test4')
declare #min_dt date, #max_dt date;
select #min_dt = min(Created), #max_dt = max(Created)
from #t;
with calendar as
(
select dateadd(day, n, #min_dt) as dt
from #nums
where dateadd(day, n, #min_dt) <= #max_dt
)
select c.dt, isnull(count(t.keyword), 0) as cnt
from calendar c left join #t t
on c.dt = cast(t.Created as date)
group by c.dt;
In my case I don't have table calendar but I have fixed table with the numbers (Nums), but if you don't have even table of numbers you can generate it as I did in #nums (you should limit the numbers generated to a reasonable number)
declare #date int
WITH CTE_DatesTable
AS
(
SELECT CAST('20171003' as date) AS [date]
UNION ALL
SELECT DATEADD(dd, 1, [date])
FROM CTE_DatesTable
WHERE DATEADD(dd, 1, [date]) <= '20171010'
)
SELECT [CalendarDate]=[date] into #DimDate FROM CTE_DatesTable
OPTION (MAXRECURSION 0);
select * from #DimDate
This will create a calendar table to join with your current table to fill the gaps

Conditional Count On Row_Number

I have a query that calculates the number working days within a month based on a table which stores all our public holidays.
The current output would show all working days, excluding public holidays and Saturday and Sunday, I would like to show each day of the month, but don't increment on a public holiday or Saturday or Sunday.
Is there a way to conditionally increment the row number?
Query is below:
DECLARE #startnum INT=0
DECLARE #endnum INT=365;
WITH gen AS
(
SELECT #startnum AS num
UNION ALL
SELECT num + 1
FROM gen
WHERE num + 1 <= #endnum
)
, holidays AS
(
SELECT CONVERT(DATE, transdate) AS HolidayDate
FROM WORKCALENDER w
WHERE w.CALENDARID = 'PubHoliday'
)
, allDays AS
(
SELECT DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ) ) AS DateOfYear
, DATENAME( dw, DATEADD( d, num, CONVERT( DATE, '1 Jan 2016' ))) AS [dayOfWeek]
FROM gen
)
select number = ROW_NUMBER() OVER ( ORDER BY DateOfYear )
, *
from allDays
LEFT OUTER JOIN holidays
ON allDays.DateOfYear = holidays.HolidayDate
WHERE holidays.HolidayDate IS NULL
AND allDays.dayOfWeek NOT IN ( 'Saturday', 'Sunday')
AND DateOfYear >= CONVERT( DATE, '1 ' + DATENAME( MONTH, GETDATE() ) + ' 2016' )
AND DateOfYear < CONVERT( DATE, '1 ' + DATENAME( MONTH, DATEADD( month, 1, GETDATE()) ) + ' 2016' )
option (maxrecursion 10000)
kind of pseudo code
select date, row_number() over (order by date) as num
from ( select date
from allDates
where month = x and weekday
exept
select date
from holidays
where month is x
) as t
union all
select date, null
from holidays
where month is x
order by date
You could use a windowed sum, see how the output of WorkdaySequenceInMonth is composed.
DECLARE #startDate DATE = '20160101'
, #numDays INT = 365
, #num INT = 0;
DECLARE #Holidays TABLE (Holiday DATE);
INSERT INTO #Holidays(Holiday)
VALUES ('20160101')
, ('20160115')
, ('20160714');
WITH nums AS
(
SELECT row_number() OVER (ORDER BY object_id) - 1 as num
FROM sys.columns
),
dateRange as
(
SELECT
DATEADD(DAY, num, #startDate) AS Dt
, num
FROM nums
WHERE num < #numDays
),
Parts AS
(
SELECT
R.Dt as [Date]
, Year(R.Dt) as [Year]
, Month(R.Dt) as [Month]
, Day(R.Dt) as [Day]
, Datename(weekday, R.Dt) as [Weekday]
, CASE WHEN H.Holiday IS NOT NULL
OR Datename(weekday, R.Dt) IN ('Saturday', 'Sunday')
THEN 0
ELSE 1
END AS IsWorkday
FROM dateRange R
LEFT JOIN #Holidays H ON R.Dt = H.Holiday
)
--
select
*
, sum(IsWorkday) over (PARTITION BY [Year],[month]
ORDER BY [Day]
ROWS UNBOUNDED PRECEDING) as WorkdaySequenceInMonth
from Parts
order by [Year], [Month]
Hi You can try this query, the initial part is the data generation, maybe you won't need it.
Then I generate a temp table with all the dates for the time period set in #StartYear, #EndYear
Then just simple queries to return the data
-- generate holidays table
select holiday
into #tempHolidays
from
(
select '20160101' as holiday
union all
select '20160201' as holiday
union all
select '20160205' as holiday
union all
select '20160301' as holiday
union all
select '20160309' as holiday
union all
select '20160315' as holiday
) as t
create table #tempCalendar (Date_temp date)
select * from
#tempHolidays
declare #startYear int , #endYear int, #i int, #dateStart datetime , #dateEnd datetime, #date datetime, #i = 0
Select #startYear = '2016'
,#endYear = '2016'
,#dateStart = (Select cast( (cast(#startYear as varchar(4)) +'0101') as datetime))
,#dateEnd = (Select cast( (cast(#startYear as varchar(4)) +'1231') as datetime))
,#date = #dateStart
--Insert dates of the period of time
while (#date <> #dateEnd)
begin
insert into #tempCalendar
Select #date
set #date = (select DATEADD(dd,1,#date))
end
-- Retrive Date list
Select Date_temp
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
--REtrieve sum of working days per month
select DATEPART(year,Date_temp) as year
,DATEPART(month,Date_temp) as Month
,Count(*) as CountOfWorkingDays
from #tempCalendar
where Date_temp not in (Select holiday from #tempHolidays)
and datename(weekday,Date_temp) not in ('Saturday','Sunday')
Group by DATEPART(year,Date_temp)
,DATEPART(month,Date_temp)
You should change #tempHolidays for your Holidays table, and use #StarYear and #EndYear as your time period.
Here's a simple demo that shows the use of the partition by clause to keep contiguity in your sequencing for non-holidays
IF OBJECT_ID('tempdb.dbo.#dates') IS NOT null
DROP TABLE #dates;
CREATE TABLE #dates (d DATE);
IF OBJECT_ID('tempdb.dbo.#holidays') IS NOT null
DROP TABLE #holidays;
CREATE TABLE #holidays (d DATE);
INSERT INTO [#holidays]
( [d] )
VALUES
('2016-12-25'),
('2017-12-25'),
('2018-12-25');
INSERT INTO [#dates]
( [d] )
SELECT TOP 1000 DATEADD(DAY, n, '2015-12-31')
FROM [Util].dbo.[Numbers] AS [n];
WITH holidays AS (
SELECT d.*, CASE WHEN h.d IS NULL THEN 0 ELSE 1 END AS [IsHoliday]
FROM [#dates] AS [d]
LEFT JOIN [#holidays] AS [h]
ON [d].[d] = [h].[d]
)
SELECT d, ROW_NUMBER() OVER (PARTITION BY [holidays].[IsHoliday] ORDER BY d)
FROM [holidays]
ORDER BY d;
And please forgive my marking only Christmas as a holiday!

How can I get aggregate values for all dates, even when missing data for some days?

I have the data with users tracking time. The data is in segments and each row represent one segment. Here is the sample data
http://sqlfiddle.com/#!6/2fa61
How can I get the data on daily basis i.e. if a complete day is of 1440 minutes then I want to know how many minutes the user was tracked in a day. I also want to show 0 on the day when there is no data.
I am expecting the following output
Use table of numbers. I personally have a permanent table Numbers with 100K numbers in it.
Once you have a set of numbers you can generate a set of dates for the range that you need. In this query I'll take MIN and MAX dates from your data, but since you may not have data for some dates, it is better to have explicit parameters defining the range.
For each date I have the beginning and ending of a day - our grouping interval.
For each date we are searching among track rows for those that intersect with this interval. Two intervals (DayStart, DayEnd) and (StartTime, EndTime) intersect if StartTime < DayEnd and EndTime > DayStart. This goes into WHERE.
For each intersecting intervals we are calculating the range that belongs to both intervals: from MAX(DayStart, StartTime) to MIN(DayEnd, EndTime).
Finally, we group by day and sum up durations of all ranges.
I added a row to your sample data to test the case when interval covers the whole day. From 2015-02-14 20:50:43 to 2015-02-16 19:49:59. I chose this interval to be well before intervals in your sample, so that results for the dates in your example are not affected. Here is SQL Fiddle.
DECLARE #track table
(
Email varchar(20),
StartTime datetime,
EndTime datetime,
DurationInSeconds int,
FirstDate datetime,
LastUpdate datetime
);
Insert into #track values ( 'ABC', '2015-02-20 08:49:43.000', '2015-02-20 14:49:59.000', 21616, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-20 14:49:59.000', '2015-02-20 22:12:07.000', 26528, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-20 22:12:07.000', '2015-02-21 07:00:59.000', 31732, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-21 09:49:43.000', '2015-02-21 16:30:10.000', 24027, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-21 16:30:10.000', '2015-02-22 09:49:30.000', 62360, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-22 09:55:43.000', '2015-02-22 11:49:59.000', 5856, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-22 11:49:10.000', '2015-02-23 08:49:59.000', 75649, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-23 10:59:43.000', '2015-02-23 12:49:59.000', 6616, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-23 12:50:43.000', '2015-02-24 19:49:59.000', 111556, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-28 08:49:43.000', '2015-02-28 14:49:59.000', 21616, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
Insert into #track values ( 'ABC', '2015-02-14 20:50:43.000', '2015-02-16 19:49:59.000', 0, '2015-02-19 00:00:00.000', '2015-02-28 11:45:27.000')
.
;WITH
CTE_Dates
AS
(
SELECT
Email
,CAST(MIN(StartTime) AS date) AS StartDate
,CAST(MAX(EndTime) AS date) AS EndDate
FROM #track
GROUP BY Email
)
SELECT
CTE_Dates.Email
,DayStart AS xDate
,ISNULL(SUM(DATEDIFF(second, RangeStart, RangeEnd)) / 60, 0) AS TrackMinutes
FROM
Numbers
CROSS JOIN CTE_Dates -- this generates list of dates without gaps
CROSS APPLY
(
SELECT
DATEADD(day, Numbers.Number-1, CTE_Dates.StartDate) AS DayStart
,DATEADD(day, Numbers.Number, CTE_Dates.StartDate) AS DayEnd
) AS A_Date -- this is midnight of each current and next day
OUTER APPLY
(
SELECT
-- MAX(DayStart, StartTime)
CASE WHEN DayStart > StartTime THEN DayStart ELSE StartTime END AS RangeStart
-- MIN(DayEnd, EndTime)
,CASE WHEN DayEnd < EndTime THEN DayEnd ELSE EndTime END AS RangeEnd
FROM #track AS T
WHERE
T.Email = CTE_Dates.Email
AND T.StartTime < DayEnd
AND T.EndTime > DayStart
) AS A_Track -- this is all tracks that intersect with the current day
WHERE
Numbers.Number <= DATEDIFF(day, CTE_Dates.StartDate, CTE_Dates.EndDate)+1
GROUP BY DayStart, CTE_Dates.Email
ORDER BY DayStart;
Result
Email xDate TrackMinutes
ABC 2015-02-14 189
ABC 2015-02-15 1440
ABC 2015-02-16 1189
ABC 2015-02-17 0
ABC 2015-02-18 0
ABC 2015-02-19 0
ABC 2015-02-20 910
ABC 2015-02-21 1271
ABC 2015-02-22 1434
ABC 2015-02-23 1309
ABC 2015-02-24 1189
ABC 2015-02-25 0
ABC 2015-02-26 0
ABC 2015-02-27 0
ABC 2015-02-28 360
You can still get TrackMinutes more than 1440, if two or more intervals in your data overlap.
update
You said in the comments that you have few rows in your data, where intervals do overlap and result has values more than 1440. You can wrap SUM into CASE to hide these errors in the data, but ultimately it is better to find these rows with problems and fix the data. You saw only few rows with values more than 1440, but there could be many more other rows with the same problem, which is not so visible. So, it is better to write a query that finds such overlapping rows and check how many there are and then decide what to do with them. The danger here is that at the moment you think that there are only few, but there could be a lot. This is beyond the scope of this question.
To hide the problem replace this line in the query above:
,ISNULL(SUM(DATEDIFF(second, RangeStart, RangeEnd)) / 60, 0) AS TrackMinutes
with this:
,CASE
WHEN ISNULL(SUM(DATEDIFF(second, RangeStart, RangeEnd)) / 60, 0) > 1440
THEN 1440
ELSE ISNULL(SUM(DATEDIFF(second, RangeStart, RangeEnd)) / 60, 0)
END AS TrackMinutes
I am making some guesses on the date ranges but this should be pretty close.
On my system I keep a view named cteTally which is my version of a tally table. Here is the code to create it.
create View [dbo].[cteTally] as
WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select N from cteTally
Now we can utilize this to build your results. We just need to put in a couple other CTEs to get the date ranges established.
with DateRange as
(
select MIN(FirstDate) as StartDate
, MAX(LastUpdate) as EndDate
from track
)
, AllDates as
(
select DateAdd(DAY, t.N - 1, StartDate) BaseDate
from DateRange dr
cross join cteTally t
where t.N <= DATEDIFF(day, StartDate, EndDate) + 1
)
select t.Email
, ad.BaseDate as xDate
, t.DurationInSeconds as TrackMinutes
from AllDates ad
left join track t on cast(t.StartTime as date) = ad.BaseDate
Create a table variable for the dates
Populate table in a WHILE loop
Cross join to tracker data with the dates table variable
Convert values in column [DurationInSeconds] into minutes
Replace nulls with zero
Code:
DECLARE #dates TABLE ( ReportDates DATE )
DECLARE #BeginDate AS DATE
, #EndDate AS DATE
, #RunDate AS DATE
SELECT #BeginDate = MIN(starttime) FROM dbo.track
SELECT #EndDate = MAX(starttime) FROM dbo.track
SET #RunDate = #BeginDate
WHILE #RunDate <= #EndDate
BEGIN
SET #RunDate = DATEADD(DAY, 1, #RunDate)
INSERT INTO #dates
VALUES ( #RunDate )
END;
SELECT e.Email
, e.ReportDates
, ISNULL(SUM(DurationInSeconds / 60), 0) AS TotDurationInMinutes
FROM ( SELECT d.ReportDates
,t.email
FROM #dates AS d
cross JOIN track AS t
GROUP BY d.ReportDates, t.Email ) AS e
LEFT JOIN track AS t ON e.ReportDates = CAST(t.StartTime AS DATE)
GROUP BY e.ReportDates, e.Email
Results:
Email ReportDates TotDurationInMinutes
----- ----------- ----------------------
ABC 2015-02-21 1439
ABC 2015-02-22 1357
ABC 2015-02-23 1969
ABC 2015-02-24 0
ABC 2015-02-25 0
ABC 2015-02-26 0
ABC 2015-02-27 0
ABC 2015-02-28 360
ABC 2015-03-01 0
you should group by the day value. you could get the day with the function DATEPART as in : DATEPART(d,[StartTime])
SELECT cast([StartTime] as date) as date ,sum(datediff(n,[StartTime],[EndTime])) as "min"
FROM [test].[dbo].[track]
group by DATEPART(d,[StartTime]),cast([StartTime]as date)
hope it helps
SET NOCOUNT ON;
DROP TABLE #temp_table
CREATE TABLE #temp_table (
Email VARCHAR(20)
,StartTime DATETIME
,DurationInSeconds INT
,
)
DECLARE #Nextday DATETIME
,#Email VARCHAR(20)
,#StartTime DATETIME
,#DurationInSeconds INT
,#lastduration INT
,#currentduration INT
,#FirstDate DATETIME
SET #FirstDate = (
SELECT TOP 1 LEFT(StartTime, 11)
FROM track
)
DECLARE vendor_cursor CURSOR
FOR
SELECT Email
,StartTime
,DurationInSeconds
FROM track
OPEN vendor_cursor
FETCH NEXT
FROM vendor_cursor
INTO #Email
,#StartTime
,#DurationInSeconds
WHILE ##FETCH_STATUS = 0
BEGIN
IF EXISTS (
SELECT 1
FROM #temp_table
WHERE LEFT(StartTime, 11) = LEFT(#StartTime, 11)
)
BEGIN
SELECT #lastduration = DurationInSeconds
FROM #temp_table
WHERE LEFT(StartTime, 11) = LEFT(#StartTime, 11)
SET #currentduration = #lastduration + #DurationInSeconds
UPDATE #temp_table
SET DurationInSeconds = #currentduration
WHERE LEFT(StartTime, 11) = LEFT(#StartTime, 11)
END
ELSE
BEGIN
INSERT INTO #temp_table
SELECT #Email
,#StartTime
,#DurationInSeconds
SET #FirstDate = DATEADD(day, 1, #FirstDate)
END
IF NOT EXISTS (
SELECT 1
FROM track
WHERE LEFT(StartTime, 11) = #FirstDate
)
BEGIN
INSERT INTO #temp_table
SELECT #Email
,#FirstDate
,0
SET #FirstDate = DATEADD(day, 1, #FirstDate)
END
-- Get the next vendor.
FETCH NEXT
FROM vendor_cursor
INTO #Email
,#StartTime
,#DurationInSeconds
END
CLOSE vendor_cursor;
DEALLOCATE vendor_cursor;
SELECT *
FROM #temp_table
ORDER BY StartTime