Finding missing dates compared to date range - sql

I have one table (A) with date ranges and another (B) with just a set date. There are missing months in B that are within the date range of A. I need to identify the missing months.
A
Person StartDate EndDate
123 1/1/2016 5/1/2016
B
Person EffectiveDate
123 1/1/2016
123 2/1/2016
123 4/1/2016
123 5/1/2016
Expected result would be
123 3/1/2016
I'm using SQL Server 2012. Any assistance would be appreciated. Thanks!

One approach is to generate all values between the two dates. Here is an approach using a numbers table:
with n as (
select row_number() over (order by (select null)) - 1 as n
from master.spt_values
)
select a.person, dateadd(day, n.n, a.startdate) as missingdate
from a join
n
on dateadd(day, n.n, a.startdate) <= day.enddate left join
b
on b.person = a.person and b.effectivedate = dateadd(day, n.n, a.startdate)
where b.person is null;

Try this:
CREATE TABLE #A (Person INT, StartDate DATE, EndDate DATE)
INSERT INTO #A
SELECT '123','1/1/2016', '5/1/2016'
CREATE TABLE #B(Person INT, EffectiveDate DATE)
INSERT INTO #B
SELECT 123 ,'1/1/2016' UNION ALL
SELECT 123 ,'2/1/2016' UNION ALL
SELECT 123 ,'4/1/2016' UNION ALL
SELECT 123 ,'5/1/2016'
;WITH A1
AS(
SELECT PERSON , StartDate, EndDate
FROM #A
UNION ALL
SELECT PERSON ,DATEADD(MM,1,STARTDATE), EndDate
FROM A1
WHERE DATEADD(MM,1,STARTDATE) <= EndDate
)
SELECT PERSON , StartDate
FROM A1
WHERE
NOT EXISTS
(
SELECT 1 FROM #B B1
WHERE B1.Person = A1.PERSON
AND YEAR(B1.EffectiveDate) = YEAR(A1.STARTDATE) AND MONTH(B1.EffectiveDate) = MONTH(A1.STARTDATE)
)

This should work if you are interested in getting missing months
;WITH n
AS (SELECT ROW_NUMBER() OVER(ORDER BY
(
SELECT NULL
)) - 1 AS n
FROM master.dbo.spt_values)
SELECT a.person,
DATEADD(MONTH, n.n, a.startdate) AS missingdate
FROM a a
INNER JOIN n ON DATEADD(MONTH, n.n, a.startdate) <= a.enddate
LEFT JOIN b b ON MONTH(DATEADD(MONTH, n.n, a.startdate)) = MONTH(b.effectivedate) AND YEAR(DATEADD(MONTH, n.n, a.startdate)) = YEAR(b.effectivedate)
WHERE b.person IS NULL;

Related

SQL Join two tables by unrelated date

I’m looking to join two tables that do not have a common data point, but common value (date). I want a table that lists the date and total number of hired/terminated employees on that day. Example is below:
Table 1
Hire Date Employee Number Employee Name
--------------------------------------------
5/5/2018 10078 Joe
5/5/2018 10077 Adam
5/5/2018 10078 Steve
5/8/2018 10079 Jane
5/8/2018 10080 Mary
Table 2
Termination Date Employee Number Employee Name
----------------------------------------------------
5/5/2018 10010 Tony
5/6/2018 10025 Jonathan
5/6/2018 10035 Mark
5/8/2018 10052 Chris
5/9/2018 10037 Sam
Desired result:
Date Total Hired Total Terminated
--------------------------------------
5/5/2018 3 1
5/6/2018 0 2
5/7/2018 0 0
5/8/2018 2 1
5/9/2018 0 1
Getting the total count is easy, just unsure as the best approach from the standpoint of "adding" a date column
If you need all dates within some window then you need to join the data to a calendar. You can then left join and sum flags for data points.
DECLARE #StartDate DATETIME = (SELECT MIN(ActionDate) FROM(SELECT ActionDate = MIN(HireDate) FROM Table1 UNION SELECT ActionDate = MIN(TerminationDate) FROM Table2)AS X)
DECLARE #EndDate DATETIME = (SELECT MAX(ActionDate) FROM(SELECT ActionDate = MAX(HireDate) FROM Table1 UNION SELECT ActionDate = MAX(TerminationDate) FROM Table2)AS X)
;WITH AllDates AS
(
SELECT CalendarDate=#StartDate
UNION ALL
SELECT DATEADD(DAY, 1, CalendarDate)
FROM AllDates
WHERE DATEADD(DAY, 1, CalendarDate) <= #EndDate
)
SELECT
CalendarDate,
TotalHired = SUM(CASE WHEN H.HireDate IS NULL THEN NULL ELSE 1 END),
TotalTerminated = SUM(CASE WHEN T.TerminationDate IS NULL THEN NULL ELSE 1 END)
FROM
AllDates D
LEFT OUTER JOIN Table1 H ON H.HireDate = D.CalendarDate
LEFT OUTER JOIN Table2 T ON T.TerminationDate = D.CalendarDate
/* If you only want dates with data points then uncomment out the where clause
WHERE
NOT (H.HireDate IS NULL AND T.TerminationDate IS NULL)
*/
GROUP BY
CalendarDate
I would do this with a union all and aggregations:
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
order by dte;
This does not include the "missing" dates. If you want those, a calendar or recursive CTE works. For instance:
with ht as (
select dte, sum(is_hired) as num_hired, sum(is_termed) as num_termed
from (select hiredate as dte, 1 as is_hired, 0 as is_termed from table1
union all
select terminationdate, 0 as is_hired, 1 as is_termed from table2
) ht
group by dte
),
d as (
select min(dte) as dte, max(dte) as max_dte)
from ht
union all
select dateadd(day, 1, dte), max_dte
from d
where dte < max_dte
)
select d.dte, coalesce(ht.num_hired, 0) as num_hired, coalesce(ht.num_termed) as num_termed
from d left join
ht
on d.dte = ht.dte
order by dte;
Try this one
SELECT ISNULL(a.THE_DATE, b.THE_DATE) as Date,
ISNULL(a.Total_Hire,0) as Total_Hire,
ISNULL (b.Total_Terminate,0) as Total_terminate
FROM (SELECT Hire_date as the_date, COUNT(1) as Total_Hire
FROM TABLE_HIRE GROUP BY HIRE_DATE) a
FULL OUTER JOIN (SELECT Termination_Date as the_date, COUNT(1) as Total_Terminate
FROM TABLE_TERMINATE GROUP BY HIRE_DATE) a
ON a.the_date = b.the_date

SQL Server Group by date and by time of day over a date range

I'm not even sure if this can/should be done is SQL but here goes.
I have a table that stores a start date and an end date like so
userPingId createdAt lastUpdatedAt
1 2017-10-17 11:31:52.160 2017-10-18 14:31:52.160
I want to return a result set that groups the results by date and if they were active between different points between the two date.
The different points are
Morning - Before 12pm
Afternoon - Between 12pm and 5pm
Evening - After 5pm
So for example I would get the following results
sessionDate morning afternoon evening
2017-10-17 1 1 1
2017-10-18 1 1 0
Here is what I have so far and I believe that it's quite close but the fact I can't get the results I need make me think that this might not be possible in SQL (btw i'm using a numbers lookup table in my query which I saw on another tutorial)
DECLARE #s DATE = '2017-01-01', #e DATE = '2018-01-01';
;WITH d(sessionDate) AS
(
SELECT TOP (DATEDIFF(DAY, #s, #e) + 1) DATEADD(DAY, n-1, #s)
FROM dbo.Numbers ORDER BY n
)
SELECT
d.sessionDate,
sum(case when
(CONVERT(DATE, createdAt) = d.sessionDate AND datepart(hour, createdAt) < 12)
OR (CONVERT(DATE, lastUpdatedAt) = d.sessionDate AND datepart(hour, lastUpdatedAt) < 12)
then 1 else 0 end) as Morning,
sum(case when
(datepart(hour, createdAt) >= 12 and datepart(hour, createdAt) < 17)
OR (datepart(hour, lastUpdatedAt) >= 12 and datepart(hour, lastUpdatedAt) < 17)
OR (datepart(hour, createdAt) < 12 and datepart(hour, lastUpdatedAt) >= 17)
then 1 else 0 end) as Afternoon,
sum(case when datepart(hour, createdAt) >= 17 OR datepart(hour, lastUpdatedAt) >= 17 then 1 else 0 end) as Evening
FROM d
LEFT OUTER JOIN MYTABLE AS s
ON s.createdAt >= #s AND s.lastUpdatedAt <= #e
AND (CONVERT(DATE, s.createdAt) = d.sessionDate OR CONVERT(DATE, s.lastUpdatedAt) = d.sessionDate)
WHERE d.sessionDate >= #s AND d.sessionDate <= #e
AND userPingId = 49
GROUP BY d.sessionDate
ORDER BY d.sessionDate;
Building on what you started with the numbers table, you can add the time ranges to your adhoc calendar table using another common table expression using cross apply()
and the table value constructor (values (...),(...)).
From there, you can use an inner join based on overlapping date ranges along with conditional aggregation to pivot the results:
declare #s datetime = '2017-01-01', #e datetime = '2018-01-01';
;with n as (select n from (values(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) t(n))
, d as ( /* adhoc date/numbers table */
select top (datediff(day, #s, #e)+1)
SessionDate=convert(datetime,dateadd(day,row_number() over(order by (select 1))-1,#s))
from n as deka cross join n as hecto cross join n as kilo
cross join n as tenK cross join n as hundredK
order by SessionDate
)
, h as ( /* add time ranges to date table */
select
SessionDate
, StartDateTime = dateadd(hour,v.s,SessionDate)
, EndDateTime = dateadd(hour,v.e,SessionDate)
, v.point
from d
cross apply (values
(0,12,'morning')
,(12,17,'afternoon')
,(17,24,'evening')
) v (s,e,point)
)
select
t.userPingId
, h.SessionDate
, morning = count(case when point = 'morning' then 1 end)
, afternoon = count(case when point = 'afternoon' then 1 end)
, evening = count(case when point = 'evening' then 1 end)
from t
inner join h
on t.lastupdatedat >= h.startdatetime
and h.enddatetime > t.createdat
group by t.userPingId, h.SessionDate
rextester demo: http://rextester.com/MVB77123
returns:
+------------+-------------+---------+-----------+---------+
| userPingId | SessionDate | morning | afternoon | evening |
+------------+-------------+---------+-----------+---------+
| 1 | 2017-10-17 | 1 | 1 | 1 |
| 1 | 2017-10-18 | 1 | 1 | 0 |
+------------+-------------+---------+-----------+---------+
Alternately, you could use pivot() instead of conditional aggregation in the final select:
select UserPingId, SessionDate, Morning, Afternoon, Evening
from (
select
t.userPingId
, h.SessionDate
, h.point
from t
inner join h
on t.lastupdatedat >= h.startdatetime
and h.enddatetime > t.createdat
) t
pivot (count(point) for point in ([Morning], [Afternoon], [Evening])) p
rextester demo: http://rextester.com/SKLRG63092
You can using PIVOT on CTE's to derive solution to this problem.
Below is the test table
select * from ping
Below is the sql query
;with details as
(
select userPingId, createdAt as presenceDate , convert(date, createdAt) as
onlyDate,
datepart(hour, createdAt) as onlyHour
from ping
union all
select userPingId, lastUpdatedAt as presenceDate , convert(date,
lastUpdatedAt) as onlyDate,
datepart(hour, lastUpdatedAt) as onlyHour
from ping
)
, cte as
(
select onlyDate,count(*) as count,
case
when onlyHour between 0 and 12 then 'morning'
when onlyHour between 12 and 17 then 'afternoon'
when onlyHour>17 then 'evening'
end as 'period'
from details
group by onlyDate,onlyHour
)
select onlyDate, coalesce(morning,0) as morning,
coalesce(afternoon,0) as afternoon , coalesce(evening,0) as evening from
(
select onlyDate, count,period
from cte ) src
pivot
(
sum(count)
for period in ([morning],[afternoon],[evening])
) p
Below is the final result
This is a fairly similar answer to the one already posted, I just wanted the practice with PIVOT :)
I use a separate table with the time sections in it. this is then cross joined with the number table to create a date and time range for bucketing. i join this to the data and then pivot it (example: https://data.stackexchange.com/stackoverflow/query/750496/bucketing-data-into-date-am-pm-evening-and-pivoting-results)
SELECT
*
FROM (
SELECT
[userPingId],
dt,
[desc]
FROM (
SELECT
DATEADD(D, number, #s) AS dt,
CAST(DATEADD(D, number, #s) AS datetime) + CAST(s AS datetime) AS s,
CAST(DATEADD(D, number, #s) AS datetime) + CAST(e AS datetime) AS e,
[desc]
FROM #numbers
CROSS JOIN #times
WHERE number < DATEDIFF(D, #s, #e)
) ts
INNER JOIN #mytable AS m
ON m.createdat < ts.e
AND m.[lastUpdatedAt] >= ts.s
) src
PIVOT
(
COUNT([userPingId])
FOR [desc] IN ([am], [pm], [ev])
) piv;
the #times table is just:
s e desc
00:00:00.0000000 12:00:00.0000000 am
12:00:00.0000000 17:00:00.0000000 pm
17:00:00.0000000 23:59:59.0000000 ev

Select Missing Date Ranges In table In Oracle

I have the following Data Table Data as:
EMPLOYEEID DAYDATE
-----------------------------------
101 01/08/2017
101 02/08/2017
101 04/08/2017
101 06/08/2017
101 07/08/2017
102 01/08/2017
102 03/08/2017
102 06/08/2017
I want to Write a Query to get the following missing Dates between
01/8/2017 to
07/08/2017 Data as :
EMPLOYEEID DAYDATE
-----------------------------------
101 03/08/2017
101 05/08/2017
101 07/08/2017
102 02/08/2017
102 04/08/2017
102 05/08/2017
102 07/08/2017
How to deal with That?!
Either use a Date table, which I think is better, or use a Derived table to populate the dates(many codes available in google under "Oracle How to generate dates") :
SELECT t.employeeid,s.date
FROM (SELECT distinct p.employeeid FROM YourTable p) t
CROSS JOIN ( DATE TABLE \ Derived Table) s
This query will generate the entire range of dates for each employee. If you want only those that are missing:
SELECT t.employeeid,s.date
FROM (SELECT distinct p.employeeid FROM YourTable p) t
CROSS JOIN ( DATE TABLE \ Derived Table) s
LEFT JOIN YourTable h
ON(h.employeeID = t.employeeId and h.date = s.date)
WHERE h.employeeID IS NULL
You can generate all the rows with a hierarchical query:
SELECT DATE '2017-08-01' + LEVEL - 1 AS dt
FROM DUAL
CONNECT BY DATE '2017-08-01' + LEVEL - 1 <= DATE '2017-08-07'
or a recursive sub-query factoring clause:
WITH alldates ( dt ) AS (
SELECT DATE '2017-08-01' FROM DUAL
UNION ALL
SELECT dt + 1 FROM alldates WHERE dt < DATE '2017-08-07'
)
SELECT * FROM alldates
Then you can use a partitioned outer join combine it with your existing data nad filter for those missing rows:
WITH alldates ( dt ) AS (
SELECT DATE '2017-08-01' FROM DUAL
UNION ALL
SELECT dt + 1 FROM alldates WHERE dt < DATE '2017-08-07'
)
SELECT employeeId,
dt
FROM alldates d
LEFT OUTER JOIN
your_table t
PARTITION BY ( t.employeeID )
ON ( t.daydate = d.dt )
WHERE daydate IS NULL;
Assuming you have a table EMPLOYEE in which EMPLOYEEID is primary key - and assuming that there may be employees who don't appear in your DATA_TABLE at all, and they must be then included in your output:
select e.employeeid, d.daydate
from employee e cross join ( select date '2017-08-01' + level - 1 as daydate
from dual
connect by level <= 7
) d
where (e.employeeid, d.daydate) not in (select employeeid, daydate from data_table)
;

How to group by ID as well as date with this query

This is a followup to a question I posted earlier. This code does exactly what I was asking about but I realized upon finally getting it working that what I was trying to do isn't actually quite what I need.
DECLARE #StartDateTime datetime = '2015-07-13 14:00:00',
#EndDateTime datetime = '2015-07-13 16:00:00';
WITH JumpsOf15 AS
(
SELECT ROW_NUMBER() OVER(ORDER BY object_id) * 15 AS Step
FROM sys.objects
),
Dates as
(
SELECT currentDate = steppedDate.steppedDate
FROM JumpsOf15
CROSS APPLY(SELECT DATEADD(MINUTE,Step,#StartDateTime) AS steppedDate ) AS steppedDate
WHERE #EndDateTime>steppedDate.steppedDate
)
SELECT d.currentDate, t.Value, t.FK_ConfigId
FROM Dates AS d
OUTER APPLY
( SELECT TOP 1 t.[Timestamp], t.Value, t.FK_ConfigId
FROM myTable AS t
WHERE t.[Timestamp] <= d.currentDate and t.FK_ConfigId in (208812, 208809, 208815)
ORDER BY t.[Timestamp] DESC, t.Value, t.FK_ConfigId
) AS t
This gives an output like so:
currentDate value FK_ConfigId
1/1/2015 12:15 2 208809
1/1/2015 12:30 5 208815
1/1/2015 12:45 1 208815
But actually I need to have one record per timestamp per unique FK_ConfigId that I specify in a list. Right now I am only getting one record per timestamp regardless of the config ID. The output I want:
currentDate value FK_ConfigId
1/1/2015 12:15 2 208809
1/1/2015 12:15 4 208815
1/1/2015 12:30 5 208809
1/1/2015 12:30 1 208815
How could I achieve this?
Try this and tell me what you think:
DECLARE #StartDateTime datetime = '2015-07-13 14:00:00',
#EndDateTime datetime = '2015-07-13 16:00:00';
WITH JumpsOf15 AS (
SELECT Step = Row_Number() OVER (ORDER BY (SELECT 1)) * 15
FROM sys.objects
),
Dates AS (
SELECT currentDate = DateAdd(minute, Step, #StartDateTime)
FROM JumpsOf15
WHERE Step < DateDiff(minute, 0, #EndDateTime - #StartDateTime)
)
SELECT
d.currentDate,
t.ConfigId,
t.Value
FROM
Dates d
CROSS JOIN (VALUES
(208812), (208809), (208815)
) c (ConfigId)
OUTER APPLY (
SELECT TOP 1
t.[Timestamp],
t.Value,
t.FK_ConfigId
FROM myTable t
WHERE
d.currentDate >= t.[Timestamp]
AND c.ConfigId = t.FK_ConfigId
ORDER BY
t.[Timestamp] DESC,
t.Value
) t
;

How to Select continuous date in sql

Is there any function to check for continuous date. I'm having problem on working with this issue below:
My table has a datetime column with the following data:
----------
2015-03-11
2015-03-12
2015-03-13
2015-03-16
Given start date as 2015-3-11 and end date as 2015-3-17. I want the result as:
----------
2015-03-11
2015-03-12
2015-03-13
Can anyone suggest anything ?
I'm thinking this is somewhat a variation of Grouping Islands of Contiguous Dates problem. This can be done using ROW_NUMBER():
SQL Fiddle
CREATE TABLE Test(
tDate DATETIME
)
INSERT INTO Test VALUES
('20150311'), ('20150312'), ('20150313'), ('20150316');
DECLARE #startDate DATE = '20150311'
DECLARE #endDate DATE = '20150317'
;WITH Cte AS(
SELECT
*,
RN = DATEADD(DD, - (ROW_NUMBER() OVER(ORDER BY tDATE) - 1), tDate)
FROM Test
WHERE
tDate >= #startDate
AND tDate < DATEADD(DAY, 1, #endDate)
)
SELECT CAST(tDate AS DATE)
FROM CTE
WHERE RN = #startDate
RESULT
|------------|
| 2015-03-11 |
| 2015-03-12 |
| 2015-03-13 |
Here is the SQL Server 2005 version:
SQL Fiddle
DECLARE #startDate DATETIME
DECLARE #endDate DATETIME
SET #startDate = '20150311'
SET #endDate = '20150317'
;WITH Cte AS(
SELECT
*,
RN = DATEADD(DD, -(ROW_NUMBER() OVER(ORDER BY tDATE)-1), tDate)
FROM Test
WHERE
tDate >= #startDate
AND tDate < DATEADD(DAY, 1, #endDate)
)
SELECT CONVERT(VARCHAR(10), tDate, 121)
FROM CTE
WHERE RN = #startDate
For MSSQL 2012. This will return MAX continuous groups:
DECLARE #t TABLE(d DATE)
INSERT INTO #t VALUES
('20150311'),
('20150312'),
('20150313'),
('20150316')
;WITH
c1 AS(SELECT d, IIF(DATEDIFF(dd,LAG(d, 1, DATEADD(dd, -1, d)) OVER(ORDER BY d), d) = 1, 0, 1) AS n FROM #t),
c2 AS(SELECT d, SUM(n) OVER(ORDER BY d) AS n FROM c1)
SELECT TOP 1 WITH TIES MIN(d) AS StartDate, MAX(d) AS EndDate, COUNT(*) AS DayCount
FROM c2
GROUP BY n
ORDER BY DayCount desc
Output:
StartDate EndDate DayCount
2015-03-11 2015-03-13 3
For
('20150311'),
('20150312'),
('20150313'),
('20150316'),
('20150317'),
('20150318'),
('20150319'),
('20150320')
Output:
StartDate EndDate DayCount
2015-03-16 2015-03-20 5
Apply filtering in c1 CTE:
c1 AS(SELECT d, IIF(DATEDIFF(dd,LAG(d, 1, DATEADD(dd, -1, d)) OVER(ORDER BY d), d) = 1, 0, 1) AS n FROM #t WHERE d BETWEEN '20150311' AND '20150320'),
For MSSQL 2008:
;WITH
c1 AS(SELECT d, (SELECT MAX(d) FROM #t it WHERE it.d < ot.d) AS pd FROM #t ot),
c2 AS(SELECT d, CASE WHEN DATEDIFF(dd,ISNULL(pd, DATEADD(dd, -1, d)), d) = 1 THEN 0 ELSE 1 END AS n FROM c1),
c3 AS(SELECT d, (SELECT SUM(n) FROM c2 ci WHERE ci.d <= co.d) AS n FROM c2 co)
SELECT TOP 1 WITH TIES MIN(d) AS StartDate, MAX(d) AS EndDate, COUNT(*) AS DayCount
FROM c3
GROUP BY n
ORDER BY DayCount desc
you don't need to declare any start date or end date as other answers says, you need a row_num with datediff function:
create table DateFragTest (cDate date);
insert into DateFragTest
values ('2015-3-11'),
('2015-3-12'),
('2015-3-13'),
('2015-3-16')
with cte as
(select
cDate,
row_number() over (order by cDate ) as rn
from
DateFragTest)
select cDate
from cte t1
where datediff(day,
(select cDate from cte t2 where t2.rn=t1.rn+1),
t1.cDate)<>1
Output:
cDate
2015-03-11
2015-03-12
2015-03-13
SQLFIDDLE DEMO
For sql server 2012-
WITH cte
AS
(
SELECT [datex]
, lead([datex]) OVER ( ORDER BY [datex]) lead_datex
, datediff(dd,[datex],lead([datex]) OVER ( ORDER BY [datex]) ) AS diff
FROM [dbo].[datex]
)
SELECT c.[datex]
FROM [cte] AS c
WHERE diff >=1
Use BETWEEN
The query will go like this:
SELECT *
FROM your_table_name
WHERE your_date_column_name BETWEEN '2015-3-11' AND '2015-3-13'
(dt between x and y) or just (dt >= x and dt <= y).