Find missing contact date details - sql

I have the following table with Contacts details:
Table: tblContacts
CREATE TABLE tblContacts
(
Series INT,
ContacNumber INT,
CDate DATETIME
);
Sample data:
INSERT INTO tblContacts VALUES(12,123456,'2019-01-01');
INSERT INTO tblContacts VALUES(3,3456,'2019-01-01');
INSERT INTO tblContacts VALUES(12,123560,'2019-01-02');
INSERT INTO tblContacts VALUES(12,123459,'2019-01-05');
INSERT INTO tblContacts VALUES(3,3446,'2019-01-02');
INSERT INTO tblContacts VALUES(3,3486,'2019-01-03');
INSERT INTO tblContacts VALUES(3,34861,'2019-01-05');
INSERT INTO tblContacts VALUES(3,34862,'2019-01-07');
INSERT INTO tblContacts VALUES(12,127456,'2019-01-21');
INSERT INTO tblContacts VALUES(12,129456,'2019-02-03');
INSERT INTO tblContacts VALUES(12,126456,'2019-02-06');
INSERT INTO tblContacts VALUES(94,941256,'2019-01-01');
INSERT INTO tblContacts VALUES(94,944356,'2019-01-03');
INSERT INTO tblContacts VALUES(94,941356,'2019-01-07');
INSERT INTO tblContacts VALUES(94,943356,'2019-01-09');
I want to find those contacts who never called on those dates and 1 or 2 days before and after call.
Note: I may crease before and after calls to any level like 1,2,3,4 so on.
Expected Output: The following output for just 1 day before and after calls.
ContacNumber CDate
----------------------------------------
3486 2019-01-03
NULL 2019-01-04
34861 2019-01-05
NULL 2019-01-06
34862 2019-01-07
123560 2019-01-02
NULL 2019-01-03 - 2019-01-04
123459 2019-01-05
NULL 2019-01-06 - 2019-01-20
127456 2019-01-21
NULL 2019-01-22 - 2019-02-02
129456 2019-02-03
NULL 2019-02-04 - 2019-02-05
126456 2019-02-06
941256 2019-01-01
NULL 2019-01-02
944356 2019-01-03
NULL 2019-01-04 - 2019-01-06
941356 2019-01-07
NULL 2019-01-08
943356 2019-01-09
My try: Following query works only for 1 day before and after calls but not for other than 1.
Query:
; WITH Stage_1_CTE AS
(
SELECT Series,
ContacNumber,
CAST(CDate AS DATE) CDate,
ROW_NUMBER() OVER (PARTITION BY Series ORDER BY CAST(CDate AS DATE)) rnk1,
(ROW_NUMBER() OVER (PARTITION BY Series ORDER BY CAST(CDate AS DATE)))/2 rnk2,
(ROW_NUMBER() OVER (PARTITION BY Series ORDER BY CAST(CDate AS DATE)) + 1)/2 rnk3
FROM tblContacts
GROUP BY ContacNumber,CDate,Series
)
,
Stage_2_CTE AS
(
SELECT *,
CASE WHEN rnk1%2=1 THEN MAX(CASE WHEN rnk1%2=0 THEN CDate END) OVER (PARTITION BY Series,rnk2)
ELSE MAX(CASE WHEN rnk1%2=1 THEN CDate END) OVER (PARTITION BY Series,rnk3)
END AS CDate_Prev,
CASE WHEN rnk1%2=1 THEN MAX(CASE WHEN rnk1%2=0 THEN CDate END) OVER (PARTITION BY Series,rnk3)
ELSE MAX(CASE WHEN rnk1%2=1 THEN CDate END) OVER (PARTITION BY Series,rnk2)
END AS CDate_Next
FROM Stage_1_CTE
)
,Stage_Final_CTE AS
(
SELECT c.Series,
c.ContacNumber,
c.CDate,
EndDate = ''
FROM Stage_2_CTE c
WHERE c.CDate <> DATEADD(DAY, +1, CDate_Prev) OR c.CDate <> DATEADD(DAY, -1, CDate_Next)
UNION ALL
SELECT Series,
ContacNumber = NULL,
CDate = DATEADD(DAY, 1, c.CDate),
EndDate = ' - '+CAST(DATEADD(DAY, -1, CDate_Next) AS VARCHAR(10))
FROM Stage_2_CTE c
WHERE c.CDate <> DATEADD(DAY, -1, CDate_Next)
)
SELECT ContacNumber,
CASE WHEN CAST(CDate AS VARCHAR(10)) = REPLACE(EndDate,' - ','')
THEN CAST(CDate AS VARCHAR(10))
ELSE
CAST(CDate AS VARCHAR(10)) + CAST(EndDate AS VARCHAR(13))
END CDate
FROM Stage_Final_CTE
GROUP BY ContacNumber,CDate,EndDate,Series
ORDER BY Series,CDate;

You can generate numbers and then add in rows. Something like this:
select contactnumber, cdate, 1 as isvalid
from tblcontacts
union all
select c.contactnumber, dateadd(day, v.n, c.contacctdate), 0
from tblcontacts c cross join
(values (-1), (1)) v(n)
where not exists (select 1
from tblcontacts c2
where c2.contactnumber = c.contactnumber and
c2.date = dateadd(day, v.n, c.contacctdate)
);
Note: I added a new column to determine if the row is valid. It doesn't make sense to repeat NULL values in the first column, because you don't know what contact number the row applies to.

You could get the next date in a Series.
Which can be used to find the gaps between the dates.
Then glue those gaps to the result.
For Sql Server 2008 the data is first loaded into a temporary table.
IF OBJECT_ID('tempdb..#tmpContacts', 'U') IS NOT NULL
DROP TABLE #tmpContacts;
CREATE TABLE #tmpContacts
(
Series INT NOT NULL,
Rn INT NOT NULL,
ContacNumber INT NOT NULL,
CDate DATE NOT NULL,
PRIMARY KEY (Series, Rn)
);
INSERT INTO #tmpContacts (Series, ContacNumber, CDate, Rn)
SELECT Series, ContacNumber
, CAST(CDate AS DATE)
, ROW_NUMBER() OVER (PARTITION BY Series ORDER BY CAST(CDate AS DATE)) Rn
FROM tblContacts
WHERE CDate >= CAST('2019-01-01' AS DATE)
AND CAST(CDate AS DATE) <= EOMONTH(EOMONTH(CAST('2019-01-01' AS DATE)))
GROUP BY Series, ContacNumber, CAST(CDate AS DATE);
WITH CTE_CONTACTS AS
(
SELECT Series, ContacNumber
, CDate
, CDate AS nextCDate
, CAST(0 AS BIT) AS IsGap
FROM #tmpContacts
UNION ALL
SELECT t1.Series, null
, DATEADD(day,1,t1.CDate)
, DATEADD(day,-1,t2.CDate)
, 1
FROM #tmpContacts t1
LEFT JOIN #tmpContacts t2
ON t2.Series = t1.Series
AND t2.Rn = t1.Rn + 1
AND t1.CDate < DATEADD(day,-1,t2.CDate)
)
SELECT c.Series, ContacNumber
, CONCAT(CONVERT(varchar,c.CDate,23),
CASE
WHEN c.IsGap=1
AND DATEDIFF(day,c.CDate,c.nextCDate) > 0
THEN ' - ' + CONVERT(varchar,c.nextCDate,23)
END) AS Cdates
FROM CTE_CONTACTS c
ORDER BY c.Series, c.CDate;
Test on rextester here
In Sql Server 2012+ the window function LEAD can be used instead.
WITH CTE_CONTACTS AS
(
SELECT Series, ContacNumber
, CAST(CDate AS DATE) AS CDate
, LEAD(CAST(CDate AS DATE)) OVER (PARTITION BY Series ORDER BY CAST(CDate AS DATE)) AS nextCDate
FROM tblContacts
WHERE CDate >= CAST('2019-01-01' AS DATE)
AND CAST(CDate AS DATE) <= EOMONTH(EOMONTH(CAST('2019-01-01' AS DATE)))
GROUP BY Series, ContacNumber, CAST(CDate AS DATE)
)
, CTE_CONTACTS2 AS
(
SELECT Series, ContacNumber
, CDate
, nextCDate
, CAST(0 AS BIT) AS IsGap
FROM CTE_CONTACTS
UNION ALL
SELECT Series, null
, DATEADD(day,1,CDate)
, DATEADD(day,-1,nextCDate)
, 1
FROM CTE_CONTACTS c
WHERE CDate < DATEADD(day,-1,nextCDate)
)
SELECT c.Series, ContacNumber
, CONCAT(CONVERT(varchar,c.CDate,23),
CASE
WHEN c.IsGap=1
AND DATEDIFF(day,c.CDate,c.nextCDate) > 0
THEN ' - ' + CONVERT(varchar,c.nextCDate,23)
END) AS Cdates
FROM CTE_CONTACTS2 c
ORDER BY c.Series, c.CDate;
A test on rextester here

Related

Loop within id and combine dates between rows in SQL [duplicate]

I have a table in the following format
Id StartDate EndDate Type
1 2012-02-18 2012-03-18 1
1 2012-03-17 2012-06-29 1
1 2012-06-27 2012-09-27 1
1 2014-08-23 2014-09-24 3
1 2014-09-23 2014-10-24 3
1 2014-10-23 2014-11-24 3
2 2015-07-04 2015-08-06 1
2 2015-08-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
I found similar questions here, but not something that could help me solve my problem. I want to merge rows that has the same Id, Type and overlapping date periods.
The result from the above table should be
Id StartDate EndDate Type
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
In another server, I was able to do it with the following restrictions and the query below:
Didn't care about the Type column, but just the Id
Had a newer version of SQL Server (2012), but now I have 2008 which the code is not compatible
SELECT Id
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
FROM (
SELECT *
, SUM(CASE WHEN a.EndDate = a.StartDate THEN 0
ELSE 1
END
) OVER (ORDER BY Id, StartDate) sm
FROM (
SELECT Id
, StartDate
, EndDate
, LAG(EndDate, 1, NULL) OVER (PARTITION BY Id ORDER BY Id, EndDate) EndDate
FROM #temptable
) a
) b
GROUP BY Id, sm
Any advice how I can
Include Type on the process
Make it work on SQL Server 2008
This approach uses an additional temp table to identify the groups of overlapping dates, and then performs a quick aggregate based on the groupings.
SELECT *, ROW_NUMBER() OVER (ORDER BY Id, Type) AS UID,
ROW_NUMBER() OVER (ORDER BY Id, Type) AS GroupId INTO #G FROM #TempTable
WHILE ##ROWCOUNT <> 0 BEGIN
UPDATE T1 SET
GroupId = T2.GroupId
FROM #G T1
INNER JOIN (
SELECT T1.UID, CASE WHEN T1.GroupId < T2.GroupId THEN T1.GroupId ELSE T2.GroupId END
FROM #G T1
LEFT OUTER JOIN #G T2
ON T1.Id = T2.Id AND T1.Type = T2.Type AND T1.GroupId <> T2.GroupId
AND T1.StartDate <= T2.EndDate AND T2.StartDate <= T1.EndDate
) T2 (UID, GroupId)
ON T1.UID = T2.UID
WHERE T1.GroupId <> T2.GroupId
END
SELECT Id, MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate, Type
FROM #G G GROUP BY GroupId, Id, Type
This returns the expected values
Id StartDate EndDate Type
----------- ---------- ---------- -----------
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
This is 2008 compatible. A CTE really is the best way to link up all overlapping records in my opinion. The date overlap logic came from this thread: SO Date Overlap
I added extra data that's more complex to make sure that it's working as expected.
DECLARE #Data table (Id INT, StartDate DATE, EndDate DATE, Type INT)
INSERT INTO #data
SELECT 1,'2/18/2012' ,'3/18/2012', 1 UNION ALL
select 1,'3/17/2012','6/29/2012',1 UNION ALL
select 1,'6/27/2012','9/27/2012',1 UNION ALL
select 1,'8/23/2014','9/24/2014',3 UNION ALL
select 1,'9/23/2014','10/24/2014',3 UNION ALL
select 1,'10/23/2014','11/24/2014',3 UNION ALL
select 2,'7/4/2015','8/6/2015',1 UNION ALL
select 2,'8/4/2015','9/6/2015',1 UNION ALL
select 3,'11/1/2013','12/1/2013',0 UNION ALL
select 3,'1/9/2018','2/9/2018',0 UNION ALL
select 4,'1/1/2018','1/2/2018',0 UNION ALL --many non overlapping dates
select 4,'1/4/2018','1/5/2018',0 UNION ALL
select 4,'1/7/2018','1/9/2018',0 UNION ALL
select 4,'1/11/2018','1/13/2018',0 UNION ALL
select 4,'2/7/2018','2/8/2018',0 UNION ALL --many overlapping dates
select 4,'2/8/2018','2/9/2018',0 UNION ALL
select 4,'2/9/2018','2/10/2018',0 UNION all
select 4,'2/10/2018','2/11/2018',0 UNION all
select 4,'2/11/2018','2/12/2018',0 UNION all
select 4,'2/12/2018','2/13/2018',0 UNION all
select 4,'3/7/2018','3/8/2018',0 UNION ALL --many overlapping dates, second instance of id 4, type 0
select 4,'3/8/2018','3/9/2018',0 UNION ALL
select 4,'3/9/2018','3/10/2018',0 UNION all
select 4,'3/10/2018','3/11/2018',0 UNION all
select 4,'3/11/2018','3/12/2018',0 UNION all
select 4,'3/12/2018','3/13/2018',0
;
WITH cdata
AS (SELECT Id,
d.Type,
d.StartDate,
d.EndDate,
CurrentStart = d.StartDate
FROM #Data d
WHERE
NOT EXISTS (
SELECT * FROM #Data x WHERE x.StartDate < d.StartDate AND d.StartDate <= x.EndDate AND d.EndDate >= x.StartDate AND d.Id = x.Id AND d.Type = x.Type --get first records for overlapping ranges
)
UNION ALL
SELECT d.Id,
d.Type,
StartDate = CASE WHEN d2.StartDate < d.StartDate THEN d2.StartDate ELSE d.StartDate END,
EndDate = CASE WHEN d2.EndDate > d.EndDate THEN d2.EndDate ELSE d.EndDate END,
CurrentStart = d2.StartDate
FROM cdata d
INNER JOIN #Data d2
ON (
d.StartDate <= d2.EndDate
AND d.EndDate >= d2.StartDate
)
AND d2.Id = d.Id
AND d2.Type = d.Type
AND d2.StartDate > d.CurrentStart)
SELECT cdata.Id, cdata.Type, cdata.StartDate, EndDate = MAX(cdata.EndDate)
FROM cdata
GROUP BY cdata.Id, cdata.Type, cdata.StartDate
This looks like a Packing Intervals problem. See the post by Itzik Ben-Gan for all the details and what indexes he recommends to make it work efficiently. He presents a solution without recursive CTE.
Two notes.
The query below assumes that intervals are [closed; open), i.e. StartDate is inclusive and EndDate is exclusive. This way to represent such data is often the most convenient. (in the same sense as having arrays as zero-based instead of 1-based is usually more convenient in programming languages).
I added a RowID column to have unambiguous sorting.
Sample data
DECLARE #T TABLE
(
RowID int IDENTITY,
id int,
StartDate date,
EndDate date,
tp int
);
INSERT INTO #T(Id, StartDate, EndDate, tp) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
-- Make EndDate an opened interval, make it exclusive
-- [Start; End)
UPDATE #T
SET EndDate = DATEADD(day, 1, EndDate)
;
Recommended indexes
-- indexes to support solutions
CREATE UNIQUE INDEX idx_start_id ON T(id, tp, StartDate, RowID);
CREATE UNIQUE INDEX idx_end_id ON T(id, tp, EndDate, RowID);
Query
Read the Itzik's post to understand what is going on. He has nice illustrations there. In short, each timestamp (start or end) is treated as an event. Each event has a + or - type. Each time we encounter a + event (some interval starts) we increase the running counter. Each time we encounter a - event (some interval ends) we decrease the running counter. When the running counter is 0 it means that the streak of overlapping intervals is over.
I took Itzik's query as is and simply changed the column names to match your names.
WITH C1 AS
-- let e = end ordinals, let s = start ordinals
(
SELECT
RowID, id, tp, StartDate AS ts, +1 AS EventType,
NULL AS e,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY StartDate, RowID) AS s
FROM #T
UNION ALL
SELECT
RowID, id, tp, EndDate AS ts, -1 AS EventType,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY EndDate, RowID) AS e,
NULL AS s
FROM #T
),
C2 AS
-- let se = start or end ordinal, namely, how many events (start or end) happened so far
(
SELECT C1.*,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts, EventType DESC, RowID) AS se
FROM C1
),
C3 AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions were active
-- just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions are active
-- right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT id, tp, ts,
((ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts) - 1) / 2 + 1)
AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT id, tp, MIN(ts) AS StartDate, DATEADD(day, -1, MAX(ts)) AS EndDate
FROM C3
GROUP BY id, tp, grpnum
ORDER BY id, tp, StartDate;
Result
+----+----+------------+------------+
| id | tp | StartDate | EndDate |
+----+----+------------+------------+
| 1 | 1 | 2012-02-18 | 2012-09-27 |
| 1 | 3 | 2014-08-23 | 2014-11-24 |
| 2 | 1 | 2015-07-04 | 2015-09-06 |
| 3 | 0 | 2013-11-01 | 2013-12-01 |
| 3 | 0 | 2018-01-09 | 2018-02-09 |
+----+----+------------+------------+
create table #table
(Id int,StartDate date, EndDate date, Type int)
insert into #table
values
('1','2012-02-18','2012-03-18','1'),('1','2012-03-19','2012-06-19','1'),
('1','2012-06-27','2012-09-27','1'),('1','2014-08-23','2014-09-24','3'),
('1','2014-09-23','2014-10-24','3'),('1','2014-10-23','2014-11-24','3'),
('2','2015-07-04','2015-08-06','1'),('2','2015-08-04','2015-09-06','1'),
('3','2013-11-01','2013-12-01','0'),('3','2018-01-09','2018-02-09','0')
select ID,MIN(startdate)sd,MAX(EndDate)ed,type from #table
group by ID,TYPE,YEAR(startdate),YEAR(EndDate)
this can be easily achieved by using some window-functions and CTE's. Here is the solution
DECLARE #table TABLE
(id INT,
StartDate DATE,
EndDate DATE,
[Type] INT
);
INSERT INTO #table(Id, StartDate, EndDate, [Type]) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
WITH C1 AS
(
SELECT *,
MAX(EndDate) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS PrevEnd
FROM #table
),
C2 AS
(
SELECT *,
SUM(StartFlag) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS UNBOUNDED PRECEDING) AS GroupID
FROM C1
CROSS APPLY ( VALUES(CASE WHEN StartDate <= PrevEnd THEN NULL ELSE 1 END) ) AS A(StartFlag)
)
SELECT Id, [Type], MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate
FROM C2
GROUP BY Id, [Type], GroupID;

Merge rows if date columns are overlapping in TSQL

I have a table in the following format
Id StartDate EndDate Type
1 2012-02-18 2012-03-18 1
1 2012-03-17 2012-06-29 1
1 2012-06-27 2012-09-27 1
1 2014-08-23 2014-09-24 3
1 2014-09-23 2014-10-24 3
1 2014-10-23 2014-11-24 3
2 2015-07-04 2015-08-06 1
2 2015-08-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
I found similar questions here, but not something that could help me solve my problem. I want to merge rows that has the same Id, Type and overlapping date periods.
The result from the above table should be
Id StartDate EndDate Type
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
In another server, I was able to do it with the following restrictions and the query below:
Didn't care about the Type column, but just the Id
Had a newer version of SQL Server (2012), but now I have 2008 which the code is not compatible
SELECT Id
, MIN(StartDate) AS StartDate
, MAX(EndDate) AS EndDate
FROM (
SELECT *
, SUM(CASE WHEN a.EndDate = a.StartDate THEN 0
ELSE 1
END
) OVER (ORDER BY Id, StartDate) sm
FROM (
SELECT Id
, StartDate
, EndDate
, LAG(EndDate, 1, NULL) OVER (PARTITION BY Id ORDER BY Id, EndDate) EndDate
FROM #temptable
) a
) b
GROUP BY Id, sm
Any advice how I can
Include Type on the process
Make it work on SQL Server 2008
This approach uses an additional temp table to identify the groups of overlapping dates, and then performs a quick aggregate based on the groupings.
SELECT *, ROW_NUMBER() OVER (ORDER BY Id, Type) AS UID,
ROW_NUMBER() OVER (ORDER BY Id, Type) AS GroupId INTO #G FROM #TempTable
WHILE ##ROWCOUNT <> 0 BEGIN
UPDATE T1 SET
GroupId = T2.GroupId
FROM #G T1
INNER JOIN (
SELECT T1.UID, CASE WHEN T1.GroupId < T2.GroupId THEN T1.GroupId ELSE T2.GroupId END
FROM #G T1
LEFT OUTER JOIN #G T2
ON T1.Id = T2.Id AND T1.Type = T2.Type AND T1.GroupId <> T2.GroupId
AND T1.StartDate <= T2.EndDate AND T2.StartDate <= T1.EndDate
) T2 (UID, GroupId)
ON T1.UID = T2.UID
WHERE T1.GroupId <> T2.GroupId
END
SELECT Id, MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate, Type
FROM #G G GROUP BY GroupId, Id, Type
This returns the expected values
Id StartDate EndDate Type
----------- ---------- ---------- -----------
1 2012-02-18 2012-09-27 1
1 2014-08-23 2014-11-24 3
2 2015-07-04 2015-09-06 1
3 2013-11-01 2013-12-01 0
3 2018-01-09 2018-02-09 0
This is 2008 compatible. A CTE really is the best way to link up all overlapping records in my opinion. The date overlap logic came from this thread: SO Date Overlap
I added extra data that's more complex to make sure that it's working as expected.
DECLARE #Data table (Id INT, StartDate DATE, EndDate DATE, Type INT)
INSERT INTO #data
SELECT 1,'2/18/2012' ,'3/18/2012', 1 UNION ALL
select 1,'3/17/2012','6/29/2012',1 UNION ALL
select 1,'6/27/2012','9/27/2012',1 UNION ALL
select 1,'8/23/2014','9/24/2014',3 UNION ALL
select 1,'9/23/2014','10/24/2014',3 UNION ALL
select 1,'10/23/2014','11/24/2014',3 UNION ALL
select 2,'7/4/2015','8/6/2015',1 UNION ALL
select 2,'8/4/2015','9/6/2015',1 UNION ALL
select 3,'11/1/2013','12/1/2013',0 UNION ALL
select 3,'1/9/2018','2/9/2018',0 UNION ALL
select 4,'1/1/2018','1/2/2018',0 UNION ALL --many non overlapping dates
select 4,'1/4/2018','1/5/2018',0 UNION ALL
select 4,'1/7/2018','1/9/2018',0 UNION ALL
select 4,'1/11/2018','1/13/2018',0 UNION ALL
select 4,'2/7/2018','2/8/2018',0 UNION ALL --many overlapping dates
select 4,'2/8/2018','2/9/2018',0 UNION ALL
select 4,'2/9/2018','2/10/2018',0 UNION all
select 4,'2/10/2018','2/11/2018',0 UNION all
select 4,'2/11/2018','2/12/2018',0 UNION all
select 4,'2/12/2018','2/13/2018',0 UNION all
select 4,'3/7/2018','3/8/2018',0 UNION ALL --many overlapping dates, second instance of id 4, type 0
select 4,'3/8/2018','3/9/2018',0 UNION ALL
select 4,'3/9/2018','3/10/2018',0 UNION all
select 4,'3/10/2018','3/11/2018',0 UNION all
select 4,'3/11/2018','3/12/2018',0 UNION all
select 4,'3/12/2018','3/13/2018',0
;
WITH cdata
AS (SELECT Id,
d.Type,
d.StartDate,
d.EndDate,
CurrentStart = d.StartDate
FROM #Data d
WHERE
NOT EXISTS (
SELECT * FROM #Data x WHERE x.StartDate < d.StartDate AND d.StartDate <= x.EndDate AND d.EndDate >= x.StartDate AND d.Id = x.Id AND d.Type = x.Type --get first records for overlapping ranges
)
UNION ALL
SELECT d.Id,
d.Type,
StartDate = CASE WHEN d2.StartDate < d.StartDate THEN d2.StartDate ELSE d.StartDate END,
EndDate = CASE WHEN d2.EndDate > d.EndDate THEN d2.EndDate ELSE d.EndDate END,
CurrentStart = d2.StartDate
FROM cdata d
INNER JOIN #Data d2
ON (
d.StartDate <= d2.EndDate
AND d.EndDate >= d2.StartDate
)
AND d2.Id = d.Id
AND d2.Type = d.Type
AND d2.StartDate > d.CurrentStart)
SELECT cdata.Id, cdata.Type, cdata.StartDate, EndDate = MAX(cdata.EndDate)
FROM cdata
GROUP BY cdata.Id, cdata.Type, cdata.StartDate
This looks like a Packing Intervals problem. See the post by Itzik Ben-Gan for all the details and what indexes he recommends to make it work efficiently. He presents a solution without recursive CTE.
Two notes.
The query below assumes that intervals are [closed; open), i.e. StartDate is inclusive and EndDate is exclusive. This way to represent such data is often the most convenient. (in the same sense as having arrays as zero-based instead of 1-based is usually more convenient in programming languages).
I added a RowID column to have unambiguous sorting.
Sample data
DECLARE #T TABLE
(
RowID int IDENTITY,
id int,
StartDate date,
EndDate date,
tp int
);
INSERT INTO #T(Id, StartDate, EndDate, tp) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
-- Make EndDate an opened interval, make it exclusive
-- [Start; End)
UPDATE #T
SET EndDate = DATEADD(day, 1, EndDate)
;
Recommended indexes
-- indexes to support solutions
CREATE UNIQUE INDEX idx_start_id ON T(id, tp, StartDate, RowID);
CREATE UNIQUE INDEX idx_end_id ON T(id, tp, EndDate, RowID);
Query
Read the Itzik's post to understand what is going on. He has nice illustrations there. In short, each timestamp (start or end) is treated as an event. Each event has a + or - type. Each time we encounter a + event (some interval starts) we increase the running counter. Each time we encounter a - event (some interval ends) we decrease the running counter. When the running counter is 0 it means that the streak of overlapping intervals is over.
I took Itzik's query as is and simply changed the column names to match your names.
WITH C1 AS
-- let e = end ordinals, let s = start ordinals
(
SELECT
RowID, id, tp, StartDate AS ts, +1 AS EventType,
NULL AS e,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY StartDate, RowID) AS s
FROM #T
UNION ALL
SELECT
RowID, id, tp, EndDate AS ts, -1 AS EventType,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY EndDate, RowID) AS e,
NULL AS s
FROM #T
),
C2 AS
-- let se = start or end ordinal, namely, how many events (start or end) happened so far
(
SELECT C1.*,
ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts, EventType DESC, RowID) AS se
FROM C1
),
C3 AS
-- For start events, the expression s - (se - s) - 1 represents how many sessions were active
-- just before the current (hence - 1)
--
-- For end events, the expression (se - e) - e represents how many sessions are active
-- right after this one
--
-- The above two expressions are 0 exactly when a group of packed intervals
-- either starts or ends, respectively
--
-- After filtering only events when a group of packed intervals either starts or ends,
-- group each pair of adjacent start/end events
(
SELECT id, tp, ts,
((ROW_NUMBER() OVER(PARTITION BY id, tp ORDER BY ts) - 1) / 2 + 1)
AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT id, tp, MIN(ts) AS StartDate, DATEADD(day, -1, MAX(ts)) AS EndDate
FROM C3
GROUP BY id, tp, grpnum
ORDER BY id, tp, StartDate;
Result
+----+----+------------+------------+
| id | tp | StartDate | EndDate |
+----+----+------------+------------+
| 1 | 1 | 2012-02-18 | 2012-09-27 |
| 1 | 3 | 2014-08-23 | 2014-11-24 |
| 2 | 1 | 2015-07-04 | 2015-09-06 |
| 3 | 0 | 2013-11-01 | 2013-12-01 |
| 3 | 0 | 2018-01-09 | 2018-02-09 |
+----+----+------------+------------+
create table #table
(Id int,StartDate date, EndDate date, Type int)
insert into #table
values
('1','2012-02-18','2012-03-18','1'),('1','2012-03-19','2012-06-19','1'),
('1','2012-06-27','2012-09-27','1'),('1','2014-08-23','2014-09-24','3'),
('1','2014-09-23','2014-10-24','3'),('1','2014-10-23','2014-11-24','3'),
('2','2015-07-04','2015-08-06','1'),('2','2015-08-04','2015-09-06','1'),
('3','2013-11-01','2013-12-01','0'),('3','2018-01-09','2018-02-09','0')
select ID,MIN(startdate)sd,MAX(EndDate)ed,type from #table
group by ID,TYPE,YEAR(startdate),YEAR(EndDate)
this can be easily achieved by using some window-functions and CTE's. Here is the solution
DECLARE #table TABLE
(id INT,
StartDate DATE,
EndDate DATE,
[Type] INT
);
INSERT INTO #table(Id, StartDate, EndDate, [Type]) VALUES
(1, '2012-02-18', '2012-03-18', 1),
(1, '2012-03-17', '2012-06-29', 1),
(1, '2012-06-27', '2012-09-27', 1),
(1, '2014-08-23', '2014-09-24', 3),
(1, '2014-09-23', '2014-10-24', 3),
(1, '2014-10-23', '2014-11-24', 3),
(2, '2015-07-04', '2015-08-06', 1),
(2, '2015-08-04', '2015-09-06', 1),
(3, '2013-11-01', '2013-12-01', 0),
(3, '2018-01-09', '2018-02-09', 0);
WITH C1 AS
(
SELECT *,
MAX(EndDate) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS PrevEnd
FROM #table
),
C2 AS
(
SELECT *,
SUM(StartFlag) OVER(PARTITION BY Id, [Type]
ORDER BY StartDate, EndDate
ROWS UNBOUNDED PRECEDING) AS GroupID
FROM C1
CROSS APPLY ( VALUES(CASE WHEN StartDate <= PrevEnd THEN NULL ELSE 1 END) ) AS A(StartFlag)
)
SELECT Id, [Type], MIN(StartDate) AS StartDate, MAX(EndDate) AS EndDate
FROM C2
GROUP BY Id, [Type], GroupID;

Get all overlapping date ranges when all overlap at the same time

I'm struggling with this for a few days... trying to write an SQL query to get all date ranges when all units overlap at the same time. It's better to see it graphically.
Here is the simplified table with the image for reference:
UnitId Start End
====== ========== ==========
1 05/01/2018 09/01/2018
1 10/01/2018 13/01/2018
2 04/01/2018 15/01/2018
2 19/01/2018 23/01/2018
3 06/01/2018 12/01/2018
3 14/01/2018 22/01/2018
Expected result:
Start End
====== ==========
06/01/2018 09/01/2018
10/01/2018 12/01/2018
What I currently have:
DECLARE #sourceTable TABLE (UnitId int, StartDate datetime, EndDate datetime);
INSERT INTO #sourceTable VALUES
(1, '2018-01-05', '2018-01-09')
,(1, '2018-01-10', '2018-01-13')
,(2, '2018-01-04', '2018-01-15')
,(2, '2018-01-19', '2018-01-23')
,(3, '2018-01-06', '2018-01-12')
,(3, '2018-01-14', '2018-01-22');
SELECT DISTINCT
(SELECT max(v) FROM (values(A.StartDate), (B.StartDate)) as value(v)) StartDate
,(SELECT min(v) FROM (values(A.EndDate), (B.EndDate)) as value(v)) EndDate
FROM #sourceTable A
JOIN #sourceTable B
ON A.startDate <= B.endDate AND A.endDate >= B.startDate AND A.UnitId != B.UnitId
I believe it is "count number of overlapping intervals" problem (this picture should help). Here is one solution to it:
DECLARE #t TABLE (UnitId INT, [Start] DATE, [End] DATE);
INSERT INTO #t VALUES
(1, '2018-01-05', '2018-01-09'),
(1, '2018-01-10', '2018-01-13'),
(2, '2018-01-04', '2018-01-15'),
(2, '2018-01-19', '2018-01-23'),
(3, '2018-01-06', '2018-01-12'),
(3, '2018-01-14', '2018-01-22');
WITH cte1(date, val) AS (
SELECT [Start], 1 FROM #t AS t
UNION ALL
SELECT [End], 0 FROM #t AS t
UNION ALL
SELECT DATEADD(DAY, 1, [End]), -1 FROM #t AS t
), cte2 AS (
SELECT date, SUM(val) OVER (ORDER BY date, val) AS usage
FROM cte1
)
SELECT date, MAX(usage) AS usage
FROM cte2
GROUP BY date
It will give you a list of all dates at which the use count (possibly) changed:
date usage
2018-01-04 1
2018-01-05 2
2018-01-06 3
2018-01-09 3
2018-01-10 3
2018-01-12 3
2018-01-13 2
2018-01-14 2
2018-01-15 2
2018-01-16 1
2018-01-19 2
2018-01-22 2
2018-01-23 1
2018-01-24 0
With this approach you do not need a calendar table or rCTE to build missing dates. Converting the above to ranges (2018-01-05 ... 2018-01-15, 2018-01-19 ... 2018-01-22 etc) is not very difficult.
DECLARE #t TABLE (UnitId INT, [Start] DATE, [End] DATE);
INSERT INTO #t VALUES
(1, '2018-01-05', '2018-01-09'),
(1, '2018-01-10', '2018-01-13'),
(2, '2018-01-04', '2018-01-15'),
(2, '2018-01-19', '2018-01-23'),
(3, '2018-01-06', '2018-01-12'),
(3, '2018-01-14', '2018-01-22');
WITH cte1(date, val) AS (
SELECT [Start], 1 FROM #t AS t -- starting date increments counter
UNION ALL
SELECT [End], 0 FROM #t AS t -- we need all edges in the result
UNION ALL
SELECT DATEADD(DAY, 1, [End]), -1 FROM #t AS t -- end date + 1 decrements counter
), cte2 AS (
SELECT date, SUM(val) OVER (ORDER BY date, val) AS usage -- running sum for counter
FROM cte1
), cte3 AS (
SELECT date, MAX(usage) AS usage -- group multiple events on same date together
FROM cte2
GROUP BY date
), cte4 AS (
SELECT date, usage, CASE
WHEN usage > 1 AND LAG(usage) OVER (ORDER BY date) > 1 THEN 0
WHEN usage < 2 AND LAG(usage) OVER (ORDER BY date) < 2 THEN 0
ELSE 1
END AS chg -- start new group if prev and curr usage are on opposite side of 1
FROM cte3
), cte5 AS (
SELECT date, usage, SUM(chg) OVER (ORDER BY date) AS grp -- number groups for each change
FROM cte4
)
SELECT MIN(date) date1, MAX(date) date2
FROM cte5
GROUP BY grp
HAVING MIN(usage) > 1
Result:
date1 date2
2018-01-05 2018-01-15
2018-01-19 2018-01-22
You are looking for date ranges where all units overlap. So look for start dates where all units exist and end dates where all units exist and then join the two.
I'm using ROW_NUMBER to join the first start date with the first end date, the second start date with the second end date and so on.
select s.startdate, e.enddate
from
(
select startdate, row_number() over (order by startdate) as rn
from #sourceTable s1
where
(
select count(*)
from #sourceTable s2
where s1.startdate between s2.startdate and s2.enddate
) = (select count(distinct unitid) from #sourceTable)
) s
join
(
select enddate, row_number() over (order by startdate) as rn
from #sourceTable s1
where
(
select count(*)
from #sourceTable s2
where s1.enddate between s2.startdate and s2.enddate
) = (select count(distinct unitid) from #sourceTable)
) e on e.rn = s.rn
order by s.startdate;
There may be more elegant ways to solve this, but I guess this query is at least easy to understand :-)
Rextester demo: https://rextester.com/GRRSW89045

T-SQL Calculate duration in months between different years of ranges

I have a table in SQL Server which contains the duration of a user working for different jobs. I need to calculate the total number of experience for user.
Declare #temp table(Id int, FromDate DATETIME, ToDate DATETIME)
INSERT INTO #temp ( Id ,FromDate ,ToDate )
VALUES ( 1 , '2003-1-08 06:55:56' , '2005-5-08 06:55:56'),
( 2 , '2000-10-08 06:55:56' , '2008-7-08 06:55:56'),
( 3 , '2013-6-08 06:55:56' , '2015-1-08 06:55:56'),
( 4 , '2006-4-08 06:55:56' , '2011-3-08 06:55:56' )
SELECT * FROM #temp
I want to calculate the total number of experience;
Id FromDate ToDate Difference IN Months
===================================================
1 2003-01-08 2005-05-08 28
2 2000-10-08 2008-07-08 93
3 2013-06-08 2015-01-08 19
4 2006-04-08 2011-03-08 59
after removing the years overlapping like 2003-2005 covers up in 2000-2008;
I got something like this:
Id FromDate ToDate Difference IN Months
===================================================
1 2000-10-08 2011-03-08 125
2 2013-06-08 2015-01-08 19
So the answer would be 125+19 = 144 Months.
Please help me to find a solution.
The syntax here is finding all FromDate that doesn't have an overlapping FromDate and ToDate interval and all ToDates that doesn't have an overlapping FromDate and ToDate interval. Giving them a rownumber according to the date value and matching them on that rownumber:
;WITH CTE as
(
SELECT min(Id) Id ,FromDate, row_number() over (ORDER BY FromDate) rn
FROM #temp x
WHERE
not exists
(SELECT * FROM #temp WHERE x.FromDate > FromDate and x.FromDate <= Todate)
GROUP BY FromDate
), CTE2 as
(
SELECT Max(Id) Id ,ToDate, row_number() over (ORDER BY ToDate) rn
FROM #temp x
WHERE
not exists
(SELECT * FROM #temp WHERE x.ToDate >= FromDate and x.ToDate < Todate)
GROUP BY ToDate
)
SELECT SUM(DateDiff(month, CTE.FromDate, CTE2.ToDate))
FROM CTE
JOIN CTE2
ON CTE.rn = CTE2.rn
Result:
144
You can try this
SELECT Set1.FromDate,MIN(List1.ToDate) AS ToDate, DATEDIFF(MONTH,Set1.FromDate,MIN(List1.ToDate))
FROM #temp Set1
INNER JOIN #temp List1 ON Set1.FromDate <= List1.ToDate
AND NOT EXISTS(SELECT * FROM #temp List2
WHERE List1.ToDate >= List2.FromDate AND List1.ToDate < List2.ToDate)
WHERE NOT EXISTS(SELECT * FROM #temp Set2
WHERE Set1.FromDate > Set2.FromDate AND Set1.FromDate <= Set2.ToDate)
GROUP BY Set1.FromDate
ORDER BY Set1.FromDate
You can try this code:
DECLARE #temp TABLE (ID INT, FromDate DATETIME, ToDate DATETIME)
INSERT INTO #temp (ID, FromDate, ToDate)
VALUES ( 1 , '2003-1-08 06:55:56' , '2005-5-08 06:55:56'),
( 2 , '2000-10-08 06:55:56' , '2008-7-08 06:55:56'),
( 3 , '2013-6-08 06:55:56' , '2015-1-08 06:55:56'),
( 4 , '2006-4-08 06:55:56' , '2011-3-08 06:55:56' )
SELECT
ID,
CONVERT(DATE, FromDate) AS FromDate,
CONVERT(DATE, ToDate) AS ToDate,
DATEDIFF(MONTH, FromDate, ToDate) AS [Difference IN Months]
INTO #tmp
FROM #temp
SELECT T1.ID AS ID1, T2.ID AS ID2, T2.ToDate, T1.[Difference IN Months] + T2.[Difference IN Months] AS [Difference IN Months]
INTO #tmp2
FROM #tmp T1
INNER JOIN #tmp T2
ON CAST(T1.ToDate AS DATE) = CAST(T2.FromDate AS DATE)
OR (YEAR(T1.ToDate) = YEAR(T2.FromDate) AND CAST(T1.ToDate AS DATE) < CAST(T2.FromDate AS DATE))
OR YEAR(T1.ToDate) = YEAR(T2.FromDate) - 1
DELETE #tmp WHERE ID IN (SELECT ID2 FROM #tmp2)
UPDATE #tmp
SET ToDate = (SELECT ToDate FROM #tmp2 WHERE #tmp.ID = ID1),
[Difference IN Months] = (SELECT [Difference IN Months] FROM #tmp2 WHERE #tmp.ID = ID1)
WHERE ID IN (SELECT ID1 FROM #tmp2)
SELECT
*,
ROW_NUMBER() OVER(ORDER BY FromDate) AS RF
INTO #tmp3
FROM #tmp
SELECT T1.ID AS ID1, T2.ID AS ID2, T1.ToDate
INTO #tmp4
FROM #tmp3 T1
INNER JOIN #tmp3 T2 ON T1.RF = T2.RF + 1
WHERE CAST(T1.FromDate AS DATE) < CAST(T2.ToDate AS DATE)
UPDATE #tmp
SET ToDate = (SELECT ToDate FROM #tmp4 WHERE #tmp.ID = ID2)
WHERE ID IN (SELECT ID2 FROM #tmp4)
DELETE #tmp WHERE ID IN (SELECT ID1 FROM #tmp4)
UPDATE #tmp
SET[Difference IN Months] = DATEDIFF(MONTH, FromDate, ToDate)
SELECT
ROW_NUMBER() OVER(ORDER BY FromDate) AS ID,
FromDate, ToDate, [Difference IN Months]
FROM #tmp
DROP TABLE #tmp
DROP TABLE #tmp2
DROP TABLE #tmp3
DROP TABLE #tmp4
Result:
ID FromDate ToDate Difference IN Months
===================================================
1 2000-10-08 2011-03-08 125
2 2013-06-08 2015-01-08 19

SQL Stairstep Query

I need some help producing a MS SQL 2012 query that will match the desired stair-step output. The rows summarize data by one date range (account submission date month), and the columns summarize it by another date range (payment date month)
Table 1: Accounts tracks accounts placed for collections.
CREATE TABLE [dbo].[Accounts](
[AccountID] [nchar](10) NOT NULL,
[SubmissionDate] [date] NOT NULL,
[Amount] [money] NOT NULL,
CONSTRAINT [PK_Accounts] PRIMARY KEY CLUSTERED (AccountID ASC))
INSERT INTO [dbo].[Accounts] VALUES ('1000', '2012-01-01', 1999.00)
INSERT INTO [dbo].[Accounts] VALUES ('1001', '2012-01-02', 100.00)
INSERT INTO [dbo].[Accounts] VALUES ('1002', '2012-02-05', 350.00)
INSERT INTO [dbo].[Accounts] VALUES ('1003', '2012-03-01', 625.00)
INSERT INTO [dbo].[Accounts] VALUES ('1004', '2012-03-10', 50.00)
INSERT INTO [dbo].[Accounts] VALUES ('1005', '2012-03-10', 10.00)
Table 2: Trans tracks payments made
CREATE TABLE [dbo].[Trans](
[TranID] [int] IDENTITY(1,1) NOT NULL,
[AccountID] [nchar](10) NOT NULL,
[TranDate] [date] NOT NULL,
[TranAmount] [money] NOT NULL,
CONSTRAINT [PK_Trans] PRIMARY KEY CLUSTERED (TranID ASC))
INSERT INTO [dbo].[Trans] VALUES (1000, '2012-01-15', 300.00)
INSERT INTO [dbo].[Trans] VALUES (1000, '2012-02-15', 300.00)
INSERT INTO [dbo].[Trans] VALUES (1000, '2012-03-15', 300.00)
INSERT INTO [dbo].[Trans] VALUES (1002, '2012-02-20', 325.00)
INSERT INTO [dbo].[Trans] VALUES (1002, '2012-04-20', 25.00)
INSERT INTO [dbo].[Trans] VALUES (1003, '2012-03-24', 625.00)
INSERT INTO [dbo].[Trans] VALUES (1004, '2012-03-28', 31.00)
INSERT INTO [dbo].[Trans] VALUES (1004, '2012-04-12', 5.00)
INSERT INTO [dbo].[Trans] VALUES (1005, '2012-04-08', 7.00)
INSERT INTO [dbo].[Trans] VALUES (1005, '2012-04-28', 3.00)
Here's what the desired output should look like
*Total Payments in Each Month*
SubmissionYearMonth TotalAmount | 2012-01 2012-02 2012-03 2012-04
--------------------------------------------------------------------
2012-01 2099.00 | 300.00 300.00 300.00 0.00
2012-02 350.00 | 325.00 0.00 25.00
2012-03 685.00 | 656.00 15.00
The first two columns sum Account.Amount grouping by month.
The last 4 columns sum the Tran.TranAmount, by month, for Accounts placed in the given month of the current row.
The query I've been working with feel close. I just don't have the lag correct.
Here's the query I'm working with thus far:
Select SubmissionYearMonth,
TotalAmount,
pt.[0] AS MonthOld0,
pt.[1] AS MonthOld1,
pt.[2] AS MonthOld2,
pt.[3] AS MonthOld3,
pt.[4] AS MonthOld4,
pt.[5] AS MonthOld5,
pt.[6] AS MonthOld6,
pt.[7] AS MonthOld7,
pt.[8] AS MonthOld8,
pt.[9] AS MonthOld9,
pt.[10] AS MonthOld10,
pt.[11] AS MonthOld11,
pt.[12] AS MonthOld12,
pt.[13] AS MonthOld13
From (
SELECT Convert(Char(4),Year(SubmissionDate)) + '-' + Right('00' + Convert(VarChar(2), DatePart(Month, SubmissionDate)),2) AS SubmissionYearMonth,
SUM(Amount) AS TotalAmount
FROM Accounts
GROUP BY Convert(Char(4),Year(SubmissionDate)) + '-' + Right('00' + Convert(VarChar(2), DatePart(Month, SubmissionDate)),2)
)
AS AccountSummary
OUTER APPLY
(
SELECT *
FROM (
SELECT CASE WHEN DATEDIFF(Month, SubmissionDate, TranDate) < 13
THEN DATEDIFF(Month, SubmissionDate, TranDate)
ELSE 13
END AS PaymentMonthAge,
TranAmount
FROM Trans INNER JOIN Accounts ON Trans.AccountID = Accounts.AccountID
Where Convert(Char(4),Year(TranDate)) + '-' + Right('00' + Convert(VarChar(2), DatePart(Month, TranDate)),2)
= AccountSummary.SubmissionYearMonth
) as TransTemp
PIVOT (SUM(TranAmount)
FOR PaymentMonthAge IN ([0],
[1],
[2],
[3],
[4],
[5],
[6],
[7],
[8],
[9],
[10],
[11],
[12],
[13])) as TransPivot
) as pt
It's producing the following output:
SubmissionYearMonth TotalAmount MonthOld0 MonthOld1 MonthOld2 MonthOld3 ...
2012-01 2099.00 300.00 NULL NULL NULL ...
2012-02 350.00 325.00 300.00 NULL NULL ...
2012-03 685.00 656.00 NULL 300.00 NULL ...
As for the column date headers. I'm not sure what the best option is here. I could add an additional set of columns and create a calculated value that I could use in the resulting report.
SQL Fiddle: http://www.sqlfiddle.com/#!6/272e5/1/0
Since you are using SQL Server 2012, we can use the Format function to make the date pretty. There is no need to group by the strings. Instead, I find it useful to use the proper data type for as long as I can and only use Format or Convert on display (or not at all and let the middle tier handle the display).
In this solution, I arbitrarily assumed the earliest TransDate and extract from it, the first day of that month. However, one could easily replace that expression with a static value of the start date desired and this solution would take that and the next 12 months.
With SubmissionMonths As
(
Select DateAdd(d, -Day(A.SubmissionDate) + 1, A.SubmissionDate) As SubmissionMonth
, A.Amount
From dbo.Accounts As A
)
, TranMonths As
(
Select DateAdd(d, -Day(Min( T.TranDate )) + 1, Min( T.TranDate )) As TranMonth
, 1 As MonthNum
From dbo.Accounts As A
Join dbo.Trans As T
On T.AccountId = A.AccountId
Join SubmissionMonths As M
On A.SubmissionDate >= M.SubmissionMonth
And A.SubmissionDate < DateAdd(m,1,SubmissionMonth)
Union All
Select DateAdd(m, 1, TranMonth), MonthNum + 1
From TranMonths
Where MonthNum < 12
)
, TotalBySubmissionMonth As
(
Select M.SubmissionMonth, Sum( M.Amount ) As Total
From SubmissionMonths As M
Group By M.SubmissionMonth
)
Select Format(SMT.SubmissionMonth,'yyyy-MM') As SubmissionMonth, SMT.Total
, Sum( Case When TM.MonthNum = 1 Then T.TranAmount End ) As Month1
, Sum( Case When TM.MonthNum = 2 Then T.TranAmount End ) As Month2
, Sum( Case When TM.MonthNum = 3 Then T.TranAmount End ) As Month3
, Sum( Case When TM.MonthNum = 4 Then T.TranAmount End ) As Month4
, Sum( Case When TM.MonthNum = 5 Then T.TranAmount End ) As Month5
, Sum( Case When TM.MonthNum = 6 Then T.TranAmount End ) As Month6
, Sum( Case When TM.MonthNum = 7 Then T.TranAmount End ) As Month7
, Sum( Case When TM.MonthNum = 8 Then T.TranAmount End ) As Month8
, Sum( Case When TM.MonthNum = 9 Then T.TranAmount End ) As Month9
, Sum( Case When TM.MonthNum = 10 Then T.TranAmount End ) As Month10
, Sum( Case When TM.MonthNum = 11 Then T.TranAmount End ) As Month11
, Sum( Case When TM.MonthNum = 12 Then T.TranAmount End ) As Month12
From TotalBySubmissionMonth As SMT
Join dbo.Accounts As A
On A.SubmissionDate >= SMT.SubmissionMonth
And A.SubmissionDate < DateAdd(m,1,SMT.SubmissionMonth)
Join dbo.Trans As T
On T.AccountId = A.AccountId
Join TranMonths As TM
On T.TranDate >= TM.TranMonth
And T.TranDate < DateAdd(m,1,TM.TranMonth)
Group By SMT.SubmissionMonth, SMT.Total
SQL Fiddle version
The following query pretty much returns what you want. You need to do the to operations separately. I just join the results together:
select a.yyyymm, a.Amount,
t201201, t201202, t201203, t201204
from (select LEFT(convert(varchar(255), a.submissiondate, 121), 7) as yyyymm,
SUM(a.Amount) as amount
from Accounts a
group by LEFT(convert(varchar(255), a.submissiondate, 121), 7)
) a left outer join
(select LEFT(convert(varchar(255), a.submissiondate, 121), 7) as yyyymm,
sum(case when trans_yyyymm = '2012-01' then tranamount end) as t201201,
sum(case when trans_yyyymm = '2012-02' then tranamount end) as t201202,
sum(case when trans_yyyymm = '2012-03' then tranamount end) as t201203,
sum(case when trans_yyyymm = '2012-04' then tranamount end) as t201204
from Accounts a join
(select t.*, LEFT(convert(varchar(255), t.trandate, 121), 7) as trans_yyyymm
from trans t
) t
on a.accountid = t.accountid
group by LEFT(convert(varchar(255), a.submissiondate, 121), 7)
) t
on a.yyyymm = t.yyyymm
order by 1
I am getting a NULL where you have a 0.00 in two cells.
Thomas, I used your response as inspiration for the following solution I ended up using.
I first create a SubmissionDate, TranDate cross join skeleton date matrix, that I later use to join on the AccountSummary and TranSummary data.
The resulting query output isn't formatted in columns, per TranDate month. Rather I'm using output in a SQL Server Reporting Services matrix, and using a column grouping, based off the TranSummaryMonthNum column, to get the desired formatted output.
SQL Fiddle version
;
WITH
--Generate a list of Dates, from the first SubmissionDate, through today.
--Note: Requires the use of: 'OPTION (MAXRECURSION 0)' to generate a list with more than 100 dates.
CTE_AutoDates AS
( Select Min(SubmissionDate) as FiscalDate
From Accounts
UNION ALL
SELECT DATEADD(Day, 1, FiscalDate)
FROM CTE_AutoDates
WHERE DATEADD(Day, 1, FiscalDate) <= GetDate()
),
FiscalDates As
( SELECT FiscalDate,
DATEFROMPARTS(Year(FiscalDate), Month(FiscalDate), 1) as FiscalMonthStartDate
FROM CTE_AutoDates
--Optionaly filter Fiscal Dates by the last known Math.Max(SubmissionDate, TranDate)
Where FiscalDate <= (Select Max(MaxDate)
From (Select Max(SubmissionDate) as MaxDate From Accounts
Union All
Select Max(TranDate) as MaxDate From Trans
) as MaxDates
)
),
FiscalMonths as
( SELECT Distinct FiscalMonthStartDate
FROM FiscalDates
),
--Matrix to store the reporting date groupings for the Account submission and payment periods.
SubmissionAndTranMonths AS
( Select AM.FiscalMonthStartDate as SubmissionMonthStartDate,
TM.FiscalMonthStartDate as TransMonthStartDate,
DateDiff(Month, (Select Min(FiscalMonthStartDate) From FiscalMonths), TM.FiscalMonthStartDate) as TranSummaryMonthNum
From FiscalMonths AS AM
Join FiscalMonths AS TM
ON TM.FiscalMonthStartDate >= AM.FiscalMonthStartDate
),
AccountData as
( Select A.AccountID,
A.Amount,
FD.FiscalMonthStartDate as SubmissionMonthStartDate
From Accounts as A
Inner Join FiscalDates as FD
ON A.SubmissionDate = FD.FiscalDate
),
TranData as
( Select T.AccountID,
T.TranAmount,
AD.SubmissionMonthStartDate,
FD.FiscalMonthStartDate as TranMonthStartDate
From Trans as T
Inner Join AccountData as AD
ON T.AccountID = AD.AccountID
Inner Join FiscalDates AS FD
ON T.TranDate = FD.FiscalDate
),
AccountSummaryByMonth As
( Select ASM.FiscalMonthStartDate,
Sum(AD.Amount) as TotalSubmissionAmount
From FiscalMonths as ASM
Inner Join AccountData as AD
ON ASM.FiscalMonthStartDate = AD.SubmissionMonthStartDate
Group By
ASM.FiscalMonthStartDate
),
TranSummaryByMonth As
( Select STM.SubmissionMonthStartDate,
STM.TransMonthStartDate,
STM.TranSummaryMonthNum,
Sum(TD.TranAmount) as TotalTranAmount
From SubmissionAndTranMonths as STM
Inner Join TranData as TD
ON STM.SubmissionMonthStartDate = TD.SubmissionMonthStartDate
AND STM.TransMonthStartDate = TD.TranMonthStartDate
Group By
STM.SubmissionMonthStartDate,
STM.TransMonthStartDate,
STM.TranSummaryMonthNum
)
--#Inspect 1
--Select * From SubmissionAndTranMonths
--OPTION (MAXRECURSION 0)
--#Inspect 1 Results
--SubmissionMonthStartDate TransMonthStartDate TranSummaryMonthNum
--2012-01-01 2012-01-01 0
--2012-01-01 2012-02-01 1
--2012-01-01 2012-03-01 2
--2012-01-01 2012-04-01 3
--2012-02-01 2012-02-01 1
--2012-02-01 2012-03-01 2
--2012-02-01 2012-04-01 3
--2012-03-01 2012-03-01 2
--2012-03-01 2012-04-01 3
--2012-04-01 2012-04-01 3
--#Inspect 2
--Select * From AccountSummaryByMonth
--OPTION (MAXRECURSION 0)
--#Inspect 2 Results
--FiscalMonthStartDate TotalSubmissionAmount
--2012-01-01 2099.00
--2012-02-01 350.00
--2012-03-01 685.00
--#Inspect 3
--Select * From TranSummaryByMonth
--OPTION (MAXRECURSION 0)
--#Inspect 3 Results
--SubmissionMonthStartDate TransMonthStartDate TranSummaryMonthNum TotalTranAmount
--2012-01-01 2012-01-01 0 300.00
--2012-01-01 2012-02-01 1 300.00
--2012-01-01 2012-03-01 2 300.00
--2012-02-01 2012-02-01 1 325.00
--2012-02-01 2012-04-01 3 25.00
--2012-03-01 2012-03-01 2 656.00
--2012-03-01 2012-04-01 3 15.00
Select STM.SubmissionMonthStartDate,
ASM.TotalSubmissionAmount,
STM.TransMonthStartDate,
STM.TranSummaryMonthNum,
TSM.TotalTranAmount
From SubmissionAndTranMonths as STM
Inner Join AccountSummaryByMonth as ASM
ON STM.SubmissionMonthStartDate = ASM.FiscalMonthStartDate
Left Join TranSummaryByMonth AS TSM
ON STM.SubmissionMonthStartDate = TSM.SubmissionMonthStartDate
AND STM.TransMonthStartDate = TSM.TransMonthStartDate
Order By STM.SubmissionMonthStartDate, STM.TranSummaryMonthNum
OPTION (MAXRECURSION 0)
--#Results
--SubmissionMonthStartDate TotalSubmissionAmount TransMonthStartDate TranSummaryMonthNum TotalTranAmount
--2012-01-01 2099.00 2012-01-01 0 300.00
--2012-01-01 2099.00 2012-02-01 1 300.00
--2012-01-01 2099.00 2012-03-01 2 300.00
--2012-01-01 2099.00 2012-04-01 3 NULL
--2012-02-01 350.00 2012-02-01 1 325.00
--2012-02-01 350.00 2012-03-01 2 NULL
--2012-02-01 350.00 2012-04-01 3 25.00
--2012-03-01 685.00 2012-03-01 2 656.00
--2012-03-01 685.00 2012-04-01 3 15.00
The following query exactly duplicates the results of your final query in your own answer but takes no more than 1/30th the CPU (or better), plus is a whole lot simpler.
If I had the time & energy I am sure I could find even more improvements... my gut tells me I might not have to hit the Accounts table so many times. But in any case, it's a huge improvement and should perform very well even for very large result sets.
See the SqlFiddle for it.
WITH L0 AS (SELECT 1 N UNION ALL SELECT 1),
L1 AS (SELECT 1 N FROM L0, L0 B),
L2 AS (SELECT 1 N FROM L1, L1 B),
L3 AS (SELECT 1 N FROM L2, L2 B),
L4 AS (SELECT 1 N FROM L3, L2 B),
Nums AS (SELECT N = Row_Number() OVER (ORDER BY (SELECT 1)) FROM L4),
Anchor AS (
SELECT MinDate = DateAdd(month, DateDiff(month, '20000101', Min(SubmissionDate)), '20000101')
FROM dbo.Accounts
),
MNums AS (
SELECT N
FROM Nums
WHERE
N <= DateDiff(month,
(SELECT MinDate FROM Anchor),
(SELECT Max(TranDate) FROM dbo.Trans)
) + 1
),
A AS (
SELECT
AM.AccountMo,
Amount = Sum(A.Amount)
FROM
dbo.Accounts A
CROSS APPLY (
SELECT DateAdd(month, DateDiff(month, '20000101', A.SubmissionDate), '20000101')
) AM (AccountMo)
GROUP BY
AM.AccountMo
), T AS (
SELECT
AM.AccountMo,
TM.TranMo,
TotalTranAmount = Sum(T.TranAmount)
FROM
dbo.Accounts A
CROSS APPLY (
SELECT DateAdd(month, DateDiff(month, '20000101', A.SubmissionDate), '20000101')
) AM (AccountMo)
INNER JOIN dbo.Trans T
ON A.AccountID = T.AccountID
CROSS APPLY (
SELECT DateAdd(month, DateDiff(month, '20000101', T.TranDate), '20000101')
) TM (TranMo)
GROUP BY
AM.AccountMo,
TM.TranMo
)
SELECT
SubmissionStartMonth = A.AccountMo,
TotalSubmissionAmount = A.Amount,
M.TransMonth,
TransMonthNum = N.N - 1,
T.TotalTranAmount
FROM
A
INNER JOIN MNums N
ON N.N >= DateDiff(month, (SELECT MinDate FROM Anchor), A.AccountMo) + 1
CROSS APPLY (
SELECT TransMonth = DateAdd(month, N.N - 1, (SELECT MinDate FROM Anchor))
) M
LEFT JOIN T
ON A.AccountMo = T.AccountMo
AND M.TransMonth = T.TranMo
ORDER BY
A.AccountMo,
M.TransMonth;