SQL Select: Get the previous Date in column - sql

I have a table in my SQL server with some dates. Now I would like to create a Select which gives me a column with all dates then a second column with the previous dates of the first column and a third column with the previous dates of the previous date column(c2).
For Exempel:
c1(orginal) c2(prevoius of c1) c3(previous of c2)
2017-10-15 00:00:00 2017-04-15 00:00:00 2016-10-15 00:00:00
2017-04-15 00:00:00 2016-10-15 00:00:00 2016-04-15 00:00:00
2016-10-15 00:00:00 2016-04-15 00:00:00 2015-10-15 00:00:00
2016-04-15 00:00:00 2015-10-15 00:00:00 null
2015-10-15 00:00:00 null null
Example with colors:
Is it possible to make a SELECT where the first row would be the first date from column 1, the second from column 1 and the third from column 1. The second row would be the second date from column1, the third from column 1 and the forth from column 1.
My current query
SELECT DISTINCT(BFSSTudStichdatum) AS C1, BFSSTudStichdatum AS C2,
BFSSTudStichdatum AS C3 FROM BFSStudierende
ORDER BY C1 DESC
result:

Because you need to get a distinct list of your dates first, you will need to split your query into a common table expression and then use lag to get your c2 and c3 values:
declare #t table(c1 datetime);
insert into #t values ('2017-10-15 00:00:00'),('2017-04-15 00:00:00'),('2016-10-15 00:00:00'),('2016-04-15 00:00:00'),('2015-10-15 00:00:00')
,('2017-10-15 00:00:00'),('2017-04-15 00:00:00'),('2016-10-15 00:00:00'),('2016-04-15 00:00:00'),('2015-10-15 00:00:00');
with c as
(
select distinct c1
from #t
)
select c1
,lag(c1, 1) over (order by c1) as c2
,lag(c1, 2) over (order by c1) as c3
from c
order by c1 desc;
Output:
+-------------------------+-------------------------+-------------------------+
| c1 | c2 | c3 |
+-------------------------+-------------------------+-------------------------+
| 2017-10-15 00:00:00.000 | 2017-04-15 00:00:00.000 | 2016-10-15 00:00:00.000 |
| 2017-04-15 00:00:00.000 | 2016-10-15 00:00:00.000 | 2016-04-15 00:00:00.000 |
| 2016-10-15 00:00:00.000 | 2016-04-15 00:00:00.000 | 2015-10-15 00:00:00.000 |
| 2016-04-15 00:00:00.000 | 2015-10-15 00:00:00.000 | NULL |
| 2015-10-15 00:00:00.000 | NULL | NULL |
+-------------------------+-------------------------+-------------------------+

Are you looking for lag()?
select col1,
lag(col1, 1) over (order by col1) as col1_prev,
lag(col1, 2) over (order by col1) as col1_prev2
from t;

For SQL Server 2008 and later:
WITH DataSource AS
(
SELECT DISTINCT *
,DENSE_RANK() OVER (ORDER BY c1) rowID
FROM #t
)
SELECT DS1.[c1]
,DS2.[c1]
,DS3.[c1]
FROM DataSource DS1
LEFT JOIN DataSource DS2
ON DS1.[rowID] = DS2.[rowID] + 1
LEFT JOIN DataSource DS3
ON DS1.[rowID] = DS3.[rowID] + 2;

For SQL Server 2008 and later:
Hope you did want an auto column generation with lagging one value behind from the past columns first value. Try the following snippet.
Created a dynamic query with respect to the number of columns in the dataset.
create table BFSStudierende
(
BFSSTudStichdatum datetime
)
insert into BFSStudierende
Select getdate()
union
Select dateadd(day,1,getdate())
union
Select dateadd(day,2,getdate())
union
Select dateadd(day,3,getdate())
union
Select dateadd(day,4,getdate())
Declare #count int=(Select count(BFSSTudStichdatum ) from BFSStudierende)
Declare #query nvarchar(max)='with BFSStudierendeCte as (Select *,row_number() over(order by BFSSTudStichdatum)rn from BFSStudierende) Select *from BFSStudierendeCte as BFSStudierendeCte1'
Declare #i int=2 ;
Declare #j int ;
while(#i<=#count)
begin
Set #j=#i-1
Set #query=#query+' left outer join BFSStudierendeCte as BFSStudierendeCte'+cast(#i as varchar(5)) +' on BFSStudierendeCte1.rn+'+cast(#j as varchar(5))+'=BFSStudierendeCte'+cast(#i as varchar(5))+'.rn';
set #i+=1;
End
print #query
Execute(#query)
Note: Duplicate date will not be removed from the results. If you require duplicate to be removed. Please change the following line in the above snippet.
Declare #count int=(Select count(distinct BFSSTudStichdatum ) from BFSStudierende)
Declare #query nvarchar(max)='with BFSStudierendeCte as (Select *,row_number() over(order by BFSSTudStichdatum)rn from(Select distinct BFSSTudStichdatum from BFSStudierende)l ) Select *from BFSStudierendeCte as BFSStudierendeCte1'

Related

How to repeat previous row values until another field changes

I am trying to repeat a value from one row if there is no change between a column value in the row before. Thanks in advance!
create table #results (
[Floor] varchar(20)
,Room varchar(20)
,CheckInTime datetime
)
INSERT INTO #results
VALUES
('Floor1','Room1','2020-01-01 12:00:00'),
('Floor1','Room2','2020-01-05 19:00:00'),
('Floor1','Room3','2020-01-20 08:02:00'),
('Floor2','Room1','2020-01-23 19:32:00'),
('Floor1','Room1','2020-02-01 20:00:00')
And the expected result I am looking for is
in the "WantedValue" column. So if Floor is the same as the row before it, use the CheckInTime value from the row before. But this needs to carry down through multiple rows potentially.
I am interpreting the question as you want the minimum date/time every time the floor changes.
Use lag() and a cumulative sum to define the groups. Then use a window function to "spread" the minimum value:
select r.*,
min(checkintime) over (partition by grp order by checkintime) as wantedvalue
from (select r.*,
sum(case when prev_floor = floor then 0 else 1 end) over (order by checkintime) as grp
from (select r.*,
lag(floor) over (order by checkintime) as prev_floor
from results r
) r
) r
order by checkintime;
Here is a db<>fiddle.
This should wok
create table #results (
[Floor] varchar(20)
,Room varchar(20)
,CheckInTime datetime
)
INSERT INTO #results
VALUES
('Floor1','Room1','2020-01-01 12:00:00'),
('Floor1','Room2','2020-01-05 19:00:00'),
('Floor1','Room3','2020-01-20 08:02:00'),
('Floor2','Room1','2020-01-23 19:32:00'),
('Floor1','Room1','2020-02-01 20:00:00')
select
a.Floor,
a.Room,
a.CheckInTime,
b.CheckInTime
from #results a
left Join
(
select [Floor],MIN(CheckInTime) AS CheckInTime
from #results group by [Floor]
) b
ON a.[Floor]=b.[Floor]
drop table #results
Output
Floor Room CheckInTime CheckInTime
Floor1 Room1 2020-01-01 12:00:00.000 2020-01-01 12:00:00.000
Floor1 Room2 2020-01-05 19:00:00.000 2020-01-01 12:00:00.000
Floor1 Room3 2020-01-20 08:02:00.000 2020-01-01 12:00:00.000
Floor2 Room1 2020-01-23 19:32:00.000 2020-01-23 19:32:00.000
Floor1 Room1 2020-02-01 20:00:00.000 2020-01-01 12:00:00.000

Date range with minimum and maximum dates from dataset having records with continuous date range

I have a dataset with id ,Status and date range of employees.
The input dataset given below are the details of one employee.
The date ranges in the records are continuous(in exact order) such that startdate of second row will be the next date of enddate of first row.
If an employee takes leave continuously for different months, then the table is storing the info with date range as separated for different months.
For example: In the input set, the employee has taken Sick leave from '16-10-2016' to '31-12-2016' and joined back on '1-1-2017'.
So there are 3 records for this item but the dates are continuous.
In the output I need this as one record as shown in the expected output dataset.
INPUT
Id Status StartDate EndDate
1 Active 1-9-2007 15-10-2016
1 Sick 16-10-2016 31-10-2016
1 Sick 1-11-2016 30-11-2016
1 Sick 1-12-2016 31-12-2016
1 Active 1-1-2017 4-2-2017
1 Unpaid 5-2-2017 9-2-2017
1 Active 10-2-2017 11-2-2017
1 Unpaid 12-2-2017 28-2-2017
1 Unpaid 1-3-2017 31-3-2017
1 Unpaid 1-4-2017 30-4-2017
1 Active 1-5-2017 13-10-2017
1 Sick 14-10-2017 11-11-2017
1 Active 12-11-2017 NULL
EXPECTED OUTPUT
Id Status StartDate EndDate
1 Active 1-9-2007 15-10-2016
1 Sick 16-10-2016 31-12-2016
1 Active 1-1-2017 4-2-2017
1 Unpaid 5-2-2017 9-2-2017
1 Active 10-2-2017 11-2-2017
1 Unpaid 12-2-2017 30-4-2017
1 Active 1-5-2017 13-10-2017
1 Sick 14-10-2017 11-11-2017
1 Active 12-11-2017 NULL
I can't take min(startdate) and max(EndDate) group by id,status because if the same employee has taken another Sick leave then that end date ('11-11-2017' in the example) will come as the End date.
can anyone help me with the query in SQL server 2014?
It suddenly hit me that this is basically a gaps and islands problem - so I've completely changed my solution.
For this solution to work, the dates does not have to be consecutive.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
Id int,
Status varchar(10),
StartDate date,
EndDate date
);
SET DATEFORMAT DMY; -- This is needed because how you specified your dates.
INSERT INTO #T (Id, Status, StartDate, EndDate) VALUES
(1, 'Active', '1-9-2007', '15-10-2016'),
(1, 'Sick', '16-10-2016', '31-10-2016'),
(1, 'Sick', '1-11-2016', '30-11-2016'),
(1, 'Sick', '1-12-2016', '31-12-2016'),
(1, 'Active', '1-1-2017', '4-2-2017'),
(1, 'Unpaid', '5-2-2017', '9-2-2017'),
(1, 'Active', '10-2-2017', '11-2-2017'),
(1, 'Unpaid', '12-2-2017', '28-2-2017'),
(1, 'Unpaid', '1-3-2017', '31-3-2017'),
(1, 'Unpaid', '1-4-2017', '30-4-2017'),
(1, 'Active', '1-5-2017', '13-10-2017'),
(1, 'Sick', '14-10-2017', '11-11-2017'),
(1, 'Active', '12-11-2017', NULL);
The (new) common table expression:
;WITH CTE AS
(
SELECT Id,
Status,
StartDate,
EndDate,
ROW_NUMBER() OVER(PARTITION BY Id ORDER BY StartDate)
- ROW_NUMBER() OVER(PARTITION BY Id, Status ORDER BY StartDate) As IslandId,
ROW_NUMBER() OVER(PARTITION BY Id ORDER BY StartDate DESC)
- ROW_NUMBER() OVER(PARTITION BY Id, Status ORDER BY StartDate DESC) As ReverseIslandId
FROM #T
)
The (new) query:
SELECT DISTINCT Id,
Status,
MIN(StartDate) OVER(PARTITION BY IslandId, ReverseIslandId) As StartDate,
NULLIF(MAX(ISNULL(EndDate, '9999-12-31')) OVER(PARTITION BY IslandId, ReverseIslandId), '9999-12-31') As EndDate
FROM CTE
ORDER BY StartDate
(new) Results:
Id Status StartDate EndDate
1 Active 01.09.2007 15.10.2016
1 Sick 16.10.2016 31.12.2016
1 Active 01.01.2017 04.02.2017
1 Unpaid 05.02.2017 09.02.2017
1 Active 10.02.2017 11.02.2017
1 Unpaid 12.02.2017 30.04.2017
1 Active 01.05.2017 13.10.2017
1 Sick 14.10.2017 11.11.2017
1 Active 12.11.2017 NULL
You can see a live demo on rextester.
Please note that string representation of dates in SQL should be acccording to ISO 8601 - meaning either yyyy-MM-dd or yyyyMMdd as it's unambiguous and will always be interpreted correctly by SQL Server.
It's an example of GROUPING AND WINDOW.
First you set a reset point for each Status
Sum to set a group
Then get max/min dates of each group.
;with x as
(
select Id, Status, StartDate, EndDate,
iif (lag(Status) over (order by Id, StartDate) = Status, null, 1) rst
from emp
), y as
(
select Id, Status, StartDate, EndDate,
sum(rst) over (order by Id, StartDate) grp
from x
)
select Id,
MIN(Status) as Status,
MIN(StartDate) StartDate,
MAX(EndDate) EndDate
from y
group by Id, grp
order by Id, grp
GO
Id | Status | StartDate | EndDate
-: | :----- | :------------------ | :------------------
1 | Active | 01/09/2007 00:00:00 | 15/10/2016 00:00:00
1 | Sick | 16/10/2016 00:00:00 | 31/12/2016 00:00:00
1 | Active | 01/01/2017 00:00:00 | 04/02/2017 00:00:00
1 | Unpaid | 05/02/2017 00:00:00 | 09/02/2017 00:00:00
1 | Active | 10/02/2017 00:00:00 | 11/02/2017 00:00:00
1 | Unpaid | 12/02/2017 00:00:00 | 30/04/2017 00:00:00
1 | Active | 01/05/2017 00:00:00 | 13/10/2017 00:00:00
1 | Sick | 14/10/2017 00:00:00 | 11/11/2017 00:00:00
1 | Active | 12/11/2017 00:00:00 | null
dbfiddle here
Here's an alternative answer that doesn't use LAG.
First I need to take a copy of your test data:
DECLARE #table TABLE (Id INT, [Status] VARCHAR(50), StartDate DATE, EndDate DATE);
INSERT INTO #table SELECT 1, 'Active', '20070901', '20161015';
INSERT INTO #table SELECT 1, 'Sick', '20161016', '20161031';
INSERT INTO #table SELECT 1, 'Sick', '20161101', '20161130';
INSERT INTO #table SELECT 1, 'Sick', '20161201', '20161231';
INSERT INTO #table SELECT 1, 'Active', '20170101', '20170204';
INSERT INTO #table SELECT 1, 'Unpaid', '20170205', '20170209';
INSERT INTO #table SELECT 1, 'Active', '20170210', '20170211';
INSERT INTO #table SELECT 1, 'Unpaid', '20170212', '20170228';
INSERT INTO #table SELECT 1, 'Unpaid', '20170301', '20170331';
INSERT INTO #table SELECT 1, 'Unpaid', '20170401', '20170430';
INSERT INTO #table SELECT 1, 'Active', '20170501', '20171013';
INSERT INTO #table SELECT 1, 'Sick', '20171014', '20171111';
INSERT INTO #table SELECT 1, 'Active', '20171112', NULL;
Then the query is:
WITH add_order AS (
SELECT
*,
ROW_NUMBER() OVER (ORDER BY StartDate) AS order_id
FROM
#table),
links AS (
SELECT
a1.Id,
a1.[Status],
a1.order_id,
MIN(a1.order_id) AS start_order_id,
MAX(ISNULL(a2.order_id, a1.order_id)) AS end_order_id,
MIN(a1.StartDate) AS StartDate,
MAX(ISNULL(a2.EndDate, a1.EndDate)) AS EndDate
FROM
add_order a1
LEFT JOIN add_order a2 ON a2.Id = a1.Id AND a2.[Status] = a1.[Status] AND a2.order_id = a1.order_id + 1 AND a2.StartDate = DATEADD(DAY, 1, a1.EndDate)
GROUP BY
a1.Id,
a1.[Status],
a1.order_id),
merged AS (
SELECT
l1.Id,
l1.[Status],
l1.[StartDate],
ISNULL(l2.EndDate, l1.EndDate) AS EndDate,
ROW_NUMBER() OVER (PARTITION BY l1.Id, l1.[Status], ISNULL(l2.EndDate, l1.EndDate) ORDER BY l1.order_id) AS link_id
FROM
links l1
LEFT JOIN links l2 ON l2.order_id = l1.end_order_id)
SELECT
Id,
[Status],
StartDate,
EndDate
FROM
merged
WHERE
link_id = 1
ORDER BY
StartDate;
Results are:
Id Status StartDate EndDate
1 Active 2007-09-01 2016-10-15
1 Sick 2016-10-16 2016-12-31
1 Active 2017-01-01 2017-02-04
1 Unpaid 2017-02-05 2017-02-09
1 Active 2017-02-10 2017-02-11
1 Unpaid 2017-02-12 2017-04-30
1 Active 2017-05-01 2017-10-13
1 Sick 2017-10-14 2017-11-11
1 Active 2017-11-12 NULL
How does it work? First I add a sequence number, to assist with merging contiguous rows together. Then I determine the rows that can be merged together, add a number to identify the first row in each set that can be merged, and finally pick the first rows out of the final CTE. Note that I also have to handle rows that can't be merged, hence the LEFT JOINs and ISNULL statements.
Just for interest, this is what the output from the final CTE looks like, before I filter out all but the rows with a link_id of 1:
Id Status StartDate EndDate link_id
1 Active 2007-09-01 2016-10-15 1
1 Sick 2016-10-16 2016-12-31 1
1 Sick 2016-11-01 2016-12-31 2
1 Sick 2016-12-01 2016-12-31 3
1 Active 2017-01-01 2017-02-04 1
1 Unpaid 2017-02-05 2017-02-09 1
1 Active 2017-02-10 2017-02-11 1
1 Unpaid 2017-02-12 2017-04-30 1
1 Unpaid 2017-03-01 2017-04-30 2
1 Unpaid 2017-04-01 2017-04-30 3
1 Active 2017-05-01 2017-10-13 1
1 Sick 2017-10-14 2017-11-11 1
1 Active 2017-11-12 NULL 1
You could use lag() and lead() function together to check the previous and next status
WITH CTE AS
(
select *,
COALESCE(LEAD(status) OVER(ORDER BY (select 1)), '0') Nstatus,
COALESCE(LAG(status) OVER(ORDER BY (select 1)), '0') Pstatus
from table
)
SELECT * FROM CTE
WHERE (status <> Nstatus AND status <> Pstatus) OR
(status <> Pstatus)

SQL breakout date range to rows

I am trying to take given date ranges found in a data set and divide them into unique rows for each day in the range (example below). Doing the opposite in SQL is pretty straight forward, but I am struggling to achieve the desired query output.
Beginning data:
ITEM START_DATE END_DATE
A 1/1/2015 1/5/2015
B 2/5/2015 2/7/2015
Desired query output:
ITEM DATE_COVERED
A 1/1/2015
A 1/2/2015
A 1/3/2015
A 1/4/2015
A 1/5/2015
B 2/5/2015
B 2/6/2015
B 2/7/2015
The fastest way will be some tally table:
DECLARE #t TABLE
(
ITEM CHAR(1) ,
START_DATE DATE ,
END_DATE DATE
)
INSERT INTO #t
VALUES ( 'A', '1/1/2015', '1/5/2015' ),
( 'B', '2/5/2015', '2/7/2015' )
;WITH cte AS(SELECT -1 + ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) d FROM
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t1(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t2(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t3(n) CROSS JOIN
(VALUES(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) t4(n))
SELECT t.ITEM, ca.DATE_COVERED FROM #t t
CROSS APPLY(SELECT DATEADD(dd, d, t.START_DATE) AS DATE_COVERED
FROM cte
WHERE DATEADD(dd, d, t.START_DATE) BETWEEN t.START_DATE AND t.END_DATE) ca
ORDER BY t.ITEM, ca.DATE_COVERED
Query:
SQLFiddleExample
SELECT t.ITEM,
DATEADD(day,n.number, t.START_DATE) AS DATE_COVERED
FROM Table1 t,
(SELECT number
FROM master..spt_values
WHERE [type] = 'P') n
WHERE START_DATE <= DATEADD(day,n.number, t.START_DATE)
AND END_DATE >= DATEADD(day,n.number, t.START_DATE)
Result:
| ITEM | DATE_COVERED |
|------|--------------|
| A | 2015-01-01 |
| A | 2015-01-02 |
| A | 2015-01-03 |
| A | 2015-01-04 |
| A | 2015-01-05 |
| B | 2015-02-05 |
| B | 2015-02-06 |
| B | 2015-02-07 |
NOTE: this only works if the difference between your startdate and enddate is a maximum of 2047 days (master..spt_values only allows 0..2047 range of values)
select item, dateadd(d,v.number,d.start_date) adate
from begindata d
join master..spt_values v on v.type='P'
and v.number between 0 and datediff(d, start_date, end_date)
order by adate;
I'd like to say I did this myself but I got the code from this
Here is a fiddle with your expected result
TRY THIS...
CREATE TABLE Table1
([ITEM] varchar(1), [START_DATE] date, [END_DATE] date)
;
INSERT INTO Table1
([ITEM], [START_DATE], [END_DATE])
VALUES ('A', '2015-01-01', '2015-01-05'), ('B', '2015-02-05', 2015-02-07');
WITH Days
AS ( SELECT ITEM, START_DATE AS [Date], 1 AS [level] from Table1
UNION ALL
SELECT TABLE1.ITEM, DATEADD(DAY, 1, [Date]), [level] + 1
FROM Days,Table1
WHERE DAYS.ITEM=TABLE1.ITEM AND [Date] < END_DATE )
SELECT distinct [Date]
FROM Days
DEMO

How can I solve this query?

i have one table contain 2 fields named as date,val
datas are
date val
2014-08-01 A
2014-08-02 B
2014-08-03 A
2014-08-04 A
2014-08-05 B
2014-08-06 B
2014-08-07 A
2014-08-08 A
2014-08-09 B
2014-08-10 A
2014-08-11 A
i want a table the output like this
MIN MAX A B
2014-08-01 2014-08-05 3 2
2014-08-06 2014-08-11 4 2
The result will be calculated based on number of days, like for 5 days. in this question I selected 5 days interval.From this table i want to find the count of value 'A' and the count of value 'B' in the selected interval.
Can any one find a solution for this. Any help will be appreciated.
i think this will help you :
DECLARE #MinDate DATETIME
DECLARE #MaxDate DATETIME
DECLARE #StartDate DATETIME
DECLARE #DayCount INT
DECLARE #A_Count INT
DECLARE #B_Count INT
SET #DayCount = 5
SELECT TOP 1 #MinDate=[date] FROM table_1 ORDER BY date
SELECT TOP 1 #MaxDate=[date] FROM table_1 ORDER BY date DESC
CREATE TABLE #temp_table (
_Min DATETIME,
_Max DATETIME,
_A INT,
_B INT
)
SET #StartDate=#MinDate
WHILE #StartDate < #MaxDate
BEGIN
SELECT #A_Count = COUNT(*) FROM table_1 WHERE date BETWEEN #StartDate AND DATEADD(dd,#DayCount-1,#StartDate) AND val = 'A'
SELECT #B_Count = COUNT(*) FROM table_1 WHERE date BETWEEN #StartDate AND DATEADD(dd,#DayCount-1,#StartDate) AND val = 'B'
INSERT INTO #temp_table VALUES (#StartDate,DATEADD(dd,#DayCount-1,#StartDate),#A_Count,#B_Count)
SET #StartDate = DATEADD(dd,#DayCount,#StartDate)
END
SELECT * FROM #temp_table
DROP TABLE #temp_table
and SqlFiddle demo: click here
You can execute a query like this:
SELECT MAX(date), MIN(date), val, COUNT(val) AS occurence
FROM table
WHERE date>minDate AND date<maxDate
GROUP BY val
where minDate and maxDate are your varables.
Result will be read in this way: Value "val" apper "occurence" time between minDate and maxDate
DROP TABLE IF EXISTS my_table;
CREATE TABLE my_table
(date DATE NOT NULL
,val CHAR(1) NOT NULL
,PRIMARY KEY (date)
);
INSERT INTO my_table VALUES
('2014-08-01','A'),
('2014-08-02','B'),
('2014-08-03','A'),
('2014-08-04','A'),
('2014-08-05','B'),
('2014-08-06','B'),
('2014-08-07','A'),
('2014-08-08','A'),
('2014-08-09','B'),
('2014-08-10','A'),
('2014-08-11','A');
SELECT MIN(date) min_date
, MAX(date) max_date
, SUM(val = 'A') A
, SUM(val = 'B') B
FROM my_table
GROUP
BY CEILING(TO_DAYS(date)/5)*5;
+------------+------------+------+------+
| min_date | max_date | A | B |
+------------+------------+------+------+
| 2014-08-01 | 2014-08-05 | 3 | 2 |
| 2014-08-06 | 2014-08-10 | 3 | 2 |
| 2014-08-11 | 2014-08-11 | 1 | 0 |
+------------+------------+------+------+

Combine consecutive date ranges

Using SQL Server 2008 R2,
I'm trying to combine date ranges into the maximum date range given that one end date is next to the following start date.
The data is about different employments. Some employees may have ended their employment and have rejoined at a later time. Those should count as two different employments (example ID 5). Some people have different types of employment, running after each other (enddate and startdate neck-to-neck), in this case it should be considered as one employment in total (example ID 30).
An employment period that has not ended has an enddate that is null.
Some examples is probably enlightening:
declare #t as table (employmentid int, startdate datetime, enddate datetime)
insert into #t values
(5, '2007-12-03', '2011-08-26'),
(5, '2013-05-02', null),
(30, '2006-10-02', '2011-01-16'),
(30, '2011-01-17', '2012-08-12'),
(30, '2012-08-13', null),
(66, '2007-09-24', null)
-- expected outcome
EmploymentId StartDate EndDate
5 2007-12-03 2011-08-26
5 2013-05-02 NULL
30 2006-10-02 NULL
66 2007-09-24 NULL
I've been trying different "islands-and-gaps" techniques but haven't been able to crack this one.
The strange bit you see with my use of the date '31211231' is just a very large date to handle your "no-end-date" scenario. I have assumed you won't really have many date ranges per employee, so I've used a simple Recursive Common Table Expression to combine the ranges.
To make it run faster, the starting anchor query keeps only those dates that will not link up to a prior range (per employee). The rest is just tree-walking the date ranges and growing the range. The final GROUP BY keeps only the largest date range built up per starting ANCHOR (employmentid, startdate) combination.
SQL Fiddle
MS SQL Server 2008 Schema Setup:
create table Tbl (
employmentid int,
startdate datetime,
enddate datetime);
insert Tbl values
(5, '2007-12-03', '2011-08-26'),
(5, '2013-05-02', null),
(30, '2006-10-02', '2011-01-16'),
(30, '2011-01-17', '2012-08-12'),
(30, '2012-08-13', null),
(66, '2007-09-24', null);
/*
-- expected outcome
EmploymentId StartDate EndDate
5 2007-12-03 2011-08-26
5 2013-05-02 NULL
30 2006-10-02 NULL
66 2007-09-24 NULL
*/
Query 1:
;with cte as (
select a.employmentid, a.startdate, a.enddate
from Tbl a
left join Tbl b on a.employmentid=b.employmentid and a.startdate-1=b.enddate
where b.employmentid is null
union all
select a.employmentid, a.startdate, b.enddate
from cte a
join Tbl b on a.employmentid=b.employmentid and b.startdate-1=a.enddate
)
select employmentid,
startdate,
nullif(max(isnull(enddate,'32121231')),'32121231') enddate
from cte
group by employmentid, startdate
order by employmentid
Results:
| EMPLOYMENTID | STARTDATE | ENDDATE |
-----------------------------------------------------------------------------------
| 5 | December, 03 2007 00:00:00+0000 | August, 26 2011 00:00:00+0000 |
| 5 | May, 02 2013 00:00:00+0000 | (null) |
| 30 | October, 02 2006 00:00:00+0000 | (null) |
| 66 | September, 24 2007 00:00:00+0000 | (null) |
SET NOCOUNT ON
DECLARE #T TABLE(ID INT,FromDate DATETIME, ToDate DATETIME)
INSERT INTO #T(ID,FromDate,ToDate)
SELECT 1,'20090801','20090803' UNION ALL
SELECT 2,'20090802','20090809' UNION ALL
SELECT 3,'20090805','20090806' UNION ALL
SELECT 4,'20090812','20090813' UNION ALL
SELECT 5,'20090811','20090812' UNION ALL
SELECT 6,'20090802','20090802'
SELECT ROW_NUMBER() OVER(ORDER BY s1.FromDate) AS ID,
s1.FromDate,
MIN(t1.ToDate) AS ToDate
FROM #T s1
INNER JOIN #T t1 ON s1.FromDate <= t1.ToDate
AND NOT EXISTS(SELECT * FROM #T t2
WHERE t1.ToDate >= t2.FromDate
AND t1.ToDate < t2.ToDate)
WHERE NOT EXISTS(SELECT * FROM #T s2
WHERE s1.FromDate > s2.FromDate
AND s1.FromDate <= s2.ToDate)
GROUP BY s1.FromDate
ORDER BY s1.FromDate
An alternative solution that uses window functions rather than recursive CTEs
SELECT
employmentid,
MIN(startdate) as startdate,
NULLIF(MAX(COALESCE(enddate,'9999-01-01')), '9999-01-01') as enddate
FROM (
SELECT
employmentid,
startdate,
enddate,
DATEADD(
DAY,
-COALESCE(
SUM(DATEDIFF(DAY, startdate, enddate)+1) OVER (PARTITION BY employmentid ORDER BY startdate ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING),
0
),
startdate
) as grp
FROM #t
) withGroup
GROUP BY employmentid, grp
ORDER BY employmentid, startdate
This works by calculating a grp value that will be the same for all consecutive rows. This is achieved by:
Determine totals days the span occupies (+1 as the dates are inclusive)
SELECT *, DATEDIFF(DAY, startdate, enddate)+1 as daysSpanned FROM #t
Cumulative sum the days spanned for each employment, ordered by startdate. This gives us the total days spanned by all the previous employment spans
We coalesce with 0 to ensure we dont have NULLs in our cumulative sum of days spanned
We do not include current row in our cumulative sum, this is because we will use the value against startdate rather than enddate (we cant use it against enddate because of the NULLs)
SELECT *, COALESCE(
SUM(daysSpanned) OVER (
PARTITION BY employmentid
ORDER BY startdate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
)
,0
) as cumulativeDaysSpanned
FROM (
SELECT *, DATEDIFF(DAY, startdate, enddate)+1 as daysSpanned FROM #t
) inner1
Subtract the cumulative days from the startdate to get our grp. This is the crux of the solution.
If the start date increases at the same rate as the days spanned then the days are consecutive, and subtracting the two will give us the same value.
If the startdate increases faster than the days spanned then there is a gap and we will get a new grp value greater than the previous one.
Although grp is a date, the date itself is meaningless we are using just as a grouping value
SELECT *, DATEADD(DAY, -cumulativeDaysSpanned, startdate) as grp
FROM (
SELECT *, COALESCE(
SUM(daysSpanned) OVER (
PARTITION BY employmentid
ORDER BY startdate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
)
,0
) as cumulativeDaysSpanned
FROM (
SELECT *, DATEDIFF(DAY, startdate, enddate)+1 as daysSpanned FROM #t
) inner1
) inner2
With the results
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| employmentid | startdate | enddate | daysSpanned | cumulativeDaysSpanned | grp |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 5 | 2007-12-03 00:00:00.000 | 2011-08-26 00:00:00.000 | 1363 | 0 | 2007-12-03 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 5 | 2013-05-02 00:00:00.000 | NULL | NULL | 1363 | 2009-08-08 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 30 | 2006-10-02 00:00:00.000 | 2011-01-16 00:00:00.000 | 1568 | 0 | 2006-10-02 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 30 | 2011-01-17 00:00:00.000 | 2012-08-12 00:00:00.000 | 574 | 1568 | 2006-10-02 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 30 | 2012-08-13 00:00:00.000 | NULL | NULL | 2142 | 2006-10-02 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
| 66 | 2007-09-24 00:00:00.000 | NULL | NULL | 0 | 2007-09-24 00:00:00.000 |
+--------------+-------------------------+-------------------------+-------------+-----------------------+-------------------------+
Finally we can GROUP BY grp to get the get rid of the consecutive days.
Use MIN and MAX to get the new startdate and endate
To handle the NULL enddate we give them a large value to get picked up by MAX then convert them back to NULL again
SELECT
employmentid,
MIN(startdate) as startdate,
NULLIF(MAX(COALESCE(enddate,'9999-01-01')), '9999-01-01') as enddate
FROM (
SELECT *, DATEADD(DAY, -cumulativeDaysSpanned, startdate) as grp
FROM (
SELECT *, COALESCE(
SUM(daysSpanned) OVER (
PARTITION BY employmentid
ORDER BY startdate
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
)
,0
) as cumulativeDaysSpanned
FROM (
SELECT *, DATEDIFF(DAY, startdate, enddate)+1 as daysSpanned FROM #t
) inner1
) inner2
) inner3
GROUP BY employmentid, grp
ORDER BY employmentid, startdate
To get the desired result
+--------------+-------------------------+-------------------------+
| employmentid | startdate | enddate |
+--------------+-------------------------+-------------------------+
| 5 | 2007-12-03 00:00:00.000 | 2011-08-26 00:00:00.000 |
+--------------+-------------------------+-------------------------+
| 5 | 2013-05-02 00:00:00.000 | NULL |
+--------------+-------------------------+-------------------------+
| 30 | 2006-10-02 00:00:00.000 | NULL |
+--------------+-------------------------+-------------------------+
| 66 | 2007-09-24 00:00:00.000 | NULL |
+--------------+-------------------------+-------------------------+
We can combine the inner queries to get the query at the start of this answer. Which is shorter, but less explainable
Limitations of all this required that
there are no overlaps of startdate and enddate for an employment. This could produce collisions in our grp.
startdate is not NULL. However this could be overcome by replacing NULL start dates with small date values
Future developers can decipher the window black magic you performed
A modified script for combining all overlapping periods. For example
01.01.2001-01.01.2010
05.05.2005-05.05.2015
will give one period:
01.01.2001-05.05.2015
tbl.enddate must be completed
;WITH cte
AS(
SELECT
a.employmentid
,a.startdate
,a.enddate
from tbl a
left join tbl c on a.employmentid=c.employmentid
and a.startdate > c.startdate
and a.startdate <= dateadd(day, 1, c.enddate)
WHERE c.employmentid IS NULL
UNION all
SELECT
a.employmentid
,a.startdate
,a.enddate
from cte a
inner join tbl c on a.startdate=c.startdate
and (c.startdate = dateadd(day, 1, a.enddate) or (c.enddate > a.enddate and c.startdate <= a.enddate))
)
select distinct employmentid,
startdate,
nullif(max(enddate),'31.12.2099') enddate
from cte
group by employmentid, startdate