Determine contiguous date intervals - sql

I have the following table structure:
id int -- more like a group id, not unique in the table
AddedOn datetime -- when the record was added
For a specific id there is at most one record each day. I have to write a query that returns contiguous (at day level) date intervals for each id.
The expected result structure is:
id int
StartDate datetime
EndDate datetime
Note that the time part of AddedOn is available but it is not important here.
To make it clearer, here is some input data:
with data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
select * from data
And the expected result:
id StartDate EndDate
1 2015-01-01 2015-01-02
1 2015-01-04 2015-01-06
2 2015-01-01 2015-01-04
2 2015-01-06 2015-01-07
3 2015-01-01 2015-01-01
3 2015-01-03 2015-01-03
3 2015-01-05 2015-01-06
3 2015-01-08 2015-01-10
Although it looks like a common problem I couldn't find a similar enough question. Also I'm getting closer to a solution and I will post it when (and if) it works but I feel that there should be a more elegant one.

Here's answer without any fancy joining, but simply using group by and row_number, which is not only simple but also more efficient.
WITH CTE_dayOfYear
AS
(
SELECT id,
AddedOn,
DATEDIFF(DAY,'20000101',AddedOn) dyID,
ROW_NUMBER() OVER (ORDER BY ID,AddedOn) row_num
FROM data
)
SELECT ID,
MIN(AddedOn) StartDate,
MAX(AddedOn) EndDate,
dyID-row_num AS groupID
FROM CTE_dayOfYear
GROUP BY ID,dyID - row_num
ORDER BY ID,2,3
The logic is that the dyID is based on the date so there are gaps while row_num has no gaps. So every time there is a gap in dyID, then it changes the difference between row_num and dyID. Then I simply use that difference as my groupID.

In Sql Server 2008 it is a little bit pain without LEAD and LAG functions:
WITH data
AS ( SELECT * ,
ROW_NUMBER() OVER ( ORDER BY id, AddedOn ) AS rn
FROM ( VALUES ( 0, GETDATE()), --dummy record used to infer column types
( 1, '20150101'), ( 1, '20150102'), ( 1, '20150104'),
( 1, '20150105'), ( 1, '20150106'), ( 2, '20150101'),
( 2, '20150102'), ( 2, '20150103'), ( 2, '20150104'),
( 2, '20150106'), ( 2, '20150107'), ( 3, '20150101'),
( 3, '20150103'), ( 3, '20150105'), ( 3, '20150106'),
( 3, '20150108'), ( 3, '20150109'), ( 3, '20150110') )
AS d ( id, AddedOn )
WHERE id > 0 -- exclude dummy record
),
diff
AS ( SELECT d1.* ,
CASE WHEN ISNULL(DATEDIFF(dd, d2.AddedOn, d1.AddedOn),
1) = 1 THEN 0
ELSE 1
END AS diff
FROM data d1
LEFT JOIN data d2 ON d1.id = d2.id
AND d1.rn = d2.rn + 1
),
parts
AS ( SELECT * ,
( SELECT SUM(diff)
FROM diff d2
WHERE d2.rn <= d1.rn
) AS p
FROM diff d1
)
SELECT id ,
MIN(AddedOn) AS StartDate ,
MAX(AddedOn) AS EndDate
FROM parts
GROUP BY id ,
p
Output:
id StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
Walkthrough:
diff
This CTE returns data:
1 2015-01-01 00:00:00.000 1 0
1 2015-01-02 00:00:00.000 2 0
1 2015-01-04 00:00:00.000 3 1
1 2015-01-05 00:00:00.000 4 0
1 2015-01-06 00:00:00.000 5 0
You are joining same table on itself to get the previous row. Then you calculate difference in days between current row and previous row and if the result is 1 day then pick 0 else pick 1.
parts
This CTE selects result from previous step and sums up the new column(it is a cumulative sum. sum of all values of new column from starting till current row), so you are getting partitions to group by:
1 2015-01-01 00:00:00.000 1 0 0
1 2015-01-02 00:00:00.000 2 0 0
1 2015-01-04 00:00:00.000 3 1 1
1 2015-01-05 00:00:00.000 4 0 1
1 2015-01-06 00:00:00.000 5 0 1
2 2015-01-01 00:00:00.000 6 0 1
2 2015-01-02 00:00:00.000 7 0 1
2 2015-01-03 00:00:00.000 8 0 1
2 2015-01-04 00:00:00.000 9 0 1
2 2015-01-06 00:00:00.000 10 1 2
2 2015-01-07 00:00:00.000 11 0 2
3 2015-01-01 00:00:00.000 12 0 2
3 2015-01-03 00:00:00.000 13 1 3
The last step is just a grouping by ID and new column and picking min and max values for dates.

I took the "Islands Solution #3 from SQL MVP Deep Dives" solution from https://www.simple-talk.com/sql/t-sql-programming/the-sql-of-gaps-and-islands-in-sequences/ and applied to your test data:
with
data as
(
select * from
(
values
(0, getdate()), --dummy record used to infer column types
(1, '20150101'),
(1, '20150102'),
(1, '20150104'),
(1, '20150105'),
(1, '20150106'),
(2, '20150101'),
(2, '20150102'),
(2, '20150103'),
(2, '20150104'),
(2, '20150106'),
(2, '20150107'),
(3, '20150101'),
(3, '20150103'),
(3, '20150105'),
(3, '20150106'),
(3, '20150108'),
(3, '20150109'),
(3, '20150110')
) as d(id, AddedOn)
where id > 0 -- exclude dummy record
)
,CTE_Seq
AS
(
SELECT
ID
,SeqNo
,SeqNo - ROW_NUMBER() OVER (PARTITION BY ID ORDER BY SeqNo) AS rn
FROM
data
CROSS APPLY
(
SELECT DATEDIFF(day, '20150101', AddedOn) AS SeqNo
) AS CA
)
SELECT
ID
,DATEADD(day, MIN(SeqNo), '20150101') AS StartDate
,DATEADD(day, MAX(SeqNo), '20150101') AS EndDate
FROM CTE_Seq
GROUP BY ID, rn
ORDER BY ID, StartDate;
Result set
ID StartDate EndDate
1 2015-01-01 00:00:00.000 2015-01-02 00:00:00.000
1 2015-01-04 00:00:00.000 2015-01-06 00:00:00.000
2 2015-01-01 00:00:00.000 2015-01-04 00:00:00.000
2 2015-01-06 00:00:00.000 2015-01-07 00:00:00.000
3 2015-01-01 00:00:00.000 2015-01-01 00:00:00.000
3 2015-01-03 00:00:00.000 2015-01-03 00:00:00.000
3 2015-01-05 00:00:00.000 2015-01-06 00:00:00.000
3 2015-01-08 00:00:00.000 2015-01-10 00:00:00.000
I'd recommend you to examine the intermediate results of CTE_Seq to understand how it actually works. Just put
select * from CTE_Seq
instead of the final SELECT ... GROUP BY .... You'll get this result set:
ID SeqNo rn
1 0 -1
1 1 -1
1 3 0
1 4 0
1 5 0
2 0 -1
2 1 -1
2 2 -1
2 3 -1
2 5 0
2 6 0
3 0 -1
3 2 0
3 4 1
3 5 1
3 7 2
3 8 2
3 9 2
Each date is converted into a sequence number by DATEDIFF(day, '20150101', AddedOn). ROW_NUMBER() generates a set of sequential numbers without gaps, so when these numbers are subtracted from a sequence with gaps the difference jumps/changes. The difference stays the same until the next gap, so in the final SELECT GROUP BY ID, rn brings all rows from the same island together.

Here is a simple solution that does not use analytics. I tend not to use analytics because I work with many different DBMSs and many don't (yet) have them emplemented and even those who do have different syntaxes. I just have the habit of writing generic code whenever possible.
with
Data( ID, AddedOn )as(
select 1, convert( date, '20150101' ) union all
select 1, '20150102' union all
select 1, '20150104' union all
select 1, '20150105' union all
select 1, '20150106' union all
select 2, '20150101' union all
select 2, '20150102' union all
select 2, '20150103' union all
select 2, '20150104' union all
select 2, '20150106' union all
select 2, '20150107' union all
select 3, '20150101' union all
select 3, '20150103' union all
select 3, '20150105' union all
select 3, '20150106' union all
select 3, '20150108' union all
select 3, '20150109' union all
select 3, '20150110'
)
select d.ID, d.AddedOn StartDate, IsNull( d1.AddedOn, '99991231' ) EndDate
from Data d
left join Data d1
on d1.ID = d.ID
and d1.AddedOn =(
select Min( AddedOn )
from data
where ID = d.ID
and AddedOn > d.AddedOn );
In your situation I assume that ID and AddedOn form a composite PK and so are indexed. Thus, the query will run impressively fast even on very large tables.
Also, I used the outer join because it seemed like the last AddedOn date of each ID should be seen in the StartDate column. Instead of NULL I used a common MaxDate value. The NULL could work just as well as a "this is the latest StartDate row" flag.
Here is the output for ID=1:
ID StartDate EndDate
----------- ---------- ----------
1 2015-01-01 2015-01-02
1 2015-01-02 2015-01-04
1 2015-01-04 2015-01-05
1 2015-01-05 2015-01-06
1 2015-01-06 9999-12-31

I'd like to post my own solution too because it's yet another approach:
with data as
(
...
),
temp as
(
select d.id
,d.AddedOn
,dprev.AddedOn as PrevAddedOn
,dnext.AddedOn as NextAddedOn
FROM data d
left JOIN
data dprev on dprev.id = d.id
and dprev.AddedOn = dateadd(d, -1, d.AddedOn)
left JOIN
data dnext on dnext.id = d.id
and dnext.AddedOn = dateadd(d, 1, d.AddedOn)
),
starts AS
(
select id
,AddedOn
from temp
where PrevAddedOn is NULL
),
ends as
(
select id
,AddedOn
from temp
where NextAddedon is NULL
)
SELECT s.id as id
,s.AddedOn as StartDate
,(select min(e.AddedOn) from ends e where e.id = s.id and e.AddedOn >= s.AddedOn) as EndDate
from starts s

Related

Function that returns MAX OR MIN dates based on ID count

I have a task in SQL Server where I need to return the RESULT_DATE column using ID, PRODUCT_ID and DATE columns. Task criteria:
If DATE column is filled once for each PRODUCT_ID then I need to return the only date (like for PRODUCT_ID 1 and 3). Let`s say its MIN date.
If DATE column is filled more than one time (like for PRODUCT_ID 2) then I need to return the next filled DATE row.
Data:
CREATE TABLE #temp (
ID INT,
PRODUCT_ID INT,
[DATE] DATETIME
)
INSERT #temp (ID, PRODUCT_ID, DATE) VALUES
(1, 1, '2008-04-24 00:00:00.000'),
(2, 1, NULL),
(3, 2, '2015-12-09 00:00:00.000'),
(4, 2, NULL),
(5, 2, NULL),
(6, 2, '2022-01-01 13:06:45.253'),
(7, 2, NULL),
(8, 2, '2022-01-19 13:06:45.253'),
(9, 3, '2018-04-25 00:00:00.000'),
(10,3, NULL),
(11,3, NULL)
ID
PRODUCT_ID
DATE
RESULT_DATE
1
1
2008-04-24 00:00:00.000
2008-04-24 00:00:00.000
2
1
NULL
2008-04-24 00:00:00.000
3
2
2015-12-09 00:00:00.000
2022-01-01 13:06:45.253
4
2
NULL
2022-01-01 13:06:45.253
5
2
NULL
2022-01-01 13:06:45.253
6
2
2022-01-01 13:06:45.253
2022-01-19 13:06:45.253
7
2
NULL
2022-01-19 13:06:45.253
8
2
2022-01-19 13:06:45.253
2022-01-19 13:06:45.253
9
3
2018-04-25 00:00:00.000
2018-04-25 00:00:00.000
10
3
NULL
2018-04-25 00:00:00.000
11
3
NULL
2018-04-25 00:00:00.000
I have tried different techniques, for example using LEAD and LAG SQL function combinations. The latest script: (However, still not working)
SELECT
COALESCE(DATE,
CAST(
SUBSTRING(
MAX(CAST(DATE AS BINARY(4)) + CAST(DATE AS BINARY(4))) OVER ( PARTITION BY PRODUCT_ID ORDER BY DATE ROWS UNBOUNDED PRECEDING)
,5,4)
AS INT)
) AS RESULT_DATE,
*
FROM TABLE
You can use a CTE, Select all rows with a non-NULL Date giving each a row_number, then use a second CTE to fetch all rows from the first CTE equivalent to the date with the largest row number per product_id that is less than 3. Finally join this CTE to the original table to supply the 2nd Date to each row:
Set Up
CREATE TABLE #temp (
ID INT,
PRODUCT_ID INT,
MyDATE DATETIME
)
INSERT #temp (ID, PRODUCT_ID, MyDate)
VALUES
(1, 1, '2008-04-24 00:00:00.000'),
(2, 1, NULL),
(3, 2, '2015-12-09 00:00:00.000'),
(4, 2, NULL),
(5, 2, NULL),
(6, 2, '2022-01-01 13:06:45.253'),
(7, 2, NULL),
(8, 2, '2022-01-19 13:06:45.253'),
(9, 3, '2018-04-25 00:00:00.000'),
(10,3, NULL),
(11,3, NULL);
Query:
;WITH CTE
AS
(
SELECT ID, Product_ID, MyDate,
ROW_NUMBER() OVER (PARTITION BY Product_ID ORDER BY Id) AS rn
from #temp
WHERE MyDate IS NOT NULL
),
CTE2
AS
(
SELECT *
FROM CTE C1
WHERE C1.rn < 3
AND
C1.rn =
(SELECT MAX(rn) FROM CTE WHERE Product_Id = C1.Product_Id AND rn<3)
)
SELECT T.Id, T.Product_Id, T.MyDate, C.MyDate As Result_date
FROM #temp T
INNER JOIN CTE2 C
ON T.Product_Id = C.Product_Id
ORDER BY T.Id;
Results:
Id Product_Id MyDate Result_Date
1 1 2008-04-24 00:00:00.000 2008-04-24 00:00:00.000
2 1 NULL 2008-04-24 00:00:00.000
3 2 2015-12-09 00:00:00.000 2022-01-01 13:06:45.253
4 2 NULL 2022-01-01 13:06:45.253
5 2 NULL 2022-01-01 13:06:45.253
6 2 2022-01-01 13:06:45.253 2022-01-01 13:06:45.253
7 2 NULL 2022-01-01 13:06:45.253
8 2 2022-01-19 13:06:45.253 2022-01-01 13:06:45.253
9 3 2018-04-25 00:00:00.000 2018-04-25 00:00:00.000
10 3 NULL 2018-04-25 00:00:00.000
11 3 NULL 2018-04-25 00:00:00.000

Histogram of orders by range of dates

I'm trying to create a histogram based on interval of dates and total number of orders but im having a hard time binning it through SQL.
A simplified table can be seen below
customer_id
Date
count_orders
1
01-01-2020
5
1
01-13-2020
26
1
02-06-2020
11
2
01-17-2020
9
3
02-04-2020
13
3
03-29-2020
24
4
04-05-2020
1
5
02-23-2020
10
6
03-15-2020
7
6
04-18-2020
32
...
...
...
and im thinking of binning it into 20 day intervals but the only thing I can think about is do a
SUM(CASE WHEN Date BETWEEN <interval1_startdate> AND <interval1_enddate> ...)
method per interval which if used into the actual data (which contains millions of row) is quite exhausting. So i need help in automating the binning part.
Desired output would either be
1)
interval
total_count
01-01-2020 - 01-20-2020
31
01-21-2020 - 02-10-2020
24
02-10-2020 - 03-01-2020
10
...
...
or 2)
start
end
total_count
01-01-2020
01-20-2020
31
01-21-2020
02-10-2020
24
02-10-2020
03-01-2020
10
...
...
...
Do you have any ideas?
You can group by the (current date - minimum date)/20. For preso something like this:
WITH dataset (customer_id, Date, count_orders) AS (
VALUES (1, date_parse('01-01-2020', '%m-%d-%Y'), 5),
(1, date_parse('01-13-2020', '%m-%d-%Y'), 26),
(1, date_parse('02-06-2020', '%m-%d-%Y'), 11),
(2, date_parse('01-17-2020', '%m-%d-%Y'), 9),
(3, date_parse('02-04-2020', '%m-%d-%Y'), 13),
(3, date_parse('03-29-2020', '%m-%d-%Y'), 24),
(4, date_parse('04-05-2020', '%m-%d-%Y'), 1),
(5, date_parse('02-23-2020', '%m-%d-%Y'), 10),
(6, date_parse('03-15-2020', '%m-%d-%Y'), 7),
(6, date_parse('04-18-2020', '%m-%d-%Y'), 32)
)
SELECT date_add('day', 20 * grp, min(min_date)) interval_end,
date_add('day', 20 * (grp + 1) - 1, min(min_date)) interval_end,
sum(count_orders) total_count
FROM (
SELECT *,
date_diff('day', min(date) over (), date) / 20 as grp,
min(date) over () min_date
FROM dataset
)
group by grp
order by 1
Output:
interval_end
interval_end
total_count
2020-01-01 00:00:00.000
2020-01-20 00:00:00.000
40
2020-01-21 00:00:00.000
2020-02-09 00:00:00.000
24
2020-02-10 00:00:00.000
2020-02-29 00:00:00.000
10
2020-03-01 00:00:00.000
2020-03-20 00:00:00.000
7
2020-03-21 00:00:00.000
2020-04-09 00:00:00.000
25
2020-04-10 00:00:00.000
2020-04-29 00:00:00.000
32
You can get the intervals using CTE and then get the total using cross apply.
Drop table Tbl
Create Table Tbl (customer_id Int, [date] Date, count_orders Int)
Insert Into Tbl (customer_id, [date], count_orders)
Values (1,'2020-01-01', 5),
(1,'2020-01-13',26),
(1,'2020-02-06',11),
(2,'2020-01-17',9),
(3,'2020-02-04',13),
(3,'2020-03-29',24),
(4,'2020-04-05',1),
(5,'2020-02-23',10),
(6,'2020-03-15',7),
(6,'2020-04-18',32)
;With A As (
Select Min([date]) As start, DateAdd(dd,19,Min([date])) As [end], Max([date]) As [max]
From Tbl
Union All
Select DateAdd(dd,1,[end]) As start, DateAdd(dd,20,[end]) As [end], [max]
From A
Where [end]<[max])
Select A.[start], A.[end], T.total_count
From A Cross Apply (Select SUM(count_orders) As total_count
From Tbl Where [date] between A.[start] And A.[end]) As T
Result:
start end total_count
---------- ---------- -----------
2020-01-01 2020-01-20 40
2020-01-21 2020-02-09 24
2020-02-10 2020-02-29 10
2020-03-01 2020-03-20 7
2020-03-21 2020-04-09 25
2020-04-10 2020-04-29 32

SQL - Setting Value From Hierarchical Children

I am writing an application which gets task data from a project planning MS SQL table (let's call the table tasks). For simplicity the table fields can be thought of as follows:
task_id, parent_id, name, start_date, end_date
All parent tasks have NULL as start and end dates. Only the children (with no children of their own) have a start and end date.
I want to get the tasks data and in the process set the start date of each parent based upon the earliest start date of all the parent's children and recursive grandchildren and set the end date to be the latest end date of all the children and recursive grandchildren. Is this possible please?
I assume from your question that you use Sql Server. I think this is what you want. It is done with recursive common table expression. It begins with leaf children and goes up to top most parents:
DECLARE #t TABLE(id INT, pid INT, sd DATE, ed DATE)
INSERT INTO #t VALUES
(1, NULL, NULL, NULL),
(2, 1, NULL, NULL),
(3, 2, '20150201', '20150215'),
(4, 2, '20150101', '20150201'),
(5, 1, NULL, NULL),
(6, 5, '20150301', '20150401'),
(7, 1, NULL, NULL),
(8, 7, NULL, NULL),
(9, 8, '20140101', '20141230'),
(10, 8, '20140102', '20141231')
;WITH cte AS(
SELECT * FROM #t WHERE sd IS NOT NULL
UNION ALL
SELECT t.id, t.pid, c.sd, c.ed FROM #t t
JOIN cte c ON c.pid = t.id
)
SELECT id, pid, MIN(sd) AS sd, MAX(ed) AS ed
FROM cte
GROUP BY id, pid
ORDER BY id
Output:
id pid sd ed
1 NULL 2014-01-01 2015-04-01
2 1 2015-01-01 2015-02-15
3 2 2015-02-01 2015-02-15
4 2 2015-01-01 2015-02-01
5 1 2015-03-01 2015-04-01
6 5 2015-03-01 2015-04-01
7 1 2014-01-01 2014-12-31
8 7 2014-01-01 2014-12-31
9 8 2014-01-01 2014-12-30
10 8 2014-01-02 2014-12-31

SQL query stuck - comparison on different lines

I m working on a very weird problem with SQL where I have to compare previous rows
Number start_date end_date
----- ------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000
Here , I have to compare the start_date and end_date on the two different line and create a view out of it.
(If the start_date is less than the previous end_date , then criteria is set to 1).
Well it should compare 2011-10-26 00:00:00.000 for 3 and 2011-10-27 00:00:00.000 on 2 for 30 days
Number start_date end_date Criteria
----- ----------- ---------------- ------------
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-11 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-30 00:00:00.000 2011-11-15 00:00:00.000 1
I m confused how should I proceed with this.
Any help would be helpful !!!!
Thanks !!!
The most straightforward way to do this is to use a subquery:
select A.number, a.start_date, a.end_date,
CASE WHEN start_date < dateadd(d,30,(select TOP(1) b.end_date
from mytable B
where B.number < A.number
order by B.number desc)) then 1 else 0 end Criteria
from mytable A
Note: If the start date is the 29th day following the previous row's end date, Criteria becomes 1. By the 30th day onwards, it is 0. Tweak the 30 in the query as required.
Sample:
create table mytable (
Number int primary key,
start_date datetime,
end_date datetime);
insert mytable
select 1, '2011-06-07', '2011-07-10' union all
select 2, '2011-10-11', '2011-10-27' union all
select 3, '2011-10-26', '2011-10-29' union all
select 4, '2011-10-29', '2011-11-15'
Result:
number start_date end_date Criteria
1 2011-06-07 00:00:00.000 2011-07-10 00:00:00.000 0
2 2011-10-11 00:00:00.000 2011-10-27 00:00:00.000 0
3 2011-10-26 00:00:00.000 2011-10-29 00:00:00.000 1
4 2011-10-29 00:00:00.000 2011-11-15 00:00:00.000 0
Try using case like this:
create view vDates as
select Number,start_date,end_date,
case
when start_date<end_date
then 0
else 1
end as Criteria
from tab
SQL Fiddle Demo
A more readable way is create a function and send the correct dates:
Function:
create function [dbo].[CompareDates] (
#START_DATE datetime,
#PREVIOUS_END_DATE datetime
)
RETURNS int
AS
BEGIN
if #START_DATE < #PREVIOUS_END_DATE
return 1
return 0
END
Query (using subquery):
declare #dates table
(
number int,
start datetime,
end_date datetime
)
insert into #dates values
(1, '2011-06-07 00:00:00.000', '2011-07-10 00:00:00.000'),
(2, '2011-10-11 00:00:00.000', '2011-10-27 00:00:00.000'),
(3, '2011-10-26 00:00:00.000', '2011-10-29 00:00:00.000'),
(4, '2011-10-29 00:00:00.000', '2011-11-15 00:00:00.000')
select *, dbo.CompareDates(dates.end_date, dates.previous_end_date) from
(
select number, start, end_date,
(select TOP 1 end_date
from #dates d2
where d2.number < d1.number
order by d2.number desc) as previous_end_date
from #dates d1
) dates

SQL Query getting the date between expiration date and effective date from 2 different tables

I having this problem with query these two table do not have any link between and I am trying to combine. Table 1 have effective and expiration date in which have to be tied to table 2 of PollDate. PollDate must not lies in between the effective and expiration date.
Table 1
ClientID EffectiveDate ExpirationDate
1 2009-04-01 00:00:00.000 2009-12-18 00:00:00.000
1 2010-02-12 00:00:00.000 2010-03-05 00:00:00.000
1 2010-05-18 00:00:00.000 NULL
1 2009-12-21 00:00:00.000 2010-02-08 00:00:00.000
1 2010-12-19 00:00:00.000 2009-12-20 00:00:00.000
Table 2
ClientID PollDate
1 2009-12-20 00:00:00.000
1 2009-12-19 00:00:00.000
1 2010-02-12 00:00:00.000
1 2010-02-27 00:00:00.000
1 2010-05-19 00:00:00.000
1 2010-05-29 00:00:00.000
1 2010-05-30 00:00:00.000
1 2010-05-31 00:00:00.000
1 2010-06-05 00:00:00.000
1 2010-06-25 00:00:00.000
1 2010-06-27 00:00:00.000
1 2010-07-02 00:00:00.000
1 2010-08-04 00:00:00.000
1 2010-08-20 00:00:00.000
Result
ClientID inValidDate
1 2009-12-20 00:00:00.000
1 2009-12-19 00:00:00.000
The following is a slight variation of #AJP's result - just accounting for NULL values in ExpirationDate:
CREATE TABLE #Table1
(
[ClientID] INT,
[EffectiveDate] DATETIME,
[ExpirationDate] DATETIME
)
INSERT INTO #Table1
(
[ClientID],
[EffectiveDate],
[ExpirationDate]
)
SELECT 1, '2009-04-01', '2009-12-18' UNION
SELECT 1, '2010-02-12', '2010-03-05' UNION
SELECT 1, '2010-05-18', NULL UNION
SELECT 1, '2009-12-21', '2010-02-08' UNION
SELECT 1, '2010-12-19', '2009-12-20'
CREATE TABLE #Table2
(
[ClientID] INT,
[PollDate] DATETIME
)
INSERT INTO #Table2
(
[ClientID],
[PollDate]
)
SELECT 1, '2009-12-20' UNION
SELECT 1, '2009-12-19' UNION
SELECT 1, '2010-02-12' UNION
SELECT 1, '2010-02-27' UNION
SELECT 1, '2010-05-19' UNION
SELECT 1, '2010-05-29' UNION
SELECT 1, '2010-05-30' UNION
SELECT 1, '2010-05-31' UNION
SELECT 1, '2010-06-05' UNION
SELECT 1, '2010-06-25' UNION
SELECT 1, '2010-06-27' UNION
SELECT 1, '2010-07-02' UNION
SELECT 1, '2010-08-04' UNION
SELECT 1, '2010-08-20'
SELECT
t2.[ClientID],
t2.[PollDate] AS 'inValidDate'
FROM
#Table1 AS t1
JOIN
#Table2 AS t2
ON
(t2.[PollDate] < t1.[EffectiveDate]
OR t2.[PollDate] > ISNULL(t1.[ExpirationDate], '9999-12-31'))
AND t1.ClientID = t2.ClientID -- Not clear from your question if this is necessary
can u try:
select t2.ClientID, t2.PoolDate
from Table1 t1, Table2 t2
where t2.PollDate between t1.EffectiveDate and t1.ExpirationDate
because u didn't put in the join condition, each row of table1 will match with each row of table2
not sure about syntext but u want to do something like this.
select clientID, polDate as 'inValidDate'
FROM Table1 t1
INNER JOIN Table2 t2
ON t2.PolDate not in between t1.EffectiveDate and t1.ExpirationDate
EDIT:
Assuming if expiration date is null means policy will never expire.
select clientID, polDate as 'inValidDate'
FROM Table1 t1
INNER JOIN Table2 t2
ON t2.PolDate not in between t1.EffectiveDate and ISNULL(t1.ExpirationDate, '2999-01-01')