SQL: Deduce Effective Pricing from overlapping Dates - sql

I have pricing record with overlapping dates. On few dates there are more than one overlapping prices. Please follow the example below:
Example on 2022/02/15 there are 2 prices 10 and 8 .
article
price
startdate
enddate
123
10
2022/02/02
2049/12/31
123
8
2022/02/14
2022/09/14
123
5
2022/03/14
2022/04/06
123
4
2022/04/11
2022/04/27
I want to apply the effective price for date ranges like below and avoid conflicting prices in the output.
article
price
startdate
enddate
123
10
2022/02/02
2022/02/13
123
8
2022/02/14
2022/03/13
123
5
2022/03/14
2022/04/06
123
8
2022/04/07
2022/04/10
123
4
2022/04/11
2022/04/27
123
8
2022/04/28
2022/09/14
123
10
2022/09/15
2049/12/31
I can think of window functions to adjust the end dates and prices, but I cannot wrap my head around the problem completely to get the complete solution. Any suggestion/solution is appreciated.
Database: Snowflake
Thank you

Using the logic of new starting price window wins for overlaps.
Discreate Date version:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date)-1 as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate+1 as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
gives:
ARTICLE
PRICE
S_STARTDATE
S_ENDDATE
123
10
2022-02-02
2022-02-13
123
8
2022-02-14
2022-03-13
123
5
2022-03-14
2022-04-06
123
8
2022-04-07
2022-04-10
123
4
2022-04-11
2022-04-27
123
8
2022-04-28
2022-09-14
123
10
2022-09-15
2049-12-31
Continuous Timestamp version:
with data(article,price,startdate,enddate) as (
select * FROM VALUES
(123, 10, '2022-02-02'::date, '2049-12-31'::date),
(123, 8, '2022-02-14'::date, '2022-09-14'::date),
(123, 5, '2022-03-14'::date, '2022-04-06'::date),
(123, 4, '2022-04-11'::date, '2022-04-27'::date)
), dis_times as (
select article,
date as startdate,
lead(date) over(partition by article order by date) as enddate
from (
select distinct article, startdate as date from data
union
select distinct article, enddate as date from data
)
qualify enddate is not null
)
select
d1.article,
d1.price,
d2.startdate,
d2.enddate
from data as d1
join dis_times as d2
on d1.article = d2.article
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
qualify row_number() over (partition by d1.article, s_startdate order by d1.startdate desc) = 1
order by 1,3;
which gives:
ARTICLE
PRICE
S_STARTDATE
S_ENDDATE
123
10
2022-02-02
2022-02-14
123
8
2022-02-14
2022-03-14
123
5
2022-03-14
2022-04-06
123
8
2022-04-06
2022-04-11
123
4
2022-04-11
2022-04-27
123
8
2022-04-27
2022-09-14
123
10
2022-09-14
2049-12-31
Thanks to MatBailie for the tighter join suggestion.
join dis_times as d2
on d1.article = d2.article
and d2.startdate between d1.startdate and d1.enddate
the continuous range I would normally do in this for
and d2.startdate between d1.startdate and d1.enddate and d2.startdate < d1.enddate
instead of this form
and d2.startdate >= d1.startdate and d2.startdate < d1.enddate
because I in experience it performed better. always test your complexities.

First thing I did was --I turned your price-per-date range data into a price-per-date lookup table.
create or replace temporary table price_date_lookup as
select distinct
article,
dateadd('day',b.index-1,start_date) as dates,
first_value(price) over (partition by article, dates order by end_date) as price
from my_table,
lateral split_to_table(repeat('.',datediff(day,start_date,end_date)), '.') b;
Notes:
first_value handles overlaps by overriding prices based on their end dates.
lateral... basically helps create a date column with all the days in the range
As soon as I created that table, I figured the rest could be approached like a gaps and island problem.
with cte1 as
(select *, case when lag(price) over (partition by article order by dates)=price then 0 else 1 end as price_start --flag start of a new price island
from price_date_lookup),
cte2 as
(select *, sum(price_start) over (partition by article order by dates) as price_id --assign id to all the price islands
from cte1)
select article,
price,
min(dates) as start_date,
max(dates) as end_date
from cte2
group by article,price,price_id;

Related

How to create a start and end date with no gaps from one date column and to sum a value within the dates

I am new SQL coding using in SQL developer.
I have a table that has 4 columns: Patient ID (ptid), service date (dt), insurance payment amount (insr_amt), out of pocket payment amount (op_amt). (see table 1 below)
What I would like to do is (1) create two columns "start_dt" and "end_dt" using the "dt" column where if there are no gaps in the date by the patient ID then populate the start and end date with the first and last date by patient ID, however if there is a gap in service date within the patient ID then to create the separate start and end date rows per patient ID, along with (2) summing the two payment amounts by patient ID with in the one set of start and end date visits (see table 2 below).
What would be the way to run this using SQL code in SQL developer?
Thank you!
Table 1:
Ptid
dt
insr_amt
op_amt
A
1/1/2021
30
20
A
1/2/2021
30
10
A
1/3/2021
30
10
A
1/4/2021
30
30
B
1/6/2021
10
10
B
1/7/2021
20
10
C
2/1/2021
15
30
C
2/2/2021
15
30
C
2/6/2021
60
30
Table 2:
Ptid
start_dt
end_dt
total_insr_amt
total_op_amt
A
1/1/2021
1/4/2021
120
70
B
1/6/2021
1/7/2021
30
20
C
2/1/2021
2/2/2021
30
60
C
2/6/2021
2/6/2021
60
30
You didn't mention the specific database so this solution works in PostgreSQL. You can do:
select
ptid,
min(dt) as start_dt,
max(dt) as end_dt,
sum(insr_amt) as total_insr_amt,
sum(op_amt) as total_op_amt
from (
select *,
sum(inc) over(partition by ptid order by dt) as grp
from (
select *,
case when dt - interval '1 day' = lag(dt) over(partition by ptid order by dt)
then 0 else 1 end as inc
from t
) x
) y
group by ptid, grp
order by ptid, grp
Result:
ptid start_dt end_dt total_insr_amt total_op_amt
----- ---------- ---------- -------------- -----------
A 2021-01-01 2021-01-04 120 70
B 2021-01-06 2021-01-07 30 20
C 2021-02-01 2021-02-02 30 60
C 2021-02-06 2021-02-06 60 30
See running example at DB Fiddle 1.
EDIT for Oracle
As requested, the modified query that works in Oracle is:
select
ptid,
min(dt) as start_dt,
max(dt) as end_dt,
sum(insr_amt) as total_insr_amt,
sum(op_amt) as total_op_amt
from (
select x.*,
sum(inc) over(partition by ptid order by dt) as grp
from (
select t.*,
case when dt - 1 = lag(dt) over(partition by ptid order by dt)
then 0 else 1 end as inc
from t
) x
) y
group by ptid, grp
order by ptid, grp
See running example at db<>fiddle 2.

SQL - Find if column dates include at least partially a date range

I need to create a report and I am struggling with the SQL script.
The table I want to query is a company_status_history table which has entries like the following (the ones that I can't figure out)
Table company_status_history
Columns:
| id | company_id | status_id | effective_date |
Data:
| 1 | 10 | 1 | 2016-12-30 00:00:00.000 |
| 2 | 10 | 5 | 2017-02-04 00:00:00.000 |
| 3 | 11 | 5 | 2017-06-05 00:00:00.000 |
| 4 | 11 | 1 | 2018-04-30 00:00:00.000 |
I want to answer to the question "Get all companies that have been at least for some point in status 1 inside the time period 01/01/2017 - 31/12/2017"
Above are the cases that I don't know how to handle since I need to add some logic of type :
"If this row is status 1 and it's date is before the date range check the next row if it has a date inside the date range."
"If this row is status 1 and it's date is after the date range check the row before if it has a date inside the date range."
I think this can be handled as a gaps and islands problem. Consider the following input data: (same as sample data of OP plus two additional rows)
id company_id status_id effective_date
-------------------------------------------
1 10 1 2016-12-15
2 10 1 2016-12-30
3 10 5 2017-02-04
4 10 4 2017-02-08
5 11 5 2017-06-05
6 11 1 2018-04-30
You can use the following query:
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
ORDER BY company_id, effective_date
to get:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 0
2 10 1 2016-12-30 1
3 10 5 2017-02-04 2
4 10 4 2017-02-08 2
5 11 5 2017-06-05 0
6 11 1 2018-04-30 0
Now you can identify status = 1 islands using:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
)
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
Output:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 1
2 10 1 2016-12-30 1
3 10 5 2017-02-04 1
4 10 4 2017-02-08 2
5 11 5 2017-06-05 1
6 11 1 2018-04-30 2
Calculated field grp will help us identify those islands:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
), CTE2 AS
(
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
)
SELECT company_id,
MIN(effective_date) AS start_date,
CASE
WHEN COUNT(*) > 1 THEN DATEADD(DAY, -1, MAX(effective_date))
ELSE MIN(effective_date)
END AS end_date
FROM CTE2
GROUP BY company_id, grp
HAVING COUNT(CASE WHEN status_id = 1 THEN 1 END) > 0
Output:
company_id start_date end_date
-----------------------------------
10 2016-12-15 2017-02-03
11 2018-04-30 2018-04-30
All you want know is those records from above that overlap with the specified interval.
Demo here with somewhat more complicated use case.
Maybe this is what you are looking for? For these kind of questions, you need to join two instance of your table, in this case I am just joining with next record by Id, which probably is not totally correct. To do it better, you can create a new Id using a windowed function like row_number, ordering the table by your requirement criteria
If this row is status 1 and it's date is before the date range check
the next row if it has a date inside the date range
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
else NULL
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
Implementing second criteria:
"If this row is status 1 and it's date is after the date range check
the row before if it has a date inside the date range."
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
when csh1.status_id=1 and csh1.effective_date>#range_en
then
case
when csh3.effective_date between #range_st and #range_en then true
else false
end
else null -- ¿?
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
left join company_status_history csh3
on csh1.id=csh3.id-1
I would suggest the use of a cte and the window functions ROW_NUMBER. With this you can find the desired records. An example:
DECLARE #t TABLE(
id INT
,company_id INT
,status_id INT
,effective_date DATETIME
)
INSERT INTO #t VALUES
(1, 10, 1, '2016-12-30 00:00:00.000')
,(2, 10, 5, '2017-02-04 00:00:00.000')
,(3, 11, 5, '2017-06-05 00:00:00.000')
,(4, 11, 1, '2018-04-30 00:00:00.000')
DECLARE #StartDate DATETIME = '2017-01-01';
DECLARE #EndDate DATETIME = '2017-12-31';
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) AS rn
FROM #t
),
cteLeadLag AS(
SELECT c.*, ISNULL(c2.effective_date, c.effective_date) LagEffective, ISNULL(c3.effective_date, c.effective_date)LeadEffective
FROM cte c
LEFT JOIN cte c2 ON c2.company_id = c.company_id AND c2.rn = c.rn-1
LEFT JOIN cte c3 ON c3.company_id = c.company_id AND c3.rn = c.rn+1
)
SELECT 'Included' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Following' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date > #EndDate
AND LagEffective BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Trailing' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date < #EndDate
AND LeadEffective BETWEEN #StartDate AND #EndDate
I first select all records with their leading and lagging Dates and then I perform your checks on the inclusion in the desired timespan.
Try with this, self-explanatory. Responds to this part of your question:
I want to answer to the question "Get all companies that have been at
least for some point in status 1 inside the time period 01/01/2017 -
31/12/2017"
Case that you want to find those id's that have been in any moment in status 1 and have records in the period requested:
SELECT *
FROM company_status_history
WHERE id IN
( SELECT Id
FROM company_status_history
WHERE status_id=1 )
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'
Case that you want to find id's in status 1 and inside the period:
SELECT *
FROM company_status_history
WHERE status_id=1
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'

How to duplicate data in sql with conditions

I havea table as table_A . table_A includes these columns
-CountryName
-Min_Date
-Max_Date
-Number
I want to duplicate data with seperating by months. For example
Argentina | 2015-01-04 | 2015-04-07 | 100
England | 2015-02-08 | 2015-03-11 | 90
I want to see a table as this (Monthly seperated)
Argentina | 01-2015 | 27 //(days to end of the min_date's month)
Argentina | 02-2015 | 29 //(days full month)
Argentina | 03-2015 | 31 //(days full month)
Argentina | 04-2015 | 7 //(days from start of the max_date's month)
England | 02-2015 | 21 //(days)
England | 03-2015 | 11 //(days)
I tried too much thing to made this for each records. But now my brain is so confusing and my project is delaying.
Does anybody know how can i solve this. I tried to duplicate each rows with datediff count but it is not working
WITH cte AS (
SELECT CountryName, ISNULL(DATEDIFF(M,Min_Date ,Max_Date )+1,1) as count FROM table_A
UNION ALL
SELECT CountryName, count-1 FROM cte WHERE count>1
)
SELECT CountryName,count FROM cte
-Generate all the dates between min and max dates for each country.
-Then get the month start and month end dates for each country,year,month.
-Finally get the date differences of the month start and month end.
WITH cte AS (
SELECT Country, min_date dt,min_date,max_date FROM t
UNION ALL
SELECT Country, dateadd(dd,1,dt),min_date,max_date FROM cte WHERE dt < max_date
)
,monthends as (
SELECT country,year(dt) yr,month(dt) mth,max(dt) monthend,min(dt) monthstart
FROM cte
GROUP BY country,year(dt),month(dt))
select country
,cast(mth as varchar(2))+'-'+cast(yr as varchar(4)) yr_month
,datediff(dd,monthstart,monthend)+1 days_diff
from monthends
Sample Demo
EDIT: Another option would be to generate all the dates once (the example shown here generates 51 years of dates from 2000 to 2050) and then joining it to the table to get the days by month.
WITH cte AS (
SELECT cast('2000-01-01' as date) dt,cast('2050-12-31' as date) maxdt
UNION ALL
SELECT dateadd(dd,1,dt),maxdt FROM cte WHERE dt < maxdt
)
SELECT country,year(dt) yr,month(dt) mth, datediff(dd,min(dt),max(dt))+1 days_diff
FROM cte c
JOIN t on c.dt BETWEEN t.min_date and t.max_date
GROUP BY country,year(dt),month(dt)
OPTION (MAXRECURSION 0)
I think you have the right idea. But you need to construct the months:
WITH cte AS (
SELECT CountryName, Min_Date as dte, Min_Date, Max_Date
FROM table_A
UNION ALL
SELECT CountryName, DATEADD(month, 1, dte), Min_Date, Max_Date
FROM cte
WHERE dte < Max_date
)
SELECT CountryName, dte
FROM cte;
Getting the number of days in the month is a bit more complicated. That requires some thought.
Oh, I forgot about EOMONTH():
select countryName, dte,
(case when dte = min_date
then datediff(day, min_date, eomonth(dte)) + 1
when dte = max_date
then day(dte)
else day(eomonth(dte))
end) as days
from cte;
Using a Calendar Table makes this stuff pretty easy. RexTester: http://rextester.com/EBTIMG23993
begin
create table #enderaric (
CountryName varchar(16)
, Min_Date date
, Max_Date date
, Number int
)
insert into #enderaric values
('Argentina' ,'2015-01-04' ,'2015-04-07' ,'100')
, ('England' ,'2015-02-08' ,'2015-03-11' ,'90')
end;
-- select * from #enderaric
--*/"
declare #FromDate date;
declare #ThruDate date;
set #FromDate = '2015-01-01';
set #ThruDate = '2015-12-31';
with x as (
select top (cast(sqrt(datediff(day, #FromDate, #ThruDate)) as int) + 1)
[number]
from [master]..spt_values v
)
/* Date Range CTE */
,cal as (
select top (1+datediff(day, #FromDate, #ThruDate))
DateValue = convert(date,dateadd(day,
row_number() over (order by x.number)-1,#FromDate)
)
from x cross join x as y
order by DateValue
)
select
e.CountryName
, YearMonth = convert(char(7),left(convert(varchar(10),DateValue),7))
, [Days]=count(c.DateValue)
from #enderaric as e
inner join cal c on c.DateValue >= e.min_date
and c.DateValue <= e.max_date
group by
e.CountryName
, e.Min_Date
, e.Max_Date
, e.Number
, convert(char(7),left(convert(varchar(10),DateValue),7))
results in:
CountryName YearMonth Days
---------------- --------- -----------
Argentina 2015-01 28
Argentina 2015-02 28
Argentina 2015-03 31
Argentina 2015-04 7
England 2015-02 21
England 2015-03 11
More about calendar tables:
Aaron Bertrand - Generate a set or sequence without loops
generate-a-set-1
generate-a-set-2
generate-a-set-3
David Stein - Creating a Date Table/Dimension on SQL 2008
Michael Valentine Jones - F_TABLE_DATE

start date end date combine rows

In Redshift, through SQL script want to consolidate monthly records as long as gap between the end date of first and the start date of the next record is 32 days or less (<=32) into single record with minimum startdate of continuous month as output startdate and maximum of end date of continuous month as output enddate.
The below input data refers to the table's data and also listed the expected output. The input data is listed ORDER BY ID,STARTDT,ENDDT in ASC.
For example, in below table, consider ID 100, the gab between the end of the first record and start of the next record <=32, however gap between the second record end date and third records start date falls more than 32 days, hence the first two records to be consolidate into one record i.e. (ID),MIN(STARTSDT),MAX(ENDDT) which corresponds to first record in the expected output. Similarly gab between 3 and 4 record in the input data falls within the 32 days and thus these 2 records to be consolidated into single records which corresponds to the second record in the expected output.
INPUT DATA:
ID STARTDT ENDDT
100 2000-01-01 2000-01-31
100 2000-02-01 2000-02-29
100 2000-05-01 2000-05-31
100 2000-06-01 2000-06-30
100 2000-09-01 2000-09-30
100 2000-10-01 2000-10-31
101 2012-06-01 2012-06-30
101 2012-07-01 2012-07-31
102 2000-01-01 2000-01-31
103 2013-03-01 2013-03-31
103 2013-05-01 2013-05-31
EXPECTED OUTPUT:
ID MIN_STARTDT MAX_END_DT
100 2000-01-01 2000-02-29
100 2000-05-01 2000-06-30
100 2000-09-01 2000-10-31
101 2012-06-01 2012-07-31
102 2000-01-01 2000-01-31
103 2013-03-01 2013-03-31
103 2013-05-01 2013-05-31
You can do this in steps:
Use a join to identify where two adjacent records should be combined.
Then do a cumulative sum to assign all such adjacent records a grouping identifier.
Aggregate.
It looks like:
select id, min(startdt), max(enddte)
from (select t.*,
count(case when tprev.id is null then 1 else 0 end) over
(partition by t.idid
order by t.startdt
rows between unbounded preceding and current row
) as grp
from t left join
t tprev
on t.id = tprev.id and
t.startdt = tprev.enddt + interval '1 day'
) t
group by id, grp;
The question is very similar to this one and my answer is also similar: Fetch rows based on condition
The gist of the idea is to use Window Functions to identify transitions between period (events which are less than 33 days apart), and then do some filtering to remove the rows within the period, and then Window Functions again.
Complete solution:
SELECT
id,
startdt AS period_start,
period_end
FROM (
SELECT
id,
startdt,
enddt,
lead(enddt, 1)
OVER (PARTITION BY id
ORDER BY enddt) AS period_end,
period_boundary
FROM (
SELECT
id,
startdt,
enddt,
CASE WHEN period_switch = 0 AND reverse_period_switch = 1
THEN 'start'
ELSE 'end' END AS period_boundary
FROM (
SELECT
id,
startdt,
enddt,
CASE WHEN datediff(days, enddt, lead(startdt, 1)
OVER (PARTITION BY id
ORDER BY enddt ASC)) > 32
THEN 1
ELSE 0 END AS period_switch,
CASE WHEN datediff(days, lead(enddt, 1)
OVER (PARTITION BY id
ORDER BY enddt DESC), startdt) > 32
THEN 1
ELSE 0 END AS reverse_period_switch
FROM date_test
)
AS sessioned
WHERE period_switch != 0 OR reverse_period_switch != 0
UNION
SELECT -- adding start rows without transition
id,
startdt,
enddt,
'start'
FROM (
SELECT
id,
startdt,
enddt,
row_number()
OVER (PARTITION BY id
ORDER BY enddt ASC) AS row_num
FROM date_test
) AS with_row_number
WHERE row_num = 1
UNION
SELECT -- adding end rows without transition
id,
startdt,
enddt,
'end'
FROM (
SELECT
id,
startdt,
enddt,
row_number()
OVER (PARTITION BY id
ORDER BY enddt desc) AS row_num
FROM date_test
) AS with_row_number
WHERE row_num = 1
) AS with_boundary -- data set containing start/end boundaries
) AS with_end -- data set where end date is propagated into the start row of the period
WHERE period_boundary = 'start'
ORDER BY id, startdt ASC;
Note that in your expected output, you had a row for 103 2013-05-01 2013-05-31, however its start date is 31 days apart from end date of the previous row, so this row should instead be merged with the previous row for id 103 according to your requirements.
So the output that I get looks like this:
id start end
100 2000-01-01 2000-02-29
100 2000-05-01 2000-06-30
100 2000-09-01 2000-10-31
101 2012-06-01 2012-07-31
102 2000-01-01 2000-01-31
103 2013-03-01 2013-05-31

Find From/To Dates across multiple rows - SQL Postgres

I want to be able to "book" within range of dates, but you can't book across gaps of days. So booking across multiple rates is fine as long as they are contiguous.
I am happy to change data structure/index, if there are better ways of storing start/end ranges.
So far I have a "rates" table which contains Start/End Periods of time with a daily rate.
e.g. Rates Table.
ID Price From To
1 75.00 2015-04-12 2016-04-15
2 100.00 2016-04-16 2016-04-17
3 50.00 2016-04-18 2016-04-30
For the above data I would want to return:
From To
2015-04-12 2016-4-30
For simplicity sake it is safe to assume that dates are safely consecutive. For contiguous dates To is always 1 day before from.
For the case there is only 1 row, I would want it to return the From/To of that single row.
Also to clarify if I had the following data:
ID Price From To
1 75.00 2015-04-12 2016-04-15
2 100.00 2016-04-17 2016-04-18
3 50.00 2016-04-19 2016-04-30
4 50.00 2016-05-01 2016-05-21
Meaning where there is a gap >= 1 day it would count as a separate range.
In which case I would expect the following:
From To
2015-04-12 2016-04-15
2015-04-17 2016-05-21
Edit 1
After playing around I have come up with the following SQL which seems to work. Although I'm not sure if there are better ways/issues with it?
WITH grouped_rates AS
(SELECT
from_date,
to_date,
SUM(grp_start) OVER (ORDER BY from_date, to_date) group
FROM (SELECT
gite_id,
from_date,
to_date,
CASE WHEN (from_date - INTERVAL '1 DAY') = lag(to_date)
OVER (ORDER BY from_date, to_date)
THEN 0
ELSE 1
END grp_start
FROM rates
GROUP BY from_date, to_date) AS start_groups)
SELECT
min(from_date) from_date,
max(to_date) to_date
FROM grouped_rates
GROUP BY grp;
This is identifying contiguous overlapping groups in the data. One approach is to find where each group begins and then do a cumulative sum. The following query adds a flag indicating if a row starts a group:
select r.*,
(case when not exists (select 1
from rates r2
where r2.from < r.from and r2.to >= r.to or
(r2.from = r.from and r2.id < r.id)
)
then 1 else 0 end) as StartFlag
from rate r;
The or in the correlation condition is to handle the situation where intervals that define a group overlap on the start date for the interval.
You can then do a cumulative sum on this flag and aggregate by that sum:
with r as (
select r.*,
(case when not exists (select 1
from rates r2
where (r2.from < r.from and r2.to >= r.to) or
(r2.from = r.from and r2.id < r.id)
)
then 1 else 0 end) as StartFlag
from rate r
)
select min(from), max(to)
from (select r.*,
sum(r.StartFlag) over (order by r.from) as grp
from r
) r
group by grp;
CREATE TABLE prices( id INTEGER NOT NULL PRIMARY KEY
, price MONEY
, date_from DATE NOT NULL
, date_upto DATE NOT NULL
);
-- some data (upper limit is EXCLUSIVE)
INSERT INTO prices(id, price, date_from, date_upto) VALUES
( 1, 75.00, '2015-04-12', '2016-04-16' )
,( 2, 100.00, '2016-04-17', '2016-04-19' )
,( 3, 50.00, '2016-04-19', '2016-05-01' )
,( 4, 50.00, '2016-05-01', '2016-05-22' )
;
-- SELECT * FROM prices;
-- Recursive query to "connect the dots"
WITH RECURSIVE rrr AS (
SELECT date_from, date_upto
, 1 AS nperiod
FROM prices p0
WHERE NOT EXISTS (SELECT * FROM prices nx WHERE nx.date_upto = p0.date_from) -- no preceding segment
UNION ALL
SELECT r.date_from, p1.date_upto
, 1+r.nperiod AS nperiod
FROM prices p1
JOIN rrr r ON p1.date_from = r.date_upto
)
SELECT * FROM rrr r
WHERE NOT EXISTS (SELECT * FROM prices nx WHERE nx.date_from = r.date_upto) -- no following segment
;
Result:
date_from | date_upto | nperiod
------------+------------+---------
2015-04-12 | 2016-04-16 | 1
2016-04-17 | 2016-05-22 | 3
(2 rows)