SQL Server - Get row based on a sum - sql

I need to calculate 5 working days from a given date, based on the table below.
5 working days from Jan 9 is Jan 16 because the sum of the working_days column below between those dates is 5.
Here is SQL that I used.
WITH dates AS
(
SELECT t_from.start_date, t_to.start_date end_date
FROM #t t_from, #t t_to
WHERE t_from.start_date < t_to.start_date
),
sum_days AS
(
SELECT
start_date, end_date,
(SELECT SUM(t_sum.working_days)
FROM #t t_sum
WHERE t_sum.start_date BETWEEN d.start_date AND d.end_date) tot_days
FROM
dates d
)
SELECT
start_date, MAX(end_date) end_date
FROM
sum_days
WHERE
tot_days = 5
GROUP BY
start_date
It works, but it is inefficient. The real table that I'm using has 1,000 rows, and it takes over 1 minute for the query to return.
My question: is there a better way?
Input:
start_date
working_days
2023-01-09
1
2023-01-10
1
2023-01-11
1
2023-01-12
1
2023-01-13
1
2023-01-14
0
2023-01-15
0
2023-01-16
0
2023-01-17
1
2023-01-18
1
2023-01-19
1
2023-01-20
1
2023-01-21
0
2023-01-22
0
2023-01-23
1
2023-01-24
1
Desired output:
start_date
end_date
2023-01-09
2023-01-16
2023-01-10
2023-01-17
2023-01-11
2023-01-18
2023-01-12
2023-01-19
2023-01-13
2023-01-22
2023-01-14
2023-01-23
2023-01-15
2023-01-23
2023-01-16
2023-01-23
2023-01-17
2023-01-23
2023-01-18
2023-01-24
SQL to create the table:
drop table if exists #t;
GO
select '2023-01-09' start_date,1 working_days into #t;
GO
insert into #t values('2023-01-10',1) ;
go
insert into #t values('2023-01-11',1);
insert into #t values('2023-01-12',1);
insert into #t values('2023-01-13',1);
insert into #t values('2023-01-14',0);
insert into #t values('2023-01-15',0);
insert into #t values('2023-01-16',0);
insert into #t values('2023-01-17',1);
insert into #t values('2023-01-18',1);
insert into #t values('2023-01-19',1);
insert into #t values('2023-01-20',1);
insert into #t values('2023-01-21',0);
insert into #t values('2023-01-22',0);
insert into #t values('2023-01-23',1);
insert into #t values('2023-01-24',1);
go

FROM #t t_from, #t t_to
where t_from.start_date < t_to.start_date
is a "triangular" join. It is not quite as bad as a cross join but getting that way (rows returned are N*(N-1)/2 rather than N*N).
This will not scale with large numbers of rows in #t.
One way of getting your desired results (db fiddle) is
WITH Dates
AS (SELECT *,
sum(working_days)
OVER (
ORDER BY start_date) AS working_day_count
FROM #t)
SELECT D1.start_date,
MAX(D2.start_date)
FROM Dates D1
JOIN Dates D2
ON D1.working_day_count + 5 - D1.working_days = D2.working_day_count
GROUP BY D1.start_date
This calculates the running total efficiently. Potentially a solution will be provided that does it all in one pass rather than requiring the self join above but this is at least an equi join and should be a lot faster in your 1,000 row case than your current method.

Out of curiosity, look at the solution without JOIN
with cte as (
select start_date,working_days
,lead(start_date,1)over(order by start_date ) d1
,lead(start_date,4)over(order by start_date ) d2 -- target date
,lead(start_date,5)over(order by start_date ) d3 --target+1, if not_working_days between d2 and d3
from (select * from #t where working_days=1) t -- dates, only working_days
)
,t2 as(
select *
,case when datediff(d,d2,d3)>1 then dateadd(d,-1,d3)
else d2
end end_date
,datediff(d,start_date,d1) dn
from cte
)
select
dateadd(d,isnull(n,0),start_date) start_date
,case when isnull(n,0)=0 then working_days else 0 end working_days
,case when isnull(n,0)=0 then end_date else dateadd(d,1,end_date) end end_date
from t2 left join (values(0),(1),(2),(3),(4),(5))nn(n) --to restore not_working_days
on nn.n<t2.dn
If there is an opportunity to compare the cost, it will be interesting.
Test example

with data as (
select start_date as dt, working_days as adj,
sum(cast(working_days as int)) over (order by start_date) as ofs
from #t
)
select ds.dt as start_date, de.dt as end_date
from data ds cross apply (
select max(dt) as end_date
from data de
where de.ofs = ds.ofs + 5 - ds.adj
) de(dt)
where de.dt is not null;
Basically the same as above but cross apply might be improvement. This seems to favor a clustered index on start_date in my experiments.
Or you could just search via lead():
with data as (
select start_date as dt, working_days as adj,
sum(cast(working_days as int)) over (order by start_date) as ofs
from #t
), data2 as (
select dt as start_date,
case ofs + 5 - adj -- check in reverse order
when lead(ofs, 9) over (order by dt) then lead(dt, 9) over (order by dt)
when lead(ofs, 8) over (order by dt) then lead(dt, 8) over (order by dt)
when lead(ofs, 7) over (order by dt) then lead(dt, 7) over (order by dt)
when lead(ofs, 6) over (order by dt) then lead(dt, 6) over (order by dt)
end as end_date
from data
)
select * from data2 where end_date is not null;
This query assumes at least two days off per week and a maximum of four-day weekends to limit the number of dates that need to be searched. Expand as necessary.
Check out the fiddle here with a demonstration that both of these approaches seem to generate cheaper plans: https://dbfiddle.uk/1SdBRmmg
There is a way to use a single pass over the data. You'll have to wrap it up in a table expression to filter the null values near the end of the calendar.
select start_date,
dateadd(day,
case 5
when sum(working_days) over (order by start_date rows between current row and 9 following) then 9
when sum(working_days) over (order by start_date rows between current row and 8 following) then 8
when sum(working_days) over (order by start_date rows between current row and 7 following) then 7
when sum(working_days) over (order by start_date rows between current row and 6 following) then 6
end,
start_date) as end_date
from #t;
And just for fun you could do this with range over (https://dbfiddle.uk/cm3KJO-W) just not on SQL Server yet:
with data as (select *, sum(working_days) over (order by start_date) as ofs from t)
select start_date,
max(start_date) over (order by ofs range between 5 following and 5 following) as end_date
from data;

Related

SQL: How to create a daily view based on different time intervals using SQL logic?

Here is an example:
Id|price|Date
1|2|2022-05-21
1|3|2022-06-15
1|2.5|2022-06-19
Needs to look like this:
Id|Date|price
1|2022-05-21|2
1|2022-05-22|2
1|2022-05-23|2
...
1|2022-06-15|3
1|2022-06-16|3
1|2022-06-17|3
1|2022-06-18|3
1|2022-06-19|2.5
1|2022-06-20|2.5
...
Until today
1|2022-08-30|2.5
I tried using the lag(price) over (partition by id order by date)
But i can't get it right.
I'm not familiar with Azure, but it looks like you need to use a calendar table, or generate missing dates using a recursive CTE.
To get started with a recursive CTE, you can generate line numbers for each id (assuming multiple id values) in the source data ordered by date. These rows with row number equal to 1 (with the minimum date value for the corresponding id) will be used as the starting point for the recursion. Then you can use the DATEADD function to generate the row for the next day. To use the price values ​​from the original data, you can use a subquery to get the price for this new date, and if there is no such value (no row for this date), use the previous price value from CTE (use the COALESCE function for this).
For SQL Server query can look like this
WITH cte AS (
SELECT
id,
date,
price
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY date) AS rn
FROM tbl
) t
WHERE rn = 1
UNION ALL
SELECT
cte.id,
DATEADD(d, 1, cte.date),
COALESCE(
(SELECT tbl.price
FROM tbl
WHERE tbl.id = cte.id AND tbl.date = DATEADD(d, 1, cte.date)),
cte.price
)
FROM cte
WHERE DATEADD(d, 1, cte.date) <= GETDATE()
)
SELECT * FROM cte
ORDER BY id, date
OPTION (MAXRECURSION 0)
Note that I added OPTION (MAXRECURSION 0) to make the recursion run through all the steps, since the default value is 100, this is not enough to complete the recursion.
db<>fiddle here
The same approach for MySQL (you need MySQL of version 8.0 to use CTE)
WITH RECURSIVE cte AS (
SELECT
id,
date,
price
FROM (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY date) AS rn
FROM tbl
) t
WHERE rn = 1
UNION ALL
SELECT
cte.id,
DATE_ADD(cte.date, interval 1 day),
COALESCE(
(SELECT tbl.price
FROM tbl
WHERE tbl.id = cte.id AND tbl.date = DATE_ADD(cte.date, interval 1 day)),
cte.price
)
FROM cte
WHERE DATE_ADD(cte.date, interval 1 day) <= NOW()
)
SELECT * FROM cte
ORDER BY id, date
db<>fiddle here
Both queries produces the same results, the only difference is the use of the engine's specific date functions.
For MySQL versions below 8.0, you can use a calendar table since you don't have CTE support and can't generate the required date range.
Assuming there is a column in the calendar table to store date values ​​(let's call it date for simplicity) you can use the CROSS JOIN operator to generate date ranges for the id values in your table that will match existing dates. Then you can use a subquery to get the latest price value from the table which is stored for the corresponding date or before it.
So the query would be like this
SELECT
d.id,
d.date,
(SELECT
price
FROM tbl
WHERE tbl.id = d.id AND tbl.date <= d.date
ORDER BY tbl.date DESC
LIMIT 1
) price
FROM (
SELECT
t.id,
c.date
FROM calendar c
CROSS JOIN (SELECT DISTINCT id FROM tbl) t
WHERE c.date BETWEEN (
SELECT
MIN(date) min_date
FROM tbl
WHERE tbl.id = t.id
)
AND NOW()
) d
ORDER BY id, date
Using my pseudo-calendar table with date values ranging from 2022-05-20 to 2022-05-30 and source data in that range, like so
id
price
date
1
2
2022-05-21
1
3
2022-05-25
1
2.5
2022-05-28
2
10
2022-05-25
2
100
2022-05-30
the query produces following results
id
date
price
1
2022-05-21
2
1
2022-05-22
2
1
2022-05-23
2
1
2022-05-24
2
1
2022-05-25
3
1
2022-05-26
3
1
2022-05-27
3
1
2022-05-28
2.5
1
2022-05-29
2.5
1
2022-05-30
2.5
2
2022-05-25
10
2
2022-05-26
10
2
2022-05-27
10
2
2022-05-28
10
2
2022-05-29
10
2
2022-05-30
100
db<>fiddle here

How to cross join but using latest value in BIGQUERY

I have this table below
date
id
value
2021-01-01
1
3
2021-01-04
1
5
2021-01-05
1
10
And I expect output like this, where the date column is always increase daily and value column will generate the last value on an id
date
id
value
2021-01-01
1
3
2021-01-02
1
3
2021-01-03
1
3
2021-01-04
1
5
2021-01-05
1
10
2021-01-06
1
10
I think I can use cross join but I can't get my expected output and think that there are a special syntax/logic to solve this
Consider below approach
select * from `project.dataset.table`
union all
select missing_date, prev_row.id, prev_row.value
from (
select *, lag(t) over(partition by id order by date) prev_row
from `project.dataset.table` t
), unnest(generate_date_array(prev_row.date + 1, date - 1)) missing_date
I would write this using:
select dte, t.id, t.value
from (select t.*,
lead(date, 1, date '2021-01-06') over (partition by id order by date) as next_day
from `table` t
) t cross join
unnest(generate_date_array(
date,
ifnull(
date_add(next_date, interval -1 day), -- generate missing date rows
(select max(date) from `table`) -- add last row
)
)) dte;
Note that this requires neither union all nor window function to fill in the values.
alternative solution using last_value. You may explore the following query and customize your logic to generate days (if needed)
WITH
query AS (
SELECT
date,
id,
value
FROM
`mydataset.newtable`
ORDER BY
date ),
generated_days AS (
SELECT
day
FROM (
SELECT
MIN(date) min_dt,
MAX(date) max_dt
FROM
query),
UNNEST(GENERATE_DATE_ARRAY(min_dt, max_dt)) day )
SELECT
g.day,
LAST_VALUE(q.id IGNORE NULLS) OVER(ORDER BY g.day) id,
LAST_VALUE(q.value IGNORE NULLS) OVER(ORDER BY g.day) value,
FROM
generated_days g
LEFT OUTER JOIN
query q
ON
g.day = q.date
ORDER BY
g.day

Window functions with missing data

Assume that I have a table (MyTable) as follows:
item_id | date
----------------
1 | 2016-06-08
1 | 2016-06-07
1 | 2016-06-05
1 | 2016-06-04
1 | 2016-05-31
...
2 | 2016-06-08
2 | 2016-06-06
2 | 2016-06-04
2 | 2016-05-31
...
3 | 2016-05-31
...
I would like to build a weekly summary table that reports on a running 7 day window. The window would basically say "How many unique item_ids were reported in the preceding 7 days"?
So, in this case, the output table would look something like:
date | weekly_ids
----------------------
2016-05-31| 3 # All 3 were present on the 31st
2016-06-01| 3 # All 3 were present on the 31st which is < 7 days before the 1st
2016-06-02| 3 # Same
2016-06-03| 3 # Same
2016-06-04| 3 # Same
2016-06-05| 3 # Same
2016-06-06| 3 # Same
2016-06-07| 3 # Same
2016-06-08| 2 # item 3 was not present for the entire last week so it does not add to the count.
I've tried:
SELECT
item_id,
date,
MAX(present) OVER (
PARTITION BY item_id
ORDER BY date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS is_present
FROM (
# Inner query
SELECT
item_id,
date,
1 AS present,
FROM MyTable
)
GROUP BY date
ORDER BY date DESC
This feels like it is going in the right direction. But as it is, the window runs over the wrong time-frame when dates aren't present (too many dates) and it also doesn't output records for dates when the item_id wasn't present (even if it was present on the previous date). Is there a simple resolution to this problem?
If it's helpful and necessary
I can hard-code an oldest date
I also can get a table of all of the item_ids in existence.
This query will only be run on BigQuery, so BQ specific functions/syntax are fair game and SQL functions/syntax that doesn't run on BigQuery unfortunately doesn't help me ...
I have created a temp table to hold dates, however, you probably would benefit from adding a permanent table to your database for these joins. Trust me it will cause less headaches.
DECLARE #my_table TABLE
(
item_id int,
date DATETIME
)
INSERT #my_table SELECT 1,'2016-06-08'
INSERT #my_table SELECT 1,'2016-06-07'
INSERT #my_table SELECT 1,'2016-06-05'
INSERT #my_table SELECT 1,'2016-06-04'
INSERT #my_table SELECT 1,'2016-05-31'
INSERT #my_table SELECT 2,'2016-06-08'
INSERT #my_table SELECT 2,'2016-06-06'
INSERT #my_table SELECT 2,'2016-06-04'
INSERT #my_table SELECT 2,'2016-05-31'
INSERT #my_table SELECT 3,'2016-05-31'
DECLARE #TrailingDays INT=7
DECLARE #LowDate DATETIME='01/01/2016'
DECLARE #HighDate DATETIME='12/31/2016'
DECLARE #Calendar TABLE(CalendarDate DATETIME)
DECLARE #LoopDate DATETIME=#LowDate
WHILE(#LoopDate<=#HighDate) BEGIN
INSERT #Calendar SELECT #LoopDate
SET #LoopDate=DATEADD(DAY,1,#LoopDate)
END
SELECT
date=HighDate,
weekly_ids=COUNT(DISTINCT item_id)
FROM
(
SELECT
HighDate=C.CalendarDate,
LowDate=LAG(C.CalendarDate, #TrailingDays,0) OVER (ORDER BY C.CalendarDate)
FROM
#Calendar C
WHERE
CalendarDate BETWEEN #LowDate AND #HighDate
)AS X
LEFT OUTER JOIN #my_table MT ON MT.date BETWEEN LowDate AND HighDate
GROUP BY
LowDate,
HighDate
Try below example. It can give you direction to explore
Purely GBQ - Legacy SQL
SELECT date, items FROM (
SELECT
date, COUNT(DISTINCT item_id) OVER(ORDER BY sec RANGE BETWEEN 60*60*24*2 PRECEDING AND CURRENT ROW) AS items
FROM (
SELECT
item_id, date, timestamp_to_sec(timestamp(date)) AS sec
FROM (
SELECT calendar.day AS date, MyTable.item_id AS item_id
FROM (
SELECT DATE(DATE_ADD(TIMESTAMP('2016-05-28'), pos - 1, "DAY")) AS day
FROM (
SELECT ROW_NUMBER() OVER() AS pos, *
FROM (FLATTEN((
SELECT SPLIT(RPAD('', 1 + DATEDIFF(TIMESTAMP(CURRENT_DATE()), TIMESTAMP('2016-05-28')), '.'),'') AS h
FROM (SELECT NULL)),h
)))
) AS calendar
LEFT JOIN (
SELECT date, item_id
FROM
(SELECT 1 AS item_id, '2016-06-08' AS date),
(SELECT 1 AS item_id, '2016-06-07' AS date),
(SELECT 1 AS item_id, '2016-06-05' AS date),
(SELECT 1 AS item_id, '2016-06-04' AS date),
(SELECT 1 AS item_id, '2016-05-28' AS date),
(SELECT 2 AS item_id, '2016-06-08' AS date),
(SELECT 2 AS item_id, '2016-06-06' AS date),
(SELECT 2 AS item_id, '2016-06-04' AS date),
(SELECT 2 AS item_id, '2016-05-31' AS date),
(SELECT 3 AS item_id, '2016-05-31' AS date),
(SELECT 3 AS item_id, '2016-06-05' AS date)
) AS MyTable
ON calendar.day = MyTable.date
)
)
)
GROUP BY date, items
ORDER BY date
Please note
oldest date - 2016-05-28 - is hardcoded in calendar subquery
window size is controled in RANGE BETWEEN 60*60*24*2 PRECEDING AND CURRENT ROW; if you need 7 days - the expression should be 60*60*24*6
have in mind specifics of COUNT(DISTINCT) in BigQuery Legacy SQL

Number of unique dates

There is table:
CREATE TABLE my_table
(gr_id NUMBER,
start_date DATE,
end_date DATE);
All dates always have zero time portion. I need to know a fastest way to compute number of unique dates inside gr_id.
For example, if there is rows (dd.mm.rrrr):
1 | 01.01.2000 | 07.01.2000
1 | 01.01.2000 | 07.01.2000
2 | 01.01.2000 | 03.01.2000
2 | 05.01.2000 | 07.01.2000
3 | 01.01.2000 | 04.01.2000
3 | 03.01.2000 | 05.01.2000
then right answer will be
1 | 7
2 | 6
3 | 5
At now I use additional table
CREATE TABLE mfr_date_list
(MFR_DATE DATE);
with every date between 01.01.2000 and 31.12.2020 and query like this:
SELECT COUNT(DISTINCT mfr_date_list.mfr_date) cnt,
dt.gr_id
FROM dwh_mfr.mfr_date_list,
(SELECT gr_id,
start_date AS sd,
end_date AS ed
FROM my_table
) dt
WHERE mfr_date_list.mfr_date BETWEEN dt.sd AND dt.ed
AND dt.ed IS NOT NULL
GROUP BY dt.gr_id
This query return correct resul data set, but I think it's not fastest way. I think there is some way to build query withot table mfr_date_list at all.
Oracle 11.2 64-bit.
I would expect what you're doing to be the fastest way (as always test). Your query can be simplified, though this only aids understanding and not necessarily speed:
select t.gr_id, count(distinct dl.mfr_date) as cnt
from my_table t
join mfr_date_list dl
on dl.mfr_date between t.date_start and t.date_end
where t.end_date is not null
group by t.gr_id
Whatever you do you have to generate the data between the two dates somehow as you need to remove the overlap. One way would be to use CAST(MULTISET()), as Lalit Kumar explains:
select gr_id, count(distinct end_date - column_value + 1)
from my_table m
cross join table(cast(multiset(select level
from dual
connect by level <= m.end_date - m.start_date + 1
) as sys.odcinumberlist))
group by gr_id;
GR_ID COUNT(DISTINCTEND_DATE-COLUMN_VALUE+1)
---------- --------------------------------------
1 7
2 6
3 5
This is very Oracle specific but should perform substantially better than most other row-generators as you're only accessing the table once and you're generating the minimal number of rows required due to the condition linking MY_TABLE and your generated rows.
What you really need to do is combine the ranges and then count the lengths. This can be quite challenging because of duplicate dates. The following is one way to approach this.
First, enumerate the dates and determine whether the date is "in" or "out". When the cumulative sum is 0 then it is "out":
select t.gr_id, dt,
sum(inc) over (partition by t.gr_id order by dt) as cume_inc
from (select t.gr_id, t.start_date as dt, 1 as inc
from my_table t
union all
select t.gr_id, t.end_date + 1, -1 as inc
from my_table t
) t
Then, use lead() to determine how long the period is:
with inc as (
select t.gr_id, dt,
sum(inc) over (partition by t.gr_id order by dt) as cume_inc
from (select t.gr_id, t.start_date as dt, 1 as inc
from my_table t
union all
select t.gr_id, t.end_date + 1, -1 as inc
from my_table t
) t
)
select t.gr_id,
sum(nextdt - dt) as daysInUse
from (select inc.*, lead(dt) over (partition by t.gr_id order by dt) as nextdt
from inc
) t
group by t.gr_id;
This is close to what you want. The following are two challenges: (1) putting in the limits and (2) handling ties. The following should work (although there might be off-by-one and boundary issues):
with inc as (
select t.gr_id, dt, priority,
sum(inc) over (partition by t.gr_id order by dt) as cume_inc
from ((select t.gr_id, t.start_date as dt, count(*) as inc, 1 as priority
from my_table t
group by t.gr_id, t.start_date
)
union all
(select t.gr_id, t.end_date + 1, - count(*) as inc, -1
from my_table t
group by t.gr_id, t.end_date
)
) t
)
select t.gr_id,
sum(least(nextdt, date '2020-12-31') - greatest(dt, date, '2010-01-01')) as daysInUse
from (select inc.*, lead(dt) over (partition by t.gr_id order by dt, priority) as nextdt
from inc
) t
group by t.gr_id;

Find From/To Dates across multiple rows - SQL Postgres

I want to be able to "book" within range of dates, but you can't book across gaps of days. So booking across multiple rates is fine as long as they are contiguous.
I am happy to change data structure/index, if there are better ways of storing start/end ranges.
So far I have a "rates" table which contains Start/End Periods of time with a daily rate.
e.g. Rates Table.
ID Price From To
1 75.00 2015-04-12 2016-04-15
2 100.00 2016-04-16 2016-04-17
3 50.00 2016-04-18 2016-04-30
For the above data I would want to return:
From To
2015-04-12 2016-4-30
For simplicity sake it is safe to assume that dates are safely consecutive. For contiguous dates To is always 1 day before from.
For the case there is only 1 row, I would want it to return the From/To of that single row.
Also to clarify if I had the following data:
ID Price From To
1 75.00 2015-04-12 2016-04-15
2 100.00 2016-04-17 2016-04-18
3 50.00 2016-04-19 2016-04-30
4 50.00 2016-05-01 2016-05-21
Meaning where there is a gap >= 1 day it would count as a separate range.
In which case I would expect the following:
From To
2015-04-12 2016-04-15
2015-04-17 2016-05-21
Edit 1
After playing around I have come up with the following SQL which seems to work. Although I'm not sure if there are better ways/issues with it?
WITH grouped_rates AS
(SELECT
from_date,
to_date,
SUM(grp_start) OVER (ORDER BY from_date, to_date) group
FROM (SELECT
gite_id,
from_date,
to_date,
CASE WHEN (from_date - INTERVAL '1 DAY') = lag(to_date)
OVER (ORDER BY from_date, to_date)
THEN 0
ELSE 1
END grp_start
FROM rates
GROUP BY from_date, to_date) AS start_groups)
SELECT
min(from_date) from_date,
max(to_date) to_date
FROM grouped_rates
GROUP BY grp;
This is identifying contiguous overlapping groups in the data. One approach is to find where each group begins and then do a cumulative sum. The following query adds a flag indicating if a row starts a group:
select r.*,
(case when not exists (select 1
from rates r2
where r2.from < r.from and r2.to >= r.to or
(r2.from = r.from and r2.id < r.id)
)
then 1 else 0 end) as StartFlag
from rate r;
The or in the correlation condition is to handle the situation where intervals that define a group overlap on the start date for the interval.
You can then do a cumulative sum on this flag and aggregate by that sum:
with r as (
select r.*,
(case when not exists (select 1
from rates r2
where (r2.from < r.from and r2.to >= r.to) or
(r2.from = r.from and r2.id < r.id)
)
then 1 else 0 end) as StartFlag
from rate r
)
select min(from), max(to)
from (select r.*,
sum(r.StartFlag) over (order by r.from) as grp
from r
) r
group by grp;
CREATE TABLE prices( id INTEGER NOT NULL PRIMARY KEY
, price MONEY
, date_from DATE NOT NULL
, date_upto DATE NOT NULL
);
-- some data (upper limit is EXCLUSIVE)
INSERT INTO prices(id, price, date_from, date_upto) VALUES
( 1, 75.00, '2015-04-12', '2016-04-16' )
,( 2, 100.00, '2016-04-17', '2016-04-19' )
,( 3, 50.00, '2016-04-19', '2016-05-01' )
,( 4, 50.00, '2016-05-01', '2016-05-22' )
;
-- SELECT * FROM prices;
-- Recursive query to "connect the dots"
WITH RECURSIVE rrr AS (
SELECT date_from, date_upto
, 1 AS nperiod
FROM prices p0
WHERE NOT EXISTS (SELECT * FROM prices nx WHERE nx.date_upto = p0.date_from) -- no preceding segment
UNION ALL
SELECT r.date_from, p1.date_upto
, 1+r.nperiod AS nperiod
FROM prices p1
JOIN rrr r ON p1.date_from = r.date_upto
)
SELECT * FROM rrr r
WHERE NOT EXISTS (SELECT * FROM prices nx WHERE nx.date_from = r.date_upto) -- no following segment
;
Result:
date_from | date_upto | nperiod
------------+------------+---------
2015-04-12 | 2016-04-16 | 1
2016-04-17 | 2016-05-22 | 3
(2 rows)