How to transform following table to desired table - sql

I have a table with the following structure:
ID
StartDate (YY-MM-DD)
EndDate (YY-MM-DD)
1
20-07-13
21-05-12
2
15-04-12
27-01-01
3
14-01-30
18-12-30
4
18-02-21
5
20-12-12
20-12-15
6
20-11-11
7
19-10-22
20-08-10
8
17-09-01
19-04-15
9
18-06-27
21-08-19
9
19-08-17
I would like to write a query that creates an output with the following structure:
Count the IDs which are active during each period. For example ID: 1 was active between 13. July 2020 and 12. May 2021, so it should be added to the count during 202007 and 202105.
As long as there is now EndDate, the ID should be added to the count till the present period.
COUNT_ID
YYYYMM
150
201601
200
201602
180
201603
...
...
...
...
...
...
I struggle to find a way to calculate the count of active IDs for each period, I believe there is an simple way to do that but unfortunately I don´t know.
Any help/tip/hint is much appreciated!
Best regards,
Frederik

If I'm understanding correctly, you will want to:
Generate a table of month-start-and-end ranges, between a start date (June 2016) and present
Then, for each month, examine how many of your records' start and end dates contain any days that fall within that month
CREATE TABLE #temp_ids (id INT,
id_start DATE,
id_end DATE);
INSERT INTO #temp_ids (id, id_start, id_end) Values (1,'20200713','20210512');
INSERT INTO #temp_ids (id, id_start, id_end) Values (2,'20150412','20270101');
INSERT INTO #temp_ids (id, id_start, id_end) Values (3,'20140130','20181230');
INSERT INTO #temp_ids (id, id_start) Values (4,'20180221');
INSERT INTO #temp_ids (id, id_start, id_end) Values (5,'20201212','20201215');
INSERT INTO #temp_ids (id, id_start) Values (6,'20201111');
INSERT INTO #temp_ids (id, id_start, id_end) Values (7,'20191022','20200810');
INSERT INTO #temp_ids (id, id_start, id_end) Values (8,'20170901','20190415');
INSERT INTO #temp_ids (id, id_start, id_end) Values (9,'20180627','20210809');
INSERT INTO #temp_ids (id, id_start) Values (10,'20190817');
DECLARE #windowStart DATE = '20160101',
#windowEnd DATE = GETDATE();
;WITH report_dates AS (--This is a convenient piece of code to generate a table of dates between two dates
SELECT TOP (DATEDIFF(DAY,
#windowStart,
#windowEnd) + 1
)
DATEADD(DAY,
ROW_NUMBER() OVER(ORDER BY a.object_id) - 1,
#windowStart
) AS report_date
FROM sys.all_objects AS a
CROSS JOIN sys.all_objects AS b
),
month_windows AS (--Filter the dates we just generated to only month-start, and add month-end
SELECT report_date AS month_start,
EOMONTH(report_date) AS month_end
FROM report_dates
WHERE DATEPART(DAY, report_date) = 1
)
SELECT --Count the number of records which overlap with the month range
DISTINCT month_start,
month_end,
COUNT(id) AS count_id
FROM #temp_ids AS ti
JOIN month_windows mw ON ((ti.id_start <= mw.month_start AND ti.id_end >= mw.month_start)
OR (ti.id_start >= mw.month_start AND ti.id_start <= mw.month_end)
)
WHERE id_end IS NOT NULL
GROUP BY month_start,
month_end

Related

Calculate monthly income from weekly

I need help. Trying to solve the following problem: A table stores information about the weekly sales of a product.
Need to set up automatic conversion of weekly values ​​to monthly values. Sales in transitional weeks (part of the week in one month, part in another) must be distributed on weekdays (excluding weekends - Sat, Sun).
For example, sales for the week ended 03/05/2013 should be distributed as follows: 2 days for February, 3 days for March.
The result of solving the problem is the SQL query that will automatically receive the converted data in the "Result Table" format (month; amount) according to the entered parameter MonthNumber (month number). If the parameter is not specified, the entire table is displayed.
drop table if exists #Test
create table #Test
(
sales_date date,
payment_sum real
)
insert into #Test values ('26.02.2013', 312.00)
insert into #Test values ('05.03.2013', 833.00)
insert into #Test values ('12.03.2013', 225.00)
insert into #Test values ('19.03.2013', 453.00)
insert into #Test values ('26.03.2013', 774.00)
insert into #Test values ('02.04.2013', 719.00)
insert into #Test values ('09.04.2013', 136.00)
insert into #Test values ('23.04.2013', 157.00)
insert into #Test values ('30.04.2013', 850.00)
insert into #Test values ('07.05.2013', 940.00)
insert into #Test values ('14.05.2013', 933.00)
insert into #Test values ('21.05.2013', 422.00)
insert into #Test values ('28.05.2013', 952.00)
insert into #Test values ('04.06.2013', 136.00)
insert into #Test values ('11.06.2013', 701.00)
;
I started by trying to weed out weekends. But how to understand which of the weeks should be divided between adjacent months?
SELECT * FROM #Test
WHERE ((DATEPART(dw, sales_date) + ##DATEFIRST) % 3) NOT IN (6, 7)
I want to receive this:
MonthNumber = 0
|Month|Incom|
|:----|:----|
|01 |1100 |
|02 |1120 |
|03 |1488 |
|04 |6112 |
|05 |7300 |
|06 |1360 |
|07 |8800 |
|08 |1400 |
|09 |1300 |
|10 |5070 |
|11 |3020 |
|12 |7800 |
MonthNumber = 1
|01 |1100 |
Build a calendar table, including a column that maps each day to the appropriate week and reporting month. Join that to allocate the weekly sales to the weekdays, something like:
drop table if exists #Test
drop table if exists #calendar
set dateformat dmy
create table #Test
(
sales_date date,
payment_sum real
)
insert into #Test values ('26.02.2013', 312.00)
insert into #Test values ('05.03.2013', 833.00)
insert into #Test values ('12.03.2013', 225.00)
insert into #Test values ('19.03.2013', 453.00)
insert into #Test values ('26.03.2013', 774.00)
insert into #Test values ('02.04.2013', 719.00)
insert into #Test values ('09.04.2013', 136.00)
insert into #Test values ('23.04.2013', 157.00)
insert into #Test values ('30.04.2013', 850.00)
insert into #Test values ('07.05.2013', 940.00)
insert into #Test values ('14.05.2013', 933.00)
insert into #Test values ('21.05.2013', 422.00)
insert into #Test values ('28.05.2013', 952.00)
insert into #Test values ('04.06.2013', 136.00)
insert into #Test values ('11.06.2013', 701.00);
with q as
(
select top 365 row_number() over (order by (select null))-1 i
from sys.messages
), d as
(
select dateadd(day,i,'20130101') dt
from q
)
select d.dt,
dateadd(day,3-datepart(dw,dt),dt) week_start,
case when datepart(dw,dt) in (6,7) then 0 else 1 end is_weekday,
datepart(dw,dt) day_of_week,
month(dt) month_num
into #calendar
from d
select month_num, sum(payment_sum/5)
from #test s
join #calendar c
on s.sales_date = c.week_start
where c.is_weekday = 1
group by month_num
outputs
month_num
----------- ----------------------
2 312.000007629395
3 2428.80004119873
4 1378.20001411438
5 3587.00000762939
6 836.999988555908
The following will extend sales date 6 days and then aggregate n/5 by month excluding weekends.
Example
Select D =EOMonth(D)
,Sales=sum(Payment_sum/5)
From #Test A
Cross Apply ( values ( dateadd(DAY, 0,sales_date) )
,( dateadd(DAY,-1,sales_date) )
,( dateadd(DAY,-2,sales_date) )
,( dateadd(DAY,-3,sales_date) )
,( dateadd(DAY,-4,sales_date) )
,( dateadd(DAY,-5,sales_date) )
,( dateadd(DAY,-6,sales_date) )
)D(D)
Where datename(WEEKDAY,D) not in ('Saturday','Sunday')
Group By EOMonth(D)
Results
D Sales
2013-02-28 645.20
2013-03-31 2383.20
2013-04-30 1430.60
2013-05-31 3328.60
2013-06-30 755.40

How to get one output table from select in while-loop

Edited to live up to the rules here. Sorry about my first attempt.
I got the following sample data:
CREATE TABLE SampleData(
[Time] [time](7) NOT NULL
) ON [PRIMARY]
GO
INSERT INTO SampleData([Time]) VALUES ('01:00:00')
INSERT INTO SampleData([Time]) VALUES ('02:00:00')
INSERT INTO SampleData([Time]) VALUES ('02:00:00')
INSERT INTO SampleData([Time]) VALUES ('03:00:00')
INSERT INTO SampleData([Time]) VALUES ('03:00:00')
INSERT INTO SampleData([Time]) VALUES ('03:00:00')
INSERT INTO SampleData([Time]) VALUES ('04:00:00')
INSERT INTO SampleData([Time]) VALUES ('04:00:00')
INSERT INTO SampleData([Time]) VALUES ('04:00:00')
INSERT INTO SampleData([Time]) VALUES ('04:00:00')
GO
This is my query:
DECLARE #Counter INT
SET #Counter = 1
WHILE (#Counter <= 4)
BEGIN
SELECT Count([Time]) AS OrdersAmount
FROM SampleData
WHERE DATEPART(HOUR, [Time]) = #Counter
SET #Counter = #Counter + 1
END
This is the result of the query:
OrdersAmount
1
-----
OrdersAmount
2
-----
OrdersAmount
3
-----
OrdersAmount
4
So 4 seperate tables. What I need is one table, with alle values in it, on each their own row, like this:
OrdersAmount
1
2
3
4
I tried with cte and declaring a temp table, but I just can't make it work.
I don't have the data so can't test.
But if I get your problem right, this should work for you.
select PromisedPickupDt = cast(PromisedPickupDate as date),
[Hour] = datepart(hour, PromisedPickupDate),
HourlyAmount = sum(OrdersAmount)
from [FLX].[dbo].[NDA_SAP_Bestillinger]
where cast(PromisedPickupDate as date) = cast(getdate() as date)
group by cast(PromisedPickupDate as date), datepart(hour, PromisedPickupDate)
You need an Hours table to join and group against. You can create this using a VALUES constructor:
SELECT
Count(*) AS OrdersAmount
FROM (VALUES
(0),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),(11),(12),(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23)
) AS v(Hour)
LEFT JOIN [FLX].[dbo].[NDA_SAP_Bestillinger]
ON v.Hour = DATEPART(hour, PromisedPickupTime)
AND PromisedPickupDate >= CAST(CAST(GETDATE() AS date) AS datetime)
AND PromisedPickupDate < CAST(DATEADD(day, 1, CAST(GETDATE() AS date)) AS datetime)
GROUP BY v.Hour;
What is going on here is that we start with a filter by the current date.
Then we join a constructed Hours table against the hour-part of the date. Because it is on the left-side of a LEFT JOIN, we now have all the hours whether or not there is any value in that hour. We can now group by it.
Note the correct method to compare to a date range: a half-open interval >= AND <
Do not use YEAR\MONTH etc in join and filter predicates, as indexes cannot be used for this.

Get the information in terms of time about people who are free on a particular day

I have the following data:
CREATE TABLE Table1
(
ID varchar(10),
StudentName varchar(30),
Course varchar(15),
SECTION varchar(2),
DAY varchar(10),
START_TIME time,
END_TIME time,
actual_starttime time,
actual_endtime time
);
INSERT INTO Table1
VALUES (111, 'Mary', 'Science', 'A', 'Mon', '13:30:00.0000000', '16:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
INSERT INTO Table1
VALUES (111, 'Mary', 'Maths', 'A', 'Tue', '12:30:00.0000000', '13:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
INSERT INTO Table1
VALUES (111, 'Mary', 'Physics', 'C', 'Tue', '10:30:00.0000000', '11:10:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
INSERT INTO Table1
VALUES (112, 'Robert', 'Maths', 'A', 'Mon', '13:30:00.0000000', '16:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
The scenario is as follows: the student can have class from morning 9 to night 9:30 from Monday to Friday. My requirement is I have to identify a timeslot where all the students in the same section are free so that a teacher can reschedule a class.
Example: both Mary and Robert are free in the morning from 9:00 to 1:30 in the afternoon on Monday. I would like to write query for this.
Please help.
Thanks in advance!
To return the full list of the timeslots available, you need to build a set of all the timeslots for each day of the week and then find if any of these slots have students being taught within it.
This is easily achieved with a recursive CTE to build your full timeslot set, from which you can JOIN into your Students data. The output of the query below is the day and time of each vacant session:
-- Build the dummy data sets:
declare #Data table
(
ID varchar(10),
StudentName varchar(30),
Course varchar(15),
SECTION varchar(2),
DAY varchar(10),
START_TIME time,
END_TIME time,
actual_starttime time,
actual_endtime time
);
insert into #Data values
(111, 'Mary', 'Science', 'A', 'Mon', '13:30:00.0000000', '16:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
,(111, 'Mary', 'Maths', 'A', 'Tue', '12:30:00.0000000', '13:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
,(111, 'Mary', 'Physics', 'C', 'Tue', '10:30:00.0000000', '11:10:00.0000000', '09:00:00.0000000', '21:20:00.0000000')
,(112, 'Robert', 'Maths', 'A', 'Mon', '13:30:00.0000000', '16:20:00.0000000', '09:00:00.0000000', '21:20:00.0000000');
-- Query the data:
with TimeSlots as -- Recursive CTE builds a table of all timeslots in TIME data type.
(
select cast('09:00:00' as time) as TimeSlotStart
,cast('09:30:00' as time) as TimeSlotEnd
union all
select dateadd(minute,30,TimeSlotStart)
,dateadd(minute,30,TimeSlotEnd)
from TimeSlots
where TimeSlotStart < cast('21:00:00' as time)
)
, TeachingDays as -- Used to return all the time slots above for each day of the week in CROSS JOIN below.
(
select 1 as DaySort
,'Mon' as TeachingDay
union all
select 2 as DaySort
,'Tue'
union all
select 3 as DaySort
,'Wed'
union all
select 4 as DaySort
,'Thu'
union all
select 5 as DaySort
,'Fri'
)
select td.TeachingDay
,t.TimeSlotStart
,t.TimeSlotEnd
from TimeSlots t -- Select all timeslots.
cross join TeachingDays td -- For each day.
left join #Data d -- And find all students that are being taught on that day at the specified time.
on(td.TeachingDay = d.DAY
and t.TimeSlotStart <= d.END_TIME
and t.TimeSlotEnd > d.START_TIME
)
where d.ID is null -- Then only return data where there are no students being taught at this timeslot.
order by td.DaySort
,t.TimeSlotStart;
You could create a Stored Procedure with following steps.
Step 1: Predefine timeslots in a different table.(09:00-10:00, 10:00-11:00 etc)
Step 2: Select count of students
Step 3:
for all the slots
Begin
for all the students
Begin
if(students.actual_starttime =slots.actual_starttime and
students.actual_endtime =slots.actual_endtime
break;
else count=count+1;
End
End
Step 4: if above count matches with count of total students, then slot is free for all the students else slot is not foree for all the students.
Hope this helps. Let me know if you find difficulty with it.
You should have three more tables to make it more simple
i.e. Student, Section and slots
I tried to create 1 more table with half hour slots
create table table2(timeslot time);
insert into table2 values ('9:00:00.0000000');
insert into table2 values ('9:30:00.0000000');
insert into table2 values ('10:00:00.0000000');
insert into table2 values ('10:30:00.0000000');
insert into table2 values ('11:00:00.0000000');
insert into table2 values ('11:30:00.0000000');
insert into table2 values ('12:00:00.0000000');
insert into table2 values ('12:30:00.0000000');
insert into table2 values ('13:00:00.0000000');
insert into table2 values ('13:30:00.0000000');
insert into table2 values ('14:00:00.0000000');
insert into table2 values ('14:30:00.0000000');
insert into table2 values ('15:00:00.0000000');
insert into table2 values ('15:30:00.0000000');
insert into table2 values ('16:00:00.0000000');
insert into table2 values ('16:30:00.0000000');
insert into table2 values ('17:00:00.0000000');
insert into table2 values ('17:30:00.0000000');
insert into table2 values ('18:00:00.0000000');
insert into table2 values ('18:30:00.0000000');
insert into table2 values ('19:00:00.0000000');
insert into table2 values ('19:30:00.0000000');
insert into table2 values ('20:00:00.0000000');
insert into table2 values ('20:30:00.0000000');
insert into table2 values ('21:00:00.0000000');
insert into table2 values ('21:30:00.0000000');
Following SQL will give you free slot and name of student:
Query:
select t1.StudentName,t2.timeslot
from Table2 t2,
Table1 t1
where t2.timeslot<t1.start_time
and t2.timeslot<t1.end_time
and t1.section='A'
group by t1.StudentName,t2.timeslot
order by t2.timeslot
Output:
StudentName timeslot
1 Mary 09:00:00
2 Robert 09:00:00
3 Mary 09:30:00
4 Robert 09:30:00
5 Mary 10:00:00
6 Robert 10:00:00
7 Mary 10:30:00
8 Robert 10:30:00
9 Mary 11:00:00
10 Robert 11:00:00
11 Mary 11:30:00
12 Robert 11:30:00
13 Mary 12:00:00
14 Robert 12:00:00
15 Mary 12:30:00
16 Robert 12:30:00
17 Mary 13:00:00
18 Robert 13:00:00
This is just half task done, I just showed you way to achieve it. Introduce two more joins with student and section table to achieve this.
Shred the day (09:00 to 21:30 interval) into minutes, find free minutes with respect to students of the group and days of interest and group minutes found back as intervals.
CREATE TABLE Table1 (ID varchar(10),StudentName varchar(30), Course varchar(15) ,SECTION varchar(2),DAY varchar(10),
START_TIME time , END_TIME time, actual_starttime time, actual_endtime time);
INSERT INTO Table1 VALUES (111, 'Mary','Science','A','Mon','13:30:00.0000000','16:20:00.0000000','09:00:00.0000000','21:20:00.0000000')
INSERT INTO Table1 VALUES (111, 'Mary','Maths','A','Tue','12:30:00.0000000','13:20:00.0000000','09:00:00.0000000','21:20:00.0000000')
INSERT INTO Table1 VALUES (111, 'Mary','Physics','C','Tue','10:30:00.0000000','11:10:00.0000000','09:00:00.0000000','21:20:00.0000000')
INSERT INTO Table1 VALUES (112, 'Robert','Maths','A','Mon','13:30:00.0000000','16:20:00.0000000','09:00:00.0000000','21:20:00.0000000')
;
-- parameters
declare #tds time = '09:00';
declare #tde time = '21:30';
declare #section varchar(2) = 'A';
create table #daysofinterest (DAY varchar(10) primary key);
insert #daysofinterest (DAY) values ('Mon'),('Tue'),('Fri');
create table #groupmembers(ID int primary key);
insert #groupmembers(ID) values (111),(112);
-- query
select DAY, startt = dateadd(minute, min(n), #tds), endt = dateadd (minute, max(n), #tds)
from (
select DAY, n, grp = n - row_number() over(partition by DAY order by n)
from (
-- all minutes of the day, #tds till #tde
select top (datediff(minute, #tds, #tde)) n = row_number() over(order by (select null))
from sys.all_objects
) tally
cross join #daysofinterest dd
join #groupmembers gm on
not exists (select 1 from table1 t
where t.ID = gm.ID and t.DAY = dd.DAY and SECTION = #section and
dateadd (minute, n, #tds) between t.START_TIME and t.END_TIME )
group by DAY, n
--this minute is free for every group member
having count(*) = (select count(*) from #groupmembers)
) g
group by DAY, grp
order by DAY, min(n)

Returning a set of the most recent rows from a table

I'm trying to retrieve the latest set of rows from a source table containing a foreign key, a date and other fields present. A sample set of data could be:
create table #tmp (primaryId int, foreignKeyId int, startDate datetime,
otherfield varchar(50))
insert into #tmp values (1, 1, '1 jan 2010', 'test 1')
insert into #tmp values (2, 1, '1 jan 2011', 'test 2')
insert into #tmp values (3, 2, '1 jan 2013', 'test 3')
insert into #tmp values (4, 2, '1 jan 2012', 'test 4')
The form of data that I'm hoping to retrieve is:
foreignKeyId maxStartDate otherfield
------------ ----------------------- -------------------------------------------
1 2011-01-01 00:00:00.000 test 2
2 2013-01-01 00:00:00.000 test 3
That is, just one row per foreignKeyId showing the latest start date and associated other fields - the primaryId is irrelevant.
I've managed to come up with:
select t.foreignKeyId, t.startDate, t.otherField from #tmp t
inner join (
select foreignKeyId, max(startDate) as maxStartDate
from #tmp
group by foreignKeyId
) s
on t.foreignKeyId = s.foreignKeyId and s.maxStartDate = t.startDate
but (a) this uses inner queries, which I suspect may lead to performance issues, and (b) it gives repeated rows if two rows in the original table have the same foreignKeyId and startDate.
Is there a query that will return just the first match for each foreign key and start date?
Depending on your sql server version, try the following:
select *
from (
select *, rnum = ROW_NUMBER() over (
partition by #tmp.foreignKeyId
order by #tmp.startDate desc)
from #tmp
) t
where t.rnum = 1
If you wanted to fix your attempt as opposed to re-engineering it then
select t.foreignKeyId, t.startDate, t.otherField from #tmp t
inner join (
select foreignKeyId, max(startDate) as maxStartDate, max(PrimaryId) as Latest
from #tmp
group by foreignKeyId
) s
on t.primaryId = s.latest
would have done the job, assuming PrimaryID increases over time.
Qualms about inner query would have been laid to rest as well assuming some indexes.

Group close numbers

I have a table with 2 columns of integers. The first column represents start index and the second column represents end index.
START END
1 8
9 13
14 20
20 25
30 42
42 49
60 67
Simple So far. What I would like to do is group all the records that follow together:
START END
1 25
30 49
60 67
A record can follow by Starting on the same index as the previous end index or by a margin of 1:
START END
1 10
10 20
And
START END
1 10
11 20
will both result in
START END
1 20
I'm using SQL Server 2008 R2.
Any help would be Great
This works for your example, let me know if it doesn't work for other data
create table #Range
(
[Start] INT,
[End] INT
)
insert into #Range ([Start], [End]) Values (1, 8)
insert into #Range ([Start], [End]) Values (9, 13)
insert into #Range ([Start], [End]) Values (14, 20)
insert into #Range ([Start], [End]) Values (20, 25)
insert into #Range ([Start], [End]) Values (30, 42)
insert into #Range ([Start], [End]) Values (42, 49)
insert into #Range ([Start], [End]) Values (60, 67)
;with RangeTable as
(select
t1.[Start],
t1.[End],
row_number() over (order by t1.[Start]) as [Index]
from
#Range t1
where t1.Start not in (select
[End]
from
#Range
Union
select
[End] + 1
from
#Range
)
)
select
t1.[Start],
case
when t2.[Start] is null then
(select max([End])
from #Range)
else
(select max([End])
from #Range
where t2.[Start] > [End])
end as [End]
from
RangeTable t1
left join
RangeTable t2
on
t1.[Index] = t2.[Index]-1
drop table #Range;
Edited to include another version which i think is a bit more reliable, and also works with overlapping ranges
CREATE TABLE #data (start_range INT, end_range INT)
INSERT INTO #data VALUES (1,8)
INSERT INTO #data VALUES (2,15)
INSERT INTO #data VALUES (9,13)
INSERT INTO #data VALUES (14,20)
INSERT INTO #data VALUES (13,26)
INSERT INTO #data VALUES (12,21)
INSERT INTO #data VALUES (9,25)
INSERT INTO #data VALUES (20,25)
INSERT INTO #data VALUES (30,42)
INSERT INTO #data VALUES (42,49)
INSERT INTO #data VALUES (60,67)
;with ranges as
(
SELECT start_range as level
,end_range as end_range
,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range) as row
FROM #data
UNION ALL
SELECT
level + 1 as level
,end_range as end_range
,row
From ranges
WHERE level < end_range
)
,ranges2 AS
(
SELECT DISTINCT
level
FROM ranges
)
,ranges3 AS
(
SELECT
level
,row_number() OVER (ORDER BY level) - level as grouping_group
from ranges2
)
SELECT
MIN(level) as start_number
,MAX(level) as end_number
FROM ranges3
GROUP BY grouping_group
ORDER BY start_number ASC
I think this should work - might not be especially efficient on larger sets though...
CREATE TABLE #data (start_range INT, end_range INT)
INSERT INTO #data VALUES (1,8)
INSERT INTO #data VALUES (2,15)
INSERT INTO #data VALUES (9,13)
INSERT INTO #data VALUES (14,20)
INSERT INTO #data VALUES (21,25)
INSERT INTO #data VALUES (30,42)
INSERT INTO #data VALUES (42,49)
INSERT INTO #data VALUES (60,67)
;with overlaps as
(
select *
,end_range - start_range as range
,row_number() OVER (PARTITION BY (SELECT NULL) ORDER BY start_range ASC) as line_number
from #data
)
,overlaps2 AS
(
SELECT
O1.start_range
,O1.end_range
,O1.line_number
,O1.range
,O2.start_range as next_range
,CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END as overlap
,O1.line_number - DENSE_RANK() OVER (PARTITION BY (CASE WHEN O2.start_range - O1.end_range < 2 THEN 1 ELSE 0 END) ORDER BY O1.line_number ASC) as overlap_group
FROM overlaps O1
LEFT OUTER JOIN overlaps O2 on O2.line_number = O1.line_number + 1
)
SELECT
MIN(start_range) as range_start
,MAX(end_range) as range_end
,MAX(end_range) - MIN(start_range) as range_span
FROM overlaps2
GROUP BY overlap_group
You could use a number table to solve this problem. Basically, you first expand the ranges, then combine subsequent items in groups.
Here's one implementation:
WITH data (START, [END]) AS (
SELECT 1, 8 UNION ALL
SELECT 9, 13 UNION ALL
SELECT 14, 20 UNION ALL
SELECT 20, 25 UNION ALL
SELECT 30, 42 UNION ALL
SELECT 42, 49 UNION ALL
SELECT 60, 67
),
expanded AS (
SELECT DISTINCT
N = d.START + v.number
FROM data d
INNER JOIN master..spt_values v ON v.number BETWEEN 0 AND d.[END] - d.START
WHERE v.type = 'P'
),
marked AS (
SELECT
N,
SeqID = N - ROW_NUMBER() OVER (ORDER BY N)
FROM expanded
)
SELECT
START = MIN(N),
[END] = MAX(N)
FROM marked
GROUP BY SeqID
This solution uses master..spt_values as a number table, for expanding the initial ranges. But if (all or some of) those ranges may span more than 2048 (subsequent) values, then you should define and use your own number table.