how to join two tables in hive with unique value in column - sql

I have two tables in hive, table1 and table2 with the following values:
TABLE1:........................................TABLE2:
date.................value.....................date.....................value
'2016-01-01'....one.......................'2016-01-01'..........two
'2016-01-01'....three.....................'2016-01-01'..........four
'2016-01-01'....five
and I need to join both tables to create a third table as (with three rows) :
date.......................value1...........................value2
'2016-01-01'..........one................................two
'2016-01-01'..........three..............................four
'2016-01-01'..........five
I have tried many options but none of them worked.
Any idea.

DECLARE #table1 TABLE ([Date] Date, [Value] VarChar(10))
DECLARE #table2 TABLE ([Date] Date, [Value] VarChar(10))
INSERT INTO #table1([Date], [Value])
SELECT '2016-01-01', 'one'
UNION ALL
SELECT '2016-01-01', 'three'
UNION ALL
SELECT '2016-01-01', 'five'
INSERT INTO #table2([Date], [Value])
SELECT '2016-01-01', 'two'
UNION ALL
SELECT '2016-01-01', 'four'
SELECT [Date] = t1.[Date],
[Value1] = t1.[Value],
[Value2] = t3.[Value]
FROM
(SELECT [Date] = t.[Date],
[Value] = t.[Value],
[ID] = ROW_NUMBER() OVER(ORDER BY t.[Date] DESC)
FROM #table1 t) t1
OUTER APPLY
(
SELECT TOP 1 t2.[Value]
FROM
(SELECT [Date] = t.[Date],
[Value] = t.[Value],
[ID] = ROW_NUMBER() OVER(ORDER BY t.[Date] DESC)
FROM #table2 t) t2
WHERE t2.[Date] = t1.[Date]
AND t2.[ID] = t1.[ID]
) t3

Related

SQL Cut Stretch Date into various stretchs

I have a large stretch of dates in one table and some dates to cut in other table.
With this I need to get another table with all the stretchs inside the large one cut by the others dates.
I have this for the moment:
DECLARE #UTCSTARTDATE DATETIME='2020-01-20 00:00:00.00'
DECLARE #UTCENDDATE DATETIME='2020-01-20 04:00:00.00'
DECLARE #T1 TABLE
(
StartTime DATETIME,
EndTime DATETIME,
table_id int
)
DECLARE #T2 TABLE
(
CutTime DATETIME,
table2_id int
)
INSERT INTO #T1 SELECT #UTCSTARTDATE,#UTCENDDATE,1
INSERT INTO #T2 SELECT myCut,myID FROM (VALUES('2020-01-20 01:00:00.00',1),('2020-01-20 02:30:00.00',1),('2020-01-20 03:00:00.00',2))t2(myCut,myID)
SELECT * FROM #T1
SELECT * FROM #T2 order by CutTime ASC
DECLARE #STRETCHHOURSTABLE TABLE
(
startDate DATETIME,
endDate DATETIME
);
;WITH DATESPLITTER AS
(
SELECT
StartTime AS stretchStartDate,
(SELECT TOP(1) CutTime FROM #T2 where CutTime < EndTime AND table_id = table2_id order by CutTime ASC) AS stretchEndDate,
EndTime AS RealEndTime,
table_id AS RealID
FROM #T1
UNION ALL
SELECT
stretchEndDate,
(SELECT TOP(1) CutTime FROM #T2 where CutTime > stretchEndDate AND CutTime < RealEndTime AND table2_id = RealID order by CutTime ASC) AS stretchEndDate,
RealEndTime,
RealID
FROM DATESPLITTER
WHERE stretchEndDate < #UTCENDDATE
)
INSERT INTO #STRETCHHOURSTABLE (startDate, endDate)
SELECT
stretchStartDate,
CASE
WHEN #UTCENDDATE < stretchEndDate THEN #UTCENDDATE
ELSE stretchEndDate
END
FROM DATESPLITTER
SELECT * FROM #STRETCHHOURSTABLE
This seems to basically be lead() after union all:
select dt, lead(dt) over (order by dt)
from ((select v.dt
from #t1 t1 cross apply
(values (t1.startTime), (t1.endTime)) v(dt)
) union all
(select t2.cuttime
from #t2 t2
)
) t
Here is a db<>fiddle.
Try this, I have made changes:
DECLARE #UTCSTARTDATE DATETIME='2020-01-20 00:00:00.00'
DECLARE #UTCENDDATE DATETIME='2020-01-20 04:00:00.00'
DECLARE #T1 TABLE( StartTime DATETIME, EndTime DATETIME, table_id int);
DECLARE #T2 TABLE ( CutTime DATETIME, table2_id int);
INSERT INTO #T1 SELECT #UTCSTARTDATE,#UTCENDDATE,1
INSERT INTO #T2 VALUES('2020-01-20 01:00:00.00',1),('2020-01-20 02:30:00.00',1),('2020-01-20 03:00:00.00',2);
SELECT * FROM #T1
SELECT * FROM #T2 order by CutTime ASC;
DECLARE #STRETCHHOURSTABLE TABLE (startDate DATETIME, endDate DATETIME);
WITH DATESPLITTER AS
(
SELECT
StartTime AS stretchStartDate,
(select cuttime from (SELECT row_number() over(partition by table2_id order by CutTime ASC) rn, CutTime FROM #T2 where CutTime < EndTime AND table_id = table2_id)a
where a.rn = 1) AS stretchEndDate,
EndTime AS RealEndTime,
table_id AS RealID
FROM #T1
UNION ALL
SELECT
stretchEndDate,
ISNULL((select cuttime from (SELECT row_number() over(partition by table2_id order by CutTime ASC) rn, CutTime FROM #T2 where CutTime > stretchEndDate AND CutTime < RealEndTime AND table2_id = RealID)a
where a.rn = 1), RealEndTime) AS stretchEndDate,
RealEndTime,
RealID
FROM DATESPLITTER
WHERE stretchEndDate < #UTCENDDATE
)
INSERT INTO #STRETCHHOURSTABLE (startDate, endDate)
SELECT stretchStartDate,
CASE
WHEN #UTCENDDATE < stretchEndDate THEN #UTCENDDATE
ELSE stretchEndDate
END as sommm
FROM DATESPLITTER
SELECT * FROM #STRETCHHOURSTABLE

Insert multiple row different column value

This is my existing table
In this table, each user has their own respective data according to their Status. Each of the user will surely have Status 1.
Now, there are 3 Status to be stored for every user.
Was trying to make every user to have 3 Status, by inserting new row of user copying their Status 1 data, such that:
User Ali currently only have Status 1 and its data, so need insert a new
row Ali with Status 2 and copy along the data from Status 1, again,
insert a new row Ali with Status 3 and copy along the data from
Status 1.
User John currently only have Status 1 and 2, so need insert a new
row John with Status 3 and copy along the data from Status 1.
continue same pattern with other user
Expected result:
I would use CROSS JOIN and NOT EXISTS
with data as
(
select name,
column1,
column2
from your_table
where status = 1
), cross_join_data as
(
select d1.name, t.status, d1.column1, d1.column2
from data d1
cross join
(
select 1 status
union
select 2 status
union
select 3 status
) t
where not exists (
select 1
from your_table d2
where d2.name = d1.name and
d2.status = t.status
)
)
select *
from your_table
union all
select *
from cross_join_data
dbfiddle demo
This should work
with cte as (
select
[Name], coalesce(max(iif([Status]=1, [Column1], null)), max(iif([Status]=2, [Column1], null)), max(iif([Status]=3, [Column1], null))) col1
, coalesce(max(iif([Status]=1, [Column2], null)), max(iif([Status]=2, [Column2], null)), max(iif([Status]=3, [Column2], null))) col2
from
MyTable
group by [Name]
)
--insert into MyTable
select
cte.[Name], nums.n, cte.col1, cte.col2
from
cte
cross join (values (1),(2),(3)) nums(n)
left join MyTable on cte.[Name]=MyTable.[Name] and n=MyTable.[Status]
where
MyTable.[Status] is null
This works if data is not nullable
declare #table table (name varchar(10), status int, data int);
insert into #table values
('a', 1, 2)
, ('a', 2, 5)
, ('a', 3, 7)
, ('b', 1, 5)
, ('b', 2, 6)
, ('c', 1, 3)
select stats.status as statusStats
, tn.name as nameTN
, t.status as statusData, t.name, t.data
, ISNULL(t.data, t1.data) as 'fillInData'
from (values (1),(2),(3)) as stats(status)
cross join (select distinct name from #table) tn
left join #table t
on t.status = stats.status
and t.name = tn.name
join #table t1
on t1.name = tn.name
and t1.status = 1
order by tn.name, stats.status
Here is what I would do:
CREATE TABLE #existingtable (Name VARCHAR(50), Status INT, Column1 VARCHAR (10), Column2 VARCHAR(10));
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('Ali',1,'100','90');
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('John',1,'20','200');
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('John',2,'80','90');
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('Ming',1,'54','345');
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('Mei',1,'421','123');
INSERT INTO #existingtable (Name,Status,Column1,Column2) Values('Mei',3,'24','344');
SELECT * FROM #existingtable;
WITH CTE (Name,Column1,Column2)
AS
(
SELECT DISTINCT NAME,COLUMN1,COLUMN2
FROM #existingtable
)
, CTE2 (NAME,Status,Column1,Column2)
AS
(
SELECT NAME,1 AS STATUS,COLUMN1,COLUMN2
FROM CTE
UNION
SELECT NAME,2 AS STATUS,COLUMN1,COLUMN2
FROM CTE
UNION
SELECT NAME,3 AS STATUS,COLUMN1,COLUMN2
FROM CTE
)
INSERT INTO #existingtable (Name,Status,Column1,Column2)
SELECT C.Name,C.Status,C.Column1,C.Column2
FROM CTE2 AS C
LEFT JOIN #existingtable AS E
ON C.NAME = E.Name
AND C.Status = E.Status
WHERE E.Status IS NULL
SELECT * FROM #existingtable
ORDER BY Name, status
This has 2 edits. Initial edit added a where clause to the CTE
Second edit added the values added by the OP

UNION CTE with ORDER BY on 3 Tables

This is not easy to explain, however, I do believe there is a much nicer way to do what I managed to do and hope to get some help.
I have 3 tables (T1, T2, and T3). I need to get the latest from T1 and T2, then with those results return the results from t3 or from the previous result if t3 is empty. So the LatestDate doesn't really matter if there is a record in t3. Also, if there is no data in t3 and the LatestDate is the same on t1 and t2 (rarely will happen, but want to plan accordingly), I want results from t1.
Here's a sample of what I got, mind you the actual query is has many more fields, but the concept is the same.
CREATE TABLE [dbo].[t1](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
CREATE TABLE [dbo].[t2](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
CREATE TABLE [dbo].[t3](
[Id] [INT] NOT NULL,
[LatestDate] [DATETIME] NOT NULL
);
INSERT t1 (Id, LatestDate) VALUES (1, CAST(N'2000-01-01T00:00:00.000' AS DateTime));
INSERT t1 (Id, LatestDate) VALUES (2, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t1 (Id, LatestDate) VALUES (3, CAST(N'2002-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (1, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (2, CAST(N'2002-01-01T00:00:00.000' AS DateTime));
INSERT t2 (Id, LatestDate) VALUES (4, CAST(N'2003-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (1, CAST(N'2001-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (2, CAST(N'2000-01-01T00:00:00.000' AS DateTime));
INSERT t3 (Id, LatestDate) VALUES (5, CAST(N'2004-01-01T00:00:00.000' AS DateTime));
GO;
WITH CTE AS (
SELECT TOP 1 * FROM (
SELECT 2 AS Sort, * FROM t1 WHERE t1.id = #UserId
UNION
SELECT 3 AS Sort, * FROM t2 WHERE t2.id = #UserId
) AS t
ORDER BY
t.LatestDate DESC
)
SELECT TOP 1 * FROM (
SELECT TOP 1 * FROM CTE
UNION
SELECT 1 AS Sort, * FROM t3 WHERE t3.id = #UserId
) AS t
ORDER BY
t.Sort;
Expected results:
When #UserID = 1:
Sort Source Id LatestDate
1 t3 1 1/1/2001
When #UserID = 2:
Sort Source Id LatestDate
1 t3 2 1/1/2000
When #UserID = 3:
Sort Source Id LatestDate
2 t1 3 1/1/2002
When #UserID = 4:
Sort Source Id LatestDate
3 t2 4 1/1/2003
When #UserID = 5:
Sort Source Id LatestDate
1 t3 5 1/1/2004
Thanks!
If I understand you correctly you want something like:
DECLARE #UserID INT = 5;
WITH cte AS (
SELECT 1 AS src, 1 as [tbl], * FROM t1 WHERE id = #UserId
UNION ALL SELECT 1, 2, * FROM t2 WHERE id = #UserId
UNION ALL SELECT 3, 3, * FROM t3 WHERE id = #UserId
), cte2 AS (
SELECT * , ROW_NUMBER() OVER(ORDER BY src DESC, LatestDate DESC, tbl ASC) AS rn
FROM cte
)
SELECT Id, LatestDate
FROM cte2
WHERE rn = 1;
RextesterDemo
Using PARTITION BY you could move filtering #userId to final part:
DECLARE #UserID INT = 5;
WITH cte AS (
SELECT 1 AS src, 1 as [tbl], * FROM t1
UNION ALL SELECT 1, 2, * FROM t2
UNION ALL SELECT 3, 3, * FROM t3
), cte2 AS (
SELECT *
,ROW_NUMBER() OVER(PARTITION BY Id ORDER BY src DESC, LatestDate DESC, tbl ASC) AS rn
FROM cte
)
SELECT Id, LatestDate
FROM cte2
WHERE rn = 1
AND id = #UserID
Rextester Demo2
How's this?
This assumes you will take the t3 result if it exists
or the max result from T1 or T2 if not.
if exists(select * from t3 where ID= #ID)
(
select ID, Max(LatestDate), TN='T3'
from t3
where ID= #ID
)
else
(
select top 1 ID, Max(LatestDate) LD,TN
from (Select *,TN='t1' from T1
union all
Select *, TN='t2' from t2) as a
where id=#ID
group by ID,TN
order by LD desc
)

How to replace the 'Strings' with numerical values based on a group by clause

I have the below table (#temp1) where I need to replace the string in the column'Formula' with the matching input 'VALUE' column based on the group 'Yearmonth'.
The 'Formula' column may be of any mathematical expression for better understanding I have mentioned a simple example below.
IDNUM formula INPUTNAME VALUE YEARMONTH
---------------------------------------------------------------------
1 imports(398)+imports(399) imports(398) 17.000 2003:1
2 imports(398)+imports(399) imports(398) 56.000 2003:2
3 imports(398)+imports(399) imports(399) 15.000 2003:1
4 imports(398)+imports(399) imports(399) 126.000 2003:2
For eg :From the above table i need the output as
Idnum Formula Yearmonth
1. 17.00 +15.00 2003:1
2. 56.00 +126.00 2003:2
I tried with the below different query from various suggestions but coludnt achieve it. Could someone help me this out ?
Type1 :
SELECT
REPLACE(FORMULA, INPUTName, AttributeValue) AS realvalues,
yearmonth
FROM #temp1
GROUP BY yearmonth
TYPE2 :
USING XML PATH... In this case it got worked but I need to replace only the strings with the values and not to stuff the strings based on mathematcal operation.(Because the formula might be of any type).
SELECT
IDNUM = MIN(IDNUM),
FORMULA =
(SELECT STUFF(
(SELECT ' +' + CONVERT(VARCHAR(10), Value)
FROM #temp1
WHERE YEARMONTH = t1.YEARMONTH
FOR XML PATH(''))
,1, 2, '')),
YEARMONTH
FROM #TEMP1 t1
GROUP BY YEARMONTH
TYPE3:Using Recursions...This is returning only the null values...
;with t as (
select t.*,
row_number() over (partition by yearmonth order by idnum) as seqnum,
count(*) over (partition by yearmonth) as cnt
from #temp1 t
)
,cte as (
select t.seqnum, t.yearmonth, t.cnt,
replace(formula, inputname, AttributeValue) as formula1
from t
where seqnum = 1
union all
select cte.seqnum, cte.yearmonth, cte.cnt,
replace(CTE.formula1, T.inputname, T.AttributeValue) as formula2
from cte join
t
on cte.yearmonth = t.yearmonth
AND cte.seqnum = t.seqnum + 1
)
select row_number() over (order by (select null)) as id,formula1
from cte
where seqnum = cnt
This is full working example using recursive CTE:
DECLARE #DataSource TABLE
(
[IDNUM] TINYINT
,[formula] VARCHAR(MAX)
,[INPUTNAME] VARCHAR(128)
,[VALUE] DECIMAL(9,3)
,[YEARMONTH] VARCHAR(8)
);
INSERT INTO #DataSource ([IDNUM], [formula], [INPUTNAME], [VALUE], [YEARMONTH])
VALUES ('1', 'imports(398)+imports(399)', 'imports(398)', '17.000', '2003:1')
,('2', 'imports(398)+imports(399)', 'imports(398)', '56.000', '2003:2')
,('3', 'imports(398)+imports(399)', 'imports(399)', '15.000', '2003:1')
,('4', 'imports(398)+imports(399)', 'imports(399)', '126.000', '2003:2')
,('5', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(391)', '5.000', '2003:3')
,('6', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(392)', '10.000', '2003:3')
,('7', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(393)', '3.000', '2003:3')
,('8', '(imports(391)+imports(392)-imports(393))/imports(394)', 'imports(394)', '-5.000', '2003:3');
WITH DataSource AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY [YEARMONTH] ORDER BY [IDNUM]) AS [ReplacementOrderID]
,[YEARMONTH]
,[formula]
,[INPUTNAME] AS [ReplacementString]
,[VALUE] AS [ReplacementValue]
FROM #DataSource
),
RecursiveDataSource AS
(
SELECT [ReplacementOrderID]
,[YEARMONTH]
,REPLACE([formula], [ReplacementString], [ReplacementValue]) AS [formula]
FROM DataSource
WHERE [ReplacementOrderID] = 1
UNION ALL
SELECT DS.[ReplacementOrderID]
,DS.[YEARMONTH]
,REPLACE(RDS.[formula], DS.[ReplacementString], DS.[ReplacementValue]) AS [formula]
FROM RecursiveDataSource RDS
INNER JOIN DataSource DS
ON RDS.[ReplacementOrderID] + 1 = DS.[ReplacementOrderID]
AND RDS.[YEARMONTH] = DS.[YEARMONTH]
)
SELECT RDS.[YEARMONTH]
,RDS.[formula]
FROM RecursiveDataSource RDS
INNER JOIN
(
SELECT [YEARMONTH]
,MAX([ReplacementOrderID]) AS [ReplacementOrderID]
FROM DataSource
GROUP BY [YEARMONTH]
) DS
ON RDS.[YEARMONTH] = DS.[YEARMONTH]
AND RDS.[ReplacementOrderID] = DS.[ReplacementOrderID]
ORDER BY RDS.[YEARMONTH]
Generally, you simply want to perform multiple replacements over a string in one statement. You can have many replacement values, just play with the MAXRECURSION option.
--Create sample data
DROP TABLE #temp1
CREATE TABLE #temp1 (IDNUM int, formula varchar(max), INPUTNAME varchar(max), VALUE decimal, YEARMONTH varchar(max))
INSERT INTO #temp1 VALUES
(1, 'imports(398)+imports(399)', 'imports(398)', 17.000, '2003:1'),
(2, 'imports(398)+imports(399)', 'imports(398)', 56.000, '2003:2'),
(3, 'imports(398)+imports(399)', 'imports(399)', 15.000, '2003:1'),
(4, 'imports(398)+imports(399)', 'imports(399)', 126.000, '2003:2')
--Query
;WITH t as (
SELECT formula, YEARMONTH, IDNUM
FROM #temp1
UNION ALL
SELECT REPLACE(a.formula, b.INPUTNAME, CAST(b.VALUE AS varchar(100))) AS formula, a.YEARMONTH, a.IDNUM
FROM t a
JOIN #temp1 b ON a.YEARMONTH = b.YEARMONTH AND a.formula LIKE '%' + b.INPUTNAME + '%'
)
SELECT MIN(IDNUM) AS IDNUM, formula, YEARMONTH
FROM t
WHERE formula not LIKE '%imports(%'
GROUP BY formula, YEARMONTH

Query to merge continuous temporal records

I have a table like this:
id START_DATE end_date
1 01/01/2011 01/10/2011
2 01/11/2011 01/20/2011
3 01/25/2011 02/01/2011
4 02/10/2011 02/15/2011
5 02/16/2011 02/27/2011
I want to merge the records where the start_date is just next day of end_date of another record: So the end record should be something like this:
new_id START_DATE end_date
1 01/01/2011 01/20/2011
2 01/25/2011 02/01/2011
3 02/10/2011 02/27/2011
One way that I know to do this will be to create a row based temp table with various rows as dates (each record for one date, between the total range of days) and thus making the table flat.
But there has to be a cleaner way to do this in a single query... e.g. something using row_num?
Thanks guys.
declare #T table
(
id int,
start_date datetime,
end_date datetime
)
insert into #T values
(1, '01/01/2011', '01/10/2011'),
(2, '01/11/2011', '01/20/2011'),
(3, '01/25/2011', '02/01/2011'),
(4, '02/10/2011', '02/15/2011'),
(5, '02/16/2011', '02/27/2011')
select row_number() over(order by min(dt)) as new_id,
min(dt) as start_date,
max(dt) as end_date
from (
select dateadd(day, N.Number, start_date) as dt,
dateadd(day, N.Number - row_number() over(order by dateadd(day, N.Number, start_date)), start_date) as grp
from #T
inner join master..spt_values as N
on N.number between 0 and datediff(day, start_date, end_date) and
N.type = 'P'
) as T
group by grp
order by new_id
You can use a numbers table instead of using master..spt_values.
Try This
Declare #chgRecs Table
(updId int primary key not null,
delId int not null,
endt datetime not null)
While Exists (Select * from Table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
Begin
Insert #chgRecs (updId, delId , endt)
Select a.id, b.id, b.End_Date,
From table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
And Not Exists
(Select * from table
Where end_Date =
DateAdd(day, -1, a.Start_Date)))
Delete table Where id In (Select delId from #chgRecs )
Update table set
End_Date = u.endt
From table t join #chgRecs u
On u.updId = t.Id
Delete #delRecs
End
No, was not looking for a loop...
I guess this is a good solution:
taking all the data in a #temp table
SELECT * FROM #temp
SELECT t2.start_date , t1.end_date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date)
UNION
SELECT START_DATE,end_date FROM #temp WHERE start_date NOT IN (SELECT t2.START_DATE FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
AND end_date NOT IN (SELECT t1.end_Date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
DROP TABLE #temp
Please let me know if there is anything better than this.
Thanks guys.
A recursive solution:
CREATE TABLE TestData
(
Id INT PRIMARY KEY,
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL
);
SET DATEFORMAT MDY;
INSERT TestData
SELECT 1, '01/01/2011', '01/10/2011'
UNION ALL
SELECT 2, '01/11/2011', '01/20/2011'
UNION ALL
SELECT 3, '01/25/2011', '02/01/2011'
UNION ALL
SELECT 4, '02/10/2011', '02/15/2011'
UNION ALL
SELECT 5, '02/16/2011', '02/27/2011'
UNION ALL
SELECT 6, '02/28/2011', '03/06/2011'
UNION ALL
SELECT 7, '02/28/2011', '03/03/2011'
UNION ALL
SELECT 8, '03/10/2011', '03/18/2011'
UNION ALL
SELECT 9, '03/19/2011', '03/25/2011';
WITH RecursiveCTE
AS
(
SELECT t.Id, t.StartDate, t.EndDate
,1 AS GroupID
FROM TestData t
WHERE t.Id=1
UNION ALL
SELECT crt.Id, crt.StartDate, crt.EndDate
,CASE WHEN DATEDIFF(DAY,prev.EndDate,crt.StartDate)=1 THEN prev.GroupID ELSE prev.GroupID+1 END
FROM TestData crt
JOIN RecursiveCTE prev ON crt.Id-1=prev.Id
--WHERE crt.Id > 1
)
SELECT cte.GroupID, MIN(cte.StartDate) AS StartDate, MAX(cte.EndDate) AS EndDate
FROM RecursiveCTE cte
GROUP BY cte.GroupID
ORDER BY cte.GroupID;
DROP TABLE TestData;