Query to merge continuous temporal records - sql-server-2005

I have a table like this:
id START_DATE end_date
1 01/01/2011 01/10/2011
2 01/11/2011 01/20/2011
3 01/25/2011 02/01/2011
4 02/10/2011 02/15/2011
5 02/16/2011 02/27/2011
I want to merge the records where the start_date is just next day of end_date of another record: So the end record should be something like this:
new_id START_DATE end_date
1 01/01/2011 01/20/2011
2 01/25/2011 02/01/2011
3 02/10/2011 02/27/2011
One way that I know to do this will be to create a row based temp table with various rows as dates (each record for one date, between the total range of days) and thus making the table flat.
But there has to be a cleaner way to do this in a single query... e.g. something using row_num?
Thanks guys.

declare #T table
(
id int,
start_date datetime,
end_date datetime
)
insert into #T values
(1, '01/01/2011', '01/10/2011'),
(2, '01/11/2011', '01/20/2011'),
(3, '01/25/2011', '02/01/2011'),
(4, '02/10/2011', '02/15/2011'),
(5, '02/16/2011', '02/27/2011')
select row_number() over(order by min(dt)) as new_id,
min(dt) as start_date,
max(dt) as end_date
from (
select dateadd(day, N.Number, start_date) as dt,
dateadd(day, N.Number - row_number() over(order by dateadd(day, N.Number, start_date)), start_date) as grp
from #T
inner join master..spt_values as N
on N.number between 0 and datediff(day, start_date, end_date) and
N.type = 'P'
) as T
group by grp
order by new_id
You can use a numbers table instead of using master..spt_values.

Try This
Declare #chgRecs Table
(updId int primary key not null,
delId int not null,
endt datetime not null)
While Exists (Select * from Table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
Begin
Insert #chgRecs (updId, delId , endt)
Select a.id, b.id, b.End_Date,
From table a
Where Exists
(Select * from table
Where start_date =
DateAdd(day, 1, a.End_Date)))
And Not Exists
(Select * from table
Where end_Date =
DateAdd(day, -1, a.Start_Date)))
Delete table Where id In (Select delId from #chgRecs )
Update table set
End_Date = u.endt
From table t join #chgRecs u
On u.updId = t.Id
Delete #delRecs
End

No, was not looking for a loop...
I guess this is a good solution:
taking all the data in a #temp table
SELECT * FROM #temp
SELECT t2.start_date , t1.end_date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date)
UNION
SELECT START_DATE,end_date FROM #temp WHERE start_date NOT IN (SELECT t2.START_DATE FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
AND end_date NOT IN (SELECT t1.end_Date FROM #temp t1 JOIN #temp t2 ON t1.start_date = DATEADD(DAY,1,t2.end_date))
DROP TABLE #temp
Please let me know if there is anything better than this.
Thanks guys.

A recursive solution:
CREATE TABLE TestData
(
Id INT PRIMARY KEY,
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL
);
SET DATEFORMAT MDY;
INSERT TestData
SELECT 1, '01/01/2011', '01/10/2011'
UNION ALL
SELECT 2, '01/11/2011', '01/20/2011'
UNION ALL
SELECT 3, '01/25/2011', '02/01/2011'
UNION ALL
SELECT 4, '02/10/2011', '02/15/2011'
UNION ALL
SELECT 5, '02/16/2011', '02/27/2011'
UNION ALL
SELECT 6, '02/28/2011', '03/06/2011'
UNION ALL
SELECT 7, '02/28/2011', '03/03/2011'
UNION ALL
SELECT 8, '03/10/2011', '03/18/2011'
UNION ALL
SELECT 9, '03/19/2011', '03/25/2011';
WITH RecursiveCTE
AS
(
SELECT t.Id, t.StartDate, t.EndDate
,1 AS GroupID
FROM TestData t
WHERE t.Id=1
UNION ALL
SELECT crt.Id, crt.StartDate, crt.EndDate
,CASE WHEN DATEDIFF(DAY,prev.EndDate,crt.StartDate)=1 THEN prev.GroupID ELSE prev.GroupID+1 END
FROM TestData crt
JOIN RecursiveCTE prev ON crt.Id-1=prev.Id
--WHERE crt.Id > 1
)
SELECT cte.GroupID, MIN(cte.StartDate) AS StartDate, MAX(cte.EndDate) AS EndDate
FROM RecursiveCTE cte
GROUP BY cte.GroupID
ORDER BY cte.GroupID;
DROP TABLE TestData;

Related

SQL Server - Query With Multiple Date Ranges in subquery

I have used conditional aggregation in another query where I needed to use multiple date ranges. In this case the date ranges are needed in a sub-query.
I would like to know if can I get desired results in one single query (without using UNION).
I need to check if a given record EXISTS in the subquery with date-range. Since I need to use EXISTS rather than a join - I am running into this issue.
Here is a sample script/data. The expected results table is for demonstration.
IF OBJECT_ID('tempdb..#Entity') IS NOT NULL DROP TABLE #Entity
IF OBJECT_ID('tempdb..#EntityDate') IS NOT NULL DROP TABLE #EntityDate
IF OBJECT_ID('tempdb..#ExpectedOutput') IS NOT NULL DROP TABLE #ExpectedOutput
> `DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59'
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59'
> `SELECT 'Fortnight' DateRange, #FortnightStart 'Start', #FortnightEnd 'End'
UNION
SELECT 'Quarter', #QuarterStart, #QuarterEnd
CREATE TABLE #Entity (
EntityId INT IDENTITY(1, 1),
EntityName VARCHAR(50)
)
CREATE TABLE #EntityDate (
EntityDateId INT IDENTITY(1, 1),
EntityId INT,
SubmittedDate DATETIME
)
ALTER TABLE #EntityDate ADD CONSTRAINT FK_EntityDate_Entity FOREIGN KEY (EntityId) REFERENCES Entity(EntityId)
INSERT INTO #Entity (EntityName)
SELECT 'Alice'
UNION
SELECT 'Bob'
UNION
SELECT 'Cameron'
UNION
SELECT 'Diego'
UNION
SELECT 'Elliot'
SELECT * FROM #Entity
INSERT INTO #EntityDate(EntityId, SubmittedDate)
SELECT 1, '08/01/2020 11:00:00' -- only 1 record is expected in the output for this Entity
UNION
SELECT 1, '08/10/2020 10:00:00'
UNION
SELECT 1, '04/10/2020 10:00:00' -- this record should show up for the quarter date range
UNION
SELECT 2, '06/01/2020 11:00:00' --
UNION
SELECT 3, '05/01/2020' -- only 1 record is expected in the output for this Entity
UNION
SELECT 3, '06/01/2020'
UNION
SELECT 4, '10/01/2021' -- does not fit in any date range
UNION
SELECT 5, '08/02/2020'
SELECT *
FROM #EntityDate d
INNER JOIN #Entity e ON d.EntityId = e.EntityId
SELECT *
FROM #Entity E
WHERE EXISTS ( SELECT 1
FROM #EntityDate d
WHERE SubmittedDate BETWEEN #FortnightStart AND #FortnightEnd AND e.EntityId = D.EntityId
)
SELECT *
FROM #Entity E
WHERE EXISTS ( SELECT 1
FROM #EntityDate d
WHERE SubmittedDate BETWEEN #QuarterStart AND #QuarterEnd AND e.EntityId = D.EntityId
)
CREATE TABLE #ExpectedOutput
(
EntityId INT,
DateRange VARCHAR(50)
)
INSERT INTO #ExpectedOutput (EntityId, DateRange)
SELECT 1, 'Fortnight'
UNION
SELECT 5, 'Fortnight'
UNION
SELECT 1, 'Quarter'
UNION
SELECT 2, 'Quarter'
UNION
SELECT 3, 'Quarter'
SELECT o.*, e.EntityName
FROM #ExpectedOutput o
INNER JOIN #Entity e ON o.EntityId = e.EntityId
ORDER BY O.DateRange, o.EntityId
Using the virtual Dates table you created at the top of your script, you need to join that to Entity, using the EXISTS as the ON condition
DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59';
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59';
WITH Dates AS (
SELECT 'Fortnight' DateRange, #FortnightStart Start, #FortnightEnd [End]
UNION ALL
SELECT 'Quarter', #QuarterStart, #QuarterEnd
)
SELECT
e.EntityId,
d.DateRange
FROM Dates d
JOIN #Entity E ON EXISTS (SELECT 1
FROM #EntityDate ed
WHERE ed.SubmittedDate BETWEEN d.Start AND d.[End]
AND ed.EntityId = e.EntityId
);
db<>fiddle
try something like this
SELECT * FROM (VALUES(1, '08/01/2020 11:00:00'),
(1, '08/10/2020 10:00:00'),
(1, '04/10/2020 10:00:00'),
(2, '06/01/2020 11:00:00'), --
(3, '05/01/2020'),
(3, '06/01/2020'),
(4, '10/01/2021'),
(5, '08/02/2020')
) EntityIDate(EntityId,SubmittedDate)
Documentation: https://learn.microsoft.com/en-us/u-sql/statements-and-expressions/select/from/select-selecting-from-the-values-table-value-constructor
Why you have such requirement ? What is harm in using multiple UNION ALL ? Performance wise there is no harm.
I hope I understood your requirement correctly.
DECLARE #FortnightStart DATETIME = '2020/08/01', #FortnightEnd DATETIME = '2020/08/14 23:59:59'
DECLARE #QuarterStart DATE = '2020/04/01', #QuarterEnd DATE = '2020/06/30 23:59:59'
;WITH CTE
AS (SELECT 1 AS Orderflg,
'Fortnight' DateRange,
#FortnightStart 'StartDate',
#FortnightEnd 'EndDate'
UNION ALL
SELECT 2,
'Quarter',
#QuarterStart,
#QuarterEnd),
CTE1
AS (SELECT *,
ROW_NUMBER() OVER(PARTITION BY EntityId,
Orderflg
ORDER BY SubmittedDate) rn
FROM #EntityDate d
CROSS APPLY
(
SELECT TOP 1 DateRange,
Orderflg
FROM CTE C
WHERE SubmittedDate >= StartDate
AND SubmittedDate < EndDate
) ca -- e.EntityId = D.EntityId
)
SELECT e.EntityId,
DateRange,
EntityName
FROM CTE1 C1
INNER JOIN #Entity E ON c1.EntityId = e.EntityId
WHERE rn = 1
ORDER BY Orderflg;

How to Auto generate dates between date range using SQL Query?

I just want to generate the date between data range using SQL Query.
Source:
Result:
Thanks,
Lawrance A
Here is how to accomplish this by using a tally table to create a calendar table:
declare #source table
(
user_id int not null primary key clustered,
from_date date not null,
to_date date not null
);
insert into #source
values
(1, '02/20/2019', '02/23/2019'),
(2, '02/22/2019', '02/28/2019'),
(3, '03/01/2019', '03/05/2019');
with
rows as
(
select top 1000
n = 1
from sys.messages
),
tally as
(
select n = row_number() over(order by (select null)) - 1
from rows
),
calendar as
(
select
date = dateadd(dd, n, (select min(from_date) from #source))
from tally
)
select
s.user_id,
c.date
from #source s
cross join calendar c
where c.date between s.from_date and s.to_date;
Result set:

How to use the minimum date from three available - SQL

I'm trying to plug a formula into a query to pull back how much should have run on a particular contract.
The formula itself is quite simple, but I can't find anywhere how to take the minimum date between 3, based on each record separately.
I need to calculate which is the earliest of Term_date, Suspend_date and today's date, some of which may be NULL, on each contract.
And interesting way to approach this is to use cross apply:
select t.contractid, mindte
from table t cross apply
(select min(dte) as mindte
from (values(t.term_date), (t.suspend_date), (getdate())) d(dte)
) d;
CASE
WHEN Term_date < Suspend_date AND Term_date < GETDATE() THEN Term_date
WHEN Suspend_date < GETDATE() THEN Suspend_date
ELSE GETDATE()
END AS MinimumDate
I know a CASE statement will be suggested, but I thought I'd try something different:
;WITH cte (RecordID, CheckDate) AS
( SELECT RecordID, Term_date FROM sourcetable UNION ALL
SELECT RecordID, Suspend_date FROM sourcetable UNION ALL
SELECT RecordID, GETDATE() FROM sourcetable )
SELECT src.RecordID, src.Field1, src.Field2, MinDate = MIN(cte.CheckDate)
FROM sourcetable src
LEFT JOIN cte ON cte.RecordID = src.RecordID
GROUP BY src.RecordID, src.Field1, src.Field2
Here is a method using cross apply to generate a work table from which you can get the minimum date:
-- mock table with sample testing data
declare #MyTable table
(
id int identity(1,1) primary key clustered,
term_date datetime null,
suspend_date datetime null
)
insert into #MyTable (term_date, suspend_date)
select null, null
union all select '1/1/2015', null
union all select null, '1/2/2015'
union all select '1/3/2015', '1/3/2015'
union all select '1/4/2015', '1/5/2015'
union all select '1/6/2015', '1/5/2015'
select * from #MyTable
select datevalues.id, min([date])
from #MyTable
cross apply
(
values (id, term_date), (id, suspend_date), (id, getdate())
) datevalues(id, [date])
group by datevalues.id

Can this while be converted to a set based query?

I have this query:
if OBJECT_ID('tempdb..#tempA') is not null drop table #tempA
create table #tempA
(
tempid varchar(5),
tempdate smalldatetime
)
declare #loopdate smalldatetime
set #loopdate = '4/2/2013'
while (#loopdate <= '4/28/2013')
begin
--Purpose is to get IDs not in TableB for each date period
insert into #tempA (tempid, tempdate)
select storeid, #loopdate
from
(
select tableAid
from tableA
except
select tableBid
from tableB
where tableBdate = #loopdate
) as idget
set #loopdate = DATEADD(day, 1, #loopdate)
end
Is there a way to make the while loop set-based or is this best that could be done?
EDIT: made changes for correctness
EDIT: end result
ID1 4/2/2014
ID2 4/2/2014
ID4 4/2/2014
ID2 4/3/2014
ID1 4/4/2014
ID5 4/4/2014
ID3 4/5/2014
Still a loop but maybe a little more efficient
while (#loopdate <= '4/28/2013')
begin
--Purpose is to get IDs not in TableB for each date period
insert into #tempA (tempid, tempdate)
select storeid, #loopdate
from
(
select tableAid
from tableA
left join tableB
on tableB.tableBid = tableA.tableAid
and tableB.tableBdate = #loopdate
where tableB.tableBid is null
) as idget
set #loopdate = DATEADD(day, 1, #loopdate)
end
This needs some work but may get you all the way with a set
;WITH Days
as
(
SELECT cast('4/2/2013' AS datetime ) as 'Day'
UNION ALL
SELECT DATEADD(DAY, +1, Day) as 'Day'
FROM Days
where [DAY] <= '4/28/2013'
)
SELECT tableA.tableAid, Days.[Day]
from Days
left join tableB
on tableB.tableBdate = Days.[Day]
full join tableA
on tableB.tableBid = tableA.tableAid
where tableB.tableBid is null
it depends on whether not tableA has a date on it, if not then:
WITH DateList(DateDay) AS
(
SELECT CAST('2013-04-28' AS DATETIME)
UNION ALL
SELECT DATEADD(DAY, DATEDIFF(DAY,0,DATEADD(DAY, -1, DateDay)),0)
FROM DateList
WHERE DateDay between '2013-04-03' and '2013-04-28'
)
SELECT DISTINCT
tableAid
, DateDay
FROM DateList
cross join #tableA a
left join #tableB b
on tableAid = b.tableBid
and b.tableBdate = DateDay
where
b.tableBid is null
ORDER BY
DateDay ASC
insert into #tempA (tempid, tempdate)
select tableAid, tableAdate
from tableA
except
select tableBid,tableBdate
from tableB
where tableBdate >= '4/2/2013' and tableBdate <= '4/28/2013';
You can include the range of dates in a conditional clause, as below:
insert into #tempA (tempid, tempdate)
select tableAid
from tableA
except
select tableBid
from tableB
where tableBdate >= '4/2/2013' and tableBdate <= '4/28/2013';

How to delete when the parameter varies by group without looping? (T-SQL)

Imagine I have these columns in a table:
id int NOT NULL IDENTITY PRIMARY KEY,
instant datetime NOT NULL,
foreignId bigint NOT NULL
For each group (grouped by foreignId) I want to delete all the rows which are 1 hour older than the max(instant). Thus, for each group the parameter is different.
Is it possible without looping?
Yep, it's pretty straightforward. Try this:
DELETE mt
FROM MyTable AS mt
WHERE mt.instant <= DATEADD(hh, -1, (SELECT MAX(instant)
FROM MyTable
WHERE ForeignID = mt.ForeignID))
Or this:
;WITH MostRecentKeys
AS
(SELECT ForeignID, MAX(instant) AS LatestInstant
FROM MyTable)
DELETE mt
FROM MyTable AS mt
JOIN MostRecentKeys mrk ON mt.ForeignID = mrt.ForeignID
AND mt.Instant <= DATEADD(hh, -1, mrk.LatestInstant)
DELETE
FROM mytable
FROM mytable mto
WHERE instant <
(
SELECT DATEADD(hour, -1, MAX(instant))
FROM mytable mti
WHERE mti.foreignid = mto.foreignid
)
Note double FROM clause, it's on purpose, otherwise you won't be able to alias the table you're deleting from.
The sample data to check:
DECLARE #mytable TABLE
(
id INT NOT NULL PRIMARY KEY,
instant DATETIME NOT NULL,
foreignID INT NOT NULL
)
INSERT
INTO #mytable
SELECT 1, '2009-22-07 10:00:00', 1
UNION ALL
SELECT 2, '2009-22-07 09:30:00', 1
UNION ALL
SELECT 3, '2009-22-07 08:00:00', 1
UNION ALL
SELECT 4, '2009-22-07 10:00:00', 2
UNION ALL
SELECT 5, '2009-22-07 08:00:00', 2
UNION ALL
SELECT 6, '2009-22-07 07:30:00', 2
DELETE
FROM #mytable
FROM #mytable mto
WHERE instant <
(
SELECT DATEADD(hour, -1, MAX(instant))
FROM #mytable mti
WHERE mti.foreignid = mto.foreignid
)
SELECT *
FROM #mytable
1 2009-07-22 10:00:00.000 1
2 2009-07-22 09:30:00.000 1
4 2009-07-22 10:00:00.000 2
I'm going to assume when you say '1 hour older than the max(instant)' you mean '1 hour older than the max(instant) for that foreignId'.
Given that, there's almost certainly a more succinct way than this, but it will work:
DELETE
TableName
WHERE
DATEADD(hh, 1, instant) < (SELECT MAX(instant)
FROM TableName T2
WHERE T2.foreignId = TableName.foreignId)
The inner subquery is called a 'correlated subquery', if you want to look for more info. The way it works is that for each row under consideration by the outer query, it is the foreignId of that row that gets referenced by the subquery.