This has been driving me crazy, any help is much appreciated. Code and results below - my question is simple. On the results line 4, the Activity does NOT match lag_Activity, why does the group number not increase?
create table #exampleTable
(name varchar(20)
,pnum bigint
,activity varchar(10)
,startTime datetime
,endTime datetime)
insert into #exampleTable
values
('Harry Potter',12345678, 'On Shift', '2022-05-18 13:00:00', '2022-05-18 22:00:00')
,('Harry Potter',12345678,'Off Shift','2022-05-18 16:30:00','2022-05-18 17:30:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 02:00:00','2022-05-18 05:00:00')
,('Jane Doe',98765432,'On Shift','2022-05-18 02:00:00','2022-05-18 16:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 06:15:00','2022-05-18 06:45:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 11:30:00','2022-05-18 12:00:00')
,('Jane Doe',98765432,'Off Shift','2022-05-18 12:00:00','2022-05-18 15:50:00')
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime) lag_endtime
,lag(activity) over(order by pnum, starttime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime
Here are the results:
grp name pnum activity lag_activity startTime endTime lag_endtime
1 Harry Potter 12345678 On Shift NULL 2022-05-18 13:00:00.000 2022-05-18 22:00:00.000 NULL
2 Harry Potter 12345678 Off Shift On Shift 2022-05-18 16:30:00.000 2022-05-18 17:30:00.000 2022-05-18 22:00:00.000
1 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 05:00:00.000 2022-05-18 17:30:00.000
1 Jane Doe 98765432 On Shift Off Shift 2022-05-18 02:00:00.000 2022-05-18 16:00:00.000 2022-05-18 05:00:00.000
2 Jane Doe 98765432 Off Shift On Shift 2022-05-18 06:15:00.000 2022-05-18 06:45:00.000 2022-05-18 16:00:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 11:30:00.000 2022-05-18 12:00:00.000 2022-05-18 06:45:00.000
3 Jane Doe 98765432 Off Shift Off Shift 2022-05-18 12:00:00.000 2022-05-18 15:50:00.000 2022-05-18 12:00:00.000
Thanks to #ConorCunninghamMSFT for the tip, the lag functions and Sum Over function needed additional ordering. The order itself is less important than the fact that it stays consistent.
select
sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime, endtime) as grp
,*
from (
select
*
,lag(endtime) over(order by pnum, starttime, endtime) lag_endtime
,lag(activity) over(order by pnum, starttime, endtime) lag_activity
from #exampleTable
where endtime-Starttime>0
) a
order by pnum, starttime, endtime
Related
I have a table of events with a start time and an end time, with some events that have a start time before midnight and an end time after midnight. I'd like to produce output that splits up these events at the midnight barrier so they can be counted toward their respective date.
| EVENT_ID | START_TIME | END_TIME |
|----------|-------------------------|-------------------------|
| 1001 | 2021-02-21 14:00:00.000 | 2021-02-21 18:00:00.000 |
| 1002 | 2021-02-21 17:00:00.000 | 2021-02-22 03:00:00.000 |
| 1003 | 2021-02-21 18:00:00.000 | 2021-02-21 22:00:00.000 |
| 1004 | 2021-02-21 22:00:00.000 | 2021-02-22 07:00:00.000 |
The above table could be produced by the query:
SELECT EVENT_ID,
START_TIME,
END_TIME
FROM EVENTS
WHERE START_TIME BETWEEN '2021-02-21 00:00:00.000' AND '2021-02-21 23:59:59.999'
;
My desired output will split up the events that span multiple days at midnight:
| EVENT_ID | START_TIME | END_TIME |
|----------|-------------------------|-------------------------|
| 1001 | 2021-02-21 14:00:00.000 | 2021-02-21 18:00:00.000 |
| 1002 | 2021-02-21 17:00:00.000 | 2021-02-21 23:59:59.999 |
| 1002 | 2021-02-22 00:00:00.000 | 2021-02-22 03:00:00.000 |
| 1003 | 2021-02-21 18:00:00.000 | 2021-02-21 22:00:00.000 |
| 1004 | 2021-02-21 22:00:00.000 | 2021-02-21 23:59:59.999 |
| 1004 | 2021-02-22 00:00:00.000 | 2021-02-22 07:00:00.000 |
Any help would be greatly appreciated. Ideally I'd like to produce this without functions or the creation of new tables.
Note that I'm using SQL Server 2016
Using table of numbers
with t0(n) as (
select n
from (
values (1),(2),(3),(4),(5),(6),(7),(8),(9),(10)
) t(n)
),nmbs as(
select row_number() over(order by t1.n) - 1 n
from t0 t1 cross join t0 t2 cross join t0 t3
)
select event_id,
case when n = 0
then start_time
else dateadd(day, n, convert(date, start_time))
end start_time,
case when datediff(day, start_time, end_time) = n
then end_time
else dateadd(second, -1, dateadd(day, n + 1, convert(datetime, convert(date, start_time))))
end as end_time
from Events
cross apply (
select top (datediff(day, start_time, end_time) + 1) n
from nmbs) ns
You can use a recursive CTE for this:
with cte as (
select event_id, start_time,
(case when datediff(day, start_time, end_time) = 0 then end_time
else dateadd(day, 1, convert(date, start_time))
end) as end_time,
end_time as real_end_time
from t
union all
select event_id, end_time,
(case when dateadd(day, 1, convert(date, end_time)) > real_end_time
then real_end_time
else dateadd(day, 1, convert(date, end_time))
end),
real_end_time
from cte
where end_time < real_end_time
)
select *
from cte;
Here is a db<>fiddle.
The following method solves for the case of midnight between START_TIME and END_TIME. The "desired output" above indicates only a single midnight occurs between START_TIME and END_TIME.
IF OBJECT_ID('tempdb..#t') IS NOT NULL DROP TABLE #t
CREATE TABLE #t ( Event_ID INT, START_TIME DATETIME2, END_TIME DATETIME2)
INSERT INTO #t (Event_ID, START_TIME, END_TIME)
VALUES
( 1001, '2021-02-21 14:00:00.000', '2021-02-21 18:00:00.000' )
, ( 1002, '2021-02-21 17:00:00.000', '2021-02-22 03:00:00.000' )
, ( 1003, '2021-02-21 18:00:00.000', '2021-02-21 22:00:00.000' )
, ( 1004, '2021-02-21 22:00:00.000', '2021-02-22 07:00:00.000' )
-- get original data plus midnight after START_TIME
IF OBJECT_ID('tempdb..#stage') IS NOT NULL DROP TABLE #stage
SELECT *
, CONVERT(DATETIME2, CONVERT(DATE, DATEADD(DAY, 1, t.START_TIME))) d
INTO #stage
FROM #t t
-- get all rows
SELECT Event_ID, START_TIME
, CASE WHEN d > END_TIME THEN END_TIME ELSE d END END_TIME
FROM #stage
UNION ALL
-- get rows where midnight occurs between START_TIME and END_TIME
SELECT Event_ID
, CASE WHEN d > END_TIME THEN START_TIME ELSE d END START_TIME
, END_TIME
FROM #stage
WHERE d < END_TIME
ORDER BY Event_ID
How can I select the last entry recorded for each day? In this example, I need the last item number ordered and the last DateOrdered entry for each day over the last 5 days. Here's my table:
ItemNumber | DateOrdered
1 2020-04-01 08:00:00.000
3 2020-04-01 09:00:00.000
5 2020-04-01 10:00:00.000
4 2020-04-02 09:00:00.000
6 2020-04-02 10:00:00.000
7 2020-04-03 08:00:00.000
3 2020-04-03 09:00:00.000
2 2020-04-03 10:00:00.000
5 2020-04-04 10:00:00.000
8 2020-04-05 08:00:00.000
2 2020-04-05 09:00:00.000
8 2020-04-05 10:00:00.000
Here's the results I need:
ItemNumber | DateOrdered
5 2020-04-01 10:00:00.000
6 2020-04-02 10:00:00.000
2 2020-04-03 10:00:00.000
5 2020-04-04 10:00:00.000
8 2020-04-05 10:00:00.000
This is as close as I can get with it:
with tempTable as
(
select
*,
row_number() over(partition by datediff(d, 0, DateOrdered) order by DateOrdered desc) as rn
from myTable
)
select *
from tempTable
where rn = 1
You are almost there. You just need to fix the definition of your partition so it puts together all rows that belong to the same day.
This should do it:
with tempTable as
(
select
*,
row_number() over(partition by cast(DateOrdered as date) order by DateOrdered desc) as rn
from myTable
)
select *
from tempTable
where rn = 1
I currently have an issue whereby I am doing DATEDIFF in minutes between a start date and end date, however I when this date goes over into a new month I need the figures to be separate for each month.
Please see example data (both Text and Image view);
SELECT [BookingNum]
,[StartDate]
,[EndDate]
,[Location]
,DATEPART(m,startdate) AS [Month]
,DATEDIFF(MINUTE,StartDate,EndDate) AS [Minutes]
FROM [Test].[dbo].[Booking]
BookingNum StartDate EndDate Location Month Minutes
1 2019-02-05 12:54:00.000 2019-02-08 15:00:00.000 Area 1 2 4446
2 2019-05-02 10:41:00.000 2019-05-10 12:39:00.000 Area 2 5 11638
3 2019-06-01 10:30:00.000 2019-06-04 09:25:00.000 Area 3 6 4255
4 2019-02-02 09:41:00.000 2019-04-20 11:54:00.000 Area 1 2 111013
5 2019-03-29 19:09:00.000 2019-04-02 10:41:00.000 Area 3 3 5252
For rows 4 & 5 there would need to be additional rows as they go across multiple months.
Example for the data in row 4, I would want to see;
StartDate EndDate Location Month Minutes
2019-02-02 09:41:00.000 2019-02-28 23:59:00.000 Area 1 2 38298
2019-03-01 00:00:00.000 2019-03-31 23:59:00.000 Area 1 3 44639
2019-04-01 00:00:00.000 2019-04-20 23:59:00.000 Area 1 4 28074
This would then give me the total minutes for that month only between the start and end date.
Any help much appreciated.
Edit: Recursive CTE should do the trick! Basically, use recursion to keep getting the start date through the lesser of the EOM and the end date, until ultimately you reach the end date.
Fiddle
DECLARE #tbl TABLE (bookingnum INT, sd DATETIME, ed DATETIME)
INSERT INTO #tbl VALUES
(1, '2/5/2019 12:54 PM', '2/8/2019 3:00 PM'),
(2, '5/2/2019 10:41 AM', '5/10/2019 12:39 PM'),
(3, '6/1/2019 10:30 AM', '6/4/2019 9:25 AM'),
(4, '2/2/2019 9:41 AM', '5/20/2019 11:54 AM'),
(5, '3/29/2019 7:09 PM', '4/2/2019 10:41 AM')
;WITH cte AS (
SELECT bookingnum, sd, DATEADD(DAY, 1, EOMONTH(sd)) eom, ed,
CASE WHEN DATEADD(DAY, 1, EOMONTH(sd)) < ed THEN DATEADD(DAY, 1, EOMONTH(sd)) else ed END AS applied_ed
FROM #tbl
UNION ALL
SELECT bookingnum, applied_ed, DATEADD(DAY, 1, EOMONTH(applied_ed)) eom, ed,
CASE WHEN DATEADD(DAY, 1, EOMONTH(applied_ed)) < ed THEN DATEADD(DAY, 1, EOMONTH(applied_ed)) else ed END AS applied_ed
FROM cte
WHERE applied_ed < ed
)
SELECT bookingnum, sd, applied_ed AS ed, DATEDIFF(MINUTE, sd, applied_ed) minutes
FROM cte
ORDER BY bookingnum, sd
Returns:
bookingnum sd ed minutes
1 2019-02-05 12:54:00.000 2019-02-08 15:00:00.000 4446
2 2019-05-02 10:41:00.000 2019-05-10 12:39:00.000 11638
3 2019-06-01 10:30:00.000 2019-06-04 09:25:00.000 4255
4 2019-02-02 09:41:00.000 2019-03-01 00:00:00.000 38299
4 2019-03-01 00:00:00.000 2019-04-01 00:00:00.000 44640
4 2019-04-01 00:00:00.000 2019-05-01 00:00:00.000 43200
4 2019-05-01 00:00:00.000 2019-05-20 11:54:00.000 28074
5 2019-03-29 19:09:00.000 2019-04-01 00:00:00.000 3171
5 2019-04-01 00:00:00.000 2019-04-02 10:41:00.000 2081
This can be achieved using recursive CTE as follows. This calculates multiple months between startdate and enddate.
Fiddle: http://sqlfiddle.com/#!18/26568/4
create table #temp(
BookingNum int,
StartDate datetime,
EndDate datetime,
Location varchar(25),
)
insert into #temp
values(1,'2019-02-05 12:54:00','2019-02-08 15:00:00','Area 1'),
(2,'2019-05-02 10:41:00','2019-05-10 12:39:00','Area 2'),
(3,'2019-06-01 10:30:00','2019-06-04 09:25:00','Area 3'),
(4,'2019-02-02 09:41:00','2019-05-20 11:54:00','Area 1'),
(5,'2019-03-29 19:09:00','2019-04-02 10:41:00','Area 3')
;WITH cte AS
(
SELECT BookingNum,
StartDate,
CASE
WHEN DATEPART(m, EndDate) > DATEPART(m, startdate)
THEN DATEADD(s, -1, DATEADD(mm, DATEDIFF(m, 0, startdate) + 1, 0))
ELSE EndDate
END AS EndDate,
Location,
DATEPART(m, EndDate) - DATEPART(m, startdate) AS MonthDiff
FROM #temp
UNION ALL
SELECT cte.BookingNum,
CASE
WHEN cte.MonthDiff > 0
THEN DATEADD(month, DATEDIFF(month, 0, DATEADD(month, 1, cte.StartDate)), 0)
ELSE cte.StartDate
END AS startDate,
CASE
WHEN cte.MonthDiff > 0 AND DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, DATEADD(month, 1, cte.StartDate)) + 1, 0)) < t.EndDate
THEN DATEADD(d, -1, DATEADD(m, DATEDIFF(m, 0, DATEADD(month, 1, cte.StartDate)) + 1, 0))
ELSE t.EndDate
END AS EndDate,
cte.Location,
(cte.MonthDiff - 1) MonthDiff
FROM cte
INNER JOIN #temp t ON cte.BookingNum = t.BookingNum
WHERE cte.MonthDiff > 0
)
SELECT BookingNum,
StartDate,
EndDate,
Location,
DATEPART(m, startdate) AS month,
DATEDIFF(minute, startdate, enddate) AS minutes
FROM cte
ORDER BY 1;
drop table #temp
Result:
BookingNum StartDate EndDate Location month minutes
----------- ----------------------- ----------------------- ------------------------- ----------- -----------
1 2019-02-05 12:54:00.000 2019-02-08 15:00:00.000 Area 1 2 4446
2 2019-05-02 10:41:00.000 2019-05-10 12:39:00.000 Area 2 5 11638
3 2019-06-01 10:30:00.000 2019-06-04 09:25:00.000 Area 3 6 4255
4 2019-02-02 09:41:00.000 2019-02-28 23:59:59.000 Area 1 2 38298
4 2019-03-01 00:00:00.000 2019-03-31 00:00:00.000 Area 1 3 43200
4 2019-04-01 00:00:00.000 2019-04-30 00:00:00.000 Area 1 4 41760
4 2019-05-01 00:00:00.000 2019-05-20 11:54:00.000 Area 1 5 28074
5 2019-03-29 19:09:00.000 2019-03-31 23:59:59.000 Area 3 3 3170
5 2019-04-01 00:00:00.000 2019-04-02 10:41:00.000 Area 3 4 2081
To achieve this you will need to create an additional table to join to that contains the months. You would then join to that table where the month of the date is between the dates in the calendar table, to do this you need to use a dateadd/datediff function to round your date to the first of the month e.g.: DATEADD(month, DATEDIFF(month, 0, StartDate),0). This works by calculating the difference in months between some random start date (in this case 0, I.e. 1/1/1900) and then adding those months back on to the start date.
Then you will need to round your start or end date up or down to the end of the month if they are not in the same month as the calendar table record, which will allow you to do a new calculation for the time.
The whole code would look something like this:
CREATE TABLE #MonthDate
(MonthDate date PRIMARY KEY);
INSERT INTO #MonthDate (MonthDate)
VALUES ('20190101'),('20190201'),('20190301'),('20190401'),('20190501'),('20190601');
WITH RoundedDates As
(SELECT b.StartDate,
B.EndDate,
DATEADD(month, DATEDIFF(month, 0, b.StartDate),0) AS RoundedStartDate,
DATEADD(month, DATEDIFF(month, 0, b.EndDate),0) AS RoundedEndDate
FROM Test.dbo.Booking AS b)
SELECT rd.StartDate
, rd.EndDate
, DATEDIFF(minute, CASE WHEN rd.RoundedStartDate = md.MonthDate THEN rd.StartDate ELSE md.MonthDate END, CASE WHEN rd.RoundedEndDate = md.MonthDate THEN rd.EndDate ELSE DATEADD(month,1,md.MonthDate) END) AS Minutes
FROM RoundedDates AS rd
INNER JOIN #MonthDate as md
ON md.MonthDate BETWEEN rd.RoundedStartDate AND rd.RoundedEndDate
http://sqlfiddle.com/#!18/70730/2
With the below sample data, I am trying to group record with same rate.
id start_date end_date rate
-----------------------------------------------------------------
1 01/01/2017 12:00:00 am 01/01/2017 12:00:00 am 300
1 02/01/2017 12:00:00 am 02/01/2017 12:00:00 am 300
1 03/01/2017 12:00:00 am 03/01/2017 12:00:00 am 300
1 04/01/2017 12:00:00 am 04/01/2017 12:00:00 am 1000
1 05/01/2017 12:00:00 am 05/01/2017 12:00:00 am 500
1 06/01/2017 12:00:00 am 06/01/2017 12:00:00 am 500
1 07/01/2017 12:00:00 am 07/01/2017 12:00:00 am 1000
1 08/01/2017 12:00:00 am 08/01/2017 12:00:00 am 1000
1 09/01/2017 12:00:00 am 09/01/2017 12:00:00 am 300
What I've tried :
select distinct id, mn_date, mx_date,rate
from (
select id, min(start_date) over (partition by grp order by start_date) mn_date,
max(end_date) over(partition by grp order by start_date desc) mx_date, rate
from (
select t.*, row_number() over(partition by id order by start_date) -row_number() over(partition by rate order by start_date)grp
from t
)
)
order by mn_date;
Output :
id mn_date mx_date rate
--------------------------------------------------------
1 01/01/2017 12:00:00 am 03/01/2017 12:00:00 am 300
1 04/01/2017 12:00:00 am 04/01/2017 12:00:00 am 1000
1 05/01/2017 12:00:00 am 06/01/2017 12:00:00 am 500
1 07/01/2017 12:00:00 am 09/01/2017 12:00:00 am 300
1 07/01/2017 12:00:00 am 09/01/2017 12:00:00 am 1000
Desired Output:
id mn_date mx_date rate
--------------------------------------------------------
1 01/01/2017 12:00:00 am 03/01/2017 12:00:00 am 300
1 04/01/2017 12:00:00 am 04/01/2017 12:00:00 am 1000
1 05/01/2017 12:00:00 am 06/01/2017 12:00:00 am 500
1 07/01/2017 12:00:00 am 08/01/2017 12:00:00 am 1000
1 09/01/2017 12:00:00 am 09/01/2017 12:00:00 am 300
Final result to group by consecutive dates: (Thanks to Gordon )
select id, min(start_date), max(end_date), rate
from (
select id, start_date, end_date, rate, seqnum_i-seqnum_ir grp, sum(x) over(partition by id order by start_date) grp1
from (
select t.*,
row_number() over (partition by id order by start_date) as seqnum_i,
row_number() over (partition by id, rate order by start_date) as seqnum_ir,
case when LEAD(start_date) over (partition by id order by start_date)= end_date + 1
then 0
else 1
end x
from t
)
)
group by id, grp+grp1, rate
order by min(start_date);
Assuming we can just use start_date to identify the adjacent records (i.e., there are no gaps), then you can use the difference of row numbers approach:
select id, min(start_date) as mn_date, max(end_date) as mx_date, rate
from (select t.*,
row_number() over (partition by id order by start_date) as seqnum_i,
row_number() over (partition by id, rate order by start_date) as seqnum_ir
from t
) t
group by id (seqnum_i - seqnum_ir), rate;
To see how this works, look at the results of the subquery. You should be able to "see" how the difference of the two row numbers defines the groups of adjacent records with the same rate.
I found that the last value wasn't being grouped correctly as the calculation of X wasn't handling the NULL return, so I changed it to this:
,CASE
WHEN LEAD (start_date)
OVER (PARTITION BY id ORDER BY start_date)
IS NULL
THEN
0
WHEN LEAD (start_date)
OVER (PARTITION BY id ORDER BY start_date) =
end_date + 1
THEN
0
ELSE
1
END
x
I have the below input
ID Activity Date
1 gardening 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000
The ddl is as under
Declare #t table(ID int,Activity Varchar(50),[Date] DATETIME)
Insert into #t Select 1,'gardening','01/01/2011' union all Select 1,'gardening','02/01/2011'
union all Select 2,'cooking','03/01/2011' union all Select 2,'cooking','04/01/2011'
union all Select 2,'cooking','05/01/2011' union all Select 1,'gardening','06/01/2011'
union all Select 1,'gardening','07/01/2011'
select * from #t
Expected output
ID ACTIVITY INITIAL_DATE END_DATE
1 gardening 01/01/2011 02/01/2011
1 gardening 02/01/2011 06/01/2011
1 gardening 06/01/2011 07/01/2011
2 cooking 03/01/2011 04/01/2011
2 cooking 04/01/2011 05/01/2011
So far I have done
;with cte as(Select Rn= ROW_NUMBER() Over(order by ID,[Date]),* from #t)
,cte2 as(
Select Rn
,ID,Activity,InitialDate =[Date],EndDate = [Date]
from cte where Rn =1
union all
Select c1.Rn
,c1.ID,c1.Activity,c1.Date,c1.Date
from cte2 c2
join cte c1
on c1.rn = c2.Rn+1
)
select ID,Activity,InitialDate,EndDate from cte2
but the output is not correct
ID Activity InitialDate EndDate
1 gardening 2011-01-01 00:00:00.000 2011-01-01 00:00:00.000
1 gardening 2011-02-01 00:00:00.000 2011-02-01 00:00:00.000
1 gardening 2011-06-01 00:00:00.000 2011-06-01 00:00:00.000
1 gardening 2011-07-01 00:00:00.000 2011-07-01 00:00:00.000
2 cooking 2011-03-01 00:00:00.000 2011-03-01 00:00:00.000
2 cooking 2011-04-01 00:00:00.000 2011-04-01 00:00:00.000
2 cooking 2011-05-01 00:00:00.000 2011-05-01 00:00:00.000
Help needed
;with cte as
(
select *,
row_number() over(partition by ID order by [Date]) as rn
from #t
)
select C1.ID,
C1.Activity,
C1.[Date] as INITIAL_DATE,
C2.[Date] as END_DATE
from cte as C1
inner join cte as C2
on C1.ID = C2.ID and
C1.rn + 1 = C2.rn
order by C1.ID, C1.[Date]
try this -
in oracle it is giving desired output..
please check for respective sql server function for lead () in oracle database
with cte as(Select * from #t)
(
SELECT * from
(
SELECT id,activity,
lead(date) over(partition be id,activity order by date desc) INITIAL_DATE,
date END_DATE
from cte
order by id,activity,date
)
WHERE INITIAL_DATE is not null
)