Create sql Key based on datetime that is persistent overnight - sql

I have a time series with a table like this
CarId
EventDateTime
Event
SessionFlag
CarId
EventDateTime
Event
SessionFlag
ExpectedKey
1
2022-01-01 7:00
Start
1
1-20220101-7
1
2022-01-01 7:05
Drive
1
1-20220101-7
1
2022-01-01 8:00
Park
1
1-20220101-7
1
2022-01-01 10:00
Drive
1
1-20220101-7
1
2022-01-01 18:05
End
0
1-20220101-7
1
2022-01-01 23:00
Start
1
1-20220101-23
1
2022-01-01 23:05
Drive
1
1-20220101-23
1
2022-01-02 2:00
Park
1
1-20220101-23
1
2022-01-02 3:00
Drive
1
1-20220101-23
1
2022-01-02 15:00
End
0
1-20220101-23
1
2022-01-02 16:00
Start
1
1-20220102-16
Other CarIds do exist.
What I am attempting to do is create the last column, ExpectedKey.
The problem I face though is midnight, as the same session can exist over two days.
The record above with ExpectedKey 1-20220101-23 is the prime example of what I'm trying to achieve.
I've played with using:
CASE
WHEN SessionFlag<> 0
AND
SessionFlag= LAG(SessionFlag) OVER (PARTITION BY Carid ORDER BY EventDateTime)
THEN FIRST_VALUE(CarId+'-'+Convert(CHAR(8),EventDateTime,112)+'-'+CAST(DATEPART(HOUR,EventDateTime)AS
VARCHAR))OVER (PARTITION BY CarId ORDER BY EventDateTime)
ELSE CarId+'-'+Convert(CHAR(8),EventDateTime,112)+'-'+CAST(DATEPART(HOUR,EventDateTime)AS VARCHAR) END AS SessionId
But can't seem to make it partition correctly overnight.
Can anyone off advice?

This is a classic gaps-and-islands problem. There are a number of solutions.
The simplest (if not that efficient) is partitioning over a windowed conditional count
WITH Groups AS (
SELECT *,
GroupId = COUNT(CASE WHEN t.Event = 'Start' THEN 1 END)
OVER (PARTITION BY t.CarId ORDER BY t.EventDateTime)
FROM YourTable t
)
SELECT *,
NewKey = CONCAT_WS('-',
t.CarId,
CONVERT(varchar(8), EventDateTime, 112),
FIRST_VALUE(DATEPART(hour, t.EventDateTime))
OVER (PARTITION BY t.CarId, t.GroupId ORDER BY t.EventDateTime
ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
)
FROM Groups t;
db<>fiddle

using APPLY to get the Start event datetime and form the key with concat_ws
select *
from time_series t
cross apply
(
select top 1
ExpectedKey = concat_ws('-',
CarId,
convert(varchar(10), EventDateTime, 112),
datepart(hour, EventDateTime))
from time_series x
where x.Event = 'Start'
and x.EventDateTime <= t.EventDateTime
order by x.EventDateTime desc
) k

Related

How can I create a "start" "end" time table from a timestamp list

I am trying to create a view that displays the time of employee stamps.
This is what the table looks like now:
Person
Person_Number
Date
Stamp_number
Time_Stamp
Paul
1
22-10-24
1
8:00
Paul
1
22-10-24
2
10:00
Paul
1
22-10-24
3
10:30
Paul
1
22-10-24
4
12:00
Jimmy
2
22-10-23
1
9:00
Jimmy
2
22-10-23
2
11:00
Jimmy
2
22-10-23
3
12:00
And I would like it to look like this using only a select query
Person
Person_Number
Date
Start
End
Duration
Paul
1
22-10-24
8:00
10:00
2:00
Paul
1
22-10-24
10:30
12:00
1:30
Jimmy
2
22-10-23
9:00
11:00
2:00
Jimmy
1
22-10-23
12:00
null
null
Is it possible ?
We can use conditional aggregation along with a ROW_NUMBER trick:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY Person_Number, Date
ORDER BY Stamp_number) - 1 rn
FROM yourTable
)
SELECT Person, Person_Number, Date,
MAX(CASE WHEN rn % 2 = 0 THEN Time_Stamp END) AS [Start],
MAX(CASE WHEN rn % 2 = 1 THEN Time_Stamp END) AS [End],
DATEDIFF(MINUTE,
MAX(CASE WHEN rn % 2 = 0 THEN Time_Stamp END),
MAX(CASE WHEN rn % 2 = 1 THEN Time_Stamp END)) AS Duration
FROM cte
GROUP BY Person, Person_Number, Date, rn / 2
ORDER BY 2, 4;
Here is a working demo.
Try the following:
SELECT Person, Person_Number, Date, [Start], [End],
CONVERT(TIME(0), CONVERT(DATETIME, [End]) - CONVERT(DATETIME, [Start])) AS Duration
FROM
(
SELECT Person, Person_Number, Date, MIN(Time_Stamp) AS [Start],
CASE
WHEN MAX(Time_Stamp) <> MIN(Time_Stamp)
THEN MAX(Time_Stamp)
END AS [End] /* To select End as null when there is no End for a Start */
FROM table_name
GROUP BY Person, Person_Number, Date, (Stamp_number+1)/2
) T
ORDER BY Person_Number, Date, [Start]
See a demo.

Generate multiples rows of new column based on one value of another column

I have a table like below:
ID
Date
1
2022-01-01
2
2022-03-21
I want to add a new column based on the date and it should look like this
ID
Date
NewCol
1
2022-01-01
2022-02-01
1
2022-01-01
2022-03-01
1
2022-01-01
2022-04-01
1
2022-01-01
2022-05-01
2
2022-03-21
2022-04-21
2
2022-03-21
2022-05-21
Let's say that there is a #EndDate = 2022-05-31 (that's where it should stop)
I'm having a hard time trying to figure out how to do it in SSMS. Would appreciate any insights! Thanks :)
In the following solutions we leverage string_split with combination with replicate to generate new records.
select ID
,Date
,dateadd(month, row_number() over(partition by ID order by (select null)), Date) as NewCol
from (
select *
from t
outer apply string_split(replicate(',',datediff(month, Date, '2022-05-31')-1),',')
) t
ID
Date
NewCol
1
2022-01-01
2022-02-01
1
2022-01-01
2022-03-01
1
2022-01-01
2022-04-01
1
2022-01-01
2022-05-01
2
2022-03-21
2022-04-21
2
2022-03-21
2022-05-21
Fiddle
For SQL in Azure and SQL Server 2022 we have a cleaner solution based on [ordinal][4].
"The enable_ordinal argument and ordinal output column are currently
supported in Azure SQL Database, Azure SQL Managed Instance, and Azure
Synapse Analytics (serverless SQL pool only). Beginning with SQL
Server 2022 (16.x) Preview, the argument and output column are
available in SQL Server."
select ID
,Date
,dateadd(month, ordinal, Date) as NewCol
from (
select *
from t
outer apply string_split(replicate(',',datediff(month, Date, '2022-05-31')-1),',',1)
) t
with cal (id, dt) as
(
select id, date as dt from t
union all select id, dateadd(month, 1, dt) from cal where month(dt) < month('2022-05-31')
)
select t.id
,t.date
,cal.dt as new_col
from cal join t on t.id = cal.id and t.date != cal.dt
order by id, new_col
id
date
new_col
1
2022-01-01
2022-02-01
1
2022-01-01
2022-03-01
1
2022-01-01
2022-04-01
1
2022-01-01
2022-05-01
2
2022-03-21
2022-04-21
2
2022-03-21
2022-05-21
Fiddle
There are many ways to "explode" a row into a set, the simplest in my opinion is a recursive CTE:
DECLARE #endpoint date = '20220531';
DECLARE #prev date = DATEADD(MONTH, -1, #endpoint);
WITH x AS
(
SELECT ID, date, NewCol = DATEADD(MONTH, 1, date) FROM #d
UNION ALL
SELECT ID, date, DATEADD(MONTH, 1, NewCol) FROM x
WHERE NewCol < #prev
)
SELECT * FROM x
ORDER BY ID, NewCol;
Working example in this fiddle.
Keep in mind that if you could have > 100 months you'll need to add OPTION (MAXRECURSION) (or just consider using a different solution at scale).

Group by date and find median of processing time

I select input date and output date from a database. I use a formula to indicate the processing time. Now, I would like the values ​​to be grouped according to the date of receipt and the median of the processing time to be output for all grouped dates of receipt. Something like this:
The data I select:
input date | output date | processing time
2022-01-03 | 2022-01-03 | 0
2022-01-03 | 2022-01-06 | 3
2022-01-03 | 2022-01-11 | 8
2022-01-05 | 2022-01-10 | 5
2022-01-05 | 2022-01-15 | 10
The output I want:
input date | processing time
2022-01-03 | 3
2022-01-05 | 7.5
My SQL Code:
SELECT [received_date]
,CONVERT(date, [exported_on])
,DATEDIFF(day, [received_date], [exported_on]) AS processing_time
FROM [request] WHERE YEAR (received_date) = 2022
GROUP BY received_date, [exported_on]
ORDER BY received_date
How can I do this? Do I need a temp table to do this, or can I modify my query?
You could try using PERCENTILE_CONT
with cte as (
select input_date,
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY processing_time) OVER(PARTITION BY input_date) as Median_Process_Time
FROM tableA
)
SELECT *
FROM cte
GROUP BY input_date, Median_Process_Time
db fiddle
Also you check check out the discussion here How to find the SQL medians for a grouping
Here my solution. Thank you for your help.
SET NOCOUNT ON;
DECLARE #working TABLE(entry_date date, exit_date date, work_time int)
INSERT INTO #working
SELECT [received] AS date_of_entry
,CONVERT(date, [exported]) AS date_of_exit
,DATEDIFF(day, [received], [exported]) AS processing_time
FROM [zsdt].[dbo].[antrag] WHERE YEAR([received]) = 2022 AND scanner_name IS NOT NULL AND exportiert_am IS NOT NULL AND NOT scanner_name = 'AP99'
GROUP BY [received], [exported]
ORDER BY [received] ASC
;WITH CTE AS
( SELECT entry_date,
work_time,
[half1] = NTILE(2) OVER(PARTITION BY entry_date ORDER BY work_time),
[half2] = NTILE(2) OVER(PARTITION BY entry_date ORDER BY work_time DESC)
FROM #working
WHERE work_time IS NOT NULL
)
SELECT entry_date,
(MAX(CASE WHEN Half1 = 1 THEN work_time END) +
MIN(CASE WHEN Half2 = 1 THEN work_time END)) / 2.0
FROM CTE
GROUP BY entry_date;

Select start and end dates for changing values in SQL

I have a database with accounts and historical status changes
select Date, Account, OldStatus, NewStatus from HistoricalCodes
order by Account, Date
Date
Account
OldStatus
NewStatus
2020-01-01
12345
1
2
2020-10-01
12345
2
3
2020-11-01
12345
3
2
2020-12-01
12345
2
1
2020-01-01
54321
2
3
2020-09-01
54321
3
2
2020-12-01
54321
2
3
For every account I need to determine Start Date and End Date when Status = 2. An additional challenge is that the status can change back and forth multiple times. Is there a way in SQL to create something like this for at least first two timeframes when account was in 2? Any ideas?
Account
StartDt_1
EndDt_1
StartDt_2
EndDt_2
12345
2020-01-01
2020-10-01
2020-11-01
2020-12-01
54321
2020-09-01
2020-12-01
I would suggest putting this information in separate rows:
select t.*
from (select account, date as startdate,
lead(date) over (partition by account order by date) as enddate
from t
) t
where newstatus = 2;
This produces a separate row for each period when an account has a status of 2. This is better than putting the dates in separate pairs of columns, because you do not need to know the maximum number of periods of status = 2 when you write the query.
For a fixed maximum of status changes per account, you can use window functions and conditional aggregation:
select account,
max(case when rn = 1 then date end) as start_dt1,
max(case when rn = 1 then lead_date end) as end_dt1,
max(case when rn = 2 then date end) as start_dt2,
max(case when rn = 2 then lead_date end) as end_dt2
from (
select t.*,
row_number() over(partition by account, newstatus order by date) as rn,
lead(date) over(partition by account order by date) as lead_date
from mytable t
) t
where newstatus = 2
group by account
You can extend the select clause with more conditional expressions to handle more possible ranges per account.

SQL Server - MyCTE query based on 24 hour period (next day)

I have this bit of code:
;WITH MyCTE AS
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY CardUser ORDER BY CardTableID) AS NewVariation
FROM CardChecker
)
UPDATE MyCTE
SET Status = NewVariation
which currently updates the status column, however what I want to happen is over a 24 hour period, the status starts again the next day at 1, and counts again based on the CardUser like specified above:
Current data and what happens:
2 aaa 1 2015-06-25 08:00:00.000 123 1 NULL
3 ccc 1 2015-06-25 00:00:00.000 124 1 NULL
4 aaa 1 2015-06-25 17:30:00.000 125 2 NULL
5 aaa 1 2015-06-26 17:30:00.000 125 *3* NULL
what I want to happen:
2 aaa 1 2015-06-25 08:00:00.000 123 1 NULL
3 ccc 1 2015-06-25 00:00:00.000 124 1 NULL
4 aaa 1 2015-06-25 17:30:00.000 125 2 NULL
5 aaa 1 2015-06-26 17:30:00.000 125 *1* NULL
im not quite sure how I could add this to the above query so would it be possible for someone to point me in the right direction?
the main problem is the EventTime field contains both the date and the time, so adding it is as a PARTITION means the status would always be 1 based on the time parameter of the field
thanks for the help
Current CardTable structure:
CREATE TABLE CardTable (CardTableID INT IDENTITY (1,1) NOT NULL,
CardUser VARCHAR(50),
CardNumber VARCHAR(50),
EventTime DATETIME,
Status INT)
You can CONVERT() the EventTime to DATE type and then PARTITION:
;WITH MyCTE AS
(
SELECT Status,
ROW_NUMBER() OVER(PARTITION BY CardUser, CONVERT(DATE, EventTime)
ORDER BY CardTableID) AS NewVariation
FROM CardChecker
)
UPDATE MyCTE
SET Status = NewVariation
Your query basically unnecessarily updating entire table everytime. If EventTime is current date time of the system, having a flag to mark already updated status would improve the performance.
;WITH MyCTE AS
(
SELECT Status,
ROW_NUMBER() OVER(PARTITION BY CardUser, CONVERT(DATE, EventTime)
ORDER BY CardTableID) AS NewVariation
FROM CardChecker
WHERE Status IS NULL OR
CONVERT(DATE, EventTime) = CONVERT(DATE, GETDATE())
)
UPDATE MyCTE
SET Status = NewVariation