SQL - Start and End date based on another column - sql

Simplified structure.
I need the two dates between a record that has an action type of 4 and an action type of 1.
The record could be in that state multiple times and I would need separate rows for their times
For example for IncidentId = 1
Row 1 - StartTime = 2017-01-01 14:00 (id:3) - End Time = 2017-01-01 20:00 (id: 5)
Row 2 - StartTime = 2017-01-01 21:00 (id:6) - End Time = 2017-01-02 11:00 (id: 9)
CREATE TABLE #returntable
(
[incidentid] INT,
[starttime] DATETIME,
[endtime] DATETIME
)
CREATE TABLE #testtableofdoom
(
[incidentlogid] INT,
[incidentid] INT,
[timestamp] DATETIME,
[actiontypeid] INT
)
INSERT INTO #testtableofdoom
( incidentlogid, incidentid, timestamp, actiontypeid )
VALUES ( 1, 1, '2017-01-01 09:00', 1 )
, ( 2, 1, '2017-01-01 11:00', 1 )
, ( 3, 1, '2017-01-01 14:00', 4 )
, ( 4, 1, '2017-01-01 16:00', 4 )
, ( 5, 1, '2017-01-01 20:00', 1 )
, ( 6, 1, '2017-01-01 21:00', 4 )
, ( 7, 1, '2017-01-02 09:00', 4 )
, ( 8, 2, '2017-01-02 10:00', 1 )
, ( 9, 1, '2017-01-02 11:00', 1 )
, ( 10, 1, '2017-01-02 14:00', 1 )
, ( 11, 2, '2017-01-02 15:00', 4 )
, ( 12, 1, '2017-01-02 16:00', 1 )
, ( 13, 1, '2017-01-02 17:00', 1 )
, ( 14, 1, '2017-01-02 18:00', 1 )
, ( 15, 2, '2017-01-02 15:00', 1 );
DROP TABLE #testtableofdoom
DROP TABLE #returntable

I used table variables instead of temp tables, and shorter column names than you, but this works:
declare #tt TABLE (
logId INT, iId INT,
dt DATETIME, atId INT
INSERT #tt (logId, iId,
dt, atId) values
(1, 1, '2017-01-01 09:00', 1),
(2, 1, '2017-01-01 11:00', 1),
(3, 1, '2017-01-01 14:00', 4),
(4, 1, '2017-01-01 16:00', 4),
(5, 1, '2017-01-01 20:00', 1),
(6, 1, '2017-01-01 21:00', 4),
(7, 1, '2017-01-02 09:00', 4),
(8, 2, '2017-01-02 10:00', 1),
(9, 1, '2017-01-02 11:00', 1),
(10, 1, '2017-01-02 14:00', 1),
(11, 2, '2017-01-02 15:00', 4),
(12, 1, '2017-01-02 16:00', 1),
(13, 1, '2017-01-02 17:00', 1),
(14, 1, '2017-01-02 18:00', 1),
(15, 2, '2017-01-02 15:00', 1)
Select s.logId startLogid, e.logId endLogId,
s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt
where iId = s.iID
and atId = 1
and logId > s.logId)
where s.aTid = 4
and ((Select atId from #tt
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))
This produces the following:
startLogid endLogId iID startTime endTime
----------- ----------- ---- ---------------- ----------------
3 5 1 2017-01-01 14:00 2017-01-01 20:00
6 9 1 2017-01-01 21:00 2017-01-02 11:00
11 15 2 2017-01-02 15:00 2017-01-02 15:00
it uses a self-join. s represents the first (start) record with actionType 4, and e represents end record with action type 1. Since logId increments, the end record must have higher logId than the start record, and it must be the lowest logId higher than the start records that has same iId and an atId = 1.
Select s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt -- lowest log greater than start logId
where iId = s.iID -- same iId
and atId = 1 -- with atId = 1
and logId > s.logId) -- greater than start logId
finally, the start record must be restricted to those "4" records which either have no other same incident records before it or have a "1" record immediately prior to it.
where s.aTid = 4
and ((Select atId from #tt -- atId of immed prior = 1
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists -- or there is no prior record
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))

something like this?
select
d.[timestamp] as StartDate,
(select top 1 [timestamp]
from #testTableOfDoom d2
where d2.incidentid = 1 and d2.[timestamp] > d.[timestamp] and actiontypeid = 1
order by d2.[timestamp] asc
) as EndDate
from
(select
p.[timestamp],
LAG(p.actiontypeid) OVER (ORDER BY incidentlogid asc) PrevValue,
p.actiontypeid
from #testTableOfDoom p
where p.incidentid = 1) d
where d.actiontypeid = 4
and d.PrevValue <> 4

Related

How to replace CURSOR when it has effect on functions in SELECT statement

Please consider this code:
Declare #MyMinMaxTable Table
(
[Min] int,
[Max] int,
[Desc] NVARCHAR(50)
)
Insert into #MyMinMaxTable
values (0,12,N'Child'),
(13,19,N'Teenager'),
(20,25,N'Youth'),
(25,40,N'Middle-aged'),
(40,99,N'Old')
Declare #MyTable Table
(
Id int identity(1,1),
[Year] int,
Age int,
MyCol2 int,
MyCol3 int null
)
Insert into #MyTable
([Year], Age, MyCol2, MyCol3)
values
(2012, 10, 1 , 1),
(2012, 28, 2 , 3),
(2012, 14, 1 , 7),
(2012, 24, 3 , 3),
(2012, 80, 1 , 6),
(2012, 39, 1 , 3),
(2012, 45, 1 , 5),
(2012, 23, 2 , 6),
(2012, 72, 3 , 8),
(2012, 17, 1 , null),
(2012, 62, 4 , 9),
(2012, 20, 1 , null),
(2012, 5, 1 , 9),
(2012, 8, 1 , 9),
(2012, 25, 1 , null),
(2012, 41, 2 , 2),
(2012, 26, 1 , 2),
(2012, 33, 4 , 2),
(2012, 40, 1 , 2),
(2012, 33, 2 , 3),
(2012, 41, 1 , 5),
(2012, 53, 1 , null),
(2012, 37, 1 , 3)
Declare #Result Table
(
C0 NVARCHAR(50),
c1 decimal(5,2),
C2 decimal(5,2),
C3 decimal(5,2)
)
The CURSOR part:
DECLARE #Min int;
DECLARE #Max int;
DECLARE #Desc nvarchar(50);
DECLARE mycur CURSOR
FOR
SELECT [min],
[max],
[Desc]
FROM #MyMinMaxTable
OPEN mycur
FETCH NEXT FROM mycur INTO #Min, #Max, #Desc
WHILE (##fetch_status = 0)
BEGIN
INSERT INTO #Result
SELECT #Desc As c0,
(Cast(COUNT(CASE when Age >= #Min AND Age <= #Max THEN 1 END) as decimal(5,2)) / cast(COUNT(Id) as decimal(5,2))) As c1,
(Cast(COUNT(CASE when MyCol2 = 1 AND MyCol3 IS NOT NULL THEN 1 END) as decimal(5,2)) / cast(COUNT(CASE when Age >= #Min AND Age <= #Max THEN 1 END) as decimal(5,2))) As c2,
(Cast(COUNT(CASE when Age >= #Min AND Age <= #Max ANd MyCol2 = 1 THEN 1 END) as decimal(5,2)) / cast(COUNT(CASE when MyCol2 = 1 THEN 1 END) as decimal(5,2))) As c3
FROM #MyTable AS td
FETCH NEXT FROM mycur INTO #Min, #Max, #Desc
END
CLOSE mycur
DEALLOCATE mycur
SELECT * FROM #Result
The problem is I want to remove CURSOR and write a query without it. How it can possible in this scenario?
Try maybe a CROSS JOIN and filtering in your where clause or conditional CASE statements. It's still unclear what your end goal is so the aggregates below will have to be adjusted, but this may be a good start:
SELECT
mm.[Desc],
(CAST(SUM(CASE WHEN MyCol1 >= mm.Min AND MyCol1 <= mm.Max THEN 1 ELSE 0 END) AS DECIMAL(5,2)) / CAST(COUNT(Id) AS DECIMAL(5,2))) AS C1,
(CAST(SUM(CASE WHEN MyCol2 = 1 AND MyCol3 IS NOT NULL THEN 1 END) AS DECIMAL(5,2)) / CAST(SUM(CASE WHEN MyCol1 >= mm.Min AND MyCol1 <= mm.Max THEN 1 ELSE 0 END) AS DECIMAL(5,2))) AS C2,
(CAST(SUM(CASE WHEN MyCol1 >= mm.Min AND MyCol1 <= mm.Max AND MyCol2 = 1 THEN 1 ELSE 0 END) AS DECIMAL(5,2)) / CAST(SUM(CASE WHEN MyCol2 = 1 THEN 1 ELSE 0 END) AS DECIMAL(5,2))) AS C3
FROM MyTable td
CROSS JOIN MyMinMaxTable mm
GROUP BY mm.[Desc]
Here is a solution. I unwound your query and rewrote it.
This one continues to work if you add additional ranges, and does not require CROSS APPLY, hard-coded subquery ranges or anything else.
Sorting is an exercise for you :-)
Declare #MyMinMaxTable Table
(
[Min] int,
[Max] int,
[Desc] NVARCHAR(50)
)
Insert into #MyMinMaxTable
values (0,12,N'Child'),
(13,19,N'Teenager'),
(20,25,N'Youth'),
(25,40,N'Middle-aged'),
(40,99,N'Old')
Declare #MyTable Table
(
Id int identity(1,1),
[Year] int,
MyCol1 int,
MyCol2 int,
MyCol3 int null
)
Insert into #MyTable
([Year], MyCol1, MyCol2, MyCol3)
values
(2012, 10, 1 , 1),
(2012, 28, 2 , 3),
(2012, 14, 1 , 7),
(2012, 24, 3 , 3),
(2012, 80, 1 , 6),
(2012, 39, 1 , 3),
(2012, 45, 1 , 5),
(2012, 23, 2 , 6),
(2012, 72, 3 , 8),
(2012, 17, 1 , null),
(2012, 62, 4 , 9),
(2012, 20, 1 , null),
(2012, 5, 1 , 9),
(2012, 8, 1 , 9),
(2012, 25, 1 , null),
(2012, 41, 2 , 2),
(2012, 26, 1 , 2),
(2012, 33, 4 , 2),
(2012, 40, 1 , 2),
(2012, 33, 2 , 3),
(2012, 41, 1 , 5),
(2012, 53, 1 , null),
(2012, 37, 1 , 3)
SELECT
MMT.[Desc]
--Ratio of (Age Bracket Matches) / (ALL Rows)
, CAST(COUNT(1) / (SELECT CAST(COUNT(1) AS DECIMAL(15,2)) AS TotalRows FROM #MyTable) AS DECIMAL(15,2)) AS C1
--Ratio of (Rows WHERE MyCol2 = 1 and MyCol3 = NULL) / (Age Bracket Matches)
, CAST((SELECT COUNT(1) FROM #MyTable WHERE MyCol2 = 1 AND MyCol3 IS NOT NULL) / CAST(COUNT(1) AS DECIMAL(15,2)) AS DECIMAL(15,2)) AS C2
--Ratio of (Age Bracket Matches WHERE MyCol2 = 1) / (Rows WHERE WHERE MyCol2 = 1)
, CAST(SUM(CASE WHEN T.MyCol2 = 1 THEN 1 ELSE 0 END) / (SELECT CAST(COUNT(1) AS DECIMAL(15,2)) FROM #MyTable WHERE MyCol2 = 1) AS DECIMAL(15,2)) AS C3
FROM
#MyMinMaxTable AS MMT
INNER JOIN #MyTable AS T ON
T.MyCol1 BETWEEN MMT.[Min] AND MMT.[Max]
GROUP BY
MMT.[Desc]
Seems like you are trying to divide your data into groups. Try this query. But I'm not sure about third column, especially without sample data
INSERT INTO #outTable
select
mt.[Desc], count(*) / count([Id]) over ()
, COUNT(CASE when MyCol2 = '1' AND MyCol3 IS NOT NULL THEN 1 END) / count(*)
from
MyTable td
join MyMinMaxTable mt on td.MyCol1 between mt.[min] and mt.[max]
where
td.[YEAR] = #Year
and td.[Status] = 1
group by mt.[Desc]
Another approach can be by using CROSS APPLY.
SELECT MM.[Desc],
CAST(C1.C1Count AS DECIMAL(15, 2)) / CAST(Tot.TotCount AS decimal (15, 2)) AS C1,
CAST(C2.C2Count AS DECIMAL(15, 2)) / CAST(C1.C1Count AS decimal (15, 2)) AS C2,
CAST(C3.C3Count AS DECIMAL(15, 2)) / CAST(C4.C4Count AS decimal (15, 2)) AS C2
FROM #MyMinMaxTable MM
CROSS APPLY (SELECT COUNT(MyCol1) AS C1Count
FROM #MyTable
WHERE ( MyCol1 BETWEEN MM.Min AND MM.Max )) C1
CROSS APPLY (SELECT COUNT(*) C2Count
FROM #MyTable
WHERE MyCol2 = 1
AND MyCol3 IS NOT NULL)C2
CROSS APPLY (SELECT COUNT(*) C3Count
FROM #MyTable
WHERE MyCol1 >= mm.Min
AND MyCol1 <= mm.Max
AND MyCol2 = 1)C3
CROSS APPLY (SELECT COUNT(*) C4Count
FROM #MyTable
WHERE MyCol2 = 1)C4
CROSS APPLY (SELECT COUNT(*) TotCount
FROM #MyTable)Tot
Following is the output for the above query.
Desc C1 C2 C2
---------- -------------------- --------------------- ---------------------
Child 0.130434782608695652 3.666666666666666666 0.200000000000000000
Teenager 0.086956521739130434 5.500000000000000000 0.133333333333333333
Youth 0.173913043478260869 2.750000000000000000 0.133333333333333333
Middle-aged 0.347826086956521739 1.375000000000000000 0.333333333333333333
Old 0.347826086956521739 1.375000000000000000 0.333333333333333333
I think in this situation is ok to pre calculate "The count all records in #MyTable", “The count of records in #MyTable where MyCol2 = 1 and MyCol3 is not null” and “The count of records in #MyTable where MyCol2 = 1” and store them into variables.
It would make the script more readable and easier to understand.
Here it is. Hope this helps.
Declare #MyMinMaxTable Table
(
[Min] int,
[Max] int,
[Desc] NVARCHAR(50)
)
Insert into #MyMinMaxTable
values (0,12,N'Child'),
(13,19,N'Teenager'),
(20,25,N'Youth'),
(25,40,N'Middle-aged'),
(40,99,N'Old')
Declare #MyTable Table
(
Id int identity(1,1),
[Year] int,
Age int,
MyCol2 int,
MyCol3 int null
)
Insert into #MyTable
([Year], Age, MyCol2, MyCol3)
values
(2012, 10, 1 , 1),
(2012, 28, 2 , 3),
(2012, 14, 1 , 7),
(2012, 24, 3 , 3),
(2012, 80, 1 , 6),
(2012, 39, 1 , 3),
(2012, 45, 1 , 5),
(2012, 23, 2 , 6),
(2012, 72, 3 , 8),
(2012, 17, 1 , null),
(2012, 62, 4 , 9),
(2012, 20, 1 , null),
(2012, 5, 1 , 9),
(2012, 8, 1 , 9),
(2012, 25, 1 , null),
(2012, 41, 2 , 2),
(2012, 26, 1 , 2),
(2012, 33, 4 , 2),
(2012, 40, 1 , 2),
(2012, 33, 2 , 3),
(2012, 41, 1 , 5),
(2012, 53, 1 , null),
(2012, 37, 1 , 3)
Declare #Result Table
(
C0 NVARCHAR(50),
c1 decimal(5,2),
C2 decimal(5,2),
C3 decimal(5,2)
)
-- ANSWER BEGIN HERE
-- The count all record in #MyTable
DECLARE #CountAll DECIMAL(5,2)
SET #CountAll = (SELECT COUNT(1) FROM #MyTable)
-- The count record in #MyTable where MyCol2 = 1 and MyCol3 is not null
DECLARE #CountMyCol2Eq1AndMyCol3NotNull DECIMAL(5,2)
SET #CountMyCol2Eq1AndMyCol3NotNull = (SELECT COUNT(1) FROM #MyTable WHERE MyCol2 = 1 AND MyCol3 IS NOT NULL)
-- The count record in #MyTable where MyCol2 = 1
DECLARE #CountMyCol2Eq1 DECIMAL(5,2)
SET #CountMyCol2Eq1 = (SELECT COUNT(1) FROM #MyTable WHERE MyCol2 = 1)
INSERT INTO #Result
SELECT mm.[Desc],
COUNT(1) / #CountAll,
#CountMyCol2Eq1AndMyCol3NotNull / COUNT(1),
SUM(CASE WHEN tt.MyCol2 = 1 THEN 1 ELSE 0 END) / #CountMyCol2Eq1
FROM #MyMinMAxTable mm
JOIN #MyTable tt
ON mm.[Min] <= tt.Age
AND mm.[Max] >= tt.Age
GROUP BY mm.[Desc]
SELECT * FROM #Result

SQL select max of a consecutive run of data

Given a table with a consecutive run of data: a number that always increases while a task is in progress and resets back to zero when the next task starts, how do you select the maximum of each run of data?
Each consecutive run can have any number of rows, and the runs of data are marked by a a "start" and "end" row, eg the data might look like
user_id, action, qty, datetime
1, start, 0, 2017-01-01 00:00:01
1, record, 0, 2017-01-01 00:00:01
1, record, 4, 2017-01-01 00:00:02
1, record, 5, 2017-01-01 00:00:03
1, record, 6, 2017-01-01 00:00:04
1, end, 0, 2017-01-01 00:00:04
1, start, 0, 2017-01-01 00:00:05
1, record, 0, 2017-01-01 00:00:05
1, record, 2, 2017-01-01 00:00:06
1, record, 3, 2017-01-01 00:00:07
1, end, 0, 2017-01-01 00:00:07
2, start, 0, 2017-01-01 00:00:08
2, record, 0, 2017-01-01 00:00:08
2, record, 3, 2017-01-01 00:00:09
2, record, 8, 2017-01-01 00:00:10
2, end, 0, 2017-01-01 00:00:10
And the results would be the maximum value of each run:
user_id, action, qty, datetime
1, record, 6, 2017-01-01 00:00:04
1, record, 3, 2017-01-01 00:00:07
2, record, 8, 2017-01-01 00:00:10
Using any postgres sql syntax (9.3)? Its some kind of grouping then selecting max from each group, but I don't see how to do the grouping part.
If theres no overlapping for a single user and the next run always starts at a later time, then you can use LAG() window function.
with the_table(user_id, action, qty, datetime) as (
select 1,'start', 0, '2017-01-01 00:00:01'::timestamp union all
select 1,'record', 0, '2017-01-01 00:00:01'::timestamp union all
select 1,'record', 4, '2017-01-01 00:00:02'::timestamp union all
select 1,'record', 5, '2017-01-01 00:00:03'::timestamp union all
select 1,'record', 6, '2017-01-01 00:00:04'::timestamp union all
select 1,'end', 0, '2017-01-01 00:00:04'::timestamp union all
select 1,'start', 0, '2017-01-01 00:00:05'::timestamp union all
select 1,'record', 0, '2017-01-01 00:00:05'::timestamp union all
select 1,'record', 2, '2017-01-01 00:00:06'::timestamp union all
select 1,'record', 3, '2017-01-01 00:00:07'::timestamp union all
select 1,'end', 0, '2017-01-01 00:00:07'::timestamp union all
select 2,'start', 0, '2017-01-01 00:00:08'::timestamp union all
select 2,'record', 0, '2017-01-01 00:00:08'::timestamp union all
select 2,'record', 3, '2017-01-01 00:00:09'::timestamp union all
select 2,'record', 8, '2017-01-01 00:00:10'::timestamp union all
select 2,'end', 0, '2017-01-01 00:00:10'::timestamp
)
select n_user_id, n_action, n_qty, n_datetime from (
select action,
lag(user_id) over(partition by user_id order by datetime, case when action = 'start' then 0 when action = 'record' then 1 else 2 end, qty) as n_user_id,
lag(action) over(partition by user_id order by datetime, case when action = 'start' then 0 when action = 'record' then 1 else 2 end, qty) as n_action,
lag(qty) over(partition by user_id order by datetime, case when action = 'start' then 0 when action = 'record' then 1 else 2 end, qty) as n_qty,
lag(datetime) over(partition by user_id order by datetime, case when action = 'start' then 0 when action = 'record' then 1 else 2 end, qty) as n_datetime
from the_table
)t
where action = 'end'
Because some action = record rows have same datetime as start and end rows, I use CASE in ORDER BY, to be clear that start is first, then is record and then end.
Quick and dirty, assuming runs do not overlap
with bounds as (select starts.rn, starts.datetime as s, ends.datetime as e from
(select datetime,ROW_NUMBER() OVER () as rn from runs where action = 'start' order by datetime) as starts
join
(select datetime,ROW_NUMBER() OVER () as rn from runs where action = 'end' order by datetime) as ends
on starts.rn = ends.rn)
,with_run as (SELECT *, (select rn from bounds where s <= r.datetime and e >= r.datetime) as run
from runs as r)
,max_qty as (
SELECT run,max(qty) as qty
from with_run
GROUP BY run)
SELECT s.user_id,s.action,s.qty,s.datetime from with_run as s join max_qty as f on s.run = f.run AND s.qty = f.qty;
-- TEST DATA --
create table runs (user_id int, action text, qty int, datetime TIMESTAMP);
insert INTO runs VALUES
(1, 'start', 0, '2017-01-01 00:00:01')
,(1, 'record', 0, '2017-01-01 00:00:01')
,(1, 'record', 4, '2017-01-01 00:00:02')
,(1, 'record', 5, '2017-01-01 00:00:03')
,(1, 'record', 6, '2017-01-01 00:00:04')
,(1, 'end', 0, '2017-01-01 00:00:04')
,(1, 'start', 0, '2017-01-01 00:00:05')
,(1, 'record', 0, '2017-01-01 00:00:05')
,(1, 'record', 2, '2017-01-01 00:00:06')
,(1, 'record', 3, '2017-01-01 00:00:07')
,(1, 'end', 0, '2017-01-01 00:00:07')
,(2, 'start', 0, '2017-01-01 00:00:08')
,(2, 'record', 0, '2017-01-01 00:00:08')
,(2, 'record', 3, '2017-01-01 00:00:09')
,(2, 'record', 8, '2017-01-01 00:00:10')
,(2, 'end', 0, '2017-01-01 00:00:10');
UPDATE
#Oto Shavadze answer can be shortened
with lookup as (select action,lag(t.*) over(order by datetime, case when action = 'start' then 0 when action = 'record' then 1 else 2 end) as r from runs t)
select (r::runs).user_id
,(r::runs).action
,(r::runs).qty
,(r::runs).datetime
from lookup where action = 'end';
I think OP unclear about what considers maximum, last record before end or highest qty in run.

Finding duplicate records in a specific date range

I have a table where I have 4 columns
Serial(nvarchar), SID(nvarchar), DateCreated(Date), CID(unique and int)
I want to find the records where there is duplicate serial and SID and where the 2 duplicate serial fall between date range of 180 days.
please help
Sample Data
Serial SID DateCreated CID
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 1
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 3
0264607 HONMD01 2017-05-01 00:00:00.000 65
0264607 HONMD01 2016-05-01 00:00:00.000 45
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 34
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 87
0969130 140439 2017-05-09 00:00:00.000 32
0969130 140439 2017-05-09 00:00:00.000 23
1049567 INIIL00 2017-04-12 00:00:00.000 76
create table #Test (Serial nvarchar(20), [SID] nvarchar(10), DateCreated datetime, CID int)
Insert into #Test values ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1)
, ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3)
, ('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65)
, ('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 32)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 23)
, ('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
select distinct a.*
from
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) a
full outer join
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) b on a.Serial = b.Serial and a.SID= b.SID
where datediff(d,a.DateCreated, b.DateCreated)<180
Try to do this:
with cte as (
select
serial,
sid,
dateCreated,
cid,
coalesce(max(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between unbounded preceding and 1 preceding), '1900-01-01') as last,
coalesce(min(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between 1 following and unbounded following), '5999-01-01') as next
from table_name
)
select *
from cte
where
datediff(day, last, dateCreated) >= 180
and datediff(day, dateCreated, next) >= 180
This was a challenging question ! I have left final output with *(PreviousDate, rno) for easy understanding. Here is my way to solve :
Create table #t(Serial nvarchar(100),SID nvarchar(100),DateCreated date,CID int)
Insert into #t values
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1),
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3),
('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65),
('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87),
('0969130', '140439', '2017-05-09 00:00:00.000', 32),
('0969130', '140439', '2017-05-09 00:00:00.000', 23),
('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
Select iq2.*
FROM
(Select iq.Serial, iq.SID, iq.DateCreated, iq.CID, iq.PreviousDate,
ROW_NUMBER() OVER (PARTITION BY iq.Serial,iq.SID, CASE WHEN DATEDIFF(day, iq.DateCreated, iq.PreviousDate) <= 180 THEN 1 ELSE 0 END
ORDER BY Serial,SID) rno
FROM
(select Serial,SID,DateCreated,CID,
MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID) maxDate,
DATEADD(day,-180,MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID)) PreviousDate
from #t
)iq
)iq2
where iq2.rno <> 1
output :
Serial SID DateCreated CID PreviousDate rno
---------- ------- ---------- ---- ----------- ----
02302-25-0036 HONMD01 2017-05-01 3 2016-11-02 2
03118-09-0366 PRIVA00 2016-05-20 87 2015-11-22 2
0969130 140439 2017-05-09 23 2016-11-10 2
PS : PreviousDate is MAX PreviousDate

SQL Server episode identification

I am working with a blood pressure database in SQL Server which contains patient_id, timestamp (per minute) and systolicBloodPressure.
My goals are to find:
the number of episodes in which a patient is under a certain blood pressure threshold
An episode consists of the timestmap where the patient drops below a certain threshold until the timestamp where the patient comes above the threshold.
the mean blood pressure per episode per patient
the duration of the episode per episode per patient
What I have tried so far:
I am able to identify episodes by just making a new column which sets to 1 if threshold is reached.
select *
, CASE
when sys < threshold THEN '1'
from BPDATA
However , I am not able to 'identify' different episodes within the patient; episode1 episode 2 with their relative timestamps.
Could someone help me with this? Or is there someone with a better different solution?
EDIT: Sample data with example threshold 100
ID Timestamp SysBP below Threshold
----------------------------------------------------
1 9:38 110 Null
1 9:39 105 Null
1 9:40 96 1
1 9:41 92 1
1 9:42 102 Null
2 12:23 95 1
2 12:24 98 1
2 12:25 102 Null
2 12:26 104 Null
2 12:27 94 1
2 12:28 88 1
2 12:29 104 Null
Thanks for the sample data.
This should work:
declare #t table (ID int, Timestamp time, SysBP int, belowThreshold bit)
insert #t
values
(1, '9:38', 110, null),
(1, '9:39', 105, null),
(1, '9:40', 96, 1),
(1, '9:41', 92, 1),
(1, '9:42', 102, null),
(2, '12:23', 95, 1),
(2, '12:24', 98, 1),
(2, '12:25', 102, null),
(2, '12:26', 104, null),
(2, '12:27', 94, 1),
(2, '12:28', 88, 1),
(2, '12:29', 104, null)
declare #treshold int = 100
;with y as (
select *, case when lag(belowThreshold, 1, 0) over(partition by id order by timestamp) = belowThreshold then 0 else 1 end epg
from #t
),
z as (
select *, sum(epg) over(partition by id order by timestamp) episode
from y
where sysbp < #treshold
)
select id, episode, count(episode) over(partition by id) number_of_episodes_per_id, avg(sysbp) avg_sysbp, datediff(minute, min(timestamp), max(timestamp))+1 episode_duration
from z
group by id, episode
This answer relies on LEAD() and LAG() functions so only works on 2012 or later:
Setup:
CREATE TABLE #bloodpressure
(
Patient_id int,
[TimeStamp] SmallDateTime,
SystolicBloodPressure INT
)
INSERT INTO #bloodpressure
VALUES
(1, '2017-01-01 09:01', 60),
(1, '2017-01-01 09:02', 55),
(1, '2017-01-01 09:03', 60),
(1, '2017-01-01 09:04', 70),
(1, '2017-01-01 09:05', 72),
(1, '2017-01-01 09:06', 75),
(1, '2017-01-01 09:07', 60),
(1, '2017-01-01 09:08', 50),
(1, '2017-01-01 09:09', 52),
(1, '2017-01-01 09:10', 53),
(1, '2017-01-01 09:11', 65),
(1, '2017-01-01 09:12', 71),
(1, '2017-01-01 09:13', 73),
(1, '2017-01-01 09:14', 74),
(2, '2017-01-01 09:01', 70),
(2, '2017-01-01 09:02', 75),
(2, '2017-01-01 09:03', 80),
(2, '2017-01-01 09:04', 70),
(2, '2017-01-01 09:05', 72),
(2, '2017-01-01 09:06', 75),
(2, '2017-01-01 09:07', 60),
(2, '2017-01-01 09:08', 50),
(2, '2017-01-01 09:09', 52),
(2, '2017-01-01 09:10', 53),
(2, '2017-01-01 09:11', 65),
(2, '2017-01-01 09:12', 71),
(2, '2017-01-01 09:13', 73),
(2, '2017-01-01 09:14', 74),
(3, '2017-01-01 09:12', 71),
(3, '2017-01-01 09:13', 60),
(3, '2017-01-01 09:14', 74)
Now using Lead And Lag to find the previous rows values, to find whether this is the beginning or end of a sequence of low blood pressures, in combination with a common table expression. Using a UNION of start and end events ensures that an event which covers just one minute is recorded as both a start and an end event.
;WITH CTE
AS
(
SELECT *,
LAG(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As PrevValue,
Lead(SystolicBloodPressure,1)
OVER (PaRTITION BY Patient_Id ORDER BY TimeStamp) As NextValue
FROM #bloodpressure
),
CTE2
AS
(
-- Get Start Events (EventType 1)
SELECT 1 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (PrevValue IS NULL AND SystolicBloodPressure < 70) OR
(PrevValue >= 70 AND SystolicBloodPressure < 70)
UNION
-- Get End Events (EventType 2)
SELECT 2 As [EventType], Patient_id, TimeStamp,
ROW_NUMBER() OVER (ORDER BY Patient_id, TimeStamp) AS RN
FROM CTE
WHERE (NextValue IS NULL AND SystolicBloodPressure < 70 ) OR
(NextValue >= 70 AND SystolicBloodPressure < 70)
)
SELECT C1.Patient_id, C1.TimeStamp As EventStart, C2.TimeStamp As EventEnd
FROM CTE2 C1
INNER JOIN CTE2 C2
ON C1.Patient_id = C2.Patient_id AND C1.RN = C2.RN
WHERE C1.EventType = 1 AND C2.EventType = 2
ORDER BY C1.Patient_id, C1.TimeStamp

T-SQL: Conditional NULL removal

I need to select only the Room_IDs that have no instances where the Status is NULL.
For example here :
TABLE_A
Room_Id Status Inspection_Date
-----------------------------------
1 NULL 5/15/2015
2 occupied 5/21/2015
2 NULL 1/19/2016
1 occupied 12/16/2015
4 NULL 3/25/2016
3 vacant 8/27/2015
1 vacant 4/17/2016
3 vacant 12/12/2015
3 vacant 3/22/2016
4 vacant 2/2/2015
4 vacant 3/24/2015
My result should look like this:
Room_Id Status Inspection_Date
-----------------------------------
3 vacant 8/27/2015
3 vacant 12/12/2015
3 vacant 3/22/2016
Because Room_ID '3' has no instances where the Status is NULL
Quick example of how to do it:
DECLARE #tTable TABLE(
Room_Id INT,
Status VARCHAR(20),
Inspection_Date DATETIME)
INSERT INTO #tTable VALUES
(1, NULL, '5/15/2015'),
(1,NULL, '5/15/2015'),
(2,'occupied', '5/21/2015'),
(2,NULL, '1/19/2016'),
(1,'occupied', '12/16/2015'),
(4,NULL, '3/25/2016'),
(3,'vacant', '8/27/2015'),
(1,'vacant', '4/17/2016'),
(3,'vacant', '12/12/2015'),
(3,'vacant', '3/22/2016'),
(4,'vacant', '2/2/2015'),
(4,'vacant', '3/24/2015')
SELECT * FROM #tTable T1
WHERE Room_Id NOT IN (SELECT Room_ID FROM #tTable WHERE Status IS NULL)
Gives :
Room_Id | Status | Inspection_Date |
-------------------------------------------------
3 | vacant | 2015-08-27 00:00:00.000
3 | vacant | 2015-12-12 00:00:00.000
3 | vacant | 2016-03-22 00:00:00.000
Try this out:
SELECT *
FROM Table1
WHERE Room_ID NOT IN
(
SELECT DISTINCT Room_ID
FROM Table1
WHERE Status IS NULL
)
The sub query returns a list of unique room id's that, at one time or another, had a NULL status. The outer query looks at that list, and says "Return * where the room_ID IS NOT one those in the subquery.
If you want to try it in SQL Fiddle, here is the Schema:
CREATE TABLE Table1
(Room_ID int, Status varchar(8), Inspection_Date datetime)
;
INSERT INTO Table1
(Room_ID, Status, Inspection_Date)
VALUES
(1, NULL, '2015-05-15 00:00:00'),
(2, 'occupied', '2015-05-21 00:00:00'),
(2, NULL, '2016-01-19 00:00:00'),
(1, 'occupied', '2015-12-16 00:00:00'),
(4, NULL, '2016-03-25 00:00:00'),
(4, 'vacant', '2015-08-27 00:00:00'),
(1, 'vacant', '2016-04-17 00:00:00'),
(3, 'vacant', '2015-12-12 00:00:00'),
(3, 'vacant', '2016-03-22 00:00:00'),
(4, 'vacant', '2015-02-02 00:00:00'),
(4, 'vacant', '2015-03-24 00:00:00'),
(2, NULL, '2015-05-22 00:00:00')
;
As alternative to Hashman, I just prefer to use not exists over not in for these types of queries.
Creating some test data
Note that I just kept the same date for everything since it's not imperative to the question.
create table #table_a (
Room_Id int,
Status varchar(32),
Inspection_Date date);
insert #table_a (Room_Id, Status, Inspection_Date)
values
(1, null, getdate()),
(2, 'occupied', getdate()),
(2, null, getdate()),
(1, 'occupied', getdate()),
(4, null, getdate()),
(3, 'vacant', getdate()),
(1, 'vacant', getdate()),
(3, 'vacant', getdate()),
(3, 'vacant', getdate()),
(4, 'vacant', getdate()),
(4, 'vacant', getdate());
The query
select *
from #table_a t1
where not exists (
select *
from #table_a t2
where t1.Room_Id = t2.Room_Id
and Status is null);
The results
Room_Id Status Inspection_Date
----------- -------------------------------- ---------------
3 vacant 2016-06-17
3 vacant 2016-06-17
3 vacant 2016-06-17
You can use CTE and NOT EXIST like below code
WITH bt
AS ( SELECT RoomId ,
Status,
Inspection_Date
FROM dbo.Table_1
)
SELECT *
FROM bt AS a
WHERE NOT EXISTS ( SELECT 1
FROM bt
WHERE bt.RoomId = a.RoomId
AND bt.Status IS NULL );