SQL count consecutive rows - sql

I have the following data in a table:
|event_id |starttime |person_id|attended|
|------------|-----------------|---------|--------|
| 11512997-1 | 01-SEP-16 08:00 | 10001 | N |
| 11512997-2 | 01-SEP-16 10:00 | 10001 | N |
| 11512997-3 | 01-SEP-16 12:00 | 10001 | N |
| 11512997-4 | 01-SEP-16 14:00 | 10001 | N |
| 11512997-5 | 01-SEP-16 16:00 | 10001 | N |
| 11512997-6 | 01-SEP-16 18:00 | 10001 | Y |
| 11512997-7 | 02-SEP-16 08:00 | 10001 | N |
| 11512997-1 | 01-SEP-16 08:00 | 10002 | N |
| 11512997-2 | 01-SEP-16 10:00 | 10002 | N |
| 11512997-3 | 01-SEP-16 12:00 | 10002 | N |
| 11512997-4 | 01-SEP-16 14:00 | 10002 | Y |
| 11512997-5 | 01-SEP-16 16:00 | 10002 | N |
| 11512997-6 | 01-SEP-16 18:00 | 10002 | Y |
| 11512997-7 | 02-SEP-16 08:00 | 10002 | Y |
I want to produce the following results, where the maximum number of consecutive occurences where atended = 'N' is returned:
|person_id|consec_missed_max|
| 1001 | 5 |
| 1002 | 3 |
How could this be done in Oracle (or ANSI) SQL? Thanks!
Edit:
So far I have tried:
WITH t1 AS
(SELECT t.person_id,
row_number() over(PARTITION BY t.person_id ORDER BY t.starttime) AS idx
FROM the_table t
WHERE t.attended = 'N'),
t2 AS
(SELECT person_id, MAX(idx) max_idx FROM t1 GROUP BY person_id)
SELECT t1.person_id, COUNT(1) ct
FROM t1
JOIN t2
ON t1.person_id = t2.person_id
GROUP BY t1.person_id;

The main work is in the factored subquery "prep". You seem to be somewhat familiar with analytic function, but that is not enough. This solution uses the so-called "tabibitosan" method to create groups of consecutive rows with the same characteristic in one or more dimensions; in this case, you want to group consecutive N rows with a different group for each sequence. This is done with a difference of two ROW_NUMBER() calls - one partitioned by person only, and the other by person and attended. Google "tabibitosan" to read more about the idea if needed.
with
inputs ( event_id, starttime, person_id, attended ) as (
select '11512997-1', to_date('01-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-2', to_date('01-SEP-16 10:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-3', to_date('01-SEP-16 12:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-4', to_date('01-SEP-16 14:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-5', to_date('01-SEP-16 16:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-6', to_date('01-SEP-16 18:00', 'dd-MON-yy hh24:mi'), 10001, 'Y' from dual union all
select '11512997-7', to_date('02-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-1', to_date('01-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-2', to_date('01-SEP-16 10:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-3', to_date('01-SEP-16 12:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-4', to_date('01-SEP-16 14:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual union all
select '11512997-5', to_date('01-SEP-16 16:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-6', to_date('01-SEP-16 18:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual union all
select '11512997-7', to_date('02-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual
),
prep ( starttime, person_id, attended, gp ) as (
select starttime, person_id, attended,
row_number() over (partition by person_id order by starttime) -
row_number() over (partition by person_id, attended
order by starttime)
from inputs
),
counts ( person_id, consecutive_absences ) as (
select person_id, count(*)
from prep
where attended = 'N'
group by person_id, gp
)
select person_id, max(consecutive_absences) as max_consecutive_absences
from counts
group by person_id
order by person_id;
OUTPUT:
PERSON_ID MAX_CONSECUTIVE_ABSENCES
---------- ---------------------------------------
10001 5
10002 3

If you are using Oracle 12c you could use MATCH_RECOGNIZE:
Data:
CREATE TABLE data AS
SELECT *
FROM (
with inputs ( event_id, starttime, person_id, attended ) as (
select '11512997-1', to_date('01-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-2', to_date('01-SEP-16 10:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-3', to_date('01-SEP-16 12:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-4', to_date('01-SEP-16 14:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-5', to_date('01-SEP-16 16:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-6', to_date('01-SEP-16 18:00', 'dd-MON-yy hh24:mi'), 10001, 'Y' from dual union all
select '11512997-7', to_date('02-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10001, 'N' from dual union all
select '11512997-1', to_date('01-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-2', to_date('01-SEP-16 10:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-3', to_date('01-SEP-16 12:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-4', to_date('01-SEP-16 14:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual union all
select '11512997-5', to_date('01-SEP-16 16:00', 'dd-MON-yy hh24:mi'), 10002, 'N' from dual union all
select '11512997-6', to_date('01-SEP-16 18:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual union all
select '11512997-7', to_date('02-SEP-16 08:00', 'dd-MON-yy hh24:mi'), 10002, 'Y' from dual
)
SELECT * FROM inputs
);
And query:
SELECT PERSON_ID, MAX(LEN) AS MAX_ABSENCES_IN_ROW
FROM data
MATCH_RECOGNIZE (
PARTITION BY PERSON_ID
ORDER BY STARTTIME
MEASURES FINAL COUNT(*) AS len
ALL ROWS PER MATCH
PATTERN(a b*)
DEFINE b AS attended = a.attended
)
WHERE attended = 'N'
GROUP BY PERSON_ID;
Output:
"PERSON_ID","MAX_ABSENCES_IN_ROW"
10001,5
10002,3
EDIT:
As #mathguy pointed it could be rewritten as:
SELECT PERSON_ID, MAX(LEN) AS MAX_ABSENCES_IN_ROW
FROM data
MATCH_RECOGNIZE (
PARTITION BY PERSON_ID
ORDER BY STARTTIME
MEASURES COUNT(*) AS len
PATTERN(a+)
DEFINE a AS attended = 'N'
)
GROUP BY PERSON_ID;
db<>fiddle demo

Related

Generate date range data group based on data by dates

I have a list of date's data in daily basis below:
| Daytime | Item | Category| Value |
| -------- |------|------- |-------|
| 01.01.2022|A |1 |500 |
| 02.01.2022|A |1 |500 |
| 03.01.2022|A |1 |80000 |
| 04.01.2022|A |1 |500 |
| 05.01.2022|A |1 |500 |
| 01.01.2022|A |2 |600 |
| 02.01.2022|A |2 |600 |
| 03.01.2022|A |2 |600 |
| 04.01.2022|A |2 |600 |
| 05.01.2022|A |2 |600 |
| 01.01.2022|C |1 |600 |
| 02.01.2022|C |1 |600 |
| 03.01.2022|C |1 |600 |
| 04.01.2022|C |1 |600 |
| 05.01.2022|C |1 |600 |
How can i transform the data into this form?
| FromDate | ToDate | Item |Category| Value |
| --------- |--------- |------|------ |-------|
| 01.01.2022| 02.01.2022|A |1 |500 |
| 03.01.2022| 03.01.2022|A |1 |80000 |
| 04.01.2022| 05.01.2022|A |1 |500 |
| 01.01.2022| 05.01.2022|A |2 |600 |
| 01.01.2022| 05.01.2022|C |1 |600 |
I want to group the value (by item and category too) only if they are same for consecutive dates, please help, thank you!
Date format in DD.MM.YYYY and daytime's datatype is Date.
Following script for questions:
(SELECT to_date('01/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 1 Category, 500 Value FROM dual UNION ALL
SELECT to_date('02/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 1 Category, 500 Value FROM dual UNION ALL
SELECT to_date('03/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 1 Category, 80000 Value FROM dual UNION ALL
SELECT to_date('04/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 1 Category, 500 Value FROM dual UNION ALL
SELECT to_date('05/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 1 Category, 500 Value FROM dual UNION ALL
SELECT to_date('01/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 2 Category, 600 Value FROM dual UNION ALL
SELECT to_date('02/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 2 Category, 600 Value FROM dual UNION ALL
SELECT to_date('03/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 2 Category, 600 Value FROM dual UNION ALL
SELECT to_date('04/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 2 Category, 600 Value FROM dual UNION ALL
SELECT to_date('05/01/2022', 'dd/mm/yyyy') daytime, 'A' Item, 2 Category, 600 Value FROM dual UNION ALL
SELECT to_date('01/01/2022', 'dd/mm/yyyy') daytime, 'C' Item, 1 Category, 600 Value FROM dual UNION ALL
SELECT to_date('02/01/2022', 'dd/mm/yyyy') daytime, 'C' Item, 1 Category, 600 Value FROM dual UNION ALL
SELECT to_date('03/01/2022', 'dd/mm/yyyy') daytime, 'C' Item, 1 Category, 600 Value FROM dual UNION ALL
SELECT to_date('04/01/2022', 'dd/mm/yyyy') daytime, 'C' Item, 1 Category, 600 Value FROM dual UNION ALL
SELECT to_date('05/01/2022', 'dd/mm/yyyy') daytime, 'C' Item, 1 Category, 600 Value FROM dual)
You can use common table expression (cte) technique for that purpose.
with YourSample ( Daytime, Item, Category, Value) as (
select to_date('01.01.2022', 'DD.MM.YYYY'), 'A', 1, 500 from dual union all
select to_date('02.01.2022', 'DD.MM.YYYY'), 'A', 1, 500 from dual union all
select to_date('03.01.2022', 'DD.MM.YYYY'), 'A', 1, 80000 from dual union all
select to_date('04.01.2022', 'DD.MM.YYYY'), 'A', 1, 500 from dual union all
select to_date('05.01.2022', 'DD.MM.YYYY'), 'A', 1, 500 from dual union all
select to_date('01.01.2022', 'DD.MM.YYYY'), 'A', 2, 600 from dual union all
select to_date('02.01.2022', 'DD.MM.YYYY'), 'A', 2, 600 from dual union all
select to_date('03.01.2022', 'DD.MM.YYYY'), 'A', 2, 600 from dual union all
select to_date('04.01.2022', 'DD.MM.YYYY'), 'A', 2, 600 from dual union all
select to_date('05.01.2022', 'DD.MM.YYYY'), 'A', 2, 600 from dual union all
select to_date('01.01.2022', 'DD.MM.YYYY'), 'C', 1, 600 from dual union all
select to_date('02.01.2022', 'DD.MM.YYYY'), 'C', 1, 600 from dual union all
select to_date('03.01.2022', 'DD.MM.YYYY'), 'C', 1, 600 from dual union all
select to_date('04.01.2022', 'DD.MM.YYYY'), 'C', 1, 600 from dual union all
select to_date('05.01.2022', 'DD.MM.YYYY'), 'C', 1, 600 from dual
)
, YourSampleRanked (Daytime, Item, Category, Value, rnb) as (
select Daytime, Item, Category, Value
, row_number()over(PARTITION BY ITEM, CATEGORY ORDER BY DAYTIME) rnb
from YourSample
)
, cte (Daytime, Item, Category, Value, rnb, grp) as (
select Daytime, Item, Category, Value, rnb, 1 grp
from YourSampleRanked
where rnb = 1
union all
select t.Daytime, t.Item, t.Category, t.Value, t.rnb
, decode( t.Value, c.Value, c.grp, c.grp + 1 ) grp
from YourSampleRanked t
join cte c
on ( c.Category = t.Category and c.Item = t.Item and t.rnb = c.rnb + 1 )
)
select min(DAYTIME) FromDate, max(DAYTIME) ToDate, ITEM, CATEGORY, min(Value) Value
from cte
GROUP BY GRP, ITEM, CATEGORY
order by ITEM, CATEGORY, FromDate
;
demo on fiddle<>db
You can also use the MATCH_RECOGNIZE clause for the same purpose if you are running Oracle 12c and later.
select FromDate, toDate, ITEM, CATEGORY, VALUE
from YourSample
MATCH_RECOGNIZE (
PARTITION BY ITEM, CATEGORY
ORDER BY DAYTIME
MEASURES first(STRT.VALUE) as VALUE,
first(STRT.DAYTIME) as FromDate,
nvl(last(SAME.DAYTIME), first(STRT.DAYTIME)) as toDate
ONE ROW PER MATCH
PATTERN (STRT Same*)
DEFINE
Same AS VALUE = PREV(VALUE)
) MR
ORDER BY ITEM, CATEGORY, FromDate, toDate
;
demo2 on fiddle
From Oracle 12, you can use MATCH_RECOGNIZE to perform row-by-row processing:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY item, category
ORDER BY daytime
MEASURES
FIRST(daytime) AS from_date,
LAST(daytime) AS to_date,
FIRST(value) AS value
ONE ROW PER MATCH
PATTERN (same_value+)
DEFINE
same_value AS FIRST(value) = value
)
Which, for the sample data:
CREATE TABLE table_name (daytime, item, category, value) AS
SELECT DATE '2022-01-01', 'A', 1, 500 FROM DUAL UNION ALL
SELECT DATE '2022-01-02', 'A', 1, 500 FROM DUAL UNION ALL
SELECT DATE '2022-01-03', 'A', 1, 80000 FROM DUAL UNION ALL
SELECT DATE '2022-01-04', 'A', 1, 500 FROM DUAL UNION ALL
SELECT DATE '2022-01-05', 'A', 1, 500 FROM DUAL UNION ALL
SELECT DATE '2022-01-01', 'A', 2, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-02', 'A', 2, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-03', 'A', 2, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-04', 'A', 2, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-05', 'A', 2, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-01', 'C', 1, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-02', 'C', 1, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-03', 'C', 1, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-04', 'C', 1, 600 FROM DUAL UNION ALL
SELECT DATE '2022-01-05', 'C', 1, 600 FROM DUAL
Outputs:
ITEM
CATEGORY
FROM_DATE
TO_DATE
VALUE
A
1
2022-01-01 00:00:00
2022-01-02 00:00:00
500
A
1
2022-01-03 00:00:00
2022-01-03 00:00:00
80000
A
1
2022-01-04 00:00:00
2022-01-05 00:00:00
500
A
2
2022-01-01 00:00:00
2022-01-05 00:00:00
600
C
1
2022-01-01 00:00:00
2022-01-05 00:00:00
600
db<>fiddle here
This is a job for a GROUP BY using TRUNC(daytime, 'MM'). TRUNC(), when used with dates, truncates them to the beginning of a calendar / clock period.
SELECT TRUNC(Daytime, 'MM') FromDate,
ADD_MONTHS(TRUNC(Daytime, 'MM'), 1) ToDate,
Item, Category,
SUM(Value) Value
FROM my_table
GROUP BY TRUNC(Daytime, 'MM'), Item, Category
Or alternatively you can avoid those arcane Oracle date format specifiers like 'MM' and go with LAST_DAY().
SELECT ADD_MONTHS(LAST_DAY(Daytime) + 1, -1) FromDate,
LAST_DAY(Daytime) + 1 ToDate,
Item, Category,
SUM(Value) Value
FROM my_table
GROUP BY LAST_DAY(Daytime), Item, Category

Group historical data

I'm stuck with the following problem and need help:
An object has properties that are calculated every day.
They are stored in a key-value historical table.
Property is mistakenly stored even if it was not changed.
I need a query that will group this data set by "actual values":
If a value was not changed during several days it is output as one row.
If value A was changed to B then back to A, then A, B, A should be output by the query (first A and second A are different date intervals).
Here is a dataset example.
with obj_val_hist as
(
select 123 obj_id, 'k_1' key, 'A' value_, to_date('01.01.2021', 'DD.MM.YYYY') start_dt, to_date('01.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('02.01.2021', 'DD.MM.YYYY') start_dt, to_date('02.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('03.01.2021', 'DD.MM.YYYY') start_dt, to_date('03.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('04.01.2021', 'DD.MM.YYYY') start_dt, to_date('04.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('05.01.2021', 'DD.MM.YYYY') start_dt, to_date('05.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('06.01.2021', 'DD.MM.YYYY') start_dt, to_date('06.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('07.01.2021', 'DD.MM.YYYY') start_dt, to_date('07.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('08.01.2021', 'DD.MM.YYYY') start_dt, to_date('08.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('09.01.2021', 'DD.MM.YYYY') start_dt, to_date('09.01.2021', 'DD.MM.YYYY') end_dt from dual
)
select * from obj_val_hist where obj_id = 123;
Data set:
obj_id
key
value
start_date
end_date
123
k_1
A
01.01.2021
01.01.2021
123
k_1
A
02.01.2021
02.01.2021
123
k_1
A
03.01.2021
03.01.2021
123
k_1
B
04.01.2021
04.01.2021
123
k_1
B
05.01.2021
05.01.2021
123
k_1
B
06.01.2021
06.01.2021
123
k_1
A
07.01.2021
07.01.2021
123
k_1
A
08.01.2021
08.01.2021
123
k_1
A
09.01.2021
09.01.2021
Expected result:
obj_id
key
value
start_date
end_date
123
k_1
A
01.01.2021
03.01.2021
123
k_1
B
04.01.2021
06.01.2021
123
k_1
A
07.01.2021
09.01.2021
This table contains values for million objects.
It is queried by obj_id and has an index on it.
Performance is a key point so using stored functions is most probably not an option.
This query will be a small part of a big view that is used by an external system.
I expected that there should be an analytic function suited for such a problem.
Something like dense_rank but with the possibility to order by one column (start_dt) but increase value when another column (value_) gets a different value.
But I didn't find one.
You may use match_recognize for this, which can also handle gaps in dates and is quite efficient and natural to read:
create table t (
obj_id
, key_
, value_
, start_date
, end_date
)
as
select 123, 'k_1', 'A', to_date('01.01.2021', 'dd.mm.yyyy'), to_date('01.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('02.01.2021', 'dd.mm.yyyy'), to_date('02.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('03.01.2021', 'dd.mm.yyyy'), to_date('03.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('04.01.2021', 'dd.mm.yyyy'), to_date('04.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('05.01.2021', 'dd.mm.yyyy'), to_date('05.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('06.01.2021', 'dd.mm.yyyy'), to_date('06.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('07.01.2021', 'dd.mm.yyyy'), to_date('07.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('08.01.2021', 'dd.mm.yyyy'), to_date('08.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('09.01.2021', 'dd.mm.yyyy'), to_date('09.01.2021', 'dd.mm.yyyy') from dual union all
/*Let's skip 10.01*/
select 123, 'k_1', 'A', to_date('11.01.2021', 'dd.mm.yyyy'), to_date('11.01.2021', 'dd.mm.yyyy') from dual union all
/*And extent validity period for some record*/
select 123, 'k_1', 'A', to_date('12.01.2021', 'dd.mm.yyyy'), to_date('13.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('14.01.2021', 'dd.mm.yyyy'), to_date('14.01.2021', 'dd.mm.yyyy') from dual
select *
from t
match_recognize (
/*For each ID and KEY*/
partition by obj_id, key_
order by start_date asc
/*Output attributes*/
measures
/*start_date of the first row in match group*/
final first(start_date) as min_start_date,
/*end_date of the last row in match group*/
final last(end_date) as max_end_date,
/*value itself as it is constant for match group*/
value_ as val
/*First row and any consequtive matches*/
pattern (init A*)
define
/*Consequtive are the rows which have the same value in value_ field
and start_date of the next row is not farther than
1 day from end_date of the previous row
*/
A as prev(value_) = value_
and prev(end_date) + 1 = start_date
)
OBJ_ID | KEY_ | MIN_START_DATE | MAX_END_DATE | VAL
-----: | :--- | :------------- | :----------- | :--
123 | k_1 | 01-JAN-21 | 03-JAN-21 | A
123 | k_1 | 04-JAN-21 | 06-JAN-21 | B
123 | k_1 | 07-JAN-21 | 09-JAN-21 | A
123 | k_1 | 11-JAN-21 | 14-JAN-21 | A
db<>fiddle here
If you indeed have data every day, then you can use the following relatively simple logic. The subquery calculate when the value changes. The outer query then calculates the end date by looking at the date in the next row:
select obj_id, key, value_, start_dt,
coalesce(lead(start_dt) over (partition by obj_id, key order by start_dt) - interval '1' day, max_end_dt)
from (select ovh.*,
lag(value_) over (partition by obj_id, key order by start_dt) as prev_value_,
max(end_dt) over (partition by obj_id, key) as max_end_dt
from obj_val_hist ovh
where obj_id = 123
) ovh
where prev_value_ is null or prev_value_ <> value_;
However, your data suggests that you could have a much more complicated problem. You have two dates in the row, a start date and end date. These could, in theory, overlap or have gaps. You can handle that by assigning groups when a new key/value pair starts and then aggregating:
select obj_id, key, value_, min(start_dt), max(end_dt)
from (select ovh.*,
sum(case when prev_end_dt >= start_dt - interval '1' day then 0 else 1 end) over (partition by obj_id, key order by start_dt) as grp
from (select ovh.*,
max(end_dt) over (partition by obj_id, key, value_
order by start_dt
range between unbounded preceding and interval '1' day preceding
) as prev_end_dt
from obj_val_hist ovh
) ovh
) ovh
group by obj_id, key, value_, grp;
Here is a db<>fiddle.

Query to compare dates patients are missing from hospital census

I have a hospital bed census that is triggered and creates a date/time stamped row in a table. when the bed check portion is done it labels the event census. i have found that some patients on days they were in the hospital have not been timestamped with the event census. I am trying to write a query to capture all patients that may have had this issue.
i need to capture the patients between their admit and discharge dates, and then any day they do not have a time stamp event of census. for example, this patient does not have a census on the 12th or 13th but does on the 14th. i want to be able to pull this pat_id and dates they are not stamped with census.
11-APR-2019 11:59:00 PM CENSUS
12-APR-2019 03:12:00 PM TRANSFER OUT
12-APR-2019 03:12:00 PM TRANSFER IN
14-APR-2019 07:06:00 AM PATIENT UPDATE
14-APR-2019 11:40:00 AM TRANSFER OUT
14-APR-2019 11:40:00 AM TRANSFER IN
14-APR-2019 11:59:00 PM CENSUS
I created a calendar portion to my query. then i created a query to capture patients in a time frame. from there i am a bit stuck.
DATE1
AS
(select
to_char(dates,'MM/DD/YYYY') AS WEEK_DATE,
dates,
to_char(dates,'D') weekday,
to_char(dates,'mm') m_onth,
to_char(dates,'ww') week_of_year,
to_char(dates,'dd') month_day,
to_char(dates,'ddd') Year_day,
SUBSTR(dates,1,2) AS WEEKDATE
from (SELECT TRUNC(to_date(v.yyyy,'YYYY'),'YY') +LEVEL - 1 DATES
FROM ( SELECT 2019 yyyy FROM dual ) v
CONNECT BY LEVEL < 366
)
)
,
ADT
AS (select distinct
adt.pat_id,
peh.y_mrn,
adt.DEPARTMENT_ID,
adp.department_name,
--peh.HOSP_ADMSN_TIME,
to_char(peh.HOSP_ADMSN_TIME,'MM/DD/YYYY') AS HOSP_ADMSN_TIME2,
--peh.HOSP_DISCH_TIME,
to_char(peh.HOSP_DISCH_TIME,'MM/DD/YYYY') AS HOSP_DISCH_TIME2,
adt.effective_time,
to_char(aDT.effective_time,'MM/DD/YYYY') AS EFFECT_DATE,
--LEAD(adt.effective_time) over (partition by ADT.pat_id order by ADT.pat_id, adt.effective_time) AS NEXT_EFF_DATE,
--CASE WHEN adt.event_type_c =6 THEN adt.effective_time END AS CENSUS_DATE,
et.title as event_type,
adt.event_type_c,
peh.ADT_PAT_CLASS_C,
Adt.event_subtype_c--,
--LAG(adt.effective_time) over (partition by ADT.pat_id order by ADT.pat_id, adt.effective_time) AS PREV_EFF_DATE
from
clarity_adt adt
left OUTER join
pat_enc_hsp peh
on
peh.pat_enc_csn_id = adt.pat_enc_csn_id
left outer join
clarity_dep adp
on adt.department_id = adp.department_id
left OUTER join
zc_event_type et
on adt.event_type_c = et.event_type_c
where
adt.effective_time between '08-apr-2019' and '15-apr-2019'
order by adt.effective_time
)
,
ADT2
AS
(
SELECT-- DISTINCT
D.WEEK_DATE,
A.HOSP_ADMSN_TIME2,
A.EFFECT_DATE,
A.PAT_ID,
CASE WHEN D.WEEK_DATE IS NOT NULL AND A.EFFECT_DATE IS NULL AND A.event_type <> 'CENSUS' THEN 1
WHEN D.WEEK_DATE IS NOT NULL AND A.EFFECT_DATE IS NULL AND A.event_type IS NULL THEN 1
WHEN D.WEEK_DATE IS NOT NULL AND A.EFFECT_DATE IS NOT NULL AND A.event_type <> 'CENSUS' THEN 1 ELSE 0
END AS NO_ADT_INFO,
A.event_type,
A.HOSP_DISCH_TIME2
FROM
DATE2 D
LEFT OUTER JOIN
ADT A
ON
D.WEEK_DATE = A.EFFECT_DATE
ORDER BY
D.WEEK_DATE)
i would like to end up with the patient id, the day of the week they have no census, the hosp admission & discharge dates
PAT_ID WEEK_DATE EVENT_TYPE HOSP_ADMSN_TIME HOSP_DISCH_TIME
ABCDEF 4/12/2019 NO CENSUS 4/10/2019 4/19/2019
ABCDEF 4/13/2019 NO CENSUS 4/10/2019 4/19/2019
GHIJK 4/8/2019 NO CENSUS 4/2/2019 4/12/2019
GHIJK 4/11/2019 NO CENSUS 4/2/2019 4/12/2019
Here is sample data for two patients:
events(pat_id, event_date, event_type) as (
select 'ABCD', to_date('2019-04-11 23:59', 'yyyy-mm-dd hh24:mi'), 'CENSUS' from dual union all
select 'ABCD', to_date('2019-04-12 15:12', 'yyyy-mm-dd hh24:mi'), 'TRANSFER OUT' from dual union all
select 'ABCD', to_date('2019-04-12 15:12', 'yyyy-mm-dd hh24:mi'), 'TRANSFER IN' from dual union all
select 'ABCD', to_date('2019-04-14 07:06', 'yyyy-mm-dd hh24:mi'), 'PATIENT UPDATE' from dual union all
select 'ABCD', to_date('2019-04-14 11:40', 'yyyy-mm-dd hh24:mi'), 'TRANSFER OUT' from dual union all
select 'ABCD', to_date('2019-04-14 11:40', 'yyyy-mm-dd hh24:mi'), 'TRANSFER IN' from dual union all
select 'ABCD', to_date('2019-04-14 23:59', 'yyyy-mm-dd hh24:mi'), 'CENSUS' from dual union all
select 'GHIJ', to_date('2019-05-17 23:59', 'yyyy-mm-dd hh24:mi'), 'CENSUS' from dual union all
select 'GHIJ', to_date('2019-05-19 23:59', 'yyyy-mm-dd hh24:mi'), 'CENSUS' from dual ),
peh(pat_id, hosp_admsn_time, hosp_disch_time) as (
select 'ABCD', date '2019-04-11', date '2019-04-14' from dual union all
select 'GHIJ', date '2019-05-17', date '2019-05-20' from dual ),
You can create recursive query generating days for each patient and check if there is CENSUS event for each of these days:
with cte(pat_id, num, adm, dis) as (
select pat_id, 0, hosp_admsn_time, hosp_disch_time from peh
union all
select pat_id, num + 1, adm, dis from cte where num < dis - adm)
select pat_id, day, 'NO CENSUS' info, adm, dis
from (select pat_id, adm + num day, adm, dis from cte) d
where not exists (
select 1
from events
where pat_id = d.pat_id and trunc(event_date) = d.day and event_type = 'CENSUS')
order by pat_id, day;
Result:
PAT_ID DAY INFO ADM DIS
------ ----------- --------- ----------- -----------
ABCD 2019-04-12 NO CENSUS 2019-04-11 2019-04-14
ABCD 2019-04-13 NO CENSUS 2019-04-11 2019-04-14
GHIJ 2019-05-18 NO CENSUS 2019-05-17 2019-05-20
GHIJ 2019-05-20 NO CENSUS 2019-05-17 2019-05-20
dbfiddle demo

Dense_rank query in sql(4 different columns) in

I have a table as follows:
Sn no. t_time Value rate
ABC 17-MAY-18 08:00:00 100.00 3
ABC 17-MAY-18 22:00:00 200.00 1
ABC 16-MAY-18 08:00:00 100.00 1
XYZ 14-MAY-18 01:00:00 700.00 1
XYZ 15-MAY-18 10:00:00 500.00 2
XYZ 15-MAY-18 13:00:00 100.00 2
And I want to generate the output as follows:
Sn no. New_value
ABC 150
XYZ 450
It is grouped by the Sn no. The New_value is the latest time of each date value multiplied by rate, and then averaged together.
For example ABC new_value is
Average of:[(100*1) and (200*1)]
Its a large dataset. How do I write a query for the above in the most efficient way. Please help.
You can use analytical function(row_number()) to achieve the result
SQL> WITH cte_table(Snno, t_time, Value, rate) AS (
2 SELECT 'ABC', to_date('2018-05-17 08:00:00', 'YYYY-MM-DD HH24:MI:SS'), 100.00, 3 FROM DUAL UNION ALL
3 SELECT 'ABC', to_date('2018-05-17 22:00:00', 'YYYY-MM-DD HH24:MI:SS'), 200.00, 1 FROM DUAL UNION ALL
4 SELECT 'ABC', to_date('2018-05-16 08:00:00', 'YYYY-MM-DD HH24:MI:SS'), 100.00, 1 FROM DUAL UNION ALL
5 SELECT 'XYZ', to_date('2018-05-14 01:00:00', 'YYYY-MM-DD HH24:MI:SS'), 700.00, 1 FROM DUAL UNION ALL
6 SELECT 'XYZ', to_date('2018-05-15 10:00:00', 'YYYY-MM-DD HH24:MI:SS'), 500.00, 2 FROM DUAL UNION ALL
7 SELECT 'XYZ', to_date('2018-05-15 13:00:00', 'YYYY-MM-DD HH24:MI:SS'), 100.00, 2 FROM DUAL),
8 --------------------------------
9 -- End of data preparation
10 --------------------------------
11 rn_table AS (
12 SELECT t.*, row_number() OVER (PARTITION BY TRUNC(t_time) ORDER BY t_time DESC) AS rn
13 FROM cte_table t)
14 SELECT snno,
15 AVG(VALUE * rate) new_value
16 FROM rn_table
17 WHERE rn = 1
18 GROUP BY snno;
Output:
SNNO NEW_VALUE
---- ----------
ABC 150
XYZ 450
Use the ROW_NUMBER (or RANK/DENSE_RANK if it is more appropriate) analytic function in a sub-query and then aggregate in the outer query:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( Snno, t_time, Value, rate ) AS
SELECT 'ABC', TIMESTAMP '2018-05-17 08:00:00', 100.00, 3 FROM DUAL UNION ALL
SELECT 'ABC', TIMESTAMP '2018-05-17 22:00:00', 200.00, 1 FROM DUAL UNION ALL
SELECT 'ABC', TIMESTAMP '2018-05-16 08:00:00', 100.00, 1 FROM DUAL UNION ALL
SELECT 'XYZ', TIMESTAMP '2018-05-14 01:00:00', 700.00, 1 FROM DUAL UNION ALL
SELECT 'XYZ', TIMESTAMP '2018-05-15 10:00:00', 500.00, 2 FROM DUAL UNION ALL
SELECT 'XYZ', TIMESTAMP '2018-05-15 13:00:00', 100.00, 2 FROM DUAL;
Query 1:
SELECT snno,
AVG( value * rate ) As new_value
FROM (
SELECT t.*,
ROW_NUMBER() OVER (
PARTITION BY snno, value
ORDER BY t_time DESC
) AS rn
FROM table_name t
)
WHERE rn = 1
GROUP BY snno
Results:
| SNNO | NEW_VALUE |
|------|-------------------|
| ABC | 250 |
| XYZ | 633.3333333333334 |

oracle sum limited value, extra to other variable

Query:
select sum((out_time+0) - (in_time+0))*24 man_hours
from emp a,time_sheet b
where a.SUPERVISOR='43561'
and a.EMP_ID=b.EMP_ID;
Sample data in table
emp_id in_time out_time
40716 08-07-2016 09:00 08-07-2016 18:00
40716 07-07-2016 09:00 07-07-2016 18:00
40716 06-07-2016 09:00 06-07-2016 18:00
60383 06-07-2016 09:00 06-07-2016 18:00
60383 07-07-2016 09:00 07-07-2016 18:00
41223 07-07-2016 09:00 07-07-2016 18:00
41223 08-07-2016 09:00 08-07-2016 18:00
Result: Sum of differences from above query is 45
difference between time in each row is 9 hours.
Requirement : I want only <=8 hours to sum up. >8 hours should be as other value.
Current 9*5= 45, required 8*5 = 40 and extra 5
I tried with decode, I am getting some weird results, actually I am not getting any idea in mind. Pointing out in right way would be helpful.
Thanks
This should get you started:
WITH
Timesheet_raw (emp_id, in_time, out_time) AS (
SELECT 40716, '08-07-2016 09:00', '08-07-2016 18:00' FROM DUAL UNION ALL
SELECT 40716, '07-07-2016 09:00', '07-07-2016 18:00' FROM DUAL UNION ALL
SELECT 40716, '06-07-2016 09:00', '06-07-2016 18:00' FROM DUAL UNION ALL
SELECT 60383, '06-07-2016 09:00', '06-07-2016 18:00' FROM DUAL UNION ALL
SELECT 60383, '07-07-2016 09:00', '07-07-2016 18:00' FROM DUAL UNION ALL
SELECT 41223, '07-07-2016 09:00', '07-07-2016 18:00' FROM DUAL UNION ALL
SELECT 41223, '08-07-2016 09:00', '08-07-2016 18:00' FROM DUAL
),
Timesheet (emp_id, in_time, out_time, length_of_shift) AS (
SELECT
emp_id
, TO_DATE(in_time, 'DD-MM-YYYY HH24:MI')
, TO_DATE(out_time, 'DD-MM-YYYY HH24:MI')
, (TO_DATE(out_time, 'DD-MM-YYYY HH24:MI') - TO_DATE(in_time, 'DD-MM-YYYY HH24:MI')) * 24
FROM Timesheet_raw
)
SELECT
emp_id, LEAST(length_of_shift, 8) regular, GREATEST(length_of_shift - 8, 0) overtime FROM Timesheet
;
Please comment, if and as this requires adjustment / further detail.
This is your basic query.
There is one row for every employee for every day, time spent regular and time spent overtime (if there was any).
WITH MY_TABLE AS -- Dummy data, leave this out on your enviroment
(
SELECT 40716 AS ID, TO_DATE('08-07-2016 09:00', 'DD-MM-YYYY HH24:MI') AS TIME_START, TO_DATE('08-07-2016 18:00', 'DD-MM-YYYY HH24:MI') AS TIME_END FROM DUAL UNION
SELECT 40716, TO_DATE('07-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('07-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL UNION
SELECT 40716, TO_DATE('06-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('06-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL UNION
SELECT 60383, TO_DATE('06-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('06-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL UNION
SELECT 60383, TO_DATE('07-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('07-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL UNION
SELECT 41223, TO_DATE('07-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('07-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL UNION
SELECT 41223, TO_DATE('08-07-2016 09:00', 'DD-MM-YYYY HH24:MI'), TO_DATE('08-07-2016 18:00', 'DD-MM-YYYY HH24:MI') FROM DUAL)
SELECT -- Actual query
ID,
LEAST (8, TOTAL_TIME) AS REGULAR_TIME, -- MIN of actual time and 8 hours
CASE -- If he worked less than 8 hours,
-- OT is 0, otherwise actual-8
WHEN TOTAL_TIME > 8
THEN TOTAL_TIME - 8
ELSE 0
END AS OVER_TIME
FROM
(
SELECT
ID,
(TIME_END - TIME_START)*24 AS TOTAL_TIME -- Oracle date returns days,
-- multiply by 24 to have hourse
FROM
MY_TABLE
);
Result looks following
ID REGULAR_TIME OVER_TIME
40716 8 1
40716 8 1
40716 8 1
41223 8 1
41223 8 1
60383 8 1
60383 8 1
You can nest this query under another one and perform whatever you like, for example, group by id so you get total regular and over time in given time frame per empoyee.
Or sum it up, without any grouping to fulfil your original requirement.
SELECT
SUM(REGULAR_TIME),
SUM(OVER_TIME)
FROM(
-- nest previous select
);