I am trying to find a total duration consume by a Group by calculating date difference in a following query
with event AS (
SELECT 9000 AS ID, TO_DATE('2018-03-01 09:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03/10 10:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03-10 11:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03/20 10:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03-20 10:05:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03/25 09:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03-25 10:15:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03/26 12:00:00','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9002 AS ID, TO_DATE('2017-03-26 14:30:27','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9002 AS ID, TO_DATE('2017-04-05 15:02:56','RRRR-MM-DD HH24:MI:SS') AS
TIMESTAMP, 'END' AS EVENT FROM DUAL
)
select id, min(timestamp) as call_start_ts, max(timestamp) as call_end_ts,
max(timestamp) - min(timestamp) as duration
from event t
group by id
order by 1;
I have also configure the SQLFiddle
Please help me
EDIT
Expected Result will be like below
Use the LAG or LEAD analytic functions to get the next END event's time:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE event ( id, timestamp, event ) AS
SELECT 9000, TO_DATE('2018-03-01 09:00:00','RRRR-MM-DD HH24:MI:SS'), 'Start' FROM DUAL UNION ALL
SELECT 9000, TO_DATE('2018-03/10 10:00:00','RRRR-MM-DD HH24:MI:SS'), 'END' FROM DUAL UNION ALL
SELECT 9001, TO_DATE('2018-03-10 11:00:00','RRRR-MM-DD HH24:MI:SS'), 'Start' FROM DUAL UNION ALL
SELECT 9001, TO_DATE('2018-03/20 10:00:00','RRRR-MM-DD HH24:MI:SS'), 'END' FROM DUAL UNION ALL
SELECT 9000, TO_DATE('2018-03-20 10:05:00','RRRR-MM-DD HH24:MI:SS'), 'Start' FROM DUAL UNION ALL
SELECT 9000, TO_DATE('2018-03/25 09:00:00','RRRR-MM-DD HH24:MI:SS'), 'END' FROM DUAL UNION ALL
SELECT 9001, TO_DATE('2018-03-25 10:15:00','RRRR-MM-DD HH24:MI:SS'), 'Start' FROM DUAL UNION ALL
SELECT 9001, TO_DATE('2018-03/26 12:00:00','RRRR-MM-DD HH24:MI:SS'), 'END' FROM DUAL UNION ALL
SELECT 9002, TO_DATE('2017-03-26 14:30:27','RRRR-MM-DD HH24:MI:SS'), 'Start' FROM DUAL UNION ALL
SELECT 9002, TO_DATE('2017-04-05 15:02:56','RRRR-MM-DD HH24:MI:SS'), 'END' FROM DUAL;
Query 1:
SELECT id,
MIN( timestamp ) AS start_ts,
MAX( end_time ) AS end_ts,
SUM( end_time - timestamp ) AS duration
FROM (
SELECT id,
timestamp,
event,
LEAD( CASE event WHEN 'END' THEN timestamp END )
OVER ( PARTITION BY id ORDER BY timestamp ) AS end_time
FROM event
)
WHERE event = 'Start'
GROUP BY id
ORDER BY id
Results:
| ID | START_TS | END_TS | DURATION |
|------|----------------------|----------------------|--------------------|
| 9000 | 2018-03-01T09:00:00Z | 2018-03-25T09:00:00Z | 13.996527777777779 |
| 9001 | 2018-03-10T11:00:00Z | 2018-03-26T12:00:00Z | 11.03125 |
| 9002 | 2017-03-26T14:30:27Z | 2017-04-05T15:02:56Z | 10.02255787037037 |
I solved the problem in two steps. First i match records in the same interval then i sum up their duration.
http://sqlfiddle.com/#!4/73f48/83
SELECT
Id,
round(SUM(duration))
FROM
(
SELECT
t.id,
MIN (t2. TIMESTAMP) - t. TIMESTAMP AS duration
FROM
event t,
event t2
WHERE
t.Id = t2.Id
AND t2.Event = 'END'
AND t.Event = 'Start'
AND t2. TIMESTAMP > t. TIMESTAMP
GROUP BY
t. TIMESTAMP,
t.Id
)
GROUP BY
Id
select
id, round(sum(end_timestamp - start_timestamp),3) DURATION
from (
select
t.id,
t.timestamp START_TIMESTAMP,
case when LEAD(t.event,1) OVER (partition by id order by timestamp, event desc) = 'END'
then LEAD(t.timestamp,1) OVER (partition by id order by timestamp, event desc)
else null end as END_TIMESTAMP
from event t
)tt
where end_timestamp is not null
group by id
Solution to your problem:
WITH event AS (
SELECT 9000 AS ID, TO_DATE('2018-03-01 09:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03/10 10:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03-10 11:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03/20 10:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03-20 10:05:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9000 AS ID, TO_DATE('2018-03/25 09:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03-25 10:15:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9001 AS ID, TO_DATE('2018-03/26 12:00:00','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'END' AS EVENT FROM DUAL UNION ALL
SELECT 9002 AS ID, TO_DATE('2017-03-26 14:30:27','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'Start' AS EVENT FROM DUAL UNION ALL
SELECT 9002 AS ID, TO_DATE('2017-04-05 15:02:56','RRRR-MM-DD HH24:MI:SS') AS TIMESTAMP, 'END' AS EVENT FROM DUAL
)
,rn_event AS
(
select event.*,ROW_NUMBER() OVER (Partition BY ID ORDER BY TimeSTAMP) AS rn from event
)
, diff_event AS
(
SELECT e.ID, f.TIMESTAMP AS Start_time, e.timestamp AS End_Time, e.TIMESTAMP - f.timestamp AS duration
FROM rn_event e
INNER JOIN rn_event f
ON f.id = e.id AND f.EVENT = 'Start' AND f.rn = e.rn - 1
)
SELECT ID,MIN(Start_Time) START_TS, MAX(END_TIME) END_TS, ROUND(SUM(Duration)) AS Duration
FROM diff_event
GROUP BY ID;
OUTPUT:
ID START_TS END_TS DURATION
9000 2018-03-01T09:00:00Z 2018-03-25T09:00:00Z 14
9001 2018-03-10T11:00:00Z 2018-03-26T12:00:00Z 11
9002 2017-03-26T14:30:27Z 2017-04-05T15:02:56Z 10
A demo for the above query:
http://sqlfiddle.com/#!4/73f48/87
Related
I'm stuck with the following problem and need help:
An object has properties that are calculated every day.
They are stored in a key-value historical table.
Property is mistakenly stored even if it was not changed.
I need a query that will group this data set by "actual values":
If a value was not changed during several days it is output as one row.
If value A was changed to B then back to A, then A, B, A should be output by the query (first A and second A are different date intervals).
Here is a dataset example.
with obj_val_hist as
(
select 123 obj_id, 'k_1' key, 'A' value_, to_date('01.01.2021', 'DD.MM.YYYY') start_dt, to_date('01.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('02.01.2021', 'DD.MM.YYYY') start_dt, to_date('02.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('03.01.2021', 'DD.MM.YYYY') start_dt, to_date('03.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('04.01.2021', 'DD.MM.YYYY') start_dt, to_date('04.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('05.01.2021', 'DD.MM.YYYY') start_dt, to_date('05.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'B' value_, to_date('06.01.2021', 'DD.MM.YYYY') start_dt, to_date('06.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('07.01.2021', 'DD.MM.YYYY') start_dt, to_date('07.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('08.01.2021', 'DD.MM.YYYY') start_dt, to_date('08.01.2021', 'DD.MM.YYYY') end_dt from dual union all
select 123 obj_id, 'k_1' key, 'A' value_, to_date('09.01.2021', 'DD.MM.YYYY') start_dt, to_date('09.01.2021', 'DD.MM.YYYY') end_dt from dual
)
select * from obj_val_hist where obj_id = 123;
Data set:
obj_id
key
value
start_date
end_date
123
k_1
A
01.01.2021
01.01.2021
123
k_1
A
02.01.2021
02.01.2021
123
k_1
A
03.01.2021
03.01.2021
123
k_1
B
04.01.2021
04.01.2021
123
k_1
B
05.01.2021
05.01.2021
123
k_1
B
06.01.2021
06.01.2021
123
k_1
A
07.01.2021
07.01.2021
123
k_1
A
08.01.2021
08.01.2021
123
k_1
A
09.01.2021
09.01.2021
Expected result:
obj_id
key
value
start_date
end_date
123
k_1
A
01.01.2021
03.01.2021
123
k_1
B
04.01.2021
06.01.2021
123
k_1
A
07.01.2021
09.01.2021
This table contains values for million objects.
It is queried by obj_id and has an index on it.
Performance is a key point so using stored functions is most probably not an option.
This query will be a small part of a big view that is used by an external system.
I expected that there should be an analytic function suited for such a problem.
Something like dense_rank but with the possibility to order by one column (start_dt) but increase value when another column (value_) gets a different value.
But I didn't find one.
You may use match_recognize for this, which can also handle gaps in dates and is quite efficient and natural to read:
create table t (
obj_id
, key_
, value_
, start_date
, end_date
)
as
select 123, 'k_1', 'A', to_date('01.01.2021', 'dd.mm.yyyy'), to_date('01.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('02.01.2021', 'dd.mm.yyyy'), to_date('02.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('03.01.2021', 'dd.mm.yyyy'), to_date('03.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('04.01.2021', 'dd.mm.yyyy'), to_date('04.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('05.01.2021', 'dd.mm.yyyy'), to_date('05.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'B', to_date('06.01.2021', 'dd.mm.yyyy'), to_date('06.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('07.01.2021', 'dd.mm.yyyy'), to_date('07.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('08.01.2021', 'dd.mm.yyyy'), to_date('08.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('09.01.2021', 'dd.mm.yyyy'), to_date('09.01.2021', 'dd.mm.yyyy') from dual union all
/*Let's skip 10.01*/
select 123, 'k_1', 'A', to_date('11.01.2021', 'dd.mm.yyyy'), to_date('11.01.2021', 'dd.mm.yyyy') from dual union all
/*And extent validity period for some record*/
select 123, 'k_1', 'A', to_date('12.01.2021', 'dd.mm.yyyy'), to_date('13.01.2021', 'dd.mm.yyyy') from dual union all
select 123, 'k_1', 'A', to_date('14.01.2021', 'dd.mm.yyyy'), to_date('14.01.2021', 'dd.mm.yyyy') from dual
select *
from t
match_recognize (
/*For each ID and KEY*/
partition by obj_id, key_
order by start_date asc
/*Output attributes*/
measures
/*start_date of the first row in match group*/
final first(start_date) as min_start_date,
/*end_date of the last row in match group*/
final last(end_date) as max_end_date,
/*value itself as it is constant for match group*/
value_ as val
/*First row and any consequtive matches*/
pattern (init A*)
define
/*Consequtive are the rows which have the same value in value_ field
and start_date of the next row is not farther than
1 day from end_date of the previous row
*/
A as prev(value_) = value_
and prev(end_date) + 1 = start_date
)
OBJ_ID | KEY_ | MIN_START_DATE | MAX_END_DATE | VAL
-----: | :--- | :------------- | :----------- | :--
123 | k_1 | 01-JAN-21 | 03-JAN-21 | A
123 | k_1 | 04-JAN-21 | 06-JAN-21 | B
123 | k_1 | 07-JAN-21 | 09-JAN-21 | A
123 | k_1 | 11-JAN-21 | 14-JAN-21 | A
db<>fiddle here
If you indeed have data every day, then you can use the following relatively simple logic. The subquery calculate when the value changes. The outer query then calculates the end date by looking at the date in the next row:
select obj_id, key, value_, start_dt,
coalesce(lead(start_dt) over (partition by obj_id, key order by start_dt) - interval '1' day, max_end_dt)
from (select ovh.*,
lag(value_) over (partition by obj_id, key order by start_dt) as prev_value_,
max(end_dt) over (partition by obj_id, key) as max_end_dt
from obj_val_hist ovh
where obj_id = 123
) ovh
where prev_value_ is null or prev_value_ <> value_;
However, your data suggests that you could have a much more complicated problem. You have two dates in the row, a start date and end date. These could, in theory, overlap or have gaps. You can handle that by assigning groups when a new key/value pair starts and then aggregating:
select obj_id, key, value_, min(start_dt), max(end_dt)
from (select ovh.*,
sum(case when prev_end_dt >= start_dt - interval '1' day then 0 else 1 end) over (partition by obj_id, key order by start_dt) as grp
from (select ovh.*,
max(end_dt) over (partition by obj_id, key, value_
order by start_dt
range between unbounded preceding and interval '1' day preceding
) as prev_end_dt
from obj_val_hist ovh
) ovh
) ovh
group by obj_id, key, value_, grp;
Here is a db<>fiddle.
I got an SQL problem I'm not capable to solve.
First of all, an SQL fiddle with it: http://sqlfiddle.com/#!4/fe7b07/2
As you see, I fill the table with some dates, which are bound to some ID. Those dates are day by day. So for this example, we'd have something like this, if we only look at January:
The timelines spanning from 2020-01-01 to 2020-01-31, the blocks are the dates in the database. So this would be the simple SELECT * FROM days output.
What I now want is to fill in some days to this output. These would span from timeline_begin to MIN(date_from); and from MAX(date_from) to timeline_end.
I'll mark these red in the following picture:
The orange span is not necessary to be added, too, but if your solution would do that too, that would be also ok.
Ok, so far so good.
For this I created the SELECT * FROM minmax, which will select the MIN(date_from) and MAX(date_from) for every id_othertable. Still no magic involved.
What I struggle is now creating those days for every id_othertable, while also joining the data they have on them (in this fiddle, it's just the some_info field).
I tried to write this in the SELECT * FROM days_before query, but I just can't get it to work. I read about the magical function CONNECT BY, which will on its own create dates line by line, but I can't get to join my data from the former table. Every time I join the info, I only get one line per id_othertable, not all those dates I need.
So the ideal solution I'm looking for would be to have three select queries:
SELECT * FROM days which select dates out of the database
SELECT * FROM days_before which will show the dates before MIN(date_from) of query 1
SELECT * FROM days_after for dates after MAX(date_from) of query 1
And in the end I'd UNION those three queries to have them all combined.
I hope I could explain my problem good enough. If you need any information or further explaining, please don't hesitate to ask.
EDIT 1: I created a pastebin with some example data: https://pastebin.com/jskrStpZ
Bear in mind that only the first query has actual information from the database, the other two have created data. Also, this example output only has data for id_othertable = 1, so the actual query should also have the information for id_othertable = 2, 3.
EDIT 2: just for clarification, the field date_to is just a simple date_from + 1 day.
If you have denormalised date it's quite simple:
with bas as (
select 1 id_other_table, to_date('2020-01-05', 'YYYY-MM-DD') date_from, to_date('2020-01-06', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 1 id_other_table, to_date('2020-01-06', 'YYYY-MM-DD') date_from, to_date('2020-01-07', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 1 id_other_table, to_date('2020-01-07', 'YYYY-MM-DD') date_from, to_date('2020-01-08', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 1 id_other_table, to_date('2020-01-10', 'YYYY-MM-DD') date_from, to_date('2020-01-11', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 1 id_other_table, to_date('2020-01-11', 'YYYY-MM-DD') date_from, to_date('2020-01-12', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 1 id_other_table, to_date('2020-01-12', 'YYYY-MM-DD') date_from, to_date('2020-01-13', 'YYYY-MM-DD') date_to, 'hello' some_info from dual
union all select 2 id_other_table, to_date('2020-01-10', 'YYYY-MM-DD') date_from, to_date('2020-01-11', 'YYYY-MM-DD') date_to, 'my' some_info from dual
union all select 2 id_other_table, to_date('2020-01-11', 'YYYY-MM-DD') date_from, to_date('2020-01-12', 'YYYY-MM-DD') date_to, 'my' some_info from dual
union all select 2 id_other_table, to_date('2020-01-12', 'YYYY-MM-DD') date_from, to_date('2020-01-13', 'YYYY-MM-DD') date_to, 'my' some_info from dual
union all select 3 id_other_table, to_date('2020-01-20', 'YYYY-MM-DD') date_from, to_date('2020-01-21', 'YYYY-MM-DD') date_to, 'friend' some_info from dual
union all select 3 id_other_table, to_date('2020-01-21', 'YYYY-MM-DD') date_from, to_date('2020-01-22', 'YYYY-MM-DD') date_to, 'friend' some_info from dual
union all select 3 id_other_table, to_date('2020-01-22', 'YYYY-MM-DD') date_from, to_date('2020-01-23', 'YYYY-MM-DD') date_to, 'friend' some_info from dual)
, ad as (select trunc(sysdate,'YYYY') -1 + level all_dates from dual connect by level <= 31)
select distinct some_info,all_dates from bas,ad where (some_info,all_dates) not in (select some_info,date_from from bas)
If you have longer date ranges or mind of the time the query needs another solution is helpful. But that is harder to debug. Because it's quite hard to get the orange time slot
If you want the dates per id that are not in the database then you can use the LEAD analytic function:
WITH dates ( id, date_from, date_to ) AS (
SELECT id_othertable,
DATE '2020-01-01',
MIN( date_from )
FROM some_dates
WHERE date_to > DATE '2020-01-01'
AND date_from < ADD_MONTHS( DATE '2020-01-01', 1 )
GROUP BY id_othertable
UNION ALL
SELECT id_othertable,
date_to,
LEAD( date_from, 1, ADD_MONTHS( DATE '2020-01-01', 1 ) )
OVER ( PARTITION BY id_othertable ORDER BY date_from )
FROM some_dates
WHERE date_to > DATE '2020-01-01'
AND date_from < ADD_MONTHS( DATE '2020-01-01', 1 )
)
SELECT id,
date_from,
date_to
FROM dates
WHERE date_from < date_to
ORDER BY id, date_from;
so for the test data:
CREATE TABLE some_dates ( id_othertable, date_from, date_to, some_info ) AS
SELECT 1, DATE '2020-01-05', DATE '2020-01-06', 'hello1' FROM DUAL UNION ALL
SELECT 1, DATE '2020-01-06', DATE '2020-01-07', 'hello2' FROM DUAL UNION ALL
SELECT 1, DATE '2020-01-07', DATE '2020-01-08', 'hello3' FROM DUAL UNION ALL
SELECT 1, DATE '2020-01-10', DATE '2020-01-13', 'hello4' FROM DUAL UNION ALL
SELECT 2, DATE '2020-01-10', DATE '2020-01-13', 'my' FROM DUAL UNION ALL
SELECT 3, DATE '2020-01-20', DATE '2020-01-23', 'friend' FROM DUAL UNION ALL
SELECT 4, DATE '2019-12-31', DATE '2020-01-05', 'before' FROM DUAL UNION ALL
SELECT 4, DATE '2020-01-30', DATE '2020-02-02', 'after' FROM DUAL UNION ALL
SELECT 5, DATE '2019-12-31', DATE '2020-01-10', 'only_before' FROM DUAL UNION ALL
SELECT 6, DATE '2020-01-15', DATE '2020-02-01', 'only_after' FROM DUAL UNION ALL
SELECT 7, DATE '2019-12-31', DATE '2020-02-01', 'exlude_all' FROM DUAL;
this outputs:
ID | DATE_FROM | DATE_TO
-: | :--------- | :---------
1 | 2020-01-01 | 2020-01-05
1 | 2020-01-08 | 2020-01-10
1 | 2020-01-13 | 2020-02-01
2 | 2020-01-01 | 2020-01-10
2 | 2020-01-13 | 2020-02-01
3 | 2020-01-01 | 2020-01-20
3 | 2020-01-23 | 2020-02-01
4 | 2020-01-05 | 2020-01-30
5 | 2020-01-10 | 2020-02-01
6 | 2020-01-01 | 2020-01-15
db<>fiddle here
If you want the days before then filter on:
WHERE day_from = DATE '2020-01-01'
and, similarly, if you want the days after then filter on:
WHERE day_to = ADD_MONTHS( DATE '2020-01-01', 1 )
If you want to specify the start date and number of months duration then use named bind parameters:
WITH dates ( id, date_from, date_to ) AS (
SELECT id_othertable,
:start_date,
MIN( date_from )
FROM some_dates
WHERE date_to > :start_date
AND date_from < ADD_MONTHS( :start_date, :number_months )
GROUP BY id_othertable
UNION ALL
SELECT id_othertable,
date_to,
LEAD( date_from, 1, ADD_MONTHS( :start_date, :number_months ) )
OVER ( PARTITION BY id_othertable ORDER BY date_from )
FROM some_dates
WHERE date_to > :start_date
AND date_from < ADD_MONTHS( :start_date, :number_months )
)
SELECT id,
date_from,
date_to
FROM dates
WHERE date_from < date_to
ORDER BY id, date_from;
Select whole range using connect by generator. Join your table partitioned by id.
select date_from, nvl(date_to, date_from +1) date_to, id_othertable, some_info
from (
select date '2020-01-01' + level - 1 as date_from
from dual
connect by level <= date '2020-01-31' - date '2020-01-01' ) gen
natural left join some_dates partition by (id_othertable)
sqlfiddle
I have a following data which shows the status of a support ticket:
Edit:
More concise and generic example:
STATUS SEQ_NO
New 1
Open 2
Open 3
Open 4
Queued 5
Open 6
Open 7
Open 8
Completed 9
Completed 10
Completed 11
Closed 12
From this, I would like to extract the records,
STATUS SEQ_NO
New 1
Open 2
Queued 5
Open 6
Completed 9
Closed 12
Original question:
-- SELECT status, start_time FROM events_tab ORDER BY start_time;
STATUS START_TIME
New 30/09/2014 3:48:10 PM -- I want this record,
Open 30/09/2014 3:48:10 PM -- and this,
Open 1/10/2014 10:41:57 AM
Open 4/03/2015 9:59:04 AM
Queued 18/06/2015 1:31:30 PM -- and this,
Open 20/06/2015 10:10:47 PM -- and this,
Open 20/06/2015 11:20:11 PM
Open 27/06/2015 1:18:50 PM
Completed 27/06/2015 1:22:08 PM -- and this,
Completed 28/09/2015 9:31:55 AM
Completed 5/10/2015 11:57:38 AM
Closed 11/01/2016 9:31:26 AM -- and this.
These are events that happened in each state. I want to make a timeline of state changes from it.
I want to squash these records such that only the very first row of a group is show. However, notice that there are actually two groups of Open status. So I should get two records with Open status.
Basically I want the following result:
STATUS START_TIME
New 30/09/2014 3:48:10 PM
Open 30/09/2014 3:48:10 PM
Queued 18/06/2015 1:31:30 PM
Open 20/06/2015 10:10:47 PM
Completed 27/06/2015 1:22:08 PM
Closed 11/01/2016 9:31:26 AM
How can I achieve this with an SQL statement?
I have tried,
SELECT status, MIN(start_time)
FROM events_tab
GROUP BY status;
But this does not include multiple records in Open status, as my intention above.
You can use the Tabibitosan technique to achieve this goal:
WITH your_table AS (SELECT 'New' status, to_date('30/09/2014 03:48:10 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('30/09/2014 03:48:10 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('1/10/2014 10:41:57 AM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('4/03/2015 09:59:04 AM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Queued' status, to_date('18/06/2015 01:31:30 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('20/06/2015 10:10:47 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('20/06/2015 11:20:11 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Open' status, to_date('27/06/2015 01:18:50 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Completed' status, to_date('27/06/2015 01:22:08 PM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Completed' status, to_date('28/09/2015 09:31:55 AM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Completed' status, to_date('5/10/2015 11:57:38 AM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual UNION ALL
SELECT 'Closed' status, to_date('11/01/2016 09:31:26 AM', 'dd/mm/yyyy hh:mi:ss AM') start_time FROM dual)
SELECT status,
MIN(start_time) start_time
FROM (SELECT status,
start_time,
row_number() OVER (ORDER BY start_time, status) - row_number() OVER (PARTITION BY status ORDER BY start_time, status) grp
FROM your_table)
GROUP BY status, grp
ORDER BY start_time, status;
STATUS START_TIME
--------- -------------------
New 30/09/2014 15:48:10
Open 30/09/2014 15:48:10
Queued 18/06/2015 13:31:30
Open 20/06/2015 22:10:47
Completed 27/06/2015 13:22:08
Closed 11/01/2016 09:31:26
N.B. Since you have rows with different statuses having the same start_time, I have added status into the order by, in order to get the results you were after. I don't know if that was a typo, or whether multiple rows really can have the same date.
Also, I assume that the data in your example refers to one "thing", but in your real table, you can have multiple "things" each with their own set of statuses etc.
In that case, you would need to add the column(s) that differentiate the "things" (e.g. id or event_name or etc) into both row_number() analytic functions. (e.g. row_number() over (partition by <thing column(s)> order by start_time, status))
You can also try the SQL for Pattern Matching
WITH tickets(STATUS, START_TIME) AS (
SELECT 'New', TO_DATE('30/09/2014 3:48:10 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('30/09/2014 3:48:10 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('1/10/2014 10:41:57 AM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('4/03/2015 9:59:04 AM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Queued', TO_DATE('18/06/2015 1:31:30 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('20/06/2015 10:10:47 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('20/06/2015 11:20:11 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Open', TO_DATE('27/06/2015 1:18:50 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Completed', TO_DATE('27/06/2015 1:22:08 PM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Completed', TO_DATE('28/09/2015 9:31:55 AM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Completed', TO_DATE('5/10/2015 11:57:38 AM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual UNION ALL
SELECT 'Closed', TO_DATE('11/01/2016 9:31:26 AM', 'dd/mm/yyyy hh:mi:ss AM') FROM dual)
SELECT STATUS, START_TIME
FROM tickets
MATCH_RECOGNIZE (
ORDER BY START_TIME
MEASURES
START_TIME AS START_TIME,
STATUS as STATUS
PATTERN ( CHNG )
DEFINE
CHNG AS CHNG.STATUS <> PREV(CHNG.STATUS) OR PREV(CHNG.STATUS) IS NULL
)
STATUS START_TIME
========== ====================
New 30.09.2014 15:48:10
Open 30.09.2014 15:48:10
Queued 18.06.2015 13:31:30
Open 20.06.2015 22:10:47
Completed 27.06.2015 13:22:08
Closed 11.01.2016 09:31:26
CHNG.STATUS <> PREV(CHNG.STATUS) matches each row where STATUS is different to previous row. PREV(CHNG.STATUS) IS NULL is used to get also the very first row.
use row_number window function
select STATUS ,START_TIME from
(
select STATUS,START_TIME,
row_number() over (partition by STATUS,EXTRACT(YEAR FROM START_TIME) order by START_TIME) rn
from events_tab
) t where rn=1
Use LAG Function as you need to track the change in status:
https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=38a991b698c858f6f0417c7d4c0dc9d3
with cte1 (st,dt) as
(
select 'New' as st, '30/09/2014 3:48:10 PM' as dt from dual
union all
select 'Open' as st, '30/09/2014 3:48:10 PM' as dt from dual
union all
select 'Open' as st, '20/09/2014 3:48:10 PM' as dt from dual
union all
select 'Qued' as st, '18/06/2015 1:31:30' as dt from dual
)
select st, min(case when st<>prev_order_date then dt else dt end) as d
from
(
SELECT st, dt,
LAG (st,1) OVER (ORDER BY st) AS prev_order_date
FROM cte1
)a
group by st
I am trying to perform aggregation on a table. But it is not aggregating properly for some cases. Please find the below input.
Table t1.
CHANNEL;VALUE;STATUS;ERROR_CODE;RND_TIMESTAMP;SESSION_CD;NAR;
-------------------------------------------------------------
USD;4;12;;2-NOV-2015 11:00:00;;
USD;4;12;;2-NOV-2015 11:00:00;;
USD;2;12;;2-NOV-2015 11:00:00;;
USD;3;12;;2-NOV-2015 11:00:00;;
Output table t2
CHANNEL;VALUE;STATUS;ERROR_CODE;HOUR_TIMESTAMP;SESSION_CD;NAR;
--------------------------------------------------------------
USD;5;12;;2-NOV-2015 11:00:00;;
Query:
select
channel, sum(value),
status, error_code, rnd_timestamp, session_cd, nar
from
t1
where
rnd_timestamp > (select max(hour_timestamp) from t2)
group by
channel, status, error_code, rnd_timestamp, session_cd, nar
Why is it not considering the other 2 rows for aggregation. Is it because some columns in group by have null? How to solve this issue?
Output must be :
USD;13;12;;2-NOV-2015 11:00:00;;
Why do you think your query has an issue?
By switching the hour_timestamp in t2 to be 10am not 11am, your query works as expected for me:
with t1 as (select 'USD' channel, 4 value, 12 status, null error_code, to_date('02/11/2015 11:00:00', 'dd/mm/yyyy hh24:mi:ss') rnd_timestamp, null session_cd, null nar from dual union all
select 'USD' channel, 4 value, 12 status, null error_code, to_date('02/11/2015 11:00:00', 'dd/mm/yyyy hh24:mi:ss') rnd_timestamp, null session_cd, null nar from dual union all
select 'USD' channel, 2 value, 12 status, null error_code, to_date('02/11/2015 11:00:00', 'dd/mm/yyyy hh24:mi:ss') rnd_timestamp, null session_cd, null nar from dual union all
select 'USD' channel, 3 value, 12 status, null error_code, to_date('02/11/2015 11:00:00', 'dd/mm/yyyy hh24:mi:ss') rnd_timestamp, null session_cd, null nar from dual),
t2 as (select 'USD' channel, 5 value, 12 status, null error_code, to_date('02/11/2015 10:00:00', 'dd/mm/yyyy hh24:mi:ss') hour_timestamp, null session_cd, null nar from dual)
--- end of mimicking your tables t1 and t2 with data in; see SQL below:
select channel,
sum(value),
status,
error_code,
rnd_timestamp,
session_cd,
nar
from t1
where rnd_timestamp > (select max(hour_timestamp) from t2)
group by channel,
status,
error_code,
rnd_timestamp,
session_cd,
nar;
CHANNEL SUM(VALUE) STATUS ERROR_CODE RND_TIMESTAMP SESSION_CD NAR
------- ---------- ---------- ---------- --------------------- ---------- ---
USD 13 12 02/11/2015 11:00:00
I'm trying to figure out a way of identifying a "run" of results (successive rows, in order) that meet some condition. Currently, I'm ordering a result set, and scanning by eye for particular patterns. Here's an example:
SELECT the_date, name
FROM orders
WHERE
the_date BETWEEN
to_date('2013-09-18',..) AND
to_date('2013-09-22', ..)
ORDER BY the_date
--------------------------------------
the_date | name
--------------------------------------
2013-09-18 00:00:01 | John
--------------------------------------
2013-09-19 00:00:01 | James
--------------------------------------
2013-09-20 00:00:01 | John
--------------------------------------
2013-09-20 00:00:02 | John
--------------------------------------
2013-09-20 00:00:03 | John
--------------------------------------
2013-09-20 00:00:04 | John
--------------------------------------
2013-09-21 16:00:01 | Jennifer
--------------------------------------
What I want to extract from this result set is all the rows attributed to John on 2013-09-20. Generally what I'm looking for is a run of results from the same name, in a row, >= 3. I'm using Oracle 11, but I'm interested to know if this can be achieved with strict SQL, or if some kind of analytical function must be used.
You need multiple nested window functions:
SELECT *
FROM
(
SELECT the_date, name, grp,
COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM
(
SELECT the_date, name,
SUM(flag) OVER (ORDER BY the_date) AS grp
FROM
(
SELECT the_date, name,
CASE WHEN LAG(name) OVER (ORDER BY the_date) = name THEN 0 ELSE 1 END AS flag
FROM orders
WHERE
the_date BETWEEN
TO_DATE('2013-09-18',..) AND
TO_DATE('2013-09-22', ..)
) dt
) dt
) dt
WHERE cnt >= 3
ORDER BY the_date
Try this
WITH ORDERS
AS (SELECT
TO_DATE ( '2013-09-18 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-19 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'James' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:02',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:03',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:04',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-21 16:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'Jennifer' AS NAME
FROM
DUAL)
SELECT
B.*
FROM
(SELECT
TRUNC ( THE_DATE ) THE_DATE,
NAME,
COUNT ( * )
FROM
ORDERS
WHERE
THE_DATE BETWEEN TRUNC ( TO_DATE ( '2013-09-18',
'YYYY-MM-DD' ) )
AND TRUNC ( TO_DATE ( '2013-09-22',
'YYYY-MM-DD' ) )
GROUP BY
TRUNC ( THE_DATE ),
NAME
HAVING
COUNT ( * ) >= 3) A,
ORDERS B
WHERE
A.NAME = B.NAME
AND TRUNC ( A.THE_DATE ) = TRUNC ( B.THE_DATE );
OUTPUT
9/20/2013 12:00:01 AM John
9/20/2013 12:00:02 AM John
9/20/2013 12:00:03 AM John
9/20/2013 12:00:04 AM John