I am trying to use the ROLLUP command to group my data by year/week, customer_id but I can't seem to get it to work.
Below is my sample data and my attempt. Can someone show me how to make this work
ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'DD-MON-YYYY HH24:MI:SS.FF';
ALTER SESSION SET NLS_DATE_FORMAT = 'DD-MON-YYYY HH24:MI:SS';
CREATE TABLE customers
(CUSTOMER_ID, FIRST_NAME, LAST_NAME) AS
SELECT 1, 'Faith', 'Mazzarone' FROM DUAL UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM DUAL UNION ALL
SELECT 3, 'Micheal', 'Palmice' FROM DUAL UNION ALL
SELECT 4, 'Jerry', 'Torchiano' FROM DUAL;
CREATE TABLE items
(PRODUCT_ID, PRODUCT_NAME, PRICE) AS
SELECT 100, 'Black Shoes', 79.99 FROM DUAL UNION ALL
SELECT 101, 'Brown Pants', 111.99 FROM DUAL UNION ALL
SELECT 102, 'White Shirt', 10.99 FROM DUAL;
CREATE TABLE purchases
(CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE) AS
SELECT 1, 101, 3, TIMESTAMP'2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 1, 100, 1, TIMESTAMP '2022-10-12 19:04:18' FROM DUAL UNION ALL
SELECT 2, 101,1, TIMESTAMP '2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 2, 101, 3, TIMESTAMP '2022-10-17 19:34:58' FROM DUAL UNION ALL
SELECT 2, 102, 3,TIMESTAMP '2022-12-06 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-11 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-17 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-12 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 5;
SELECT
p.customer_id,
c.first_name,
c.last_name,
sum(p.quantity * i.price) total_amt
FROM purchases p,
items i,
customers c
WHERE p.customer_id = c.customer_id
GROUP BY ROLLUP (to_char(p.purchase_date, 'YYYY/IW'),(p.customer_id)));
To me, it looks as
SQL> SELECT p.customer_id,
2 c.first_name,
3 c.last_name,
4 TO_CHAR (p.purchase_date, 'YYYY/IW') year_week,
5 SUM (p.quantity * i.price) total_amt
6 FROM purchases p, items i, customers c
7 WHERE p.customer_id = c.customer_id
8 GROUP BY c.first_name,
9 c.last_name,
10 p.customer_id,
11 ROLLUP (TO_CHAR (p.purchase_date, 'YYYY/IW'))
12 ORDER BY customer_id, year_week;
CUSTOMER_ID FIRST_N LAST_NAME YEAR_WE TOTAL_AMT
----------- ------- --------- ------- ----------
1 Faith Mazzarone 2022/41 811,88
1 Faith Mazzarone 811,88
2 Lisa Saladino 2022/41 202,97
2 Lisa Saladino 2022/42 608,91
2 Lisa Saladino 2022/49 1217,82
2 Lisa Saladino 2022/50 2435,64
2 Lisa Saladino 4465,34
3 Micheal Palmice 2022/49 202,97
3 Micheal Palmice 2022/50 2638,61
3 Micheal Palmice 2022/51 1623,76
3 Micheal Palmice 4465,34
11 rows selected.
SQL>
You have not included a JOIN condition for the items table and, since the first_name and last_name are dependent on the customer_id then, you can aggregate the name components:
SELECT p.customer_id,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.first_name)
END AS first_name,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.last_name)
END AS last_name,
to_char(p.purchase_date, 'YYYY/IW') AS week,
sum(p.quantity * i.price) total_amt
FROM purchases p
INNER JOIN customers c
ON p.customer_id = c.customer_id
INNER JOIN items i
ON p.product_id = i.product_id
GROUP BY
ROLLUP(
p.customer_id,
to_char(p.purchase_date, 'YYYY/IW')
);
Outputs:
CUSTOMER_ID
FIRST_NAME
LAST_NAME
WEEK
TOTAL_AMT
1
Faith
Mazzarone
2022/41
415.96
2
Lisa
Saladino
2022/41
111.99
2
Lisa
Saladino
2022/42
335.97
2
Lisa
Saladino
2022/49
65.94
2
Lisa
Saladino
2022/50
131.88
3
Micheal
Palmice
2022/49
111.99
3
Micheal
Palmice
2022/50
142.87
3
Micheal
Palmice
2022/51
87.92
1
Faith
Mazzarone
null
415.96
2
Lisa
Saladino
null
645.78
3
Micheal
Palmice
null
342.78
null
null
null
null
1404.52
and reversing the ROLLUP:
SELECT p.customer_id,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.first_name)
END AS first_name,
CASE
WHEN p.customer_id IS NULL
THEN NULL
ELSE MAX(c.last_name)
END AS last_name,
to_char(p.purchase_date, 'YYYY/IW') AS week,
sum(p.quantity * i.price) total_amt
FROM purchases p
INNER JOIN customers c
ON p.customer_id = c.customer_id
INNER JOIN items i
ON p.product_id = i.product_id
GROUP BY
ROLLUP(
to_char(p.purchase_date, 'YYYY/IW'),
p.customer_id
);
Outputs:
CUSTOMER_ID
FIRST_NAME
LAST_NAME
WEEK
TOTAL_AMT
1
Faith
Mazzarone
2022/41
415.96
2
Lisa
Saladino
2022/41
111.99
2
Lisa
Saladino
2022/42
335.97
2
Lisa
Saladino
2022/49
65.94
2
Lisa
Saladino
2022/50
131.88
3
Micheal
Palmice
2022/49
111.99
3
Micheal
Palmice
2022/50
142.87
3
Micheal
Palmice
2022/51
87.92
null
null
null
2022/41
527.95
null
null
null
2022/42
335.97
null
null
null
2022/49
177.93
null
null
null
2022/50
274.75
null
null
null
2022/51
87.92
null
null
null
null
1404.52
fiddle
In general the two other answer are OK, however the GROUP BY is wrong.
Consider this sample data:
INSERT INTO PURCHASES (CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE)
SELECT 2, 102, 3,TIMESTAMP '2022-12-26 11:41:25' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual CONNECT BY LEVEL <= 6 UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-12-21 09:54:48' FROM DUAL UNION ALL
SELECT 3, 102,1, TIMESTAMP '2022-12-27 19:04:18' FROM DUAL UNION ALL
SELECT 3, 102, 4,TIMESTAMP '2022-12-22 21:44:35' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 15;
SELECT p.customer_id,
TO_CHAR(p.purchase_date, 'YYYY/IW') AS WEEK,
SUM(p.quantity * i.price) total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY ROLLUP(p.customer_id, TO_CHAR(p.purchase_date, 'YYYY/IW'))
ORDER BY 2;
returns
+-----------------------------+
|CUSTOMER_ID|WEEK |TOTAL_AMT|
+-----------------------------+
|3 |2022/51|155.95 |
|2 |2022/52|65.94 |
|3 |2022/52|142.87 |
|3 |2023/01|131.88 |
|2 |2023/01|98.91 |
|3 |2023/02|175.84 |
|3 |2023/03|131.88 |
|2 |2023/52|32.97 |
|3 |2023/52|43.96 |
|2 | |197.82 |
|3 | |782.38 |
| | |980.2 |
+-----------------------------+
But I guess you are rather looking for this:
SELECT p.customer_id,
TO_CHAR(trunc(p.purchase_date, 'IW'), 'IYYY/IW') AS WEEK,
SUM(p.quantity * i.price) total_amt
FROM purchases p
JOIN customers c ON p.customer_id = c.customer_id
JOIN items i ON p.product_id = i.product_id
GROUP BY ROLLUP(p.customer_id, TRUNC(p.purchase_date, 'IW'))
ORDER BY 2;
+-----------------------------+
|CUSTOMER_ID|WEEK |TOTAL_AMT|
+-----------------------------+
|3 |2022/51|155.95 |
|2 |2022/52|98.91 |
|3 |2022/52|186.83 |
|2 |2023/01|98.91 |
|3 |2023/01|131.88 |
|3 |2023/02|175.84 |
|3 |2023/03|131.88 |
| | |980.2 |
|3 | |782.38 |
|2 | |197.82 |
+-----------------------------+
See difference with this query:
SELECT DISTINCT
TO_CHAR(purchase_date, 'YYYY-MM-DD') AS purchase_date,
TO_CHAR(TRUNC(purchase_date, 'IW'), 'IYYY-"W"IW') AS ISO_WEEK,
TO_CHAR(purchase_date, 'YYYY/IW') AS WEEK
FROM purchases p
ORDER BY purchase_date
+------------------------------+
|PURCHASE_DATE|ISO_WEEK|WEEK |
+------------------------------+
|2022-12-21 |2022-W51|2022/51|
|2022-12-24 |2022-W51|2022/51|
|2022-12-26 |2022-W52|2022/52|
|2022-12-27 |2022-W52|2022/52|
|2022-12-28 |2022-W52|2022/52|
|2022-12-30 |2022-W52|2022/52|
|2023-01-01 |2022-W52|2023/52| <- Note the difference
|2023-01-03 |2023-W01|2023/01|
|2023-01-05 |2023-W01|2023/01|
|2023-01-07 |2023-W01|2023/01|
|2023-01-09 |2023-W02|2023/02|
|2023-01-11 |2023-W02|2023/02|
|2023-01-13 |2023-W02|2023/02|
|2023-01-15 |2023-W02|2023/02|
|2023-01-17 |2023-W03|2023/03|
|2023-01-19 |2023-W03|2023/03|
|2023-01-21 |2023-W03|2023/03|
+------------------------------+
Related
I have created a query to get different time types and hours
SELECT calc_time.hours measure,
calc_time.payroll_time_type elements,
calc_time.person_id,
calc_time.start_time
FROM hwm_tm_rep_work_hours_sum_v calc_time,
per_all_people_f papf
WHERE grp_type_id = 200
AND payroll_time_type IN ( 'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay ',
'OT' )
AND (To_date(To_char(calc_time.start_time, 'YYYY-MM-DD') , 'YYYY-MM-DD') BETWEEN To_date(To_char(:From_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD')
AND To_date( To_char(:To_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD' ))
AND papf.person_id = calc_time.person_id
I get the output like -
Start_time person_id elements measure
01-Jan-2021 198 Regular Pay 10
01-Jan-2021 198 OT 2
01-jAN-2021 198 Afternoon shift 2
16-JAN-2021 198 Regular Pay 10
17-JAN-2021 198 OT 3
20-JAN-2021 198 EVENING SHIFT 8
08-JAN-2021 11 Regular Pay 8
09-JAN-2021 11 OT 1
08-JAN-2021 11 tl 2
10-JAN-2021 12 Evening shift 9
11-JAN-2021 12 Evening shift 9
I want this output to be dispplayed as follows WITHIN TWO DATES THAT I PASS AS PARAMETER - LIKE PARAMETER TO AND FROM DATE 01-JAN-2021 AND 31-JAN-2021
person_id Regular_pay OT OTHER_MEASURE OTHER_CODE
198 20 5 2 Afternoon shift
198 20 5 8 EVENING SHIFT
11 8 1 2 TL
12 18 Evening shift
So sum of Regular pay and OT IN separate columns and all others in other_measure and other_code
How can I tweak the main query to achieve this?
You can use:
SELECT *
FROM (
SELECT c.person_id,
SUM(CASE c.payroll_time_type WHEN 'Regular Pay' THEN SUM(c.hours) END)
OVER (PARTITION BY c.person_id) AS regular_pay,
SUM(CASE c.payroll_time_type WHEN 'OT' THEN SUM(c.hours) END)
OVER (PARTITION BY c.person_id) AS OT,
SUM(c.hours) AS other_measure,
c.payroll_time_type AS Other_code
FROM hwm_tm_rep_work_hours_sum_v c
INNER JOIN per_all_people_f p
ON (p.person_id = c.person_id)
WHERE grp_type_id = 200
AND payroll_time_type IN (
'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay',
'OT'
)
AND c.start_time >= TRUNC(:from_date)
AND c.start_time < TRUNC(:to_date) + INTERVAL '1' DAY
GROUP BY
c.person_id,
c.payroll_time_type
)
WHERE other_code NOT IN ('Regular Pay', 'OT');
Which, for the sample data:
CREATE TABLE hwm_tm_rep_work_hours_sum_v (start_time, person_id, payroll_time_type, hours) AS
SELECT DATE '2021-01-01', 198, 'Regular Pay', 10 FROM DUAL UNION ALL
SELECT DATE '2021-01-01', 198, 'OT', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-01', 198, 'Afternoon shift', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-16', 198, 'Regular Pay', 10 FROM DUAL UNION ALL
SELECT DATE '2021-01-17', 198, 'OT', 3 FROM DUAL UNION ALL
SELECT DATE '2021-01-20', 198, 'Evening shift', 8 FROM DUAL UNION ALL
SELECT DATE '2021-01-08', 11, 'Regular Pay', 8 FROM DUAL UNION ALL
SELECT DATE '2021-01-09', 11, 'OT', 1 FROM DUAL UNION ALL
SELECT DATE '2021-01-08', 11, 'TL', 2 FROM DUAL UNION ALL
SELECT DATE '2021-01-10', 12, 'Evening shift', 9 FROM DUAL UNION ALL
SELECT DATE '2021-01-11', 12, 'Evening shift', 9 FROM DUAL;
CREATE TABLE per_all_people_f (person_id, grp_type_id) AS
SELECT 198, 200 FROM DUAL UNION ALL
SELECT 11, 200 FROM DUAL UNION ALL
SELECT 12, 200 FROM DUAL;
Outputs:
PERSON_ID
REGULAR_PAY
OT
OTHER_MEASURE
OTHER_CODE
11
8
1
2
TL
12
18
Evening shift
198
20
5
2
Afternoon shift
198
20
5
8
Evening shift
db<>fiddle here
You could try something like this - In your question, unfortunately, it is not clear in which table which columns/values are available.
SELECT
calc_time.person_id,
(select sum(calc_time.start_time) FROM hwm_tm_rep_work_hours_sum_v calc_time where papf.person_id = calc_time.person_id and calc_time.payroll_time_type = 'Regular Pay') as Regular_Pay,
...
FROM hwm_tm_rep_work_hours_sum_v calc_time,
per_all_people_f papf
WHERE grp_type_id = 200
AND payroll_time_type IN ( 'Afternoon shift',
'TL',
'Evening shift',
'Regular Pay ',
'OT' )
AND (
To_date(To_char(calc_time.start_time, 'YYYY-MM-DD') , 'YYYY-MM-DD') BETWEEN To_date(To_char(:From_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD')
AND To_date( To_char(:To_Date, 'YYYY-MM-DD'), 'YYYY-MM-DD' ) )
and papf.person_id = calc_time.person_id
-- use a group by
GROUP BY
calc_time.person_id
You may use aggregation and then apply model clause to calculate the required columns. Below is the code with comments, assuming you can manage filter by dates.
select *
from t
PERSON_ID | ELEMENTS | MEASURE
--------: | :-------------- | ------:
198 | Regular Pay | 1
198 | Regular Pay | 2
198 | Afternoon shift | 3
198 | Afternoon shift | 4
198 | OT | 5
198 | OT | 6
198 | EVENING SHIFT | 7
198 | EVENING SHIFT | 8
11 | Regular Pay | 11
11 | Regular Pay | 12
11 | TL | 13
11 | TL | 14
11 | EVENING SHIFT | 15
11 | EVENING SHIFT | 16
12 | TL | 21
12 | TL | 22
12 | EVENING SHIFT | 23
12 | EVENING SHIFT | 24
select
person_id,
ot,
regular_pay,
elements as other_code,
mes as other_measure
from (
/*First you need to aggregate all the measures by person_id and code*/
select
person_id,
elements,
sum(measure) as mes
from t
/*Date filter goes here*/
group by
person_id,
elements
)
model
/*RETURN UPDATED ROWS
will do the trick,
because we'll update only "other"
measures, so OT and Regular pay will no go
to the output*/
return updated rows
/*Where to break the calculation*/
partition by (person_id)
/*To be able to reference by code*/
dimension by (elements)
measures (
mes,
0 as ot,
0 as regular_pay
)
rules upsert (
ot[
elements not in ('OT', 'Regular Pay')
] = sum(mes)['OT'],
regular_pay[
elements not in ('OT', 'Regular Pay')
] = sum(mes)['Regular Pay']
)
PERSON_ID | OT | REGULAR_PAY | OTHER_CODE | OTHER_MEASURE
--------: | ---: | ----------: | :-------------- | ------------:
198 | 11 | 3 | EVENING SHIFT | 15
198 | 11 | 3 | Afternoon shift | 7
11 | null | 23 | TL | 27
11 | null | 23 | EVENING SHIFT | 31
12 | null | null | TL | 43
12 | null | null | EVENING SHIFT | 47
db<>fiddle here
The goal is to select the count of distinct customer_id's who have not made a purchase in the rolling 30 day period prior to every day in the calendar year 2016. I have created a calendar table in my database to join to.
Here is an example table for reference, let's say you have customers orders normalized as follows:
+-------------+------------+----------+
| customer_id | date | order_id |
+-------------+------------+----------+
| 123 | 01/25/2016 | 1000 |
+-------------+------------+----------+
| 123 | 04/27/2016 | 1025 |
+-------------+------------+----------+
| 444 | 02/02/2016 | 1010 |
+-------------+------------+----------+
| 521 | 01/23/2016 | 998 |
+-------------+------------+----------+
| 521 | 01/24/2016 | 999 |
+-------------+------------+----------+
The goal output is effectively a calendar with 1 row for every single day of 2016 with a count on each day of how many customers "lapsed" on that day, meaning their last purchase was 30 days or more prior from that day of the year. The final output will look like this:
+------------+--------------+
| date | lapsed_count |
+------------+--------------+
| 01/01/2016 | 0 |
+------------+--------------+
| 01/02/2016 | 0 |
+------------+--------------+
| ... | ... |
+------------+--------------+
| 03/01/2016 | 12 |
+------------+--------------+
| 03/02/2016 | 9 |
+------------+--------------+
| 03/03/2016 | 7 |
+------------+--------------+
This data does not exist in 2015, therefore it's not possible for Jan-01-2016 to have a count of lapsed customers because that is the first possible day to ever make a purchase.
So for customer_id #123, they purchased on 01/25/2016 and 04/27/2016. They should have 2 lapse counts because their purchases are more than 30 days apart. One lapse occurring on 2/24/2016 and another lapse on 05/27/2016.
Customer_id#444 only purchased once, so they should have one lapse count for 30 days after 02/02/2016 on 03/02/2016.
Customer_id#521 is tricky, since they purchased with a frequency of 1 day we will not count the first purchase on 03/02/2016, so there is only one lapse starting from their last purchase of 03/03/2016. The count for the lapse will occur on 04/02/2016 (+30 days).
If you have a table of dates, here is one expensive method:
select date,
sum(case when prev_date < date - 30 then 1 else 0 end) as lapsed
from (select c.date, o.customer_id, max(o.date) as prev_date
from calendar c cross join
(select distinct customer_id from orders) c left join
orders o
on o.date <= c.date and o.customer_id = c.customer_id
group by c.date, o.customer_id
) oc
group by date;
For each date/customer pair, it determines the latest purchase the customer made before the date. It then uses this information to count the lapsed.
To be honest, this will probably work well on a handful of dates, but not for a full year's worth.
Apologies, I didn't read your question properly the first time around. This query will give you all the lapses you have. It takes each order and uses an analytic function to work out the next order date - if the gap is greater than 30 days then a lapse is recorded
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
;
Now join that to a set of dates and run a COUNT function (enter the year parameter as YYYY) :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,COUNT(*)
FROM
(
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
) lapses
,calendar
WHERE 1=1
AND calendar.date_value = TRUNC(lapses.lapse_date)
GROUP BY
calendar.date_value
;
Or if you really want every date printed out then use this :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,lapses
AS
(SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,(SELECT COUNT(*)
FROM lapses
WHERE calendar.date_value = lapses.lapse_date
)
FROM
calendar
WHERE 1=1
ORDER BY
calendar.date_value
;
Here's how I'd do it:
WITH your_table AS (SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12345 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12346 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('25/01/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/02/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('16/03/2016', 'dd/mm/yyyy') order_date, 12348 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12349 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/02/2016', 'dd/mm/yyyy') order_date, 12350 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('01/03/2016', 'dd/mm/yyyy') order_date, 12351 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('03/03/2016', 'dd/mm/yyyy') order_date, 12352 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12353 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/05/2016', 'dd/mm/yyyy') order_date, 12354 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('23/06/2016', 'dd/mm/yyyy') order_date, 12355 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('19/01/2017', 'dd/mm/yyyy') order_date, 12356 order_id FROM dual),
-- end of mimicking your_table with data in it
lapsed_info AS (SELECT customer_id,
order_date,
CASE WHEN TRUNC(SYSDATE) - order_date <= 30 THEN NULL
WHEN COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN 1 FOLLOWING AND 30 FOLLOWING) = 0 THEN order_date+30
ELSE NULL
END lapsed_date
FROM your_table),
dates AS (SELECT to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 dt
FROM dual
CONNECT BY to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 <= TRUNC(SYSDATE))
SELECT dates.dt,
COUNT(li.lapsed_date) lapsed_count
FROM dates
LEFT OUTER JOIN lapsed_info li ON dates.dt = li.lapsed_date
GROUP BY dates.dt
ORDER BY dates.dt;
Results:
DT LAPSED_COUNT
---------- ------------
01/01/2016 0
<snip>
23/01/2016 0
24/01/2016 0
25/01/2016 0
26/01/2016 0
<snip>
19/02/2016 0
20/02/2016 0
21/02/2016 0
22/02/2016 0
23/02/2016 0
24/02/2016 1
25/02/2016 0
<snip>
29/02/2016 0
01/03/2016 0
02/03/2016 0
03/03/2016 0
04/03/2016 0
<snip>
15/03/2016 0
16/03/2016 0
17/03/2016 0
<snip>
20/03/2016 0
21/03/2016 0
22/03/2016 0
<snip>
30/03/2016 0
31/03/2016 0
01/04/2016 0
02/04/2016 1
03/04/2016 0
<snip>
14/04/2016 0
15/04/2016 1
16/04/2016 0
17/04/2016 0
18/04/2016 0
19/04/2016 0
<snip>
17/05/2016 0
18/05/2016 2
19/05/2016 0
20/05/2016 0
21/05/2016 0
<snip>
18/06/2016 0
19/06/2016 1
20/06/2016 0
21/06/2016 0
22/06/2016 0
23/06/2016 0
24/06/2016 0
<snip>
22/07/2016 0
23/07/2016 1
24/07/2016 0
<snip>
18/01/2017 0
19/01/2017 0
20/01/2017 0
<snip>
08/02/2017 0
This takes your data, and uses an the analytic count function to work out the number of rows that have a value within 30 days of (but excluding) the current row's date.
Then we apply a case expression to determine that if the row has a date within 30 days of today's date, we'll count those as not lapsed. If a count of 0 was returned, then the row is considered lapsed and we'll output the lapsed date as the order_date plus 30 days. Any other count result means the row has not lapsed.
The above is all worked out in the lapsed_info subquery.
Then all we need to do is list the dates (see the dates subquery) and outer join the lapsed_info subquery to it based on the lapsed_date and then do a count of the lapsed dates for each day.
I have a table A like below
REGID | PKG_DESC | EVENT_DATE | IS_CON | IS_REN
-----------------------------------------------------
1234 | cc | 27-MAR-14 | 0 | 0
1234 | cc | 27-JUN-14 | 1 | 0
1234 | GUI | 27-MAR-14 | 0 | 0
1234 | GUI | 27-JUN-14 | 1 | 0
1234 | GUI | 27-SEPT-14 | 0 | 1
1234 | GUI | 27-SEPT-15 | 0 | 1
1234 | REMOTE | 27-MAR-14 | 0 | 0
1234 | REMOTE | 27-JUN-14 | 1 | 0
1234 | REMOTE | 27-SEPT-14 | 0 | 1
2431 | cc | 27-MAR-14 | 0 | 0
2431 | cc | 27-JUN-14 | 1 | 0
I have a query like below
select a.reg_id, b.sess_start_dt,
case when TRUNC(A.EVENT_DATE) - B.SESS_START_DT BETWEEN 0-30 THEN 'DAYS 0_30'
WHEN TRUNC(A.EVENT_DATE) - B.SESS_START_DT BETWEEN 31-60 THEN 'DAYS 31-60'
from tab a inner join tab b on a.reg_id = b.reg_id and a.is_ren = 1
union
select a.reg_id, b.sess_start_dt,
case when TRUNC(A.EVENT_DATE) - B.SESS_START_DT BETWEEN 0-30 THEN 'DAYS 0_30'
WHEN TRUNC(A.EVENT_DATE) - B.SESS_START_DT BETWEEN 31-60 THEN 'DAYS 31-60'
from tab a inner join tab b on a.reg_id = b.reg_id and a.is_con = 1
Tab B contains all the usage for each reg_id there will be 100's of records.. Sample of few are
REGID | SESS_START_DT
1234 | 27-Jan-14
1234 | 20-MAR-12
1234 | 27-MAR-12
1234 | 01-sept-14
1234 | 07-sept-14
1234 | 29-JUL-14
1234 | 03-AUG-14
1234 | 27-MAR-13
1234 | 27-MAR-12
1234 | 27-MAR-12
1234 | 27-MAR-12
1234 | 27-MAR-12
1234 | 27-MAR-12
1234 | 27-MAR-12
2431 | 20-JUN-14
The Above query needs to be corrected in a way like,
1) If the REG_ID is having at least one is_ren = 1 then that subscription should be considered as renewal subscription and needs to get the 30 days and 60 days usage from table B from his is_ren = 1 event_date. (for REGID 1234 only is_ren query should execute)
2) If multiple IS_REN = 1 are existing for each REGID then the usage needs to be taken 30 days and 60 days from table B with the MIN(event_date). in this case the usage should be taken from 27-SEPT-14 instead of 27-SEPT-15
3) If there is no IS_REN = 1 and there is IS_CON = 1 then it's considered as conversion and usage should be taken before 60 days from the converted date (for REGID 2431, usage needs to get 60 days back from 27-JUN-14{this is my event_date in the query})
The O/P should be like
REGID | EVENT_DATE | DAYS 0_30 | DAYS 31-60 | CODE
1234 | 27-SEPT-14 | 2 | 2 | REN
2431 | 27-JUL-14 | 1 | 0 | CON
If my assumptions in my Comment are correct, this may be what you need. Notice the order by clause in row_number() - first the rows with is_ren = 1, then the rows with is_ren = 0 and is_con = 1, then all the other rows, and within each group order by event_date ascending. This way, the top row (rn = 1), which is the only one I use in the outer query, will have is_ren = 1 with the earliest possible date, or if no is_ren = 1 then the row with is_con = 1 and the earliest date, or else just the earliest date. (In the last case, the CODE will be null: this means there were no is_ren = 1 and no is_con = 1 for that regid.
Not sure why you have 27-JUL-14 in the output for regid = 2431, that should be 27-JUN-14. Also, there are no four-letter months in Oracle ("SEPT"). The output shows dates using my session parameters; if you need to format the dates, use to_date(event_date, .....) with the desired date format model. Also, since the data you provided is just dates (with no time-of-day component), I didn't truncate anything; you may need to, if your real data has time-of-day components.
with
table_a ( regid, pkg_desc, event_date, is_con, is_ren ) as (
select 1234, 'cc' , to_date ('27-MAR-14', 'dd-MON-rr'), 0, 0 from dual union all
select 1234, 'cc' , to_date ('27-JUN-14', 'dd-MON-rr'), 1, 0 from dual union all
select 1234, 'GUI' , to_date ('27-MAR-14', 'dd-MON-rr'), 0, 0 from dual union all
select 1234, 'GUI' , to_date ('27-JUN-14', 'dd-MON-rr'), 1, 0 from dual union all
select 1234, 'GUI' , to_date ('27-SEP-14', 'dd-MON-rr'), 0, 1 from dual union all
select 1234, 'GUI' , to_date ('27-SEP-15', 'dd-MON-rr'), 0, 1 from dual union all
select 1234, 'REMOTE', to_date ('27-MAR-14', 'dd-MON-rr'), 0, 0 from dual union all
select 1234, 'REMOTE', to_date ('27-JUN-14', 'dd-MON-rr'), 1, 0 from dual union all
select 1234, 'REMOTE', to_date ('27-SEP-14', 'dd-MON-rr'), 0, 1 from dual union all
select 2431, 'cc' , to_date ('27-MAR-14', 'dd-MON-rr'), 0, 0 from dual union all
select 2431, 'cc' , to_date ('27-JUN-14', 'dd-MON-rr'), 1, 0 from dual
),
table_b ( regid, sess_start_dt ) as (
select 1234, to_date ('27-JAN-14', 'dd-MON-rr') from dual union all
select 1234, to_date ('20-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('01-SEP-14', 'dd-MON-rr') from dual union all
select 1234, to_date ('07-SEP-14', 'dd-MON-rr') from dual union all
select 1234, to_date ('29-JUL-14', 'dd-MON-rr') from dual union all
select 1234, to_date ('03-AUG-14', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-13', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 1234, to_date ('27-MAR-12', 'dd-MON-rr') from dual union all
select 2431, to_date ('20-JUN-14', 'dd-MON-rr') from dual
),
prep ( regid, event_date, code, rn ) as (
select regid, event_date,
case when is_ren = 1 then 'REN' when is_con = 1 then 'CON' else null end,
row_number() over (partition by regid
order by case when is_ren = 1 then 0
when is_con = 1 then 1 else 2 end,
event_date)
from table_a
)
select p.regid, p.event_date,
count(case when b.sess_start_dt between p.event_date - 30 and p.event_date
then 1 end) as days_0_30,
count(case when b.sess_start_dt between p.event_date - 60 and p.event_date - 31
then 1 end) as days_31_60,
p.code
from prep p inner join table_b b on p.regid = b.regid
where rn = 1
group by p.regid, p.event_date, p.code
;
Output:
REGID EVENT_DATE DAYS_0_30 DAYS_31_60 COD
---------- ------------------- ---------- ---------- ---
1234 2014-09-27 00:00:00 2 2 REN
2431 2014-06-27 00:00:00 1 0 CON
I have table like this:
IST | FILEDATE | DATE | ...
1 | 2013-2014 | 27.03.2015 10:20:47 | ...
2 | 2013-2014 | 27.03.2015 10:20:47 | ...
3 | 2013-2014 | 27.03.2015 10:20:47 | ...
1 | 2013-2014 | 28.03.2015 11:20:47 | ...
2 | 2013-2014 | 28.03.2015 11:20:47 | ...
3 | 2013-2014 | 28.03.2015 11:20:47 | ...
1 | 2014-2015 | 29.03.2015 12:20:47 | ...
2 | 2014-2015 | 29.03.2015 12:20:47 | ...
3 | 2014-2015 | 29.03.2015 12:20:47 | ...
...
I need to select newest(with date value) entry of all IST, like this:
IST | FILEDATE | DATE | ...
1 | 2014-2015 | 29.03.2015 11:20:47 | ...
2 | 2014-2015 | 29.03.2015 11:20:47 | ...
3 | 2014-2015 | 29.03.2015 11:20:47 | ...
I tried order by and rownum=1, but its working for just single IST.
How can I do that? Thank you.
That's a typical scenario where analytical functions (aka windowing functions) are really helpful:
with v_data(ist, filedate, entry_date) as (
select 1, '2013-2014', to_date('27.03.2015 10:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 2, '2013-2014', to_date('27.03.2015 10:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 3, '2013-2014', to_date('27.03.2015 10:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 1, '2013-2014', to_date('28.03.2015 11:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 2, '2013-2014', to_date('28.03.2015 11:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 3, '2013-2014', to_date('28.03.2015 11:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 1, '2014-2015', to_date('29.03.2015 12:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 2, '2014-2015', to_date('29.03.2015 12:20:47','DD.MM.YYYY hh24:mi:ss') from dual union all
select 3, '2014-2015', to_date('29.03.2015 12:20:47','DD.MM.YYYY hh24:mi:ss') from dual)
select * from (
select
v1.*,
row_number() over (partition by ist order by entry_date desc) as rn
from v_data v1
)
where rn=1
This solution
computes an ordering per group using the ROW_NUMBER analytical function
removes everything but the newest entry per group with WHERE rn = 1
You can first group the result:
select ist, max(date) date
from table
group
by ist
Then you can combine that result with a select to get all matching lines:
select master.*
from table master
join
( select ist, max(date) date
from table
group
by ist
) filter
on master.ist = filter.ist
and master.date = filter.date
Use NOT EXISTS to find ist's that have no newer row in table:
select *
from tablename t1
where not exists (select 1 from tablename t2
where t2.ist = t1.ist
and t2.date > t1.date)*
I have a query, which returns the following, EXCEPT for the last column, which is what I need to figure out how to create. For each given ObservationID I need to return the date on which the status changes; something like a LEAD() function that would take conditions and not just offsets. Can it be done?
I need to calculate the column Change Date; it should be the last date the status was not the current status.
+---------------+--------+-----------+--------+-------------+
| ObservationID | Region | Date | Status | Change Date | <-This field
+---------------+--------+-----------+--------+-------------+
| 1 | 10 | 1/3/2012 | Ice | 1/4/2012 |
| 2 | 10 | 1/4/2012 | Water | 1/6/2012 |
| 3 | 10 | 1/5/2012 | Water | 1/6/2012 |
| 4 | 10 | 1/6/2012 | Gas | 1/7/2012 |
| 5 | 10 | 1/7/2012 | Ice | |
| 6 | 20 | 2/6/2012 | Water | 2/10/2012 |
| 7 | 20 | 2/7/2012 | Water | 2/10/2012 |
| 8 | 20 | 2/8/2012 | Water | 2/10/2012 |
| 9 | 20 | 2/9/2012 | Water | 2/10/2012 |
| 10 | 20 | 2/10/2012 | Ice | |
+---------------+--------+-----------+--------+-------------+
a model clause (10g+) can do this in a compact way:
SQL> create table observation(ObservationID , Region ,obs_date, Status)
2 as
3 select 1, 10, date '2012-03-01', 'Ice' from dual union all
4 select 2, 10, date '2012-04-01', 'Water' from dual union all
5 select 3, 10, date '2012-05-01', 'Water' from dual union all
6 select 4, 10, date '2012-06-01', 'Gas' from dual union all
7 select 5, 10, date '2012-07-01', 'Ice' from dual union all
8 select 6, 20, date '2012-06-02', 'Water' from dual union all
9 select 7, 20, date '2012-07-02', 'Water' from dual union all
10 select 8, 20, date '2012-08-02', 'Water' from dual union all
11 select 9, 20, date '2012-09-02', 'Water' from dual union all
12 select 10, 20, date '2012-10-02', 'Ice' from dual ;
Table created.
SQL> select ObservationID, obs_date, Status, status_change
2 from observation
3 model
4 dimension by (Region, obs_date, Status)
5 measures ( ObservationID, obs_date obs_date2, cast(null as date) status_change)
6 rules (
7 status_change[any,any,any] = min(obs_date2)[cv(Region), obs_date > cv(obs_date), status != cv(status)]
8 )
9 order by 1;
OBSERVATIONID OBS_DATE STATU STATUS_CH
------------- --------- ----- ---------
1 01-MAR-12 Ice 01-APR-12
2 01-APR-12 Water 01-JUN-12
3 01-MAY-12 Water 01-JUN-12
4 01-JUN-12 Gas 01-JUL-12
5 01-JUL-12 Ice
6 02-JUN-12 Water 02-OCT-12
7 02-JUL-12 Water 02-OCT-12
8 02-AUG-12 Water 02-OCT-12
9 02-SEP-12 Water 02-OCT-12
10 02-OCT-12 Ice
fiddle: http://sqlfiddle.com/#!4/f6687/1
i.e. we will dimension on region, date and status as we want to look at cells with the same region, but get the first date that the status differs on.
we also have to measure date too so i created an alias obs_date2 to do that, and we want a new column status_change to hold the date the status changed.
this line is the line that does all the working out for us:
status_change[any,any,any] = min(obs_date2)[cv(Region), obs_date > cv(obs_date), status != cv(status)]
it says, for our three dimensions, only look at the rows with the same region (cv(Region),) and look at rows where the date follows the date of the current row (obs_date > cv(obs_date)) and also the status is different from the current row (status != cv(status)) finally get the minimum date that satisfies this set of conditions (min(obs_date2)) and assign it to status_change. The any,any,any part on the left means this calculation applies to all rows.
I've tried many times to understand the MODEL clause and never really quite managed it, so thought I would add another solution
This solution takes some of what Ronnis has done but instead uses the IGNORE NULLS clause of the LEAD function. I think that this is only new with Oracle 11 but you could probably replace it with the FIRST_VALUE function for Oracle 10 if necessary.
select
observation_id,
region,
observation_date,
status,
lead(case when is_change = 'Y' then observation_date end) ignore nulls
over (partition by region order by observation_date) as change_observation_date
from (
select
a.observation_id,
a.region,
a.observation_date,
a.status,
case
when status = lag(status) over (partition by region order by observation_date)
then null
else 'Y' end as is_change
from observations a
)
order by 1
I frequently do this when cleaning up overlapping from/to-dates and duplicate rows.
Your case is much simpler though, since you only have the "from-date" :)
Setting up the test data
create table observations(
observation_id number not null
,region number not null
,observation_date date not null
,status varchar2(10) not null
);
insert
into observations(observation_id, region, observation_date, status)
select 1, 10, date '2012-03-01', 'Ice' from dual union all
select 2, 10, date '2012-04-01', 'Water' from dual union all
select 3, 10, date '2012-05-01', 'Water' from dual union all
select 4, 10, date '2012-06-01', 'Gas' from dual union all
select 5, 10, date '2012-07-01', 'Ice' from dual union all
select 6, 20, date '2012-06-02', 'Water' from dual union all
select 7, 20, date '2012-07-02', 'Water' from dual union all
select 8, 20, date '2012-08-02', 'Water' from dual union all
select 9, 20, date '2012-09-02', 'Water' from dual union all
select 10, 20, date '2012-10-02', 'Ice' from dual;
commit;
The below query has three points of interest:
Identifying repeated information (the recording show the same as previous recording)
Ignoring the repeated recordings
Determining the date from the "next" change
.
with lagged as(
select a.*
,case when status = lag(status, 1) over(partition by region
order by observation_date)
then null
else rownum
end as change_flag -- 1
from observations a
)
select observation_id
,region
,observation_date
,status
,lead(observation_date, 1) over(
partition by region
order by observation_date
) as change_date --3
,lead(observation_date, 1, sysdate) over(
partition by region
order by observation_date
) - observation_date as duration
from lagged
where change_flag is not null -- 2
;