Oracle SQL loop LEAD() through partition - sql

I have a set that looks something like this
ID date_IN date_out
1 1/1/18 1/2/18
1 1/3/18 1/4/18
1 1/5/18 1/8/18
2 1/1/18 1/5/18
2 1/7/18 1/9/18
I began by
SELECT ID, date_IN, Date_out, lead(date_out) over ( partition by (ID)
order by ID) as next_out
From table
And get something like this...
ID date_IN date_out next_out
1 1/1/18 1/2/18 1/4/18
1 1/3/18 1/4/18 1/8/18
1 1/5/18 1/8/18 Null
2 1/1/18 1/5/18 1/9/18
2 1/7/18 1/9/18 Null
The problem I’m going to to have is that in my actual data many of the ID’s have A LOT of entries. The goal is to have all of the date_out’s appear on one row per ID....
ID date_IN date_out next_out next_out1 etc. etc.
1 1/1/18 1/2/18 1/4/18 1/8/18 X X
2 1/1/18 1/5/18 1/7/18 X Null Null
Is there a way to loop the lead() through the entire partition, order by ID drop everything but the first row then move on to the next ID?

Here is one approach, which assumes that you only expect to have a maximum of three date pairs per ID. You may assign a row number and then aggregate by ID:
WITH cte AS (
SELECT ID, date_IN, date_out,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY date_IN) rn
FROM yourTable
)
SELECT
ID,
MAX(CASE WHEN rn = 1 THEN date_IN END) AS date_IN,
MAX(CASE WHEN rn = 1 THEN date_out END) AS date_out,
MAX(CASE WHEN rn = 2 THEN next_IN END) AS next_in_1,
MAX(CASE WHEN rn = 2 THEN date_out END) AS next_out_2,
MAX(CASE WHEN rn = 3 THEN date_IN END) AS next_in_2,
MAX(CASE WHEN rn = 3 THEN date_out END) AS next_out_2
FROM cte
GROUP BY ID

No need to do a loop but use the offset option. Below is lifted from the documentation.
offset
Optional. It is the physical offset from the current row in the table.
If this parameter is omitted, the default is 1.
example; lead(date_out) means next value
lead(date_out, 2) means 2nd row after current row
lead(date_out, 3) 3rd row after current row and so on.
in your code; use below snippet;
lead(date_out) over ( partition by (ID) order by ID) as next_out,
lead(date_out, 2) over ( partition by (ID) order by ID) as next_out2,
lead(date_out, 3) over ( partition by (ID) order by ID) as next_out3

WITH TAB AS(
SELECT 1 ID, CAST('2018/01/01' AS DATE) DATE_IN, CAST('2018/01/02' AS DATE) DATE_OUT FROM DUAL
UNION
SELECT 1, CAST('2018/01/03' AS DATE) , CAST('2018/01/04' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/05' AS DATE) , CAST('2018/01/08' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/09' AS DATE) , CAST('2018/01/10' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/11' AS DATE) , CAST('2018/01/12' AS DATE) FROM DUAL
UNION
SELECT 2, CAST('2018/01/01' AS DATE) , CAST('2018/01/05' AS DATE) FROM DUAL
UNION
SELECT 2, CAST('2018/01/07' AS DATE) , CAST('2018/01/09' AS DATE) FROM DUAL
) --select * from tab;
, LEAF_CALC AS( --CONNECTING THE DATE_OUTS
SELECT
ID
,SYS_CONNECT_BY_PATH(DATE_OUT, '$') HRCHY
, LEVEL LVL
, CONNECT_BY_ISLEAF ISLEAF
FROM TAB
CONNECT BY PRIOR DATE_OUT < DATE_IN
START WITH ID = 1
) --SELECT * FROM LEAF_CALC;
, DATA_SORT AS( --ADDING ALL DATE_OUTS IN 1 ROW
SELECT
P.ID, P.HRCHY
FROM LEAF_CALC P,
(SELECT ID, MAX(LVL) MAXLVL FROM
LEAF_CALC
GROUP BY ID) C
WHERE P.ID = C.ID
AND P.LVL = C.MAXLVL
)--SELECT * FROM DATA_SORT
--SEGREGATING ALL DATES USING REGEXP_SUBSTR
SELECT
ID
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 1) DATE_IN
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 2) NEXT_OUT
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 3) NEXT_OUT2
, COALESCE(REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 4), 'NA') NEXT_OUT3
, COALESCE(REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 5), 'NA') NEXT_OUT4
FROM DATA_SORT;

Related

Flag rows that appear between rows with specific strings

Let's say I have a table like this:
user_id
order
action
1
1
start
1
2
other
1
3
other
1
4
end
1
5
other
2
1
start
2
2
other
2
3
end
2
4
other
2
5
start
2
6
other
2
7
end
And I want to create a new column that flags the rows that appear between "start" and "end" events for each user (ordering by "order"):
user_id
order
action
is_between_start_and_end
1
1
start
NULL
1
2
other
1
1
3
other
1
1
4
end
NULL
1
5
other
NULL
2
1
start
NULL
2
2
other
1
2
3
end
NULL
2
4
other
NULL
2
5
start
NULL
2
6
other
1
2
7
end
NULL
How can I achieve this?
Consider below approach
select * except(grp),
if(
countif(action = 'end') over (partition by user_id, grp order by `order`) = 0
and action != 'start', 1, null
) as is_between_start_and_end
from (
select *,
countif(action = 'start') over (partition by user_id order by `order`) as grp
from your_table
)
if applied to sample data in y our question - output is
This can be solved with windows functions.
with tbl as (
Select 1 as user_id, 1 as order_it,"start" as action
Union all select 1 , 2 ,"other"
Union all select 1 , 3 ,"other"
Union all select 1 , 4 ,"end"
Union all select 1 , 5 ,"other"
Union all select 2 , 1 ,"start"
Union all select 2 , 2 ,"other"
Union all select 2 , 3 ,"end"
Union all select 2 , 4 ,"other"
Union all select 2 , 5 ,"start"
Union all select 2 , 6 ,"other"
Union all select 2 , 7 ,"end"
),
helper as (
Select *,
countif(action="end") over win_before as ends,
countif(action="start") over win_before as starts,
first_value(if(action="end" or action="start",action,null) ignore nulls) over (partition by user_id order by order_it rows between current row and unbounded following) as end_to_come
from tbl
window win_before as (partition by user_id order by order_it rows between unbounded preceding and current row)
order by user_id,order_it
)
select *,
if(end_to_come="end" and starts-ends=1,1,null) as is_between_start_and_end
from helper
order by user_id,order_it
This should work but could surely be more optimized
with input as (
select 1 user_id, 1 as order_, 'start' action union all
select 1, 2, 'other' union all
select 1, 3, 'other' union all
select 1 , 4 , 'end' union all
select 1 , 5 , 'other' union all
select 2 , 1 , 'start' union all
select 2 , 2 , 'other' union all
select 2 , 3 , 'end' union all
select 2 , 4 , 'other' union all
select 2 , 5 , 'start' union all
select 2 , 6 , 'other' union all
select 2 , 7 , 'end'
)
select
*,
if (
order_ > max(if(action = 'start', order_, null))
over(partition by user_id order by order_ range between unbounded preceding and current row) and
order_ < min(if(action = 'end', order_, null))
over(partition by user_id order by order_ range between current row and unbounded following) and
coalesce(order_ not between
max(if(action = 'end', order_, null))
over(partition by user_id order by order_ range between unbounded preceding and 1 preceding)
and min(if(action = 'start', order_, null))
over(partition by user_id order by order_ range between 1 following and unbounded following), true)
, 1, null) as flag
from input
order by 1,2
Edit: It should also take into account weird cases where for instance a 3rd user has end > other > start > other > end > other in that order. The flag should only apply to the 4th item. If you have start > other > start > other > end however, it's unclear if items 2,3,4 or 4 or 2,4 should be flagged. I think it would only flag 4 here
Edit2: This version should flag 2,3,4
if (
order_ > max(if(action = 'start', order_, null))
over(partition by user_id order by order_ range between unbounded preceding and 1 preceding) and
order_ < min(if(action = 'end', order_, null))
over(partition by user_id order by order_ range between current row and unbounded following) and
coalesce(max(if(action = 'start', order_, null))
over(partition by user_id order by order_ range between unbounded preceding and 1 preceding) >
max(if(action = 'end', order_, null))
over(partition by user_id order by order_ range between unbounded preceding and current row),true)
, 1, null) as flag

SQL Query to find the Row with first change of data

UniqueId
ITEM
DATE
1
A
2022-01-01
2
A
2022-01-02
3
B
2022-01-03
4
B
2022-01-04
5
A
2022-01-05
6
A
2022-01-06
7
B
2022-01-07
8
B
2022-01-08
9
A
2022-01-09
10
A
2022-01-10
11
A
2022-01-11
I have above table where the item is changing from A to B and then B to A (etc).
The the most recent item in the table based on the date is A (the last row).
I need to find the date on which this last item (A) was started to be in effect.
So in this case the item A was in effect from 2022-01-09 onwards (UniqueId 9).
How can I find the UniqueId or the date of item A, where it got changed to be in effect (Row 9)?
Thank you.
with data as (
select *,
last_value(item) over (order by "date") as last_item,
lag(item) over (order by "date") as prev_item
from T
)
select
max(case when item = last_item and item <> prev_item then "date" end) as max_date
from data;
or
with data as (
select *,
case when item <> lag(item) over (order by "date")
and item = last_value(item) over (order by "date")
then 1 end as flag
from T
)
select max("date") as last_transition_date
from data
where flag = 1;
https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=bd5f6398c0167d74c26a67fafac5225e
Supposing you need all the data:
with data as (
select *,
case when item <> lag(item) over (order by "date")
and item = last_value(item) over (order by "date")
then 1 end as flag
from T
)
select *,
max(case when flag = 1 then "date" end) over () as last_transition_date
from data;
Getting a flag using a comparison of current item with previous item in time, using LAG() is indeed the way.
But it's absolutely sufficient to get the highest date and highest unique (as both are sorted ascending together) where the obtained flag is 1:
WITH
-- your input
indata(UniqueId,ITEM,DATE) AS (
SELECT 1,'A',DATE '2022-01-01'
UNION ALL SELECT 2,'A',DATE '2022-01-02'
UNION ALL SELECT 3,'B',DATE '2022-01-03'
UNION ALL SELECT 4,'B',DATE '2022-01-04'
UNION ALL SELECT 5,'A',DATE '2022-01-05'
UNION ALL SELECT 6,'A',DATE '2022-01-06'
UNION ALL SELECT 7,'B',DATE '2022-01-07'
UNION ALL SELECT 8,'B',DATE '2022-01-08'
UNION ALL SELECT 9,'A',DATE '2022-01-09'
UNION ALL SELECT 10,'A',DATE '2022-01-10'
UNION ALL SELECT 11,'A',DATE '2022-01-11'
)
-- real query starts here; replace following comma with "WITH"
,
w_change_ind AS (
SELECT
*
, CASE WHEN LAG(item) OVER(ORDER BY date) <> item
THEN 1
ELSE 0
END AS chg_ind
FROM indata
)
SELECT
MAX(uniqueid) AS uqid
, MAX(date) AS dt
FROM w_change_ind
WHERE chg_ind=1
;
-- out uqid | dt
-- out ------+------------
-- out 9 | 2022-01-09
Based on your description, this is one way to do what you want.
select top 1 * from table1
where item ='A'
order by uniqueid desc
If this is not what you want, then you will have to provide additional information.

Oracle SQL query to get the difference value with two where clause

Im trying to make a query to get the difference of avg(score1+score2/2) from 'current' and the most recent 'archived' . to do a chart on Oracle Apex
Table name: myTable
id | score1 | score2 | status | date
------------------------------------------
1 | 10 | 20 | current| 07/09/19
2 | 20 | 30 |archived| 04/09/19
3 | 15 | 35 |archived| 02/09/19
wanted the result: (avg(score1 + score2/2) where status = 'current') - (avg(score1 + score2/2) where status = 'archived' only the most recent)
Im tried
Hmmm . . . one method is conditional aggregation:
select max(case when status = 'current' then score_avg end), as current_score,
max(case when status = 'archive' then score_avg end), as last_archive_score,
(max(case when status = 'current' then score_avg end) -
max(case when status = 'archive' then score_avg end)
) as diff
from (select t.*,
row_number() over (partition by status order by date desc) as seqnum,
(score1 + score2) / 2 as score_avg
from t
) t
where seqnum = 1;
I am guessing that you really want (score1 + score2) / 2). However, if you want score1 + score2 / 2, then use that expression instead.
u want this?
select status , avg(score1 + score2/2) from you_table
group by status
or
select (select avg(score1 + score2/2) from you_table
where status='current')-(select avg(score1 + score2/2) from you_table
where status='archived') diff from dual
One option would be using
min/max(score1) keep (dense_rank first order by "date" desc) over (partition by status)
to compute the archived case, and an ordinary arithmetic average computation for current case (depending on the sample data, there exists only one row for current case )
with myTable( id, score1, score2, status, "date" )as
(
select 1, 10, 20, 'current' , date'2019-09-07' from dual union all
select 2, 20, 30, 'archived', date'2019-09-04' from dual union all
select 3, 15, 35, 'archived', date'2019-09-02' from dual
), t as
(
select
case when status = 'current' then ( score1 + score2 ) / 2 end as curr,
case when status = 'archived' then
(
(
min(score1) keep (dense_rank first order by "date" desc) over (partition by status)+
min(score2) keep (dense_rank first order by "date" desc) over (partition by status)
)/2
)
end as arch
from myTable
)
select max(curr)-max(arch) as "Avg.Result"
from t;
Demo

Oracle SQL - Return rows where there is at least one row in a group for the current month and there has been a change in class in a previous month

I am trying to output rows that meet the following conditions:
At least one row for the ClientID must be in the current month (only interested in the most recent row for the Client ID in that month)
The class in current month for the ClientID is different to the immediately previous row from an earlier month for the ClientID
My data can have multiple rows per client per month and I am only interested in the latest row per month per client.
Here is a sample of my data:
ID Client ID Class Date
14609 87415 C 04/DEC/18
13859 87415 X 16/AUG/18
11906 87415 C 27/FEB/17
10667 87415 B 23/JAN/17
14538 132595 D 03/DEC/18
14567 141805 C 04/DEC/18
14411 141805 A 27/NOV/18
Desired Output based on the above is:
ID Client ID Class Date
14609 87415 C 04/DEC/18
13859 87415 X 16/AUG/18
14567 141805 C 04/DEC/18
14411 141805 A 27/NOV/18
I have had multiple attempts at this with zero success. Any help would be greatly appreciated. My attempts have not been able to find the immediately previous row. :/
select * from
(
select drh.defaultriskhistid, drh.clientid, cv.description,
drh.updatetimestamp
from default_risk_history drh
inner join code_values cv on drh.defaultriskcodeid = cv.codevalueid
where
defaultriskhistid in
(select max(defaultriskhistid) from default_risk_history
group by clientid, ltrim(TO_CHAR(updatetimestamp,'mm-yyyy'),'0'))
) t
where
(
Select count(*) from default_risk_history drh1 where drh1.clientid =
t.clientid and ltrim(TO_CHAR(drh1.updatetimestamp,'mm-yyyy'),'0') =
ltrim(TO_CHAR(current_date,'mm-yyyy'),'0')
) >=1
order by clientid, updatetimestamp desc
You seem to want the two most recent rows, if they have different classes and the most recent one is in the current month. If so:
select t.*
from (select t.*,
max(date) over (partition by clientid) as max_date,
lag(class) over (partition by client_id order by date) as prev_class,
lead(class) over (partition by client_id order by date) as next_class,
row_number() over (partition by clientid order by date desc) as seqnum
from t
) t
where max_date >= trunc(sysdate, 'MON') and
( (seqnum = 1 and prev_class <> class) or
(seqnum = 2 and next_class <> class)
);
Here's one option:
SQL> alter session set nls_date_format = 'dd.mm.yyyy';
Session altered.
SQL> with test (client_id, class, datum) as
2 (select 87415, 'c', date '2018-12-04' from dual union all
3 select 87415, 'x', date '2018-08-16' from dual union all
4 select 87415, 'c', date '2017-02-27' from dual union all
5 select 87415, 'b', date '2017-01-23' from dual union all
6 --
7 select 132595, 'd', date '2018-12-03' from dual union all
8 select 141805, 'c', date '2018-12-04' from dual union all
9 select 141805, 'a', date '2018-11-27' from dual
10 ),
11 inter as
12 (select client_id,
13 class,
14 datum,
15 lag(class) over (partition by client_id order by datum desc) prev_class,
16 row_number() over (partition by client_id order by datum desc) rn
17 from test
18 )
19 select client_id, class, datum
20 from inter
21 where (class <> prev_class or prev_class is null)
22 and client_id in (select client_id from inter
23 group by client_id
24 having max(rn) >= 2
25 )
26 and rn <= 2
27 order by client_id, datum desc;
CLIENT_ID C DATUM
---------- - ----------
87415 c 04.12.2018
87415 x 16.08.2018
141805 c 04.12.2018
141805 a 27.11.2018
SQL>

get the most two recent dates for each customer

basically, I need to retrieve the last two dates for customers who purchased in at least two different dates, implying there are some customer who had purchased only in one date, the data has the following form
client_id date
1 2016-07-02
1 2016-07-02
1 2016-06-01
2 2015-06-01
and I would like to get it in the following form
client_id previous_date last_date
1 2016-06-01 2016-07-02
remarques:
a client can have multiple entries for the same date
a client can have entries only for one date, such customer should be discarded
Rank your dates with DENSE_RANK. Then group by client_id and show the last dates (ranked #1 and #2).
select
client_id,
max(case when rn = 2 then date end) as previous_date,
max(case when rn = 1 then date end) as last_date
from
(
select
client_id,
date,
dense_rank() over (partition by client_id order by date desc) as rn
from mytable
)
group by client_id
having max(rn) > 1;
build up:
t=# create table s153 (c int, d date);
CREATE TABLE
t=# insert into s153 values (1,'2016-07-02'), (1,'2016-07-02'),(1,'2016-06-01'),(2,'2016-06-01');
INSERT 0 4
query:
t=# with a as (
select distinct c,d from s153
)
, b as (
select c,nth_value(d,1) over (partition by c order by d) last_date, nth_value(d,2) over (partition by c order by d) prev_date
from a
)
select * from b where prev_date is not null
;
c | last_date | prev_date
---+------------+------------
1 | 2016-06-01 | 2016-07-02
(1 row)
UNTESTED:
We use a common table expression to assign a row number based on the date in descending order and then only include those records having a row number <=2 and then ensure that those having 1 row are excluded by the having.
WITH CTE AS (
SELECT Distinct Client_ID
, Date
, row_number() over (partition by clientID order by date desc) rn
FROM Table)
SELECT Client_ID, min(date) previous_date, max(date) last_date)
FROM CTE
WHERE RN <=2
GROUP BY Client_ID
HAVING max(RN) > 1
All you need is a group by...
--test date
declare #tablename TABLE
(
client_id int,
[date] datetime
);
insert into #tablename
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(2 , '2015-06-01');
--query
SELECT client_id,MIN([DATE]) AS [PREVIOUS_DATE], MAX([DATE]) AS [LAST_DATE]
FROM #tablename
GROUP BY client_id
Updated
-- create data
create table myTable
(
client_id integer,
given_date date
);
insert into myTable
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(1 , '2016-06-03'),
(1 , '2016-06-09'),
(2 , '2015-06-01'),
(3 , '2016-06-03'),
(3 , '2016-06-09');
-- query
SELECT sub.client_id, sub.PREVIOUS_DATE, sub.LAST_DATE
FROM
(select
ROW_NUMBER() OVER (PARTITION BY a.client_id order by b.given_date desc,(MAX(b.given_date) - a.given_date)) AS ROW_NUMBER,
a.client_id,a.given_date AS PREVIOUS_DATE, MAX(b.given_date) - a.given_date AS diff, (b.given_date) AS LAST_DATE
FROM myTable AS a
JOIN myTable AS b
ON b.client_id = a.client_id
WHERE a.given_date <> b.given_date
group by a.client_id, a.given_date, b.given_date) AS sub
WHERE sub.ROW_NUMBER = 1