SQL Query to find the Row with first change of data - sql

UniqueId
ITEM
DATE
1
A
2022-01-01
2
A
2022-01-02
3
B
2022-01-03
4
B
2022-01-04
5
A
2022-01-05
6
A
2022-01-06
7
B
2022-01-07
8
B
2022-01-08
9
A
2022-01-09
10
A
2022-01-10
11
A
2022-01-11
I have above table where the item is changing from A to B and then B to A (etc).
The the most recent item in the table based on the date is A (the last row).
I need to find the date on which this last item (A) was started to be in effect.
So in this case the item A was in effect from 2022-01-09 onwards (UniqueId 9).
How can I find the UniqueId or the date of item A, where it got changed to be in effect (Row 9)?
Thank you.

with data as (
select *,
last_value(item) over (order by "date") as last_item,
lag(item) over (order by "date") as prev_item
from T
)
select
max(case when item = last_item and item <> prev_item then "date" end) as max_date
from data;
or
with data as (
select *,
case when item <> lag(item) over (order by "date")
and item = last_value(item) over (order by "date")
then 1 end as flag
from T
)
select max("date") as last_transition_date
from data
where flag = 1;
https://dbfiddle.uk/?rdbms=sqlserver_2019&fiddle=bd5f6398c0167d74c26a67fafac5225e
Supposing you need all the data:
with data as (
select *,
case when item <> lag(item) over (order by "date")
and item = last_value(item) over (order by "date")
then 1 end as flag
from T
)
select *,
max(case when flag = 1 then "date" end) over () as last_transition_date
from data;

Getting a flag using a comparison of current item with previous item in time, using LAG() is indeed the way.
But it's absolutely sufficient to get the highest date and highest unique (as both are sorted ascending together) where the obtained flag is 1:
WITH
-- your input
indata(UniqueId,ITEM,DATE) AS (
SELECT 1,'A',DATE '2022-01-01'
UNION ALL SELECT 2,'A',DATE '2022-01-02'
UNION ALL SELECT 3,'B',DATE '2022-01-03'
UNION ALL SELECT 4,'B',DATE '2022-01-04'
UNION ALL SELECT 5,'A',DATE '2022-01-05'
UNION ALL SELECT 6,'A',DATE '2022-01-06'
UNION ALL SELECT 7,'B',DATE '2022-01-07'
UNION ALL SELECT 8,'B',DATE '2022-01-08'
UNION ALL SELECT 9,'A',DATE '2022-01-09'
UNION ALL SELECT 10,'A',DATE '2022-01-10'
UNION ALL SELECT 11,'A',DATE '2022-01-11'
)
-- real query starts here; replace following comma with "WITH"
,
w_change_ind AS (
SELECT
*
, CASE WHEN LAG(item) OVER(ORDER BY date) <> item
THEN 1
ELSE 0
END AS chg_ind
FROM indata
)
SELECT
MAX(uniqueid) AS uqid
, MAX(date) AS dt
FROM w_change_ind
WHERE chg_ind=1
;
-- out uqid | dt
-- out ------+------------
-- out 9 | 2022-01-09

Based on your description, this is one way to do what you want.
select top 1 * from table1
where item ='A'
order by uniqueid desc
If this is not what you want, then you will have to provide additional information.

Related

SQL: Running total count of distinct values

I'm trying to obtain rolling number of unique values in a window.
Here's how my table looks like:
SELECT
user_id
, order_date
, product
FROM example_table
WHERE user_id = 1
ORDER BY order_date ASC
user_id
order_date
product
1
2021-01-01
A
1
2021-01-01
B
1
2021-01-04
A
1
2021-01-07
C
1
2021-01-09
C
1
2021-01-20
A
Here's what I'm trying to achieve:
user_id
order_date
product
cum_dist_count
1
2021-01-01
A
1
1
2021-01-02
B
2
1
2021-01-04
A
2
1
2021-01-07
C
3
1
2021-01-09
C
3
1
2021-01-20
A
3
In other words, I want to be able to see how many unique items a customer has bough so far, and be able to see that for particular date (so for the example above: on 2021-01-04 they have bought 2 unique items and for 2021-01-07 that number was 3).
I've tried grouping by selecting user_id and product, and min(order_date) in a CTE, then doing ROW_NUMBER over user_id and product in that CTE and that worked partially - I'm able to seethe dates the countof unique products has changed (so for this example: 2021-01-01, 2021-01-02 and 2021-01-07, but then I loose the rows "between" which I still want to be able to access.
with cte as (
SELECT
user_id
, product
, min(order_date) as first_order
FROM example_table
GROUP BY 1,2
ORDER BY order_date ASC
)
SELECT
user_id
, first_order
, product
, ROW_NUMBER() OVER (PARTITION BY user_id, product ORDER BY first_order) AS number_of_unique_products
WHERE user_id = 1
With the above, I would get:
user_id
order_date
product
cum_dist_count
1
2021-01-01
A
1
1
2021-01-02
B
2
1
2021-01-07
C
3
The DB is in BigQuery StandardSQL.
Any help is much appreciated!
For each item, you can record the earliest date it appears. Then add those up:
select et.* except (seqnum),
countif(seqnum = 1) over (partition by user_id order by order_date) as running_distinct_count
from (select et.*,
row_number() over (partition by user_id, product order by order_date) as seqnum
from example_table et
) et
Below is for BigQuery
select * except(cum_products),
(select count(distinct product) from t.cum_products product) as cum_dist_count
from (
select *,
array_agg(product) over prev_rows as cum_products
from example_table
window prev_rows as (partition by user_id order by order_date)
) t
if applied to sample data in your question
with example_table as (
select 1 user_id, '2021-01-01' order_date, 'A' product union all
select 1, '2021-01-02', 'B' union all
select 1, '2021-01-04', 'A' union all
select 1, '2021-01-07', 'C' union all
select 1, '2021-01-09', 'C' union all
select 1, '2021-01-20', 'A'
)
output is

PostgreSQL Pivot by Last Date

I need to make a PIVOT table from Source like this table
FactID UserID Date Product QTY
1 11 01/01/2020 A 600
2 11 02/01/2020 A 400
3 11 03/01/2020 B 500
4 11 04/01/2020 B 200
6 22 06/01/2020 A 1000
7 22 07/01/2020 A 200
8 22 08/01/2020 B 300
9 22 09/01/2020 B 100
Need Pivot Like this where Product QTY is QTY by Last Date
UserID A B
11 400 200
22 200 100
My try PostgreSQL
Select
UserID,
MAX(CASE WHEN Product='A' THEN 'QTY' END) AS 'A',
MAX(CASE WHEN Product='B' THEN 'QTY' END) AS 'B'
FROM table
GROUP BY UserID
And Result
UserID A B
11 600 500
22 1000 300
I mean I get a result by the maximum QTY and not by the maximum date!
What do I need to add to get results by the maximum (last) date ??
Postgres doesn't have "first" and "last" aggregation functions. One method for doing this (without a subquery) uses arrays:
select userid,
(array_agg(qty order by date desc) filter (where product = 'A'))[1] as a,
(array_agg(qty order by date desc) filter (where product = 'B'))[1] as b
from tab
group by userid;
Another method uses select distinct with first_value():
select distinct userid,
first_value(qty) over (partition by userid order by product = 'A' desc, date desc) as a,
first_value(qty) over (partition by userid order by product = 'B' desc, date desc) as b
from tab;
With the appropriate indexes, though, distinct on might be the fastest approach:
select userid,
max(qty) filter (where product = 'A') as a,
max(qty) filter (where product = 'B') as b
from (select distinct on (userid, product) t.*
from tab t
order by userid, product, date desc
) t
group by userid;
In particular, this can use an index on userid, product, date desc). The improvement in performance will be most notable if there are many dates for a given user.
You can use DENSE_RANK() window function in order to filter by the last date per each product and UserID before applying conditional aggregation such as
SELECT UserID,
MAX(CASE WHEN Product='A' THEN QTY END) AS "A",
MAX(CASE WHEN Product='B' THEN QTY END) AS "B"
FROM
(
SELECT t.*, DENSE_RANK() OVER (PARTITION BY Product,UserID ORDER BY Date DESC) AS rn
FROM tab t
) q
WHERE rn = 1
GROUP BY UserID
Demo
presuming all date values are distinct(no ties occur for dates)

Picking up latest 2 records from table in hive

Team, I have a scenario here.
I need to pick 2 latest record through Hql.
I have tried rownumber but does not seems to be getting expected out put
Select
A.emp_ref_i,
A.last_updt_d,
A.start_date,
case when A.Last_updt_d=max(A.Last_updt_d) over (partition by A.emp_ref_i)
and A.start_date=max(a.start_date) over (partition by A.emp_ref_i)
then 'Y' else 'N' end as Valid_f,
a.CHANGE
from
(
select
distinct(emp_ref_i),
last_updt_d,
start_date,
CHANGE
from
PR) A
Currently getting output as
EMP_REF_I LAST_UPDT_D start_date Valid_f CHANGE
1 123 3/29/2020 2/3/2019 Y CHG3
2 123 3/30/2019 2/4/2018 N CHG2
3 123 3/29/2019 2/4/2018 N CHG1
but required:
EMP_REF_I LAST_UPDT_D start_date Valid_f CHANGE
1 123 3/29/2020 2/3/2019 Y CHG3
2 123 3/30/2019 2/4/2018 N CHG2
Use row_number and filter:
select s.emp_ref_i,
s.last_updt_d,
s.start_date,
case when rn=1 then 'Y' else 'N' end Valid_f,
s.change
from
(
Select
A.*,
row_number() over(partition by A.emp_ref_i order by a.Last_updt_d desc, a.start_date desc) rn
from (...) A
)s
where rn<=2;

Oracle SQL - Return rows where there is at least one row in a group for the current month and there has been a change in class in a previous month

I am trying to output rows that meet the following conditions:
At least one row for the ClientID must be in the current month (only interested in the most recent row for the Client ID in that month)
The class in current month for the ClientID is different to the immediately previous row from an earlier month for the ClientID
My data can have multiple rows per client per month and I am only interested in the latest row per month per client.
Here is a sample of my data:
ID Client ID Class Date
14609 87415 C 04/DEC/18
13859 87415 X 16/AUG/18
11906 87415 C 27/FEB/17
10667 87415 B 23/JAN/17
14538 132595 D 03/DEC/18
14567 141805 C 04/DEC/18
14411 141805 A 27/NOV/18
Desired Output based on the above is:
ID Client ID Class Date
14609 87415 C 04/DEC/18
13859 87415 X 16/AUG/18
14567 141805 C 04/DEC/18
14411 141805 A 27/NOV/18
I have had multiple attempts at this with zero success. Any help would be greatly appreciated. My attempts have not been able to find the immediately previous row. :/
select * from
(
select drh.defaultriskhistid, drh.clientid, cv.description,
drh.updatetimestamp
from default_risk_history drh
inner join code_values cv on drh.defaultriskcodeid = cv.codevalueid
where
defaultriskhistid in
(select max(defaultriskhistid) from default_risk_history
group by clientid, ltrim(TO_CHAR(updatetimestamp,'mm-yyyy'),'0'))
) t
where
(
Select count(*) from default_risk_history drh1 where drh1.clientid =
t.clientid and ltrim(TO_CHAR(drh1.updatetimestamp,'mm-yyyy'),'0') =
ltrim(TO_CHAR(current_date,'mm-yyyy'),'0')
) >=1
order by clientid, updatetimestamp desc
You seem to want the two most recent rows, if they have different classes and the most recent one is in the current month. If so:
select t.*
from (select t.*,
max(date) over (partition by clientid) as max_date,
lag(class) over (partition by client_id order by date) as prev_class,
lead(class) over (partition by client_id order by date) as next_class,
row_number() over (partition by clientid order by date desc) as seqnum
from t
) t
where max_date >= trunc(sysdate, 'MON') and
( (seqnum = 1 and prev_class <> class) or
(seqnum = 2 and next_class <> class)
);
Here's one option:
SQL> alter session set nls_date_format = 'dd.mm.yyyy';
Session altered.
SQL> with test (client_id, class, datum) as
2 (select 87415, 'c', date '2018-12-04' from dual union all
3 select 87415, 'x', date '2018-08-16' from dual union all
4 select 87415, 'c', date '2017-02-27' from dual union all
5 select 87415, 'b', date '2017-01-23' from dual union all
6 --
7 select 132595, 'd', date '2018-12-03' from dual union all
8 select 141805, 'c', date '2018-12-04' from dual union all
9 select 141805, 'a', date '2018-11-27' from dual
10 ),
11 inter as
12 (select client_id,
13 class,
14 datum,
15 lag(class) over (partition by client_id order by datum desc) prev_class,
16 row_number() over (partition by client_id order by datum desc) rn
17 from test
18 )
19 select client_id, class, datum
20 from inter
21 where (class <> prev_class or prev_class is null)
22 and client_id in (select client_id from inter
23 group by client_id
24 having max(rn) >= 2
25 )
26 and rn <= 2
27 order by client_id, datum desc;
CLIENT_ID C DATUM
---------- - ----------
87415 c 04.12.2018
87415 x 16.08.2018
141805 c 04.12.2018
141805 a 27.11.2018
SQL>

Oracle SQL loop LEAD() through partition

I have a set that looks something like this
ID date_IN date_out
1 1/1/18 1/2/18
1 1/3/18 1/4/18
1 1/5/18 1/8/18
2 1/1/18 1/5/18
2 1/7/18 1/9/18
I began by
SELECT ID, date_IN, Date_out, lead(date_out) over ( partition by (ID)
order by ID) as next_out
From table
And get something like this...
ID date_IN date_out next_out
1 1/1/18 1/2/18 1/4/18
1 1/3/18 1/4/18 1/8/18
1 1/5/18 1/8/18 Null
2 1/1/18 1/5/18 1/9/18
2 1/7/18 1/9/18 Null
The problem I’m going to to have is that in my actual data many of the ID’s have A LOT of entries. The goal is to have all of the date_out’s appear on one row per ID....
ID date_IN date_out next_out next_out1 etc. etc.
1 1/1/18 1/2/18 1/4/18 1/8/18 X X
2 1/1/18 1/5/18 1/7/18 X Null Null
Is there a way to loop the lead() through the entire partition, order by ID drop everything but the first row then move on to the next ID?
Here is one approach, which assumes that you only expect to have a maximum of three date pairs per ID. You may assign a row number and then aggregate by ID:
WITH cte AS (
SELECT ID, date_IN, date_out,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY date_IN) rn
FROM yourTable
)
SELECT
ID,
MAX(CASE WHEN rn = 1 THEN date_IN END) AS date_IN,
MAX(CASE WHEN rn = 1 THEN date_out END) AS date_out,
MAX(CASE WHEN rn = 2 THEN next_IN END) AS next_in_1,
MAX(CASE WHEN rn = 2 THEN date_out END) AS next_out_2,
MAX(CASE WHEN rn = 3 THEN date_IN END) AS next_in_2,
MAX(CASE WHEN rn = 3 THEN date_out END) AS next_out_2
FROM cte
GROUP BY ID
No need to do a loop but use the offset option. Below is lifted from the documentation.
offset
Optional. It is the physical offset from the current row in the table.
If this parameter is omitted, the default is 1.
example; lead(date_out) means next value
lead(date_out, 2) means 2nd row after current row
lead(date_out, 3) 3rd row after current row and so on.
in your code; use below snippet;
lead(date_out) over ( partition by (ID) order by ID) as next_out,
lead(date_out, 2) over ( partition by (ID) order by ID) as next_out2,
lead(date_out, 3) over ( partition by (ID) order by ID) as next_out3
WITH TAB AS(
SELECT 1 ID, CAST('2018/01/01' AS DATE) DATE_IN, CAST('2018/01/02' AS DATE) DATE_OUT FROM DUAL
UNION
SELECT 1, CAST('2018/01/03' AS DATE) , CAST('2018/01/04' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/05' AS DATE) , CAST('2018/01/08' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/09' AS DATE) , CAST('2018/01/10' AS DATE) FROM DUAL
UNION
SELECT 1, CAST('2018/01/11' AS DATE) , CAST('2018/01/12' AS DATE) FROM DUAL
UNION
SELECT 2, CAST('2018/01/01' AS DATE) , CAST('2018/01/05' AS DATE) FROM DUAL
UNION
SELECT 2, CAST('2018/01/07' AS DATE) , CAST('2018/01/09' AS DATE) FROM DUAL
) --select * from tab;
, LEAF_CALC AS( --CONNECTING THE DATE_OUTS
SELECT
ID
,SYS_CONNECT_BY_PATH(DATE_OUT, '$') HRCHY
, LEVEL LVL
, CONNECT_BY_ISLEAF ISLEAF
FROM TAB
CONNECT BY PRIOR DATE_OUT < DATE_IN
START WITH ID = 1
) --SELECT * FROM LEAF_CALC;
, DATA_SORT AS( --ADDING ALL DATE_OUTS IN 1 ROW
SELECT
P.ID, P.HRCHY
FROM LEAF_CALC P,
(SELECT ID, MAX(LVL) MAXLVL FROM
LEAF_CALC
GROUP BY ID) C
WHERE P.ID = C.ID
AND P.LVL = C.MAXLVL
)--SELECT * FROM DATA_SORT
--SEGREGATING ALL DATES USING REGEXP_SUBSTR
SELECT
ID
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 1) DATE_IN
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 2) NEXT_OUT
, REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 3) NEXT_OUT2
, COALESCE(REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 4), 'NA') NEXT_OUT3
, COALESCE(REGEXP_SUBSTR(HRCHY, '[^$]+', 1, 5), 'NA') NEXT_OUT4
FROM DATA_SORT;