Oracle - Join most recent status at the time of activity - sql

I have two tables - one with employee activity and one with employee_status. The issue is the employee status changes over time, so I need to join the status as it was at the time of the session.
>>> employee_activity
id session_start
emp1 1/1/2019
emp1 2/22/2019
emp1 3/1/2019
emp2 1/4/2019
emp2 2/23/2019
>>> employee_status
id status effective date
emp1 a 1/1/2018
emp1 b 2/1/2019
emp1 c 3/5/2019
emp2 a 6/1/2018
emp2 b 1/1/2019
So I started writing something that will make sure it's ignoring statuses after the activity, but I'm struggling a bit with figuring out how to only select the most recent status. The query needs join only the status with the max
effective date that is less than the session start
SELECT * FROM employee_activity a
LEFT join employee_status s on a.id = s.id WHERE s.effective_date <= a.session_start
-- how do I join only the most recent status?
The desired output from the two tables above would be
>>> my_output
id session_start status
emp1 1/1/2019 a
emp1 2/22/2019 b
emp1 3/1/2019 b
emp2 1/4/2019 b
emp2 2/23/2019 b
Thanks!!

Calculate first the validity interval from the STATUS, i.e. instead of EFFECTIVE_DATE you have starting and ending timestamp.
Note, that I use a default open end date and I subtract one second from the end date to get closed interval which can be queried using BETWEEN.
Than simple join on the key and add the between constraint for the time:
with emp as (
select ID, STATUS, EFFECTIVE_DATE status_valid_from,
lead(EFFECTIVE_DATE - INTERVAL '1' SECOND,1,DATE'2500-01-01')
over (partition by id order by EFFECTIVE_DATE) as status_valid_to
from employee_status)
SELECT a.id, a.SESSION_START, s.STATUS, s.STATUS_VALID_FROM
FROM employee_activity a
LEFT join emp s
on a.id = s.id and session_start between s.status_valid_from and s.status_valid_to
order by 1,2;
ID SESSION_START S STATUS_VALID_FROM
---- ------------------- - -------------------
emp1 01.01.2019 00:00:00 a 01.01.2018 00:00:00
emp1 22.02.2019 00:00:00 b 01.02.2019 00:00:00
emp1 01.03.2019 00:00:00 b 01.02.2019 00:00:00
emp2 04.01.2019 00:00:00 b 01.01.2019 00:00:00
emp2 23.02.2019 00:00:00 b 01.01.2019 00:00:00
Sample Data
create table employee_activity as
select 'emp1' id, to_date('1/1/2019','mm/dd/yyyy') session_start from dual union all
select 'emp1' id, to_date('2/22/2019','mm/dd/yyyy') session_start from dual union all
select 'emp1' id, to_date('3/1/2019','mm/dd/yyyy') session_start from dual union all
select 'emp2' id, to_date('1/4/2019','mm/dd/yyyy') session_start from dual union all
select 'emp2' id, to_date('2/23/2019','mm/dd/yyyy') session_start from dual;
create table employee_status as
select 'emp1' id, 'a'status, to_date('1/1/2018','mm/dd/yyyy') effective_date from dual union all
select 'emp1' id, 'b'status, to_date('2/1/2019','mm/dd/yyyy') effective_date from dual union all
select 'emp1' id, 'c'status, to_date('3/5/2019','mm/dd/yyyy') effective_date from dual union all
select 'emp2' id, 'a'status, to_date('6/1/2018','mm/dd/yyyy') effective_date from dual union all
select 'emp2' id, 'b'status, to_date('1/1/2019','mm/dd/yyyy') effective_date from dual;

You can do this using a correlated subquery:
select ea.*,
(select max(es.status) keep (dense_rank first order by es.effective_date desc)
from employee_status es
where es.id = ea.id and es.effective_date <= ea.session_start
) as status
from employee_activity ea;
In Oracle 12C+, there is the more intuitive:
select ea.*,
(select es.status
from employee_status es
where es.id = ea.id and es.effective_date <= ea.session_start
order by es.effective_date desc
fetch first 1 row only
) as status
from employee_activity ea;

Related

How do I find the next status and next date for a subscriber and offer?

How do I find the next status and next date for a subscriber and offer?
my table:
create table myTable ( user_id,offer_id,status,status_date) as
select 1,offer_1,Active,01/01/2021 from dual union all
select 1, offer_1,Deactive,01/01/2022 from dual union all
select 1,offer_2,Active,02/01/2022 from dual
expected table:
u_id
offer_id
status
status_date
next_status
next_status_date
1
offer_1
Active
01/01/2021
Deactive
01/01/2022
1
offer_2
Active
02/01/2022
null
null
What do these dates represent? What is 02/01/2022? 2nd of January, or 1st of February?
Anyway, outer join might help.
Setting date format (so that you'd know what is what):
SQL> alter session set nls_date_format = 'mm/dd/yyyy';
Session altered.
Sample data:
SQL> select * from mytable;
USER_ID OFFER_I STATUS STATUS_DAT
---------- ------- -------- ----------
1 offer_1 Active 01/01/2021
1 offer_1 Deactive 01/01/2022
1 offer_2 Active 02/01/2022
Query:
SQL> select a.user_id,
2 a.offer_id,
3 a.status,
4 a.status_date,
5 b.status next_status,
6 b.status_date next_status_date
7 from mytable a
8 left join mytable b
9 on a.user_id = b.user_id
10 and a.offer_id = b.offer_id
11 and a.status_date < b.status_date
12 where a.status = 'Active';
USER_ID OFFER_ID STATUS STATUS_DATE NEXT_STATUS NEXT_STATUS_DATE
---------- ---------- -------- --------------- --------------- ----------------
1 offer_1 Active 01/01/2021 Deactive 01/01/2022
1 offer_2 Active 02/01/2022
SQL>
Assuming that it is all about activation date and deactivation date (could it be the same date?) - maybe this could help:
Select t1.USER_ID, t1.OFFER_ID, t1.STATUS, t1.STATUS_DATE, t2.STATUS "NEXT_STATUS", t2.STATUS_DATE "NEXT_STATUS_DATE"
From tbl t1
Left Join tbl t2 ON(t2.USER_ID = t1.USER_ID And t2.OFFER_ID = t1.OFFER_ID And t2.STATUS = 'Deactive')
Where t1.STATUS = 'Active'
With your sample data:
WITH
tbl (USER_ID, OFFER_ID, STATUS, STATUS_DATE) AS
(
select 1, 'offer_1', 'Active', DATE '2021-01-01' from dual union all
select 1, 'offer_1', 'Deactive', DATE '2022-01-01' from dual union all
select 1, 'offer_2', 'Active', DATE '2022-02-01' from dual
)
The result is:
USER_ID OFFER_ID STATUS STATUS_DATE NEXT_STATUS NEXT_STATUS_DATE
---------- -------- -------- ----------- ----------- ----------------
1 offer_1 Active 01-JAN-21 Deactive 01-JAN-22
1 offer_2 Active 01-FEB-22 Null Null
Or you could use PIVOT
SELECT USER_ID, OFFER_ID, MIN(STAT) "STATUS", MIN(STAT_DAT) "STATUS_DATE", MAX(NEXT_STATUS) "NEXT_STATUS", MAX(NEXT_STATUS_DATE) "NEXT_STATUS_DATE"
FROM ( Select t1.USER_ID, t1.OFFER_ID, t1.STATUS, t1.STATUS "STAT", t1.STATUS_DATE, t1.STATUS_DATE "STAT_DAT"
From tbl t1 )
PIVOT ( MAX(STATUS_DATE) "STATUS_DATE", MAX(STATUS) "STATUS" FOR STATUS IN('Deactive' "NEXT") )
GROUP BY USER_ID, OFFER_ID
ORDER BY USER_ID, OFFER_ID
USER_ID OFFER_ID STATUS STATUS_DATE NEXT_STATUS NEXT_STATUS_DATE
---------- -------- -------- ----------- ----------- ----------------
1 offer_1 Active 01-JAN-21 Deactive 01-JAN-22
1 offer_2 Active 01-FEB-22 Null Null

Oracle SQL - Find origin ID of autoincrement column

There's a table on my ERP database that has data about certain events. It has the start date, end date and a column shows if the event is a continuation of a previous one (sequential_id references unique_id). Here's an example:
unique_id
start_date
end_date
sequential_id
001
2021-01-01
2021-01-15
002
2021-02-01
2021-02-16
001
003
2021-03-01
2021-03-17
002
004
2021-03-10
2021-03-11
005
2021-03-19
In the example above, rows 001, 002 and 003 are all part of the same event, and 004/005 are unique events, with no sequences. How can I group the data in a way that the output is like this:
origin_id
start_date
end_date
001
2021-01-01
2021-03-17
004
2021-03-10
2021-03-11
005
2021-03-19
I've tried using group by, but due to sequential_id being auto incremental, it didn't work.
Thanks in advance.
You can use modern match_recognize which is an optimal solution for such tasks:
Pattern Recognition With MATCH_RECOGNIZE
DBFiddle
select *
from t
match_recognize(
measures
first(unique_id) start_unique_id,
first(start_date) start_date,
last(end_date) end_date
pattern (strt nxt*)
define nxt as sequential_id=prev(unique_id)
);
You can use hierarchical query for this:
with a (unique_id, start_date, end_date, sequential_id) as (
select '001', date '2021-01-01', date '2021-01-15', null from dual union all
select '002', date '2021-02-01', date '2021-02-16', '001' from dual union all
select '003', date '2021-03-01', date '2021-03-17', '002' from dual union all
select '004', date '2021-03-10', date '2021-03-11', null from dual union all
select '005', date '2021-03-19', null, null from dual
)
, b as (
select
connect_by_root(unique_id) as unique_id
, connect_by_root(start_date) as start_date
, end_date
, connect_by_isleaf as l
from a
start with sequential_id is null
connect by prior unique_id = sequential_id
)
select
unique_id
, start_date
, end_date
from b
where l = 1
order by 1 asc
UNIQUE_ID | START_DATE | END_DATE
:-------- | :--------- | :--------
001 | 01-JAN-21 | 17-MAR-21
004 | 10-MAR-21 | 11-MAR-21
005 | 19-MAR-21 | null
db<>fiddle here
This is a graph-walking problem, so you can use a recursive CTE:
with cte (unique_id, start_date, end_date, start_unique_id) as (
select unique_id, start_date, end_date, unique_id
from t
where not exists (select 1 from t t2 where t.sequential_id = t2.unique_id)
union all
select t.unique_id, t.start_date, t.end_date, cte.start_unique_id
from cte join
t
on cte.unique_id = t.sequential_id
)
select start_unique_id, min(start_date), max(end_date)
from cte
group by start_Unique_id;
Here is a db<>fiddle.

Oracle SQL - Scanning attribute Changes

I have the following employee table
EMPID RECORD_DATE DEPARTMENT
123456 2020-01-01 HR
123456 2020-02-01 HR
123456 2020-03-01 FINANCE
123456 2020-04-01 FINANCE
987654 2020-01-01 HR
987654 2020-02-01 HR
987654 2020-03-01 HR
987654 2020-04-01 LEGAL
Using Oracle PL/SQL, I need to build an expression to ascertain a list of employee movement, specifically that have moved from HR to any other (non-HR) department.
Expected result:
EMPID MOVEMENT_DATE DEPT_BEFORE DEPT_AFTER
123456 2020-03-01 HR FINANCE
987654 2020-04-01 HR LEGAL
I know you can use the Lead or Lag function, but it's a little off for me:
SELECT
,EMP
,RECORD_DATE
,LAG(DEPARTMENT, 1, 0) OVER (PARTITION BY EMP ORDER BY RECORD_DATE) PREV
FROM EMP
Here are some values to work with:
CREATE TABLE #EMP
(
EMP VARCHAR(30) NOT NULL ,
RECORD_DATE DATE NOT NULL ,
DEPARTMENT VARCHAR(30) NOT NULL
);
INSERT INTO #EMP (EMP, DATE_WORKED, CITY)
VALUES
('123456','2020-01-01','HR'),
('123456','2020-02-01','HR'),
('123456','2020-03-01','FINANCE'),
('123456','2020-04-01','FINANCE'),
('987654','2020-01-01','HR'),
('987654','2020-02-01','HR'),
('987654','2020-03-01','HR'),
('987654','2020-04-01','LEGAL')
You could do it using LAG function:
WITH data AS(
SELECT 123456 EMPID, DATE '2020-01-01' RECORD_DATE, 'HR' DEPARTMENT FROM dual UNION ALL
SELECT 123456 EMPID, DATE '2020-02-01' RECORD_DATE, 'HR' DEPARTMENT FROM dual UNION ALL
SELECT 123456 EMPID, DATE '2020-03-01' RECORD_DATE, 'FINANCE' DEPARTMENT FROM dual UNION ALL
SELECT 123456 EMPID, DATE '2020-04-01' RECORD_DATE, 'FINANCE' DEPARTMENT FROM dual UNION ALL
SELECT 987654 EMPID, DATE '2020-01-01' RECORD_DATE, 'HR' DEPARTMENT FROM dual UNION ALL
SELECT 987654 EMPID, DATE '2020-02-01' RECORD_DATE, 'HR' DEPARTMENT FROM dual UNION ALL
SELECT 987654 EMPID, DATE '2020-03-01' RECORD_DATE, 'HR' DEPARTMENT FROM dual UNION ALL
SELECT 987654 EMPID, DATE '2020-04-01' RECORD_DATE, 'LEGAL' DEPARTMENT FROM dual
)
SELECT * FROM(
SELECT
EMPID,
RECORD_DATE MOVEMENT_DATE,
LAG(DEPARTMENT) OVER (PARTITION BY EMPID ORDER BY RECORD_DATE) DEPARTMENT_BEFORE,
DEPARTMENT DEPARTMENT_AFTER
FROM data
)
WHERE DEPARTMENT_BEFORE <> DEPARTMENT_AFTER;
EMPID MOVEMENT_DATE DEPARTMENT_BEFORE DEPARTMENT_AFTER
---------- --------------- ----------------- -----------------
123456 2020-03-01 HR FINANCE
987654 2020-04-01 HR LEGAL

How to join two tables to determine date ranges when one table contains (id, start_date) and another contains (id, end_date)

I'm new to SQL, hope you guys don't find it silly. Working with two tables here, one contains start dates and other contains end dates. Entries do not follow sequence/possibility of duplicates.
**TABLE 1**
id start_date
1 2019-04-23
1 2019-06-05
1 2019-06-05
1 2019-10-29
1 2019-12-16
2 2019-01-05
3 2020-02-01
**TABLE 2**
id end_date
1 2019-04-23
1 2019-06-05
1 2019-06-06
1 2019-06-06
1 2019-07-24
1 2019-10-16
2 2020-01-04
**EXPECTED OUTPUT**
id start_date end_date
1 2019-04-23 2019-06-05
1 2019-10-29 null
2 2019-01-05 2020-01-04
3 2020-02-01 null
You can use union all and aggregation with some window functions:
with table1 as (
select 1 as id, date('2019-04-23') as start_date union all
select 1, '2019-06-05' union all
select 1, '2019-06-05' union all
select 1, '2019-10-29' union all
select 1, '2019-12-16' union all
select 2, '2019-01-05' union all
select 3, '2020-02-01'
),
table2 as (
SELECT 1 as id, DATE('2019-04-23') as end_date union all
SELECT 1, '2019-06-05' union all
select 1, '2019-06-06' union all
select 1, '2019-06-06' union all
select 1, '2019-07-24' union all
select 1, '2019-10-16' union all
select 2, '2020-01-04'
)
select id, min(start_date), end_date
from (select id, start_date,
first_value(end_date ignore nulls) over (partition by id order by DATE_DIFF(coalesce(start_date, end_date), CURRENT_DATE, day) RANGE between 1 following and unbounded following) as end_date
from ((select id, start_date, null as end_date
from table1
) union all
(select id, null as start_date, end_date
from table2
)
) se
)
group by id, end_date
having min(start_date) is not null;
Why do you have multiple records with the same id (Am assuming id is a primary key)? My suggestion would be for you to make the id's unique and creating a foreign key constraint in the end dates table (Since there can't be and end date without a start date) and use the foreign key relationship to retrieve the desired results. E.g SELECT S.start_date,E.end_date FROM table1 S JOIN table2 E where S.id=E.table1_fk
Below is for BigQuery Standard SQL
#standardSQL
SELECT id, start_date, IF(end_date = '9999-01-01', NULL, end_date) end_date
FROM (
SELECT id, start_date, ARRAY_AGG(end_date ORDER BY end_date LIMIT 1)[OFFSET(0)] end_date
FROM (
SELECT id, start_date, IF(start_date < end_date, end_date, '9999-01-01') end_date
FROM `project.dataset.table1`
LEFT JOIN `project.dataset.table2`
USING (id)
)
GROUP BY id, start_date
)
If to apply to sample data from your question - result is
Row id start_date end_date
1 1 2019-04-23 2019-06-05
2 1 2019-06-05 2019-06-06
3 1 2019-10-29 null
4 1 2019-12-16 null
5 2 2019-01-05 2020-01-04
6 3 2020-02-01 null
Note: quick and not optimized - but looks like produces desired result

Month counts between dates

I have the below table. I need to count how many ids were active in a given month. So thinking I'll need to create a row for each id that was active during that month so that id can be counted each month. A row should be generated for a term_dt during that month.
active_dt term_dt id
1/1/2018 101
1/1/2018 5/15/2018 102
3/1/2018 6/1/2018 103
1/1/2018 4/25/18 104
Apparently this is a "count number of overlapping intervals" problem. The algorithm goes like this:
Create a sorted list of all start and end points
Calculate a running sum over this list, add one when you encounter a start and subtract one when you encounter an end
If two points are same then perform subtractions first
You will end up with list of all points where the sum changed
Here is a rough outline of the query. It is for SQL Server but could be ported to any RDBMS that supports window functions:
WITH cte1(date, val) AS (
SELECT active_dt, 1 FROM #t AS t
UNION ALL
SELECT COALESCE(term_dt, '2099-01-01'), -1 FROM #t AS t
-- if end date is null then assume the row is valid indefinitely
), cte2 AS (
SELECT date, SUM(val) OVER(ORDER BY date, val) AS rs
FROM cte1
)
SELECT YEAR(date) AS YY, MONTH(date) AS MM, MAX(rs) AS MaxActiveThisYearMonth
FROM cte2
GROUP BY YEAR(date), MONTH(date)
DB Fiddle
I was toying with a simpler query, that seemed to do the trick, for Oracle:
with candidates (month_start) as (
select to_date ('2018-' || column_value || '-01','YYYY-MM-DD')
from
table
(sys.odcivarchar2list('01','02','03','04','05',
'06','07','08','09','10','11','12'))
), sample_data (active_dt, term_dt, id) as (
select to_date('01/01/2018', 'MM/DD/YYYY'), null, 101 from dual
union select to_date('01/01/2018', 'MM/DD/YYYY'),
to_date('05/15/2018', 'MM/DD/YYYY'), 102 from dual
union select to_date('03/01/2018', 'MM/DD/YYYY'),
to_date('06/01/2018', 'MM/DD/YYYY'), 103 from dual
union select to_date('01/01/2018', 'MM/DD/YYYY'),
to_date('04/25/2018', 'MM/DD/YYYY'), 104 from dual
)
select c.month_start, count(1)
from candidates c
join sample_data d
on c.month_start between d.active_dt and nvl(d.term_dt,current_date)
group by c.month_start
order by c.month_start
An alternative solution would be to use a hierarchical query, e.g.:
WITH your_table AS (SELECT to_date('01/01/2018', 'dd/mm/yyyy') active_dt, NULL term_dt, 101 ID FROM dual UNION ALL
SELECT to_date('01/01/2018', 'dd/mm/yyyy') active_dt, to_date('15/05/2018', 'dd/mm/yyyy') term_dt, 102 ID FROM dual UNION ALL
SELECT to_date('01/03/2018', 'dd/mm/yyyy') active_dt, to_date('01/06/2018', 'dd/mm/yyyy') term_dt, 103 ID FROM dual UNION ALL
SELECT to_date('01/01/2018', 'dd/mm/yyyy') active_dt, to_date('25/04/2018', 'dd/mm/yyyy') term_dt, 104 ID FROM dual)
SELECT active_month,
COUNT(*) num_active_ids
FROM (SELECT add_months(TRUNC(active_dt, 'mm'), -1 + LEVEL) active_month,
ID
FROM your_table
CONNECT BY PRIOR ID = ID
AND PRIOR sys_guid() IS NOT NULL
AND LEVEL <= FLOOR(months_between(coalesce(term_dt, SYSDATE), active_dt)) + 1)
GROUP BY active_month
ORDER BY active_month;
ACTIVE_MONTH NUM_ACTIVE_IDS
------------ --------------
01/01/2018 3
01/02/2018 3
01/03/2018 4
01/04/2018 4
01/05/2018 3
01/06/2018 2
01/07/2018 1
01/08/2018 1
01/09/2018 1
01/10/2018 1
Whether this is more or less performant than the other answers is up to you to test.