Fetch next record within a table - sql

I have the following table
EMP_ID ,DATETIME_OF_MOVEMENT,CITY ,RANK
2258325 ,1/18/2020 5:37 ,London ,1
2258325 ,1/19/2020 11:01 ,Manchester ,2
2258325 ,1/20/2020 15:06 ,London ,3
2656700 ,1/20/2020 23:59 ,London ,1
2656700 ,1/21/2020 6:48 ,Manchester ,2
2656700 ,1/21/2020 6:48 ,Liverpool ,3
2656700 ,1/26/2020 10:47 ,London ,4
6631583 ,1/18/2020 18:00 ,London ,1
6631583 ,1/19/2020 14:25 ,Manchester ,2
6631583 ,1/20/2020 8:53 ,Liverpool ,3
6631583 ,1/20/2020 14:48 ,Manchester ,4
6631583 ,1/21/2020 11:34 ,London ,5
I want a query to get the employee who were in london and come back to london.
the firt location should be london second is the first location after london and third location should be london
I used the following query but it miss some employees
select emp_id , date_of_movement as first_movement , city as first_city ,lead
(DATETIME_OF_MOVEMENT, 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as second_movement ,
lead (city , 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as second_city ,
lead (DATETIME_OF_MOVEMENT, 2) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as third_movement ,
lead (city , 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as third_city ,
from table
result of the code
CUSTOMER_ID,first_movement ,first_city,second_movment ,second_movement,third_movment ,third_city
2258325 ,1/18/2020 5:37 ,London ,1/19/2020 11:01,Manchester ,1/20/2020 15:06,London
2656700 ,1/20/2020 23:59,London ,1/21/2020 6:48 ,Manchester ,1/21/2020 6:48 ,Liverpool
6631583 ,1/18/2020 18:00,London ,1/19/2020 14:25,Manchester ,1/20/2020 8:53 ,Liverpool
This code catch the employee it their track of movement ( ex. emp_id : 2258325 )
1 - London
2 - any other city
3 - London
It will not works fine If the movement of the employee like the following
1 - london
2 - any other city
3 - any other city
4 - London
i want the result to be like
CUSTOMER_ID,first_movement ,first_city,second_movement,second_city,third_movement ,third_city
2258325 ,1/18/2020 5:37 ,London ,1/19/2020 11:01,Manchester ,1/20/2020 15:06,London
2656700 ,1/20/2020 23:59,London ,1/21/2020 6:48 ,Manchester ,1/26/2020 10:47,London
6631583 ,1/18/2020 18:00,London ,1/19/2020 14:25,Manchester ,1/21/2020 11:34,London
Any suggestion please?
WITH YOUR_TABLE (EMP_ID,
DATETIME_OF_MOVEMENT,
CITY,
RANK_)
AS (SELECT 2258325,
'1/18/2020 5:37',
'London',
1
FROM DUAL
UNION ALL
SELECT 2258325,
'1/19/2020 11:01',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 2258325,
'1/20/2020 15:06',
'London',
3
FROM DUAL
UNION ALL
SELECT 2656700,
'1/20/2020 23:59',
'London',
1
FROM DUAL
UNION ALL
SELECT 2656700,
'1/21/2020 6:48',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 2656700,
'1/21/2020 6:48',
'Liverpool',
3
FROM DUAL
UNION ALL
SELECT 2656700,
'1/26/2020 10:47',
'London',
4
FROM DUAL
UNION ALL
SELECT 6631583,
'1/18/2020 18:00',
'London',
1
FROM DUAL
UNION ALL
SELECT 6631583,
'1/19/2020 14:25',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 6631583,
'1/20/2020 8:53',
'Liverpool',
3
FROM DUAL
UNION ALL
SELECT 6631583,
'1/20/2020 14:48',
'Manchester',
4
FROM DUAL
UNION ALL
SELECT 6631583,
'1/21/2020 11:34',
'London',
5
FROM DUAL
UNION ALL
SELECT 6631583,
'1/22/2020 14:48',
'Manchester',
6
FROM DUAL
UNION ALL
SELECT 6631583,
'1/24/2020 11:34',
'London',
7
FROM DUAL) -- YOUR QUERY STARTS FROM HERE SELECT EMP_ID,
MAX (CASE WHEN MINRN = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS first_movement,
MAX (CASE WHEN MINRN = RANK_ THEN CITY END) AS first_CITY,
MAX (CASE WHEN MINRN + 1 = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS SECOND_movement,
MAX (CASE WHEN MINRN + 1 = RANK_ THEN CITY END) AS SECOND_CITY,
MAX (CASE WHEN MAXRN = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS THIRD_movement,
MAX (CASE WHEN MAXRN = RANK_ THEN CITY END) AS THIRD_CITY
FROM (SELECT T.*,
MAX (CASE WHEN CITY = 'London' THEN RANK_ END)
OVER (PARTITION BY EMP_ID)
AS MAXRN,
MIN (CASE WHEN CITY = 'London' THEN RANK_ END)
OVER (PARTITION BY EMP_ID)
AS MINRN
FROM YOUR_TABLE T) WHERE MAXRN - MINRN > 1 GROUP BY EMP_ID;
check this it is not works #Tejash
result should be like this:

SELECT emp_id,
Lead (datetime_of_movement, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS first_movement,
Lead (city, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS first_city,
Lead (datetime_of_movement, 1)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS second_movement,
Lead (city, 1)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS second_city,
Lead (datetime_of_movement, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement DESC) AS third_movement,
Lead (city, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement DESC) AS third_city,
FROM table
WHERE frist_city = "London" AND third_city = "London"
I couldn't test this SQL but maybe helpful. please let me know if it works for you.

I think you need a record starting from London then next record and the last London record. You can use the combination of the conditional aggregate and analytical function as following:
SQL> WITH YOUR_TABLE (
2 EMP_ID,
3 DATETIME_OF_MOVEMENT,
4 CITY,
5 RANK
6 ) AS
7 (
8 SELECT 2258325 ,'1/18/2020 5:37' ,'London' ,1 FROM DUAL UNION ALL
9 SELECT 2258325 ,'1/19/2020 11:01' ,'Manchester' ,2 FROM DUAL UNION ALL
10 SELECT 2258325 ,'1/20/2020 15:06' ,'London' ,3 FROM DUAL UNION ALL
11 SELECT 2656700 ,'1/20/2020 23:59' ,'London' ,1 FROM DUAL UNION ALL
12 SELECT 2656700 ,'1/21/2020 6:48' ,'Manchester' ,2 FROM DUAL UNION ALL
13 SELECT 2656700 ,'1/21/2020 6:48' ,'Liverpool' ,3 FROM DUAL UNION ALL
14 SELECT 2656700 ,'1/26/2020 10:47' ,'London' ,4 FROM DUAL UNION ALL
15 SELECT 6631583 ,'1/18/2020 18:00' ,'London' ,1 FROM DUAL UNION ALL
16 SELECT 6631583 ,'1/19/2020 14:25' ,'Manchester' ,2 FROM DUAL UNION ALL
17 SELECT 6631583 ,'1/20/2020 8:53' ,'Liverpool' ,3 FROM DUAL UNION ALL
18 SELECT 6631583 ,'1/20/2020 14:48' ,'Manchester' ,4 FROM DUAL UNION ALL
19 SELECT 6631583 ,'1/21/2020 11:34' ,'London' ,5 FROM DUAL
20 )
21 -- YOUR QUERY STARTS FROM HERE
22 SELECT
23 EMP_ID,
24 MAX(CASE WHEN MINRN = RANK THEN DATETIME_OF_MOVEMENT END) AS first_movement,
25 MAX(CASE WHEN MINRN = RANK THEN CITY END) AS first_CITY,
26 MAX(CASE WHEN MINRN + 1 = RANK THEN DATETIME_OF_MOVEMENT END) AS SECOND_movement,
27 MAX(CASE WHEN MINRN + 1 = RANK THEN CITY END) AS SECOND_CITY,
28 MAX(CASE WHEN MAXRN = RANK THEN DATETIME_OF_MOVEMENT END) AS THIRD_movement,
29 MAX(CASE WHEN MAXRN = RANK THEN CITY END) AS THIRD_CITY
30 FROM
31 (
32 SELECT T.*,
33 MAX(CASE WHEN CITY = 'London' THEN RANK END) OVER(PARTITION BY EMP_ID) AS MAXRN,
34 MIN(CASE WHEN CITY = 'London' THEN RANK END) OVER(PARTITION BY EMP_ID) AS MINRN
35 FROM YOUR_TABLE T
36 )
37 WHERE MAXRN - MINRN > 1
38 GROUP BY EMP_ID;
EMP_ID FIRST_MOVEMENT FIRST_CITY SECOND_MOVEMENT SECOND_CIT THIRD_MOVEMENT THIRD_CITY
---------- --------------- ---------- --------------- ---------- --------------- ----------
2258325 1/18/2020 5:37 London 1/19/2020 11:01 Manchester 1/20/2020 15:06 London
2656700 1/20/2020 23:59 London 1/21/2020 6:48 Manchester 1/26/2020 10:47 London
6631583 1/18/2020 18:00 London 1/19/2020 14:25 Manchester 1/21/2020 11:34 London
SQL>
Cheers!!

You can try this below script. But this will return only onle row as only one EMP has city London where Rank = 3 ( and as per explanation your concern is location 3 as well)
DEMO HERE
SELECT
DISTINCT A.EMP_ID CUSTOMER_ID,
A.DATETIME_OF_MOVEMENT first_movement ,A.city first_city,
B.DATETIME_OF_MOVEMENT secondt_movement,B.city second_city,
C.DATETIME_OF_MOVEMENT third_movement ,C.city third_city
FROM your_table A
INNER JOIN your_table B ON A.Rank = B.Rank - 1 AND A.EMP_ID = B.EMP_ID
INNER JOIN your_table C ON A.Rank = C.Rank - 2 AND A.EMP_ID = C.EMP_ID
WHERE A.RANK = 1
AND C.City = 'London'
New logic if the last city is not the third city-
DEMO HERE
WITH CTE AS
(
SELECT A.EMP_ID,A.DATETIME_OF_MOVEMENT,A.CITY,A.RANK,
CASE WHEN A.RANK > 2 THEN 3 ELSE A.Rank END NEW_RANK
FROM your_table A
WHERE A.RANK IN (1,2)
OR A.RANK = (SELECT MAX(Rank) FROM your_table B WHERE B.EMP_ID = A.EMP_ID)
)
SELECT
A.EMP_ID CUSTOMER_ID,
A.DATETIME_OF_MOVEMENT first_movement ,A.city first_city,
B.DATETIME_OF_MOVEMENT secondt_movement,B.city second_city,
C.DATETIME_OF_MOVEMENT third_movement ,C.city third_city
FROM CTE A
INNER JOIN CTE B ON A.NEW_RANK = B.NEW_RANK - 1 AND A.EMP_ID = B.EMP_ID
INNER JOIN CTE C ON A.NEW_RANK = C.NEW_RANK - 2 AND A.EMP_ID = C.EMP_ID
WHERE A.NEW_RANK = 1
AND C.City = 'London'

Related

SQL Query to fetch the most old start_date if there is no change in one column value

I have a table with the following columns per_all_assignments_m-
per_assignments
Person_id position_id system_person_type start_date END_DT
1 1 EMP 01-JAN-2019 20-JAN-2019
1 1 EMP 21-JAN-2019 31-DEC-4712
2 1 EMP 01-JAN-2019 03-JUL-2019
2 1 EMP 04-JUL-2019 08-SEP-2019
2 2 EMP 09-SEP-2019 31-DEC-2019
2 2 EMP 01-JAN-2020 31-DEC-4712
3 10 EMP 01-JAN-2019 20-JAN-2019
3 10 EMP 21-JAN-2019 08-SEP-2019
3 10 EMP 09-SEP-2019 20-JAN-2020
3 10 EMP 21-JAN-2020 31-DEC-4712
I have created the below query to fetch, if there is any value change in the column position_id. This query will fetch the date when the date was changed and the previous start date.
select person_id, prev_start_dt, effective_start_date current_start_dt,
case pos_new when pos_old then 1 else pos_old end pos_old
from (
select person_id, position_id pos_new, effective_start_date, effective_end_date,
lag(position_id) over (partition by person_id order by effective_start_date) pos_old,
lag(effective_start_date) over (partition by person_id order by effective_start_date) prev_start_dt,
case effective_start_date when 1 + lag(effective_end_date) over (partition by person_id order by effective_start_date)
then 1 end flag
from per_all_assignments_m
where person_id=1
and assignment_type = 'E')
where flag = 1 and (pos_new <> pos_old )
For the above table, this query will fetch 09-SEP-2019 for employee #2 as the current_start_dt and 04-JUL-2019 as prev_start_dt
Question-
Now, I want to add the condition where, if there is no change in the position_id values then the most old effective_start_date should be retrieved.
Example-
for EE#1 , current_start_dt should be 01-JAN-2019 and prev_start_dt should be the same for EE#3 , current_start_dt AND prev_start_dt should be
01-JAN-2019.
Any suggestions would be of great help!
Required output -
Person_id prev_start_dt current_start_dt pos_old
1 01-JAN-2019 01-JAN-2019 1
2 04-JUL-2019 09-SEP-2019 1
3 01-JAN-2019 01-JAN-2019 10
Here is one way to do it:
cte2 getr the person_id with changes, and the result is the person_id row and add the first occurence of person_id without changes
Fiddle
with cte as
(
select 1 as Person_id, 1 as position_id, 'EMP' as system_person_type, STR_TO_DATE('01-JAN-2019', "%d-%M-%Y") as start_date, STR_TO_DATE('20-JAN-2019', "%d-%M-%Y") as END_DT
union select 1, 1 , 'EMP' , STR_TO_DATE('21-JAN-2019', "%d-%M-%Y"), STR_TO_DATE('31-DEC-4712', "%d-%M-%Y")
union select 2 , 1 , 'EMP' , STR_TO_DATE('01-JAN-2019', "%d-%M-%Y") , STR_TO_DATE('03-JUL-2019', "%d-%M-%Y")
union select 2 , 1 , 'EMP' , STR_TO_DATE('04-JUL-2019', "%d-%M-%Y") , STR_TO_DATE('08-SEP-2019', "%d-%M-%Y")
union select 2 , 2 , 'EMP' , STR_TO_DATE('09-SEP-2019', "%d-%M-%Y") , STR_TO_DATE('31-DEC-2019' , "%d-%M-%Y")
union select 2 , 2 , 'EMP' , STR_TO_DATE('01-JAN-2020', "%d-%M-%Y") , STR_TO_DATE('31-DEC-4712', "%d-%M-%Y")
union select 3 , 10 , 'EMP' , STR_TO_DATE('01-JAN-2019', "%d-%M-%Y") , STR_TO_DATE('20-JAN-2019' , "%d-%M-%Y")
union select 3 , 10 , 'EMP' , STR_TO_DATE('21-JAN-2019', "%d-%M-%Y") , STR_TO_DATE('08-SEP-2019', "%d-%M-%Y")
union select 3 , 10 , 'EMP' , STR_TO_DATE('09-SEP-2019', "%d-%M-%Y") , STR_TO_DATE('20-JAN-2020', "%d-%M-%Y")
union select 3 , 10 , 'EMP' , STR_TO_DATE('21-JAN-2020', "%d-%M-%Y") ,STR_TO_DATE('31-DEC-4712', "%d-%M-%Y");
),
cte2 as
(
select a.Person_id, ( (b.start_date)) as prev_start_dt, (a.start_date) as current_start_dt, (b.position_id)
from cte a left join cte b on a.Person_id = b.Person_id and a.start_date > b.start_date and a.position_id <> b.position_id
and not exists(select 1 from cte c where a.Person_id = c.Person_id and a.start_date > c.start_date and c.start_date > b.start_date)
)
select Person_id, prev_start_dt, current_start_dt, position_id from cte2 where position_id is not null
union
select a.Person_id, a.start_date, a.start_date, a.position_id from cte a where not exists(select 1 from cte b where a.Person_id = b.Person_id and a.start_date > b.start_date)
and a.Person_id not in (select Person_id from cte2 where position_id is not null)
Output:
Person_id prev_start_dt current_start_dt position_id
1 2019-01-01 2019-01-01 1
2 2019-07-04 2019-09-09 1
3 2019-01-01 2019-01-01 10
If you want to track all the cases where there is a change in Position ID, something like below should work:
with cte as
(
select 1 as Person_id,1 as position_id,'EMP' as system_person_type,cast('01Jan2019' as date) as Start_date,cast('20Jan2019' as date) as end_dt from dual union
select 1 as Person_id,1 as position_id,'EMP' as system_person_type,cast('21Jan2019' as date) as Start_date,cast('31Dec4712' as date) as end_dt from dual union
select 2 as Person_id,1 as position_id,'EMP' as system_person_type,cast('01Jan2019' as date) as Start_date,cast('03Jul2019' as date) as end_dt from dual union
select 2 as Person_id,1 as position_id,'EMP' as system_person_type,cast('04Jul2019' as date) as Start_date,cast('08Sep2019' as date) as end_dt from dual union
select 2 as Person_id,2 as position_id,'EMP' as system_person_type,cast('09Sep2019' as date) as Start_date,cast('31Dec2019' as date) as end_dt from dual union
select 2 as Person_id,2 as position_id,'EMP' as system_person_type,cast('01Jan2020' as date) as Start_date,cast('31Dec4712' as date) as end_dt from dual union
select 3 as Person_id,10 as position_id,'EMP' as system_person_type,cast('01Jan2019' as date) as Start_date,cast('20Jan2019' as date) as end_dt from dual union
select 3 as Person_id,10 as position_id,'EMP' as system_person_type,cast('21Jan2019' as date) as Start_date,cast('08Sep2019' as date) as end_dt from dual union
select 3 as Person_id,10 as position_id,'EMP' as system_person_type,cast('09Sep2019' as date) as Start_date,cast('20Jan2020' as date) as end_dt from dual union
select 3 as Person_id,10 as position_id,'EMP' as system_person_type,cast('21Jan2020' as date) as Start_date,cast('31Dec4712' as date) as end_dt from dual
)
select x.Person_Id,
COALESCE(x.lag_start,x.min_date) as Prev_start_dt,
x.min_date as Current_Start_Dt,
COALESCE(x.lag_pos,x.position_id) as Pos_old
FROM
(select person_id,
position_id,
min(start_date) as min_date,
max(start_date) as max_date,
lag(max(start_date)) Over(Partition by Person_Id Order by max(start_date)) as lag_start,
lag(position_id) Over(Partition by Person_Id Order by min(start_date)) as lag_pos
from cte
group by person_id, position_id) x
inner join
(select person_id,
count(distinct position_id) as cnt
from cte
group by person_id) y
ON x.person_id=y.person_id
Where x.Position_id<>x.lag_pos or y.cnt=1
The idea is to group the information at a person and position level and then use the lag function to determine if there is a change. Cnt=1 filter is applied to get the cases where there is no position change. Hope this helps.

Repeat a value over some rows

I have a list of values and I have to display the mother value of the CHILDVALUE in the following way. The MOTHERVALUE should be same as CHILDVALUE at RNO = 1 , Otherwise the MOTHERVALUE is repeated for subsequent rows until a new sequence beginning from RNO = 1 is encountered. Below is what I wish to achieve
RNO ChildValue MotherValue SKIPNO CREATEDDATE
1 345dg 345dg 4 19/9/2018 2:49
2 342sds 345dg 4 19/9/2018 11:53
.
.
.
19 343dfd 345dg 4 6/11/2018 12:40
20 234dfs 345dg 4 6/11/2018 14:56
1 545ert 545ert 4 6/11/2018 15:17
2 543tye 545ert 4 7/11/2018 11:29
.
.
.
9 345cxv 545ert 4 16/11/2018 14:16
1 563mnj 563mnj 5 19/11/2018 2:12
The row numbers are dynamically produced according to the SKIPNO (not distinct) and CREATEDDATE. But I have a problem in displaying the correct MOTHERVALUE for some of the rows
I have tried the below query which best captures at least 50% of the requirement.
select RNO
, CHILDVALUE
, case RNO when 1 then CHILDVALUE
else lag(MOTHERVALUE) over (order by SKIPNO, CreatedDate ASC) end as MOTHERVALUE
, SKIPNO, CreatedDate
from( SELECT (ROW_NUMBER() OVER (PARTITION BY A.SKIPNO
ORDER BY A.SKIPNO, A.CreatedDate ASC) RNO
, A.*
from (select distinct CHILDVALUE
, CHILDVALUE as MOTHERVALUE
, SKIPNO
, CreatedDate
from values ) A
)
)
This query partially gives the expected output but is still far from the actual result as it shows the correct MOTHERVALUE only for the first two rows. The query does not help me to show the correctMOTHERVALUE` for more than second row.
Here is my table:
CREATE TABLE VALUES (
CHILDVALUE VARCHAR2(36),
SKIPNO VARCHAR2(36),
CREATEDDATE DATE)
Pls help.
N.B. I am assuming that your sample data had incorrect skipnos for the second and third groups. My answer uses updated values in its sample data.
You can use the first_value analytic function, rather than lag to achieve your aim, e.g.:
WITH vals AS (SELECT '345dg' childvalue, 4 skipno, to_date('19/09/2018 02:49', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '342sds' childvalue, 4 skipno, to_date('19/09/2018 11:53', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '343dfd' childvalue, 4 skipno, to_date('06/11/2018 12:40', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '234dfs' childvalue, 4 skipno, to_date('06/11/2018 14:56', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '545ert' childvalue, 5 skipno, to_date('06/11/2018 15:17', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '543tye' childvalue, 5 skipno, to_date('07/11/2018 11:29', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '345cxv' childvalue, 5 skipno, to_date('16/11/2018 14:16', 'dd/mm/yyyy hh24:mi') createddate FROM dual UNION ALL
SELECT '563mnj' childvalue, 6 skipno, to_date('19/11/2018 02:12', 'dd/mm/yyyy hh24:mi') createddate FROM dual)
SELECT row_number() OVER (PARTITION BY skipno ORDER BY createddate) rno,
childvalue,
first_value(childvalue) OVER (PARTITION BY skipno ORDER BY createddate) mothervlue,
skipno,
createddate
FROM vals;
RNO CHILDVALUE MOTHERVLUE SKIPNO CREATEDDATE
---------- ---------- ---------- ---------- -----------
1 345dg 345dg 4 19/09/2018
2 342sds 345dg 4 19/09/2018
3 343dfd 345dg 4 06/11/2018
4 234dfs 345dg 4 06/11/2018
1 545ert 545ert 5 06/11/2018
2 543tye 545ert 5 07/11/2018
3 345cxv 545ert 5 16/11/2018
1 563mnj 563mnj 6 19/11/2018

Oracle - filtering Cartesian coordinate

I have a mating_history table:
id cage_id code event_date animal_id
---------------------------------------------------------------
100 4163 FA 03-Aug-2016 10.51.55.000 AM 3570
101 4163 MA 03-Aug-2016 10.52.13.000 AM 2053
102 4163 MR 29-Aug-2016 10.23.24.000 AM 2053
103 4163 MA 11-Oct-2016 12.50.02.000 PM 5882
104 4163 MR 31-Oct-2016 01.37.28.000 PM 5882
105 4163 MA 07-Nov-2016 01.27.58.000 PM 5882
106 4163 FA 19-Apr-2017 11.46.50.000 AM 6011
107 4163 FA 19-Apr-2017 11.48.31.000 AM 6010
Legend:
MA = Male added to cage
MR = Male removed from cage
FA = Female added to cage
FR = Female removed from cage
In the table above, the first row says that on the event_date, a female animal (with an id of 3570) was added to the cage for the purpose of breeding.
If you follow the history logs you'll get these points as "actual mating":
female_id male_id event_date
-----------------------------------------------------------------
3570 2053 03-Aug-2016 10.52.13.000 AM
3570 5882 11-Oct-2016 12.50.02.000 PM
3570 5882 07-Nov-2016 01.27.58.000 PM
6011 5882 19-Apr-2017 11.46.50.000 AM
6010 5882 19-Apr-2017 11.48.31.000 AM
However when I tried to convert my thoughts into SQL I didn't get what I wanted above.
SQL
SELECT
be.cage_id, be.code AS base_code, be.animal_id AS base_animal, be.event_date AS base_date,
se.code AS sub_code, se.animal_id AS sub_animal, se.event_date AS sub_date
FROM mating_history be
LEFT JOIN mating_history se ON se.cage_id = be.cage_id
WHERE be.cage_id = 4163
AND be.code != se.code
AND be.code IN ('MA', 'FA')
AND se.code IN ('MA', 'FA')
AND be.event_date < se.event_date
ORDER BY be.event_date ASC, se.event_date ASC
Result
cage_id base_code base_animal base_date sub_code sub_animal sub_date
--------------------------------------------------------------------------------------------------------------------
4163 FA 3570 03-Aug-2016 10.51.55.000 AM MA 2053 03-Aug-2016 10.52.13.000 AM
4163 FA 3570 03-Aug-2016 10.51.55.000 AM MA 5882 11-Oct-2016 12.50.02.000 PM
4163 FA 3570 03-Aug-2016 10.51.55.000 AM MA 5882 07-Nov-2016 01.27.58.000 PM
4163 MA 2053 03-Aug-2016 10.52.13.000 AM FA 6011 19-Apr-2017 11.46.50.000 AM --------> WRONG
4163 MA 2053 03-Aug-2016 10.52.13.000 AM FA 6010 19-Apr-2017 11.48.31.000 AM --------> WRONG
4163 MA 5882 11-Oct-2016 12.50.02.000 PM FA 6011 19-Apr-2017 11.46.50.000 AM --------> WRONG
4163 MA 5882 11-Oct-2016 12.50.02.000 PM FA 6010 19-Apr-2017 11.48.31.000 AM --------> WRONG
4163 MA 5882 07-Nov-2016 01.27.58.000 PM FA 6011 19-Apr-2017 11.46.50.000 AM
4163 MA 5882 07-Nov-2016 01.27.58.000 PM FA 6010 19-Apr-2017 11.48.31.000 AM
I'm out of ideas on how to get the 5 rows that I need. How do I further filter the result out so I get just the 5 rows that I need in this case?
Optional: Is creating a Cartesian product even the best solution for what I'm trying to accomplish? Is there a better way to do this?
This may work:
Setup:
create table mating_history (
id number primary key
, cage_id number not null
, code char(2) check (code in ('FA', 'FR', 'MA', 'MR'))
, event_date timestamp not null
, animal_id number not null
);
insert into mating_history
select 100, 4163, 'FA', timestamp '2016-08-03 10:51:55', 3570 from dual union all
select 101, 4163, 'MA', timestamp '2016-08-03 10:52:13', 2053 from dual union all
select 102, 4163, 'MR', timestamp '2016-08-29 10:23:24', 2053 from dual union all
select 103, 4163, 'MA', timestamp '2016-10-11 12:50:02', 5882 from dual union all
select 104, 4163, 'MR', timestamp '2016-10-31 13:37:28', 5882 from dual union all
select 105, 4163, 'MA', timestamp '2016-11-07 13:27:58', 5882 from dual union all
select 106, 4163, 'FA', timestamp '2017-04-19 11:46:50', 6011 from dual union all
select 107, 4163, 'FA', timestamp '2017-04-19 11:48:31', 6010 from dual
;
commit;
This is bad in several ways. There should be small "dimension" tables for cages and for animals. The animals table should show the sex (not the "code" in your current table). For now I assumed the data is as you presented it and you are not inclined to fix the data model.
Query:
with
grouped ( cage_id, sex, event_code, event_date, animal_id, grp ) as (
select cage_id, substr(code, 1, 1), substr(code, 2),
event_date, animal_id,
row_number() over (partition by animal_id, code order by event_date)
from mating_history
),
pivoted as (
select *
from grouped
pivot ( max(event_date) for event_code in ('A' as a, 'R' as r) )
)
select f.animal_id as female_id,
m.animal_id as male_id,
greatest(f.a, m.a) as event_date
from ( select * from pivoted where sex = 'F' ) f
join
( select * from pivoted where sex = 'M' ) m
on f.cage_id = m.cage_id
and ( f.r >= m.a or f.r is null )
and ( m.r >= f.a or m.r is null )
order by event_date, female_id, male_id
;
Output: (the event_date column uses my current NLS_TIMESTAMP_FORMAT)
FEMALE_ID MALE_ID EVENT_DATE
--------- ------- ------------------------------
3570 2053 03-AUG-2016 10.52.13.000000000
3570 5882 11-OCT-2016 12.50.02.000000000
3570 5882 07-NOV-2016 13.27.58.000000000
6011 5882 19-APR-2017 11.46.50.000000000
6010 5882 19-APR-2017 11.48.31.000000000
Let's keep track of who is in the cage . . . and assume that there is only one male and one female. The following gets the animals in the cage for each change:
select mh.*,
(case when 'MA' = lag(case when base_code in ('MA', 'MR') then base_code end ignore nulls) over (partition by cage_id order by event_date)
then lag(case when base_code in ('MA') then animal_id end ignore nulls) over (partition by cage_id order by event_date)
end) as male_animal,
(case when 'FA' = lag(case when base_code in ('FA', 'FR') then base_code end ignore nulls) over (partition by cage_id order by event_date)
then lag(case when base_code in ('FA') then animal_id end ignore nulls) over (partition by cage_id order by event_date)
end) as female_animal,
lead(event_date) over (partition by cage_id order by event_date) as next_event_date
from mating_history mh;
You want the ones where both animals are present:
select mh.*
from (select mh.*,
(case when 'MA' = lag(case when base_code in ('MA', 'MR') then base_code end ignore nulls) over (partition by cage_id order by event_date) = 'MA'
then lag(case when base_code in ('MA') then animal_id end ignore nulls) over (partition by cage_id order by event_date)
end) as male_animal,
(case when 'FA' = lag(case when base_code in ('FA', 'FR') then base_code end ignore nulls) over (partition by cage_id order by event_date) = 'FA'
then lag(case when base_code in ('FA') then animal_id end ignore nulls) over (partition by cage_id order by event_date)
end) as female_animal,
lead(event_date) over (partition by cage_id order by event_date) as next_event_date
from mating_history mh
) mh
where male_animal is not null and female_animal is not null;

Event grouping in time series

I'm trying to build groups of precipitation events in my measurement data. I got a time, a measurement value and a flag noting if it's was raining:
00:00, 32.4, 0
00:10, 32.4, 0
00:20, 32.6, 1
00:30, 32.7, 1
00:40, 32.9, 1
00:50, 33.2, 1
01:00, 33.2, 0
01:10, 33.2, 0
01:20, 33.2, 0
01:30, 33.5, 1
01:40, 33.6, 1
01:50, 33.6, 0
02:00, 33.6, 0
...
Now I'd like to generate an event id for the precipitation events:
00:00, 32.4, 0, NULL
00:10, 32.4, 0, NULL
00:20, 32.6, 1, 1
00:30, 32.7, 1, 1
00:40, 32.9, 1, 1
00:50, 33.2, 1, 1
01:00, 33.2, 0, NULL
01:10, 33.2, 0, NULL
01:20, 33.2, 0, NULL
01:30, 33.5, 1, 2
01:40, 33.6, 1, 2
01:50, 33.6, 0, NULL
02:00, 33.6, 0, NULL
...
Then I'll be able to use grouping to summarize the events. Any hint how to do this in Oracle is much appreciated.
So far I was able to calculate the mentioned flag and the diff to the last row:
SELECT
measured_at,
station_id
ps, -- precipitation sum
ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at ASC) as p, -- precipitation delta
CASE
WHEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at ASC) > 0 THEN 1
ELSE 0
END as rainflag
FROM measurements;
I think it must be possible to generate the required event id somehow, but can't figure it out. Thanks for your time!
Final solution using mt0 answer:
DROP TABLE events;
CREATE TABLE events (measured_at, station_id, ps) AS
SELECT TO_DATE('2016-05-01 12:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.4 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.7 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:30', 'YYYY-MM-DD HH24:MI'), 'XYZ', 32.9 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:40', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 12:50', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.5 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:30', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:40', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.6 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 13:50', 'YYYY-MM-DD HH24:MI'), 'XYZ', 33.5 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:00', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.1 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:10', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.2 FROM DUAL UNION ALL
SELECT TO_DATE('2016-05-01 17:20', 'YYYY-MM-DD HH24:MI'), 'XYZ', 39.2 FROM DUAL;
WITH
flagged AS (
SELECT
measured_at,
station_id,
ps,
CASE
WHEN measured_at - lag(measured_at, 1, NULL) OVER (ORDER BY measured_at) = (1/144) THEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at)
ELSE NULL
END as delta_p,
CASE
WHEN ps - lag(ps, 1, NULL) OVER (ORDER BY measured_at) > 0 THEN 1
ELSE 0
END AS rain
FROM events
),
eventmarked AS (
SELECT
f.*,
CASE
WHEN f.delta_p >= 0 THEN f.delta_p
ELSE NULL
END AS p,
CASE rain
WHEN 1 THEN COUNT(1) OVER (ORDER BY measured_at) - SUM(rain) OVER (ORDER BY measured_at)
END as event
FROM flagged f
),
summarized AS (
SELECT
em.*,
sum(CASE p WHEN 0 THEN NULL ELSE p END) OVER (PARTITION BY event ORDER BY measured_at) as e_ps
FROM eventmarked em
)
SELECT measured_at, station_id, ps, p, e_ps FROM summarized
ORDER BY measured_at;
Oracle Setup:
CREATE TABLE events ( measured_at, station_id, ps ) AS
SELECT '00:00', 32.4, 0 FROM DUAL UNION ALL
SELECT '00:10', 32.4, 0 FROM DUAL UNION ALL
SELECT '00:20', 32.6, 1 FROM DUAL UNION ALL
SELECT '00:30', 32.7, 1 FROM DUAL UNION ALL
SELECT '00:40', 32.9, 1 FROM DUAL UNION ALL
SELECT '00:50', 33.2, 1 FROM DUAL UNION ALL
SELECT '01:00', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:10', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:20', 33.2, 0 FROM DUAL UNION ALL
SELECT '01:30', 33.5, 1 FROM DUAL UNION ALL
SELECT '01:40', 33.6, 1 FROM DUAL UNION ALL
SELECT '01:50', 33.6, 0 FROM DUAL UNION ALL
SELECT '02:00', 33.6, 0 FROM DUAL;
Query:
SELECT measured_at,
station_id,
ps,
CASE WHEN rainflag IS NOT NULL THEN DENSE_RANK() OVER ( ORDER BY rainflag ) END AS rainflag
FROM (
SELECT e.*,
CASE ps
WHEN 1
THEN COUNT( 1 ) OVER ( ORDER BY measured_at )
- SUM( ps ) OVER ( ORDER BY measured_at )
END AS rainflag
FROM events e
)
ORDER BY measured_at;
Query 2
SELECT measured_at,
station_id,
ps,
CASE ps WHEN 1
THEN SUM( rainflag ) OVER ( ORDER BY measured_at )
END AS rainflag
FROM (
SELECT e.*,
CASE WHEN ps > LAG( ps, 1, 0 ) OVER ( ORDER BY measured_at )
THEN 1
END AS rainflag
FROM events e
);
Output:
MEASURED_AT STATION_ID PS RAINFLAG
----------- ---------- ---------- ----------
00:00 32.4 0
00:10 32.4 0
00:20 32.6 1 1
00:30 32.7 1 1
00:40 32.9 1 1
00:50 33.2 1 1
01:00 33.2 0
01:10 33.2 0
01:20 33.2 0
01:30 33.5 1 2
01:40 33.6 1 2
01:50 33.6 0
02:00 33.6 0
Alternative solution using only LAG function.
In the subquery the column PS2 marks the rain started events. The main query simple sums this flag while ignoring the time that is not raining.
with ev as (
select measured_at, station_id, ps,
case when ps = 1 and lag(ps,1,0) over (order by measured_at) = 0
then 1 else 0 end ps2
from events)
select measured_at, station_id, ps, ps2,
case when ps = 1 then
sum(ps2) over (order by measured_at) end rf
from ev
;
MEASURED_AT STATION_ID PS PS2 RF
----------- ---------- ---------- ---------- ----------
00:00 32,4 0 0
00:10 32,4 0 0
00:20 32,6 1 1 1
00:30 32,7 1 0 1
00:40 32,9 1 0 1
00:50 33,2 1 0 1
01:00 33,2 0 0
01:10 33,2 0 0
01:20 33,2 0 0
01:30 33,5 1 1 2
01:40 33,6 1 0 2
01:50 33,6 0 0
02:00 33,6 0 0

How to calculate price change over 3 years in SQL query

I need to calculate the price change of an item (both in cost and % change) over the last three years.
The table has four fields:
SKU_no, Date_updated, Price, Active_flag
When the Active_flag field is A, the item is active, when I it is inactive. Some items haven't changed prices in years so they won't have three years of entries with an inactive flag.
Sample table
SKU_NO Update_date Price Active_flag
30 1/1/1999 40.8 I
33 1/1/2014 70.59 A
33 1/1/2013 67.23 I
33 1/1/2012 60.03 I
33 1/1/2011 55.08 I
33 1/1/2010 55.08 I
34 1/1/2009 51 A
36 1/1/2014 70.59 A
36 1/1/2013 67.23 I
36 1/1/2012 60.03 I
38 1/1/2002 43.32 A
38 1/1/2001 43.32 I
38 4/8/2000 43.32 I
38 1/1/1999 43.32 I
39 1/1/2014 73.08 A
39 1/1/2013 69.6 I
39 1/1/2012 62.13 I
39 1/1/2011 57 I
39 1/1/2010 57 I
39 1/1/2009 52.8 I
This is the first query I wrote. I'm not too familiar with complex calculations
select
s.VENDOR,
s.FISCAL_YEAR,
s.FISCAL_MONTH_NO,
s.FISCAL_YEAR||'_'||FISCAL_MONTH_NO as PERIOD,
CASE WHEN S.COST_USED_FLAG IN ('CONTRACT') THEN 'CONTRACT' ELSE 'NON-CONTRACT' END AS CONTRACT_TYPE,
CASE WHEN ((s.FISCAL_YEAR = 2014 AND FISCAL_MONTH_NO <=9) OR (FISCAL_YEAR = 2013 AND FISCAL_MONTH_NO >=10)) THEN 'CP_1'
WHEN ((s.FISCAL_YEAR = 2013 AND FISCAL_MONTH_NO <= 9) OR (FISCAL_YEAR = 2012 AND FISCAL_MONTH_NO >=10)) THEN 'CP_2'
WHEN ((s.FISCAL_YEAR = 2012 AND FISCAL_MONTH_NO <= 9) OR (FISCAL_YEAR = 2011 AND FISCAL_MONTH_NO >=10)) THEN 'CP_3'
ELSE 'NULL' END CAGR_PERIODS,
CASE WHEN s.MARKET IN ('PO', 'SC', 'OC') THEN 'PC' ELSE 'EC' END AS MARKET_TYPE,
s.MARKET,
s.COST_PLUS_FLAG,
s.COST_USED_FLAG,
LPAD(S.PC_ITEM_NO,6,'0') AS NEW_ITEM_NO,
s.PC_ITEM_NO,
i.ITEM_NO,
i.VEND_CAT_NUM,
i.DESCRIPTION,
s.PC_PROD_CAT,
s.PC_PROD_SUBCAT,
i.SELL_UOM,
i.QTY_PER_SELL_UOM,
i.PRIMARY_UOM,
i.HEAD_CONV_FACT,
SUM(s.QTY_EACH) AS QUANTITY_SOLD,
SUM(s.EXT_GROSS_COGS) AS TOTAL_COGS,
SUM(s.EXT_GROSS_COGS)/ SUM(s.QTY_EACH) as NET_SALES,
SUM(s.EXT_SALES)/ SUM(s.QTY_EACH) as ASP,
SUM(s.EXT_SALES) AS TOTAL_SALES,
SUM(S.EXT_SALES) - SUM(S.EXT_GROSS_COGS) as GROSS_PROFIT
from SIXSIGMA.CIA_ALL_SALES_TREND_DATA s
INNER JOIN MGMSH.ITEM i
ON S.PC_ITEM_NO = I.ITEM_NO
WHERE S.VENDOR = 'BD' AND
(S.EXT_SALES IS NOT NULL AND S.FISCAL_YEAR IN ('2013','2012','2011'))
GROUP BY
s.VENDOR,
s.FISCAL_YEAR,
s.FISCAL_MONTH_NO,
s.FISCAL_YEAR||'_'||FISCAL_MONTH_NO,
CASE WHEN s.MARKET IN ('PO', 'SC', 'OC') THEN 'PC' ELSE 'EC' END,
CASE WHEN S.COST_USED_FLAG IN ('CONTRACT') THEN 'CONTRACT' ELSE 'NON-CONTRACT' END,
CASE WHEN ((s.FISCAL_YEAR = 2014 AND FISCAL_MONTH_NO <=9) OR (FISCAL_YEAR = 2013 AND FISCAL_MONTH_NO >=10)) THEN 'CP_1'
WHEN ((s.FISCAL_YEAR = 2013 AND FISCAL_MONTH_NO <= 9) OR (FISCAL_YEAR = 2012 AND FISCAL_MONTH_NO >=10)) THEN 'CP_2'
WHEN ((s.FISCAL_YEAR = 2012 AND FISCAL_MONTH_NO <= 9) OR (FISCAL_YEAR = 2011 AND FISCAL_MONTH_NO >=10)) THEN 'CP_3'
ELSE 'NULL' END,
s.MARKET,
s.COST_USED_FLAG,
s.COST_PLUS_FLAG,
s.PC_ITEM_NO,
s.PC_PROD_CAT,
i.SELL_UOM,
i.QTY_PER_SELL_UOM,
i.PRIMARY_UOM,
i.HEAD_CONV_FACT,
i.DESCRIPTION,
i.VEND_CAT_NUM,
s.PC_PROD_SUBCAT,
i.ITEM_NO
ORDER BY s.PC_ITEM_NO,s.FISCAL_YEAR, s.FISCAL_MONTH_NO
There are several ways to approach this, but I would recommend a windowing function such as LAG or LEAD. With these functions, you can reference neighboring rows. For example:
lead(column, offset, default) over (partition by some_column order by column)
And in the example below:
lead(price, 1, price) over (partition by sku_no order by update_date desc)
Here is a working example with sample data:
with sample_data as (
select '30' sku_no, to_date('1/1/1999','DD/MM/YYYY') update_date, 40.8 price, 'I' active_flag from dual union all
select '33', to_date('1/1/2014','DD/MM/YYYY'), 70.59, 'A' from dual union all
select '33', to_date('1/1/2013','DD/MM/YYYY'), 67.23, 'I' from dual union all
select '33', to_date('1/1/2012','DD/MM/YYYY'), 60.03, 'I' from dual union all
select '33', to_date('1/1/2011','DD/MM/YYYY'), 55.08, 'I' from dual union all
select '33', to_date('1/1/2010','DD/MM/YYYY'), 55.08, 'I' from dual union all
select '34', to_date('1/1/2009','DD/MM/YYYY'), 51 , 'A' from dual union all
select '36', to_date('1/1/2014','DD/MM/YYYY'), 70.59, 'A' from dual union all
select '36', to_date('1/1/2013','DD/MM/YYYY'), 67.23, 'I' from dual union all
select '36', to_date('1/1/2012','DD/MM/YYYY'), 60.03, 'I' from dual union all
select '38', to_date('1/1/2002','DD/MM/YYYY'), 43.32, 'A' from dual union all
select '38', to_date('1/1/2001','DD/MM/YYYY'), 43.32, 'I' from dual union all
select '38', to_date('4/8/2000','DD/MM/YYYY'), 43.32, 'I' from dual union all
select '38', to_date('1/1/1999','DD/MM/YYYY'), 43.32, 'I' from dual union all
select '39', to_date('1/1/2014','DD/MM/YYYY'), 73.08, 'A' from dual union all
select '39', to_date('1/1/2013','DD/MM/YYYY'), 69.6 , 'I' from dual union all
select '39', to_date('1/1/2012','DD/MM/YYYY'), 62.13, 'I' from dual union all
select '39', to_date('1/1/2011','DD/MM/YYYY'), 57 , 'I' from dual union all
select '39', to_date('1/1/2010','DD/MM/YYYY'), 57 , 'I' from dual union all
select '39', to_date('1/1/2009','DD/MM/YYYY'), 52.8 , 'I' from dual)
select
sku_no,
update_date,
price,
lead(price,1, price) over (partition by sku_no order by update_date desc) prior_price, -- Showing the offset
price - lead(price,1, price) over (partition by sku_no order by update_date desc) price_difference, -- Calculate the difference
round((price - lead(price,1, price) over (partition by sku_no order by update_date desc)) * 100 /price, 2) percent_change -- Calculate the percentage
from sample_data
where update_date >= add_months(trunc(sysdate,'YYYY'),-36); -- You said in the last three years
You can also use LAG with a different order by sort. If you want to calculate the difference from three years prior, I would suggest using the KEEP function.