I have a table when the sample data is as below
Date
name
IP
In IP
Out IP
session
activity
2/9/2023
X
123
123
Null
1
in
2/9/2023
X
123
Null
123
1
out
2/8/2023
Y
143
null
143
2
out
2/8/2023
Y
153
153
null
2
in
2/8/2023
X
163
163
null
3
in
2/8/2023
X
163
null
163
3
out
By grouping by name, ip and session with date & activity. I am able to get the records needed.
select
name,
ip,
max(case when activity = 'in' then date end) in
max(case when activity = 'out' then date end) out
from view
group by name, ip, session
Generally In IP & Out IP are same.but at times like case for name=y , in IP and out ip are different so i need to get IN IP and OUT IP as well at the result.
How can use this group by and get the other two columns as well?
https://dbfiddle.uk/zzNpjP3f
Not quite sure what exactly you want. With your sample data...
WITH
tbl (A_DATE, A_NAME, IP, IP_IN, IP_OUT, A_SESSION, ACTIVITY) AS
(
Select To_Date('02/09/2023', 'mm/dd/yyyy'), 'X', 123, 123, Null, 1, 'in' From Dual Union All
Select To_Date('02/09/2023', 'mm/dd/yyyy'), 'X', 123, Null, 123, 1, 'out' From Dual Union All
Select To_Date('02/08/2023', 'mm/dd/yyyy'), 'Y', 143, Null, 143, 2, 'out' From Dual Union All
Select To_Date('02/08/2023', 'mm/dd/yyyy'), 'Y', 153, 153, Null, 2, 'in' From Dual Union All
Select To_Date('02/08/2023', 'mm/dd/yyyy'), 'X', 163, 163, Null, 3, 'in' From Dual Union All
Select To_Date('02/08/2023', 'mm/dd/yyyy'), 'X', 163, Null, 163, 3, 'out' From Dual
)
... you could do a small transformation to get all the rows with some transformed columns like here..
Select
A_NAME, IP, IP_IN, IP_OUT, A_SESSION, ACTIVITY,
CASE WHEN ACTIVITY = 'in' THEN IP_IN END "IP_IN",
CASE WHEN ACTIVITY = 'in' THEN A_DATE END "IN_DATE",
CASE WHEN ACTIVITY = 'out' THEN IP_OUT END "IP_OUT",
CASE WHEN ACTIVITY = 'out' THEN A_DATE END "OUT_DATE"
From
tbl
R e s u l t :
A_NAME IP IP_IN IP_OUT A_SESSION ACTIVITY IP_IN IN_DATE IP_OUT OUT_DATE
------ ---------- ---------- ---------- ---------- -------- ---------- --------- ---------- ---------
X 123 123 1 in 123 09-FEB-23
X 123 123 1 out 123 09-FEB-23
Y 143 143 2 out 143 08-FEB-23
Y 153 153 2 in 153 08-FEB-23
X 163 163 3 in 163 08-FEB-23
X 163 163 3 out 163 08-FEB-23
... with the resulting dataset you can do either aggregation with group by or you can use analytic functions to get what you want...
Option 1 - aggregate
Select
A_NAME, IP, A_SESSION,
MAX(CASE WHEN ACTIVITY = 'in' THEN IP_IN END) "IP_IN",
MAX(CASE WHEN ACTIVITY = 'in' THEN A_DATE END) "IN_DATE",
MAX(CASE WHEN ACTIVITY = 'out' THEN IP_OUT END) "IP_OUT",
MAX(CASE WHEN ACTIVITY = 'out' THEN A_DATE END) "OUT_DATE"
From
tbl
Group By
A_NAME, IP, A_SESSION
A_NAME IP A_SESSION IP_IN IN_DATE IP_OUT OUT_DATE
------ ---------- ---------- ---------- --------- ---------- ---------
Y 153 2 153 08-FEB-23
X 163 3 163 08-FEB-23 163 08-FEB-23
Y 143 2 143 08-FEB-23
X 123 1 123 09-FEB-23 123 09-FEB-23
Option 2 - aggregate
Select
A_NAME, IP,
MAX(CASE WHEN ACTIVITY = 'in' THEN IP_IN END) "IP_IN",
MAX(CASE WHEN ACTIVITY = 'in' THEN A_DATE END) "IN_DATE",
MAX(CASE WHEN ACTIVITY = 'out' THEN IP_OUT END) "IP_OUT",
MAX(CASE WHEN ACTIVITY = 'out' THEN A_DATE END) "OUT_DATE"
From
tbl
Group By
A_NAME, IP
A_NAME IP IP_IN IN_DATE IP_OUT OUT_DATE
------ ---------- ---------- --------- ---------- ---------
X 163 163 08-FEB-23 163 08-FEB-23
X 123 123 09-FEB-23 123 09-FEB-23
Y 153 153 08-FEB-23
Y 143 143 08-FEB-23
Option 2A - IN_DATE as VARCHAR2 (from comments)
Select A_NAME, IP,
IP_IN, CASE WHEN IN_DATE IS NULL THEN 'SESSION CLOSED' ELSE To_Char(IN_DATE, 'dd.mm.yyyy') END "IN_DATE",
IP_OUT, OUT_DATE
From
(
Select
A_NAME, IP,
MAX(CASE WHEN ACTIVITY = 'in' THEN IP_IN END) "IP_IN",
MAX(CASE WHEN ACTIVITY = 'in' THEN A_DATE END) "IN_DATE",
MAX(CASE WHEN ACTIVITY = 'out' THEN IP_OUT END) "IP_OUT",
MAX(CASE WHEN ACTIVITY = 'out' THEN A_DATE END) "OUT_DATE"
From
tbl
Group By
A_NAME, IP
)
A_NAME IP IP_IN IN_DATE IP_OUT OUT_DATE
------ ---------- ---------- -------------- ---------- ---------
X 163 163 08.02.2023 163 08-FEB-23
X 123 123 09.02.2023 123 09-FEB-23
Y 153 153 08.02.2023
Y 143 SESSION CLOSED 143 08-FEB-23
Option 3 - analytic
Select
A_NAME, IP, A_SESSION,
CASE WHEN ACTIVITY = 'in' THEN IP_IN END "MAX_IP_IN",
MAX(CASE WHEN ACTIVITY = 'in' THEN A_DATE END) OVER(Partition By A_NAME, IP) "IN_DATE",
CASE WHEN ACTIVITY = 'out' THEN IP_OUT END "MAAX_IP_OUT",
MAX(CASE WHEN ACTIVITY = 'out' THEN A_DATE END) OVER(Partition By A_NAME, IP) "OUT_DATE"
From
tbl
A_NAME IP A_SESSION MAX_IP_IN IN_DATE MAAX_IP_OUT OUT_DATE
------ ---------- ---------- ---------- --------- ----------- ---------
X 123 1 123 09-FEB-23 09-FEB-23
X 123 1 09-FEB-23 123 09-FEB-23
X 163 3 08-FEB-23 163 08-FEB-23
X 163 3 163 08-FEB-23 08-FEB-23
Y 143 2 143 08-FEB-23
Y 153 2 153 08-FEB-23
Option 4 - analytic
Select
A_NAME, IP, A_SESSION,
CASE WHEN ACTIVITY = 'in' THEN IP_IN END "MAX_IP_IN",
MAX(CASE WHEN ACTIVITY = 'in' THEN A_DATE END) OVER(Partition By A_NAME, IP, ACTIVITY) "IN_DATE",
CASE WHEN ACTIVITY = 'out' THEN IP_OUT END "MAAX_IP_OUT",
MAX(CASE WHEN ACTIVITY = 'out' THEN A_DATE END) OVER(Partition By A_NAME, IP, ACTIVITY) "OUT_DATE"
From
tbl
A_NAME IP A_SESSION MAX_IP_IN IN_DATE MAAX_IP_OUT OUT_DATE
------ ---------- ---------- ---------- --------- ----------- ---------
X 123 1 123 09-FEB-23
X 123 1 123 09-FEB-23
X 163 3 163 08-FEB-23
X 163 3 163 08-FEB-23
Y 143 2 143 08-FEB-23
Y 153 2 153 08-FEB-23
... and so on... using (maybe) different functions not just MAX() (aggregate or analytic) with different Group By / Partition By
Right now you have a result row per name, ip, and session (GROUP BY name, ip, session). From your explanation I gather that you rather want a result row per name, and session only (GROUP BY name, session).
As to getting the IN/OUT IPs, use the same method as for the IN/OUT dates:
select
name,
max(case when activity = 'in' then date end) as in_date,
max(case when activity = 'out' then date end) as out_date,
max(case when activity = 'in' then in_ip end) as in_ip,
max(case when activity = 'out' then out_ip end) as out_ip
from view
group by name, session
order by name, session;
Well, you may use max, min, sum, avg and other group functions with the clause over (partition by ...).
See: MAX() OVER PARTITION BY in Oracle SQL
https://forums.oracle.com/ords/apexds/post/max-from-multiple-column-and-group-by-8269
Assuming that you are looking for in/out pairs of of entries on a day then, from Oracle 12, you an use MATCH_RECOGNIZE for row-by-row pattern matching:
SELECT user_1, in_ip, in_time, out_ip, out_time
FROM (
SELECT v.*,
TRUNC(date_column) AS day
FROM view_table v
)
MATCH_RECOGNIZE(
PARTITION BY user_1, day
ORDER BY date_column
MEASURES
user_in.ip AS in_ip,
user_in.date_column AS in_time,
user_out.ip AS out_ip,
user_out.date_column AS out_time
PATTERN ( user_in user_out? | user_out user_in? )
DEFINE
user_in AS activity = 'IN',
user_out AS activity = 'OUT'
)
Which, for the sample data:
CREATE TABLE view_TABLE (date_column, user_1,ip, in_ip,out_ip,session_1,activity) AS
SELECT DATE '2023-02-09' + INTERVAL '15:06' HOUR TO MINUTE, 'X', 123, 123, NULL, 1, 'IN' FROM DUAL UNION ALL
SELECT DATE '2023-02-09' + INTERVAL '15:08' HOUR TO MINUTE, 'X', 123, NULL, 123, 1, 'OUT' FROM DUAL UNION ALL
SELECT DATE '2023-02-08' + INTERVAL '16:08' HOUR TO MINUTE, 'Y', 143, NULL, 143, 2, 'OUT' FROM DUAL UNION ALL
SELECT DATE '2023-02-08' + INTERVAL '16:04' HOUR TO MINUTE, 'Y', 153, 153, NULL, 2, 'IN' FROM DUAL UNION ALL
SELECT DATE '2023-02-08' + INTERVAL '12:45' HOUR TO MINUTE, 'X', 163, 163, NULL, 3, 'IN' FROM DUAL UNION ALL
SELECT DATE '2023-02-08' + INTERVAL '12:48' HOUR TO MINUTE, 'X', 163, NULL, 163, 3, 'OUT' FROM DUAL;
Outputs:
USER_1
IN_IP
IN_TIME
OUT_IP
OUT_TIME
X
163
2023-02-08 12:45:00
163
2023-02-08 12:48:00
X
123
2023-02-09 15:06:00
123
2023-02-09 15:08:00
Y
153
2023-02-08 16:04:00
143
2023-02-08 16:08:00
fiddle
Related
I have a table that looks like this:
ID TARGET_ACTION TARGET_DATE
366 0 21.04.2021
186 1 03.04.2021
929 0 14.04.2021
366 1 17.04.2021
Each ID in this table can be repeated and have a different TARGET_ACTION value for a different date. I want to form a sequence of actions for each id, dividing it into weeks so that it looks like this:
ID 01.04.2021-07.04.2021 08.04.2021-14.04.2021 15.04.2021-21.04.2021
366 0 0 1
186 1 1 0
929 0 1 0
How can I do that?
You could use pivoting in this case, though for the whole year there would be 52 week columns.
First, your sample data:
WITH
a_table AS
(
Select 366 "ID", 0 "TARGET_ACTION", To_Date('21.04.2021', 'dd.mm.yyyy') "TARGET_DATE" From Dual Union All
Select 186 "ID", 1 "TARGET_ACTION", To_Date('03.04.2021', 'dd.mm.yyyy') "TARGET_DATE" From Dual Union All
Select 929 "ID", 0 "TARGET_ACTION", To_Date('14.04.2021', 'dd.mm.yyyy') "TARGET_DATE" From Dual Union All
Select 366 "ID", 1 "TARGET_ACTION", To_Date('17.04.2021', 'dd.mm.yyyy') "TARGET_DATE" From DUAL
),
Next thing to do is to join your dataset with some kind of time dimension table. It is a very good thing to have for different uses. The link to how to create one is here: http://oracleolap.blogspot.com/2011/01/script-for-time-dimension-table.html
Instead, I've created a small cte just with weeks from the question:
weeks AS
(
SELECT 14 "WEEK_NUM", To_Date('01.04.2021', 'dd.mm.yyyy') "WEEK_STARTS", To_Date('07.04.2021', 'dd.mm.yyyy') "WEEK_ENDS" From Dual Union All
SELECT 15 "WEEK_NUM", To_Date('08.04.2021', 'dd.mm.yyyy') "WEEK_STARTS", To_Date('14.04.2021', 'dd.mm.yyyy') "WEEK_ENDS" From Dual Union All
SELECT 16 "WEEK_NUM", To_Date('15.04.2021', 'dd.mm.yyyy') "WEEK_STARTS", To_Date('21.04.2021', 'dd.mm.yyyy') "WEEK_ENDS" From Dual
)
Now, if you get the weeks from your data and join them with time dimension you could pivot the resulting dataset. Here is the mainSQL with the result.
SELECT DISTINCT
ID, W14, W15, W16
FROM
(
SELECT
t.ID "ID",
t.TARGET_ACTION "TARGET_ACTION",
To_Char(t.TARGET_DATE, 'WW') "WEEK_NUM",
w.WEEK_STARTS "WEEK_STARTS",
w.WEEK_ENDS "WEEK_ENDS"
FROM
a_table t
INNER JOIN
weeks w ON(To_Char(w.WEEK_NUM) = To_Char(t.TARGET_DATE, 'WW'))
)
PIVOT
(
Count(WEEK_NUM)
FOR WEEK_NUM IN(14 "W14", 15 "W15", 16 "W16")
)
--
-- R e s u l t
--
-- ID W14 W15 W16
-- ---------- ---------- ---------- ----------
-- 929 0 1 0
-- 366 0 0 1
-- 186 1 0 0
Regards...
I have a table like this called DWH_DATA:
DATETIME FK_FEDERAL_STATE FK_ACCOMODATION ARRIVALS NIGHTS
--------------- -------------------- --------------- ---------- ----------
200010 W96-7 61 2433 16111
200011 W96-9 86 3718 30319
200012 W96-3 87 1100 8487
200101 W96-2 998 239 1038
Additional info Datetime = first four number = year + last two numbers = month
Then I have a table DWH_FEDERAL_STATES:
CODE(PK) NAME
---------- -------------------
W96-2 Country 2
W96-3 Country 3
W96-9 Country 9
W96-7 Country 7
Now I would like to get all nights for each federal state. Result should look like this:
Country 2 Country 3 Country 9 Country 7
200010 6979 16111
200011 ..
200012 ..
200101 ..
What I've already tried is a PIVOT Statement:
SELECT * FROM
(
SELECT
DATETIME,
NIGHTS,
dwh_data.fk_federal_state As federalState
FROM
dwh_data JOIN DWH_FEDERAL_STATES
ON dwh_data.fk_federal_state = dwh_federal_states.code
) t
PIVOT(
COUNT(federalState)
FOR federalState IN ('Country 2','Country 3','Country 9')
) ORDER BY 1;
The result I get with mine is:
DATETIME NIGHTS 'Country 2' 'Country 3' 'Country 9'
-------------------- ---------- ----------- ----------- -----------
197707 83648 0 0 0
197707 87301 0 0 0
197707 97350 0 0 0
You don't want to COUNT, you want to SUM the nights and use the name column:
SELECT datetime,
COALESCE(country_2, 0) AS country_2,
COALESCE(country_3, 0) AS country_3,
COALESCE(country_9, 0) AS country_9
FROM (
SELECT d.DATETIME,
nights,
f.name
FROM dwh_data d
JOIN DWH_FEDERAL_STATES f
ON d.fk_federal_state = f.code
)
PIVOT(
SUM(nights)
FOR name IN (
'Country 2' AS country_2,
'Country 3' AS country_3,
'Country 9' AS country_9
)
)
ORDER BY
datetime;
Which, for the sample data:
CREATE TABLE dwh_data (DATETIME, FK_FEDERAL_STATE, FK_ACCOMODATION, ARRIVALS, NIGHTS) AS
SELECT 200010, 'W96-7', 61, 2433, 16111 FROM DUAL UNION ALL
SELECT 200011, 'W96-9', 86, 3718, 30319 FROM DUAL UNION ALL
SELECT 200012, 'W96-3', 87, 1100, 8487 FROM DUAL UNION ALL
SELECT 200101, 'W96-2', 998, 239, 1038 FROM DUAL;
CREATE TABLE DWH_FEDERAL_STATES (CODE, NAME) AS
SELECT 'W96-2', 'Country 2' FROM DUAL UNION ALL
SELECT 'W96-3', 'Country 3' FROM DUAL UNION ALL
SELECT 'W96-9', 'Country 9' FROM DUAL UNION ALL
SELECT 'W96-7', 'Country 7' FROM DUAL;
Outputs:
DATETIME
COUNTRY_2
COUNTRY_3
COUNTRY_9
200010
0
0
0
200011
0
0
30319
200012
0
8487
0
200101
1038
0
0
db<>fiddle here
Date ID X or Y
-------------------------------
01.01.2016 1234 Y
01.01.2017 1234 X
01.01.2018 1234 Y
01.01.2019 1234 Y
01.01.2020 1234 Y
01.01.2021 1234 X
01.01.2016 4321 X
01.01.2017 4321 X
01.01.2018 4321 X
01.01.2019 4321 Y
01.01.2020 4321 Y
The above table shows the structure of the data I'm using. What I want to do is reducing it to another table where I only have the rows associated with the change in X/Y status; however, I do not only need the first observation after X becomes Y (or vice versa), but also the last observation before the change. How can I achieve the output that looks exactly like the below table with SQL running on Oracle database?
Date ID X or Y
-------------------------------
01.01.2016 1234 Y
01.01.2017 1234 X
01.01.2018 1234 Y
01.01.2020 1234 Y
01.01.2021 1234 X
01.01.2018 4321 X
01.01.2019 4321 Y
Here's one option:
sample data from line #1 - 14
TEMP CTE: previous (LAG) and next (LEAD) x/y values per ID, sorted by date value
final select retrieves the result
SQL> with test (datum, id, xy) as
2 (select date '2016-01-01', 1234, 'y' from dual union all
3 select date '2017-01-01', 1234, 'x' from dual union all
4 select date '2018-01-01', 1234, 'y' from dual union all
5 select date '2019-01-01', 1234, 'y' from dual union all
6 select date '2020-01-01', 1234, 'y' from dual union all
7 select date '2021-01-01', 1234, 'x' from dual union all
8 --
9 select date '2016-01-01', 4321, 'x' from dual union all
10 select date '2017-01-01', 4321, 'x' from dual union all
11 select date '2018-01-01', 4321, 'x' from dual union all
12 select date '2019-01-01', 4321, 'y' from dual union all
13 select date '2020-01-01', 4321, 'y' from dual
14 ),
15 temp as
16 (select datum, id, xy,
17 lag(xy) over (partition by id order by datum) laxy,
18 lead(xy) over (partition by id order by datum) lexy
19 from test
20 )
21 --
22 select datum, id, xy
23 from temp
24 where xy <> laxy or xy <> lexy
25 order by id, datum;
DATUM ID X
---------- ---------- -
01.01.2016 1234 y
01.01.2017 1234 x
01.01.2018 1234 y
01.01.2020 1234 y
01.01.2021 1234 x
01.01.2018 4321 x
01.01.2019 4321 y
7 rows selected.
SQL>
Seems you need to use LEAD() and LAG() function together to filter them out :
WITH t2 AS
(
SELECT t.*,
LAG(x_y,1,x_y) OVER (PARTITION BY id ORDER BY id, dt) AS lg_xy,
LEAD(x_y,1,x_y) OVER (PARTITION BY id ORDER BY id, dt) AS ld_xy
FROM t
ORDER BY id, dt
)
SELECT dt, id, x_y
FROM t2
WHERE NOT ( x_y = lg_xy AND x_y = ld_xy )
Demo
I have a table EMPLOYEE as under:
Enroll Date STS EMP_ID EMP_Name DEPT Rank OST BLOCK
12-Jan-17 Q 123 ABC ABC123 12 Y 1000
14-Jan-17 Q 123 ABC DEF123 12 Y 1000
15-Jan-17 R 123 ABC DEF123 12 Y 100
15-Jan-17 R 123 ABC DEF123 12 Y 200
15-Jan-17 R 123 ABC DEF123 12 Y 300
20-Jan-17 R 123 ABC DEF123 10 Y 300
26-Jan-17 R 456 RST DEF456 8 N 200
26-Jan-17 R 456 RST DEF456 8 N 300
2-Feb-17 Q 123 ABC ABC123 12 Y 300
Now i need to remove the duplicate rows for each emp_id (duplicate if EMP_Name, DEPT, OST and rank is same). If 2 rows have these 4 value same and enroll_date is different then i need not delete that row. And if 2 rows have same enroll date and the 4 fields (OST, EMP_Name, DEPT and rank) are same then i need to keep the row with highest block (1000 followed by 300 followed by 200 and so on)
So after deleting such data my table should have these rows:
Enroll Date STS EMP_ID EMP_Name DEPT Rank OST BLOCK
12-Jan-17 Q 123 ABC ABC123 12 Y 1000
14-Jan-17 Q 123 ABC DEF123 12 Y 1000
15-Jan-17 R 123 ABC DEF123 12 Y 100
2-Feb-17 Q 123 ABC ABC123 12 Y 300
20-Jan-17 R 123 ABC DEF123 10 Y 300
26-Jan-17 R 456 RST DEF456 8 N 200
26-Jan-17 R 456 RST DEF456 8 N 300
I tried using below query and will delete rows which have rn >1
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST, row_number() over ( partition BY emp_id, enroll_date,emp_name, dept, ost, rank ORDER BY enroll_date ASC, block DESC)rn
FROM employee
But i am getting rn as 1 only everytime.
can someone check the issue here or suggest some other way to do so?
I am creating a temporary table which will have all non duplicate values:
create table employee_temp as
with duplicates as (
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST, row_number() over ( partition BY emp_id, trunc(enroll_date),emp_name, dept, ost, rank ORDER BY enroll_date ASC, block DESC)rn FROM employee )
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST from duplicates where rn =1;
It looks like your enroll_date values have non-midnight times, so partitioning by those also made those combinations unique (even though they don't look it when you only show the date part).
My initial thought was that your analytic row_number() was partitoned by too many columns, and that you shouldn't be including the date value you want to order by - it doesn't really make sense to partition by and order by the same thing, as it will be unique. Reducing the columns you actually want to check against, perhaps to:
row_number() over (partition BY emp_id, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC)
would produce different ranks rather than all being 1. But I don't think that's right; that would probably make your secondary block ordering somewhat redundant, as you'll maybe be unlikely to have two rows with exactly the same time for one ID. Unlikely but not impossible, perhaps.
Re-reading your wording again I don't think you want to be ordering by the enroll_date at all, and you do want to be partitioning by the date instead; but, given that it contains non-midnight times that you apparently want to ignore for this exercise, the partitioning would have to be on the truncated date (which strips the time back to midnight, by default:
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC)
With your sample data as a CTE, including slightly different times within each day, and one extra row to get everything the same but the date, this shows your original rn and my two calculated values:
with employee (enroll_date, sts, emp_id, emp_name, dept, rank, ost, block) as (
select to_date('12-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 1000 from dual
union all select to_date('14-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'DEF123', 12, 'Y', 1000 from dual
union all select to_date('15-Jan-17 00:00:01', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 100 from dual
union all select to_date('15-Jan-17 00:00:02', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 200 from dual
union all select to_date('15-Jan-17 00:00:03', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 300 from dual
union all select to_date('20-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 10, 'Y', 300 from dual
union all select to_date('26-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'R', 456, 'RST', 'DEF456', 8, 'N', 200 from dual
union all select to_date('26-Jan-17 00:00:01', 'DD-Mon-RR HH24:MI:SS'), 'R', 456, 'RST', 'DEF456', 8, 'N', 300 from dual
union all select to_date('2-Feb-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 300 from dual
union all select to_date('3-Feb-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 300 from dual
)
SELECT to_char(enroll_date, 'DD-Mon-RR') as date_only,
enroll_date, sts, block, emp_id, emp_name, dept, rank, ost,
row_number() over ( partition BY emp_id, enroll_date, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC) your_rn,
row_number() over (partition BY emp_id, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC) my_rn_1,
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC) as my_rn_2
FROM employee
ORDER BY enroll_date;
DATE_ONLY ENROLL_DATE S BLOCK EMP_ID EMP DEPT RANK O YOUR_RN MY_RN_1 MY_RN_2
--------- ------------------- - ----- ------ --- ------ ---- - ------- ------- -------
12-Jan-17 2017-01-12 00:00:00 Q 1000 123 ABC ABC123 12 Y 1 1 1
14-Jan-17 2017-01-14 00:00:00 Q 1000 123 ABC DEF123 12 Y 1 1 1
15-Jan-17 2017-01-15 00:00:01 R 100 123 ABC DEF123 12 Y 1 2 3
15-Jan-17 2017-01-15 00:00:02 R 200 123 ABC DEF123 12 Y 1 3 2
15-Jan-17 2017-01-15 00:00:03 R 300 123 ABC DEF123 12 Y 1 4 1
20-Jan-17 2017-01-20 00:00:00 R 300 123 ABC DEF123 10 Y 1 1 1
26-Jan-17 2017-01-26 00:00:00 R 200 456 RST DEF456 8 N 1 1 2
26-Jan-17 2017-01-26 00:00:01 R 300 456 RST DEF456 8 N 1 2 1
02-Feb-17 2017-02-02 00:00:00 Q 300 123 ABC ABC123 12 Y 1 2 1
03-Feb-17 2017-02-03 00:00:00 Q 300 123 ABC ABC123 12 Y 1 3 1
To identify the rows to delete you can use a subquery:
SELECT enroll_date, sts, block, emp_id, emp_name, dept, rank, ost
FROM (
SELECT enroll_date, sts, block, emp_id, emp_name, dept, rank, ost,
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC) as my_rn_2
FROM employee
)
WHERE my_rn_2 > 1
ORDER BY enroll_date;
ENROLL_DATE S BLOCK EMP_ID EMP DEPT RANK O
------------------- - ----- ------ --- ------ ---- -
2017-01-15 00:00:01 R 100 123 ABC DEF123 12 Y
2017-01-15 00:00:02 R 200 123 ABC DEF123 12 Y
2017-01-26 00:00:00 R 200 456 RST DEF456 8 N
You'll need to decide what actually makes sense for your data and requirements though.
I am trying to use SQL to select distinct data entries based on the time difference between one entry and the next. It's easier to explain with an example:
My data table has
Part DateTime
123 12:00:00
123 12:00:05
123 12:00:06
456 12:10:23
789 12:12:13
123 12:14:32
I would like to return all rows as long with the limitation that if there are multiple entries with the same "Part" number I would like to retrieve only those that have a difference of at least 5 minutes.
The query should return:
Part DateTime
123 12:00:00
456 12:10:23
789 12:12:13
123 12:14:32
The code I'm using is the following:
SELECT data1.*, to_char(data1.scan_time, 'yyyymmdd hh24:mi:ss')
FROM data data1
where exists
(
select *
from data data2
where data1.part_serial_number = data2.part_serial_number AND
data2.scan_time + 5/1440 >= data1.scan_time
and data2.info is null
)
order by to_char(data1.scan_time, 'yyyymmdd hh24:mi:ss'), data1.part_serial_number
This is not working unfortunately. Does anyone know what i'm doing wrong or can suggest an alternate approach??
Thanks
Analytic functions to the rescue.
You can use the analytic function LEAD to get the data for the next row for the part.
SQL> ed
Wrote file afiedt.buf
1 with x as (
2 select 123 part, timestamp '2011-12-08 00:00:00' ts
3 from dual
4 union all
5 select 123, timestamp '2011-12-08 00:00:05'
6 from dual
7 union all
8 select 123, timestamp '2011-12-08 00:00:06'
9 from dual
10 union all
11 select 456, timestamp '2011-12-08 00:10:23'
12 from dual
13 union all
14 select 789, timestamp '2011-12-08 00:12:13'
15 from dual
16 union all
17 select 123, timestamp '2011-12-08 00:14:32'
18 from dual
19 )
20 select part,
21 ts,
22 lead(ts) over (partition by part order by ts) next_ts
23* from x
SQL> /
PART TS NEXT_TS
---------- ------------------------------- -------------------------------
123 08-DEC-11 12.00.00.000000000 AM 08-DEC-11 12.00.05.000000000 AM
123 08-DEC-11 12.00.05.000000000 AM 08-DEC-11 12.00.06.000000000 AM
123 08-DEC-11 12.00.06.000000000 AM 08-DEC-11 12.14.32.000000000 AM
123 08-DEC-11 12.14.32.000000000 AM
456 08-DEC-11 12.10.23.000000000 AM
789 08-DEC-11 12.12.13.000000000 AM
6 rows selected.
Once you've done that, then you can create an inline view and simply select those rows where the next date is more than 5 minutes after the current date.
SQL> ed
Wrote file afiedt.buf
1 with x as (
2 select 123 part, timestamp '2011-12-08 00:00:00' ts
3 from dual
4 union all
5 select 123, timestamp '2011-12-08 00:00:05'
6 from dual
7 union all
8 select 123, timestamp '2011-12-08 00:00:06'
9 from dual
10 union all
11 select 456, timestamp '2011-12-08 00:10:23'
12 from dual
13 union all
14 select 789, timestamp '2011-12-08 00:12:13'
15 from dual
16 union all
17 select 123, timestamp '2011-12-08 00:14:32'
18 from dual
19 )
20 select part,
21 ts
22 from (
23 select part,
24 ts,
25 lead(ts) over (partition by part order by ts) next_ts
26 from x )
27 where next_ts is null
28* or next_ts > ts + interval '5' minute
SQL> /
PART TS
---------- -------------------------------
123 08-DEC-11 12.00.06.000000000 AM
123 08-DEC-11 12.14.32.000000000 AM
456 08-DEC-11 12.10.23.000000000 AM
789 08-DEC-11 12.12.13.000000000 AM
AFJ,
let's supose that we have a new field that tell us if exists a previus entry for this Part in the previous 5 minutes, then, taking the rows that this field is set to False we have the result.
select
Part,
DateTime,
coalesce(
(select distinct 1
from data ds
where ds.Part = d.Part
and ds.DateTime between d.DateTime and d.DateTime - 5/1440
)
, 0) as exists_previous
from data d
The subquery checks if they are a row with same Part in previous 5 minutes inteval
Result must be:
Part DateTime exists_previous
123 12:00:00 0
123 12:00:05 1
123 12:00:06 1
456 12:10:23 0
789 12:12:13 0
123 12:14:32 0
now, filter to get only rows with 0:
select Part, DateTime from
(select
Part,
DateTime,
coalesce(
(select distinct 1
from data ds
where ds.Part = d.Part
and ds.DateTime between d.DateTime and d.DateTime - 5/1440
)
, 0) as exists_previous
from data D
) T where T.exists_previous = 0
Disclaimer: not tested.
This has not been verified, but essentially, the trick is to group by part AND time divided by 5 minutes (floored).
select part, min(scan_time)
from data
group by part, floor(scan_time/(5/1440))
order by scan_time;