I'm trying to figure out a way of identifying a "run" of results (successive rows, in order) that meet some condition. Currently, I'm ordering a result set, and scanning by eye for particular patterns. Here's an example:
SELECT the_date, name
FROM orders
WHERE
the_date BETWEEN
to_date('2013-09-18',..) AND
to_date('2013-09-22', ..)
ORDER BY the_date
--------------------------------------
the_date | name
--------------------------------------
2013-09-18 00:00:01 | John
--------------------------------------
2013-09-19 00:00:01 | James
--------------------------------------
2013-09-20 00:00:01 | John
--------------------------------------
2013-09-20 00:00:02 | John
--------------------------------------
2013-09-20 00:00:03 | John
--------------------------------------
2013-09-20 00:00:04 | John
--------------------------------------
2013-09-21 16:00:01 | Jennifer
--------------------------------------
What I want to extract from this result set is all the rows attributed to John on 2013-09-20. Generally what I'm looking for is a run of results from the same name, in a row, >= 3. I'm using Oracle 11, but I'm interested to know if this can be achieved with strict SQL, or if some kind of analytical function must be used.
You need multiple nested window functions:
SELECT *
FROM
(
SELECT the_date, name, grp,
COUNT(*) OVER (PARTITION BY grp) AS cnt
FROM
(
SELECT the_date, name,
SUM(flag) OVER (ORDER BY the_date) AS grp
FROM
(
SELECT the_date, name,
CASE WHEN LAG(name) OVER (ORDER BY the_date) = name THEN 0 ELSE 1 END AS flag
FROM orders
WHERE
the_date BETWEEN
TO_DATE('2013-09-18',..) AND
TO_DATE('2013-09-22', ..)
) dt
) dt
) dt
WHERE cnt >= 3
ORDER BY the_date
Try this
WITH ORDERS
AS (SELECT
TO_DATE ( '2013-09-18 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-19 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'James' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:02',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:03',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-20 00:00:04',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'John' AS NAME
FROM
DUAL
UNION ALL
SELECT
TO_DATE ( '2013-09-21 16:00:01',
'YYYY-MM-DD HH24:MI:SS' )
AS THE_DATE,
'Jennifer' AS NAME
FROM
DUAL)
SELECT
B.*
FROM
(SELECT
TRUNC ( THE_DATE ) THE_DATE,
NAME,
COUNT ( * )
FROM
ORDERS
WHERE
THE_DATE BETWEEN TRUNC ( TO_DATE ( '2013-09-18',
'YYYY-MM-DD' ) )
AND TRUNC ( TO_DATE ( '2013-09-22',
'YYYY-MM-DD' ) )
GROUP BY
TRUNC ( THE_DATE ),
NAME
HAVING
COUNT ( * ) >= 3) A,
ORDERS B
WHERE
A.NAME = B.NAME
AND TRUNC ( A.THE_DATE ) = TRUNC ( B.THE_DATE );
OUTPUT
9/20/2013 12:00:01 AM John
9/20/2013 12:00:02 AM John
9/20/2013 12:00:03 AM John
9/20/2013 12:00:04 AM John
Related
The problem I am facing is how to find distinct time periods from multiple time periods with overlap in Teradata ANSI SQL.
For example, the attached tables contain multiple overlapping time periods, how can I combine those time periods into 3 unique time periods in Teradata SQL???
I think I can do it in python with the loop function, but not sure how to do it in SQL
ID
Start Date
End Date
001
2005-01-01
2006-01-01
001
2005-01-01
2007-01-01
001
2008-01-01
2008-06-01
001
2008-04-01
2008-12-01
001
2010-01-01
2010-05-01
001
2010-04-01
2010-12-01
001
2010-11-01
2012-01-01
My expected result is:
ID
start_Date
end_date
001
2005-01-01
2007-01-01
001
2008-01-01
2008-12-01
001
2010-01-01
2012-01-01
From Oracle 12, you can use MATCH_RECOGNIZE to perform a row-by-row comparison:
SELECT *
FROM table_name
MATCH_RECOGNIZE(
PARTITION BY id
ORDER BY start_date
MEASURES
FIRST(start_date) AS start_date,
MAX(end_date) AS end_date
ONE ROW PER MATCH
PATTERN (overlapping_ranges* last_range)
DEFINE overlapping_ranges AS NEXT(start_date) <= MAX(end_date)
)
Which, for the sample data:
CREATE TABLE table_name (ID, Start_Date, End_Date) AS
SELECT '001', DATE '2005-01-01', DATE '2006-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2005-01-01', DATE '2007-01-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-01-01', DATE '2008-06-01' FROM DUAL UNION ALL
SELECT '001', DATE '2008-04-01', DATE '2008-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-01-01', DATE '2010-05-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-04-01', DATE '2010-12-01' FROM DUAL UNION ALL
SELECT '001', DATE '2010-11-01', DATE '2012-01-01' FROM DUAL;
Outputs:
ID
START_DATE
END_DATE
001
2005-01-01 00:00:00
2007-01-01 00:00:00
001
2008-01-01 00:00:00
2008-12-01 00:00:00
001
2010-01-01 00:00:00
2012-01-01 00:00:00
db<>fiddle here
Update: Alternative query
SELECT id,
start_date,
end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, ROWNUM) * type AS cnt
FROM table_name
UNPIVOT (dt FOR type IN (start_date AS 1, end_date AS -1))
)
WHERE cnt IN (1,0)
)
PIVOT (MAX(dt) FOR cnt IN (1 AS start_date, 0 AS end_date))
Or, an equivalent that does not use UNPIVOT, PIVOT or ROWNUM and works in both Oracle and PostgreSQL:
SELECT id,
MAX(CASE cnt WHEN 1 THEN dt END) AS start_date,
MAX(CASE cnt WHEN 0 THEN dt END) AS end_date
FROM (
SELECT id,
dt,
SUM(cnt) OVER (PARTITION BY id ORDER BY dt) AS grp,
cnt
FROM (
SELECT ID,
dt,
SUM(type) OVER (PARTITION BY id ORDER BY dt, rn) * type AS cnt
FROM (
SELECT r.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY dt ASC, type DESC) AS rn
FROM (
SELECT id, 1 AS type, start_date AS dt FROM table_name
UNION ALL
SELECT id, -1 AS type, end_date AS dt FROM table_name
) r
) p
) s
WHERE cnt IN (1,0)
) t
GROUP BY id, grp
Update 2: Another Alternative
SELECT id,
MIN(start_date) AS start_date,
MAX(end_Date) AS end_date
FROM (
SELECT t.*,
SUM(CASE WHEN start_date <= prev_max THEN 0 ELSE 1 END)
OVER (PARTITION BY id ORDER BY start_date) AS grp
FROM (
SELECT t.*,
MAX(end_date) OVER (
PARTITION BY id ORDER BY start_date
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
) AS prev_max
FROM table_name t
) t
) t
GROUP BY id, grp
db<>fiddle Oracle PostgreSQL
This is a gaps and islands problem. Try this:
with u as
(select ID, start_date, end_date,
case
when start_date <= lag(end_date) over(partition by ID order by start_date, end_date) then 0
else 1 end as grp
from table_name),
v as
(select ID, start_date, end_date,
sum(grp) over(partition by ID order by start_date, end_date) as island
from u)
select ID, min(start_date) as start_Date, max(end_date) as end_date
from v
group by ID, island;
Fiddle
Basically you can identify "islands" by comparing start_date of current row to end_date of previous row (ordered by start_date, end_date), if it precedes it then it's the same island. Then you can do a rolling sum() to get the island numbers. Finally select min(start_date) and max(end_date) from each island to get the desired output.
This may work ,with little bit of change in function , I tried it in Dbeaver :
select ID,Start_Date,End_Date
from
(
select t.*,
dense_rank () over(partition by extract (year from Start_Date) order BY End_Date desc) drnk
from testing_123 t
) temp
where temp.drnk = 1
ORDER BY Start_Date;
Try this
WITH a as (
SELECT
ID,
LEFT(Start_Date, 4) as Year,
MIN(Start_Date) as New_Start_Date
FROM
TAB1
GROUP BY
ID,
LEFT(Start_Date, 4)
), b as (
SELECT
a.ID,
Year,
New_Start_Date,
End_Date
FROM
a
LEFT JOIN
TAB1
ON LEFT(a.New_Start_Date, 4) = LEFT(TAB1.Start_Date, 4)
)
select
ID,
New_Start_Date as Start_Date,
MAX(End_Date)
from
b
GROUP BY
ID,
New_Start_Date;
Example: https://dbfiddle.uk/?rdbms=mysql_8.0&fiddle=97f91b68c635aebfb752538cdd752ace
I am trying to limit the amount of data I pull in prior to processing/analysing it in python.
Mainly due to memory constraints.
Each transaction results in ~3-4 different transaction_events.
-----------
trx_id timestamp
trx_1 | 2021.01.01 15:45:40
trx_1_2 | 2021.01.01 15:45:40
trx_1_3 | 2021.01.01 15:45:40
trx_2 | 2021.02.01 14:15:40
trx_2_2 | 2021.02.01 14:15:40
trx_2_3 | 2021.02.01 14:15:40
All I need is 1 record per timestamp.
-----------
trx_id timestamp
trx_1 | 2021.01.01 15:45:40
trx_2 | 2021.02.01 14:15:40
I've already tried the following suggestions:
On the ORACLE community forum
and
select distinct(date) return the same date several time
I've tried various variations too
SELECT DISTINCT TRUNC(timestamp, 'DD')
SELECT DISTINCT TRUNC(timestamp)
SELECT DISTINCT to_char(timestamp, 'yyyy-mm-dd')
However with no results.
You can use the ROW_NUMBER analytic function and partition by the first 5 characters of the TRX_ID and the timestamp:
SELECT trx_id, ts
FROM (
SELECT t.*,
ROW_NUMBER() OVER (
PARTITION BY SUBSTR( trx_id, 1, 5 ), ts ORDER BY trx_id
) AS rn
FROM table_name t
)
WHERE rn = 1;
Which, for your sample data:
CREATE TABLE table_name ( trx_id, ts ) AS
SELECT 'trx_1', TIMESTAMP '2021-01-01 15:45:40' FROM DUAL UNION ALL
SELECT 'trx_1_2', TIMESTAMP '2021-01-01 15:45:40' FROM DUAL UNION ALL
SELECT 'trx_1_3', TIMESTAMP '2021-01-01 15:45:40' FROM DUAL UNION ALL
SELECT 'trx_2', TIMESTAMP '2021-02-01 14:15:40' FROM DUAL UNION ALL
SELECT 'trx_2_2', TIMESTAMP '2021-02-01 14:15:40' FROM DUAL UNION ALL
SELECT 'trx_2_3', TIMESTAMP '2021-02-01 14:15:40' FROM DUAL;
Outputs:
TRX_ID
TS
trx_1
2021-01-01 15:45:40.000000000
trx_2
2021-02-01 14:15:40.000000000
If you can have other TRX_ID with different length patterns then you can look for the second underscore character and get the substring before that:
SELECT trx_id, ts
FROM (
SELECT t.*,
ROW_NUMBER() OVER (
PARTITION BY CASE INSTR( trx_id, '_', 1, 2 )
WHEN 0
THEN trx_id
ELSE SUBSTR( trx_id, 1, INSTR( trx_id, '_', 1, 2 ) - 1 )
END,
ts
ORDER BY trx_id
) AS rn
FROM table_name t
)
WHERE rn = 1;
db<>fiddle here
You can use aggregation:
select min(trx_id), timestamp
from t
group by timestamp;
Here is a db<>fiddle.
I have a problem with choosing from the list of absences, those that follow one another and grouping them into periods.
date_from (data_od) date_to(data_do)
--------------------------
18/08/01 - 18/08/15
18/08/16 - 18/08/20
18/08/21 - 18/08/31
18/09/01 - 18/09/08
18/05/01 - 18/05/31
18/06/01 - 18/06/30
18/03/01 - 18/03/18
18/02/14 - 18/02/28
above is a list of absences, and the result of which should be a table:
date_from (data_od) date_to(data_do)
--------------------------
18/08/01 18/09/08
18/05/01 18/06/30
18/02/14 18/03/18
For now, I did something like this, but I only research in twos :(
SELECT u1.data_od,u2.data_do
FROM l_absencje u1 CROSS APPLY
(SELECT * FROM l_absencje labs
WHERE labs.prac_id=u1.prac_id AND
TRUNC(labs.data_od) = TRUNC(u1.data_do)+1
ORDER BY id DESC FETCH FIRST 1 ROWS ONLY
) u2 where u1.prac_id=1067 ;
And give me that:
18/08/01 18/08/20 bad
18/08/16 18/08/31 bad
18/08/21 18/09/08 bad
18/05/01 18/06/30 good
18/02/14 18/03/18 good
You can use a combination of the LAG(), LEAD() and LAST_VALUE() analytic functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE absences ( date_from, date_to ) AS
SELECT DATE '2018-08-01', DATE '2018-08-15' FROM DUAL UNION ALL
SELECT DATE '2018-08-16', DATE '2018-08-20' FROM DUAL UNION ALL
SELECT DATE '2018-08-21', DATE '2018-08-31' FROM DUAL UNION ALL
SELECT DATE '2018-09-01', DATE '2018-09-08' FROM DUAL UNION ALL
SELECT DATE '2018-05-01', DATE '2018-05-31' FROM DUAL UNION ALL
SELECT DATE '2018-06-01', DATE '2018-06-30' FROM DUAL UNION ALL
SELECT DATE '2018-03-01', DATE '2018-03-18' FROM DUAL UNION ALL
SELECT DATE '2018-02-14', DATE '2018-02-28' FROM DUAL;
Query 1:
SELECT *
FROM (
SELECT CASE
WHEN date_to IS NOT NULL
THEN LAST_VALUE( date_from ) IGNORE NULLS
OVER( ORDER BY ROWNUM )
END AS date_from,
date_to
FROM (
SELECT CASE date_from
WHEN LAG( date_to ) OVER ( ORDER BY date_to )
+ INTERVAL '1' DAY
THEN NULL
ELSE date_from
END AS date_from,
CASE date_to
WHEN LEAD( date_from ) OVER ( ORDER BY date_from )
- INTERVAL '1' DAY
THEN NULL
ELSE date_to
END AS date_to
FROM absences
)
)
WHERE date_from IS NOT NULL
AND date_to IS NOT NULL
Results:
| DATE_FROM | DATE_TO |
|----------------------|----------------------|
| 2018-02-14T00:00:00Z | 2018-03-18T00:00:00Z |
| 2018-05-01T00:00:00Z | 2018-06-30T00:00:00Z |
| 2018-08-01T00:00:00Z | 2018-09-08T00:00:00Z |
I am trying to select a record from a row by looking at both the start date and the end date. What I need to do is pick the max start date, then only return a result from that max date if the end date has a value.
I hope the images below help clarify this a bit more. This is in Oracle based SQL.
Example #2
I can, so far, either return all the records or incorrectly return a record in scenario #2 but I've yet to figure out the best way to make this work. I would greatly appreciate any assistance.
Thank you!
I would use an analytic function:
with sample_data as (select 1 id, 1 grp_id, to_date('01/01/2015', 'dd/mm/yyyy') st_dt, to_date('23/01/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 2 id, 1 grp_id, to_date('24/02/2015', 'dd/mm/yyyy') st_dt, to_date('15/02/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 3 id, 1 grp_id, to_date('17/03/2015', 'dd/mm/yyyy') st_dt, to_date('30/03/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 4 id, 2 grp_id, to_date('01/01/2015', 'dd/mm/yyyy') st_dt, to_date('17/01/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 5 id, 2 grp_id, to_date('21/01/2015', 'dd/mm/yyyy') st_dt, to_date('23/03/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 6 id, 2 grp_id, to_date('14/04/2015', 'dd/mm/yyyy') st_dt, to_date('16/05/2015', 'dd/mm/yyyy') ed_dt from dual union all
select 7 id, 2 grp_id, to_date('28/05/2015', 'dd/mm/yyyy') st_dt, null ed_dt from dual),
res as (select id,
grp_id,
st_dt,
ed_dt,
max(st_dt) over (partition by grp_id) max_st_dt
from sample_data)
select id,
grp_id,
st_dt,
ed_dt
from res
where st_dt = max_st_dt
and ed_dt is not null;
ID GRP_ID ST_DT ED_DT
---------- ---------- ---------- ----------
3 1 17/03/2015 30/03/2015
This would be one of the simplest way.
select * from
(
select apay_id,
max(start_dt) OVER () max_start_dt,
start_dt,
end_dt
from sample
)
where
start_dt=max_start_dt
and end_dt is not null
Idea is to get maximum start_dt and corresponding end_dt.
And then filter result if end_dt is null.
SQL Fiddle
Database Schema
create table sample
(apay_id number(7),
account_number number(7),
start_dt date,
end_dt date);
Sample1
insert into sample values(554433, 123456, '15-Aug-15', null);
insert into sample values(112266, 123456, '21-Jul-15', '31-Aug-15');
insert into sample values(733221, 123456, '29-Jun-15', '31-Jul-15');
Output for Sample1
No rows
Sample2
insert into sample values(554433, 123456, '15-Aug-15', '11-Nov-15');
insert into sample values(112266, 123456, '21-Jul-15', '31-Aug-15');
insert into sample values(733221, 123456, '29-Jun-15', '31-Jul-15');
Output for Sample2
| APAY_ID | MAX_START_DT | END_DT |
|---------|--------------------------|----------------------------|
| 554433 | August, 15 2015 00:00:00 | November, 11 2015 00:00:00 |
select * from ( select apay_id from sample where end_dt is not null order by start_dt desc) where rownum=1
I think this can also work.
Say we have the table below
Name Date reg_no
aa 12/12/2013 10:13:15 rty003
aa 13/12/2013 11:14:16 jyu887
bb 13/14/2013 09:45:10 rty003
bb 12/12/2013 10:13:27 rty003
i want to display only the names that have matching reg_no and date values (without seconds).
The result should be as shown below.
Name1 Name2 Date reg_no
aa bb 12/12/2013 10:13 rty003
How can i achieve this in oracle?
Try with listagg function:
SELECT listagg( NAME,',') WITHIN GROUP ( ORDER BY NAME),
to_char(date_time,'mm/dd/yyyy hh24:mi'),
reg_no
FROM t
GROUP BY to_char(date_time,'mm/dd/yyyy hh24:mi'), reg_no
HAVING count(*) > 1;
Could be better but worth,
WITH TAB(NAME,DDATE, REG_NO) AS (
SELECT 'aa', TO_DATE('12/12/2013 10:13:15', 'MM/DD/YYYY HH24:MI:SS'), 'rty003' FROM DUAL UNION ALL
SELECT 'aa', TO_DATE('03/12/2013 11:14:16', 'MM/DD/YYYY HH24:MI:SS'), 'jyu887' FROM DUAL UNION ALL
SELECT 'bb', TO_DATE('03/14/2013 09:45:10', 'MM/DD/YYYY HH24:MI:SS'), 'rty003' FROM DUAL UNION ALL
SELECT 'bb', TO_DATE('12/12/2013 10:13:27', 'MM/DD/YYYY HH24:MI:SS'), 'rty003' FROM DUAL),
----------------
-- End of Data Preparation
----------------
TAB2 AS
(SELECT NAME,
TRUNC(DDATE, 'MI') DDATE,
REG_NO,
ROW_NUMBER() OVER(PARTITION BY TRUNC(DDATE, 'MI'), REG_NO ORDER BY NAME) RN,
COUNT(1) OVER(PARTITION BY TRUNC(DDATE, 'MI'), REG_NO) CNT
FROM TAB)
SELECT MIN(DECODE(RN, 1, NAME, NULL)) NAME1,
MIN(DECODE(RN, 2, NAME, NULL)) name2,
DDATE,
REG_NO
FROM TAB2
WHERE CNT = 2
group by ddate, reg_no;
Output:
| NAME1 | NAME2 | DDATE | REG_NO |
|-------|-------|---------------------------------|--------|
| aa | bb | December, 12 2013 10:13:00+0000 | rty003 |
Your final query will be
WITH TAB2 AS
(SELECT NAME,
TRUNC(DDATE, 'MI') DDATE,
REG_NO,
ROW_NUMBER() OVER(PARTITION BY TRUNC(DDATE, 'MI'), REG_NO ORDER BY NAME) RN,
COUNT(1) OVER(PARTITION BY TRUNC(DDATE, 'MI'), REG_NO) CNT
FROM <your_table_name>)
SELECT MIN(DECODE(RN, 1, NAME, NULL)) NAME1,
MIN(DECODE(RN, 2, NAME, NULL)) name2,
DDATE,
REG_NO
FROM TAB2
WHERE CNT = 2
group by ddate, reg_no;
SELECT MIN(NAME),MAX(NAME),
TRUNC(DDATE, 'MI'),
REG_NO
FROM tab
GROUP BY
TRUNC(DDATE, 'MI'),
REG_NO
HAVING COUNT(*) >= 2