Split string in Oracle with regexp_substr in order - sql

I have a string in Oracle database, my string is: 'bbb;aaa;qqq;ccc'
I used regexp for split my string:
select distinct trim(regexp_substr('bbb;aaa;qqq;ccc','[^;]+', 1,level) ) as q
from dual
connect by regexp_substr('bbb;aaa;qqq;ccc', '[^;]+', 1, level) is not null ;
I want to split it in order, I expected the below output always:
bbb
aaa
qqq
ccc
because order of the subString are very important for me. but the result of this query is not in order:
qqq
aaa
bbb
ccc

You don't need a DISTINCT to get your result; besides, to get the result in a given order, all you need is an ORDER BY clause:
select trim(regexp_substr('bbb;aaa;qqq;ccc','[^;]+', 1,level) ) as q
from dual
connect by regexp_substr('bbb;aaa;qqq;ccc', '[^;]+', 1, level) is not null
order by level

If you do need DISTINCT:
WITH your_data( value ) AS (
SELECT 'bbb;aaa;qqq;ccc;aaa;eee' FROM DUAL
),
positions ( string, lvl, start_pos, end_pos ) AS (
SELECT value, 1, 1, INSTR( value, ';', 1, 1 ) FROM your_data
UNION ALL
SELECT string, lvl + 1, end_pos + 1, INSTR( string, ';', 1, lvl + 1 )
FROM positions
WHERE end_pos > 0
),
substrings ( string, substring, lvl, start_pos ) AS (
SELECT string,
DECODE( end_pos, 0, SUBSTR( string, start_pos ), SUBSTR( string, start_pos, end_pos - start_pos ) ),
lvl,
start_pos
FROM positions
)
SELECT string,
substring,
lvl
FROM substrings
WHERE INSTR( ';' || string || ';', ';' || substring || ';' ) = start_pos;
Output:
STRING SUBSTRING LVL
----------------------- ----------------------- ----------
bbb;aaa;qqq;ccc;aaa;eee bbb 1
bbb;aaa;qqq;ccc;aaa;eee aaa 2
bbb;aaa;qqq;ccc;aaa;eee qqq 3
bbb;aaa;qqq;ccc;aaa;eee ccc 4
bbb;aaa;qqq;ccc;aaa;eee eee 6

Related

Duplicate values when splitting a string

I'm trying to create a row for each person, str but I am getting extra output.
Can someone please explain what I did wrong and show me how to fix it.
Below is my test CASE and expected results. Thanks to all who answer and your expertise.
with rws as (
select 'Bob' person, 'AB,CR,DE' str from dual UNION ALL
select 'Jane' person, 'AB' str from dual
)
select person,
regexp_substr (
str,
'[^,]+',
1,
level
) value
from rws
connect by level <=
length ( str ) - length ( replace ( str, ',' ) ) + 1
ORDER BY person, str;
PERSON VALUE
Bob AB
Bob CR
Bob DE
Bob DE
Bob CR
Jane AB
Expected results
PERSON VALUE
Bob AB
Bob CR
Bob DE
Jane AB
The problem with your original query is that connect-by is looking at previous rows more than once - essentially, the second level of rows for Bob is also picking up the first row for Jane. This is a fairly well-known issue. You can avoid that by including a unique ID (with this example you'd have to rely on the name, and hope it's unique); but that then will loop, which you can avoid by adding a non-deterministic function call:
...
connect by level <=
length ( str ) - length ( replace ( str, ',' ) ) + 1
and prior person = person
and prior dbms_random.value is not null
ORDER BY person, str;
You could also use recursive subquery factoring instead of a hierarchical query:
with rws as (
select 'Bob' person, 'AB,CR,DE' str from dual UNION ALL
select 'Jane' person, 'AB' str from dual
),
rcte (person, str, cnt, lvl, value) as (
select person, str, length ( str ) - length ( replace ( str, ',' ) ), 1,
regexp_substr (
str,
'[^,]+',
1,
1
)
from rws
union all
select person, str, cnt, lvl + 1,
regexp_substr (
str,
'[^,]+',
1,
lvl + 1
)
from rcte
where lvl <= cnt
)
select person, value
from rcte
order by person, value;
fiddle
but you might find one of the other answers performs better, or at least is easy to understand and maintain.
Incidentally, your regular expression pattern might cause issues if you ever have a null element (i.e. two adjacent commas); this this answer for an explanation.
Here's one option:
SQL> WITH
2 rws
3 AS
4 (SELECT 'Bob' person, 'AB,CR,DE' str FROM DUAL
5 UNION ALL
6 SELECT 'Jane' person, 'AB' str FROM DUAL)
7 SELECT person,
8 REGEXP_SUBSTR (str,
9 '[^,]+',
10 1,
11 COLUMN_VALUE) VALUE
12 FROM rws
13 CROSS JOIN
14 TABLE (
15 CAST (
16 MULTISET ( SELECT LEVEL
17 FROM DUAL
18 CONNECT BY LEVEL <= REGEXP_COUNT (str, ',') + 1)
19 AS SYS.odcinumberlist))
20 ORDER BY person, str;
PERS VALUE
---- --------
Bob AB
Bob CR
Bob DE
Jane AB
SQL>
Your solution would return desired result if you applied SELECT DISTINCT (and fixed order by clause, but that's irrelevant), but that would also behave badly as number of rows you're working with grows.
SQL> with rws as (
2 select 'Bob' person, 'AB,CR,DE' str from dual UNION ALL
3 select 'Jane' person, 'AB' str from dual
4 )
5 select distinct person,
6 regexp_substr (
7 str,
8 '[^,]+',
9 1,
10 level
11 ) value
12 from rws
13 connect by level <=
14 length ( str ) - length ( replace ( str, ',' ) ) + 1;
PERS VALUE
---- --------
Jane AB
Bob CR
Bob AB
Bob DE
SQL>
You can use a recursive query and simple string functions (which is slightly more to type but is faster than regular expressions):
with rws (person, str) as (
select 'Bob', 'AB,CR,DE' from dual UNION ALL
select 'Jane', 'AB' from dual
),
bounds (person, str, spos, epos) AS (
SELECT person,
str,
1,
INSTR(str, ',', 1)
FROM rws
UNION ALL
SELECT person,
str,
epos + 1,
INSTR(str, ',', epos + 1)
FROM bounds
WHERE epos > 0
)
SELECT person,
CASE epos
WHEN 0
THEN SUBSTR(str, spos)
ELSE SUBSTR(str, spos, epos - spos)
END AS value
FROM bounds
ORDER BY person, value;
Which outputs:
PERSON
VALUE
Bob
AB
Bob
CR
Bob
DE
Jane
AB
fiddle
If you don't have quotes in the data, for 12c+ you may use JSON_TABLE and lateral join instead of recursion.
with rws as (
select 'Bob' person, 'AB,CR,DE' str from dual UNION ALL
select 'Jane' person, 'AB' str from dual union all
select 'Mark', null from dual
)
select
rws.person,
l.val_splitted,
l.rn
from rws
left join lateral (
select *
from json_table(
'["' || replace(rws.str, ',', '","') || '"]',
'$[*]'
columns (
val_splitted varchar2(10) path '$',
rn for ordinality
)
)
) l
on 1 = 1
order by 1
PERSON
VAL_SPLITTED
RN
Bob
AB
1
Bob
CR
2
Bob
DE
3
Jane
AB
1
Mark
1

Oracle SQL regexp_substr

I have a query which fetches value matching the pattern, I want it to fetch all prefvalue instead of only CEROTG
SELECT regexp_substr('prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1', 'prefvalue:([[:alnum:]_]+)') as Result from dual
Current Output
prefvalue:CEROTG
Expected Output
prefvalue:CEROTG
prefvalue:CEROTG-2
prefvalue:CEROTG_1
One option uses a recursive query:
with
data as (select 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' str from dual),
cte(str, res, lvl) as (
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)'), 1 lvl from data
union all
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)', 1, lvl + 1), lvl + 1
from cte
where lvl < regexp_count(str, 'prefvalue:([[:alnum:]_-]+)')
)
select res from cte
Demo on DB Fiddle:
| RES |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
You can use simple string functions in a recursive subquery-factoring clause:
WITH rsqfc ( entry_data, start_pos, end_pos ) AS (
SELECT entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
UNION ALL
SELECT entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
Which, for your sample data:
CREATE TABLE prefdir_entrydata ( entry_data ) AS
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' FROM DUAL;
Outputs:
| VALUE |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
db<>fiddle here
An example that handles multiple input rows is:
WITH rsqfc ( id, entry_data, start_pos, end_pos ) AS (
SELECT id, entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
WHERE dist_name_short = 'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
UNION ALL
SELECT id, entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT id,
CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
ORDER BY id, start_pos
Which, for the test data:
CREATE TABLE prefdir_entrydata ( id, entry_data, dist_name_short ) AS
SELECT 1,
'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL UNION ALL
SELECT 2,
'prefvalue:CEROTG-2a prefvalue:CEROTG_1v2',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL;
Outputs:
ID | VALUE
-: | :-------------------
1 | prefvalue:CEROTG-2
1 | prefvalue:CEROTG
1 | prefvalue:CEROTG_1
2 | prefvalue:CEROTG-2a
2 | prefvalue:CEROTG_1v2
db<>fiddle here
WITH tbl(DATA) AS (
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1'
FROM dual
)
SELECT REGEXP_SUBSTR(DATA, '(.*?)( |$)', 1, LEVEL, NULL, 1) ELEMENT
FROM tbl
CONNECT BY LEVEL <= REGEXP_COUNT(DATA, ' ')+1;
ELEMENT
------------------------------------------------------
prefvalue:CEROTG-2
prefvalue:CEROTG
prefvalue:CEROTG_1
3 rows selected.

Sorting comma delimited datasets in row

This is what is given
Numbers Powers
4,5,1 WATER,FIRE
6,3,9 ICE,WATER,FIRE
My requirement is (sorted order)
Numbers Powers
1,4,5 FIRE,WATER
3,6,9 FIRE,ICE,WATER .
I want it in sorted order! How to do it in database?
Split column to rows, then aggregate them back, sorted.
SQL> with test (id, num, pow) as
2 (select 1, '4,5,1', 'water,fire' from dual union all
3 select 2, '6,3,9', 'ice,water,fire' from dual
4 ),
5 temp as
6 -- split columns to rows
7 (select id,
8 regexp_substr(num, '[^,]+', 1, column_value) num1,
9 regexp_substr(pow, '[^,]+', 1, column_value) pow1
10 from test join table(cast(multiset(select level from dual
11 connect by level <= regexp_count(num, ',') + 1
12 ) as sys.odcinumberlist)) on 1 = 1
13 )
14 -- aggregate them back, sorted
15 select id,
16 listagg(num1, ',') within group (order by to_number(num1)) num_result,
17 listagg(pow1, ',') within group (order by pow1) pow_result
18 from temp
19 group by id;
ID NUM_RESULT POW_RESULT
---------- ------------------------------ ------------------------------
1 1,4,5 fire,water
2 3,6,9 fire,ice,water
SQL>
Oracle Setup:
CREATE TABLE test_data ( Numbers, Powers ) AS
SELECT '4,5,1', 'WATER,FIRE' FROM DUAL UNION ALL
SELECT '6,3,9', 'ICE,WATER,FIRE' FROM DUAL UNION ALL
SELECT '7', 'D,B,E,C,A' FROM DUAL
Query:
SELECT (
SELECT LISTAGG( TO_NUMBER( REGEXP_SUBSTR( t.numbers, '\d+', 1, LEVEL ) ), ',' )
WITHIN GROUP ( ORDER BY TO_NUMBER( REGEXP_SUBSTR( t.numbers, '\d+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.numbers, ',' ) + 1
) AS numbers,
(
SELECT LISTAGG( REGEXP_SUBSTR( t.powers, '[^,]+', 1, LEVEL ), ',' )
WITHIN GROUP ( ORDER BY REGEXP_SUBSTR( t.powers, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.powers, ',' ) + 1
) AS numbers
FROM test_data t
Output:
NUMBERS | NUMBERS
:------ | :-------------
1,4,5 | FIRE,WATER
3,6,9 | FIRE,ICE,WATER
7 | A,B,C,D,E
db<>fiddle here
You can try the following:
I have used the table as I will need some value to get a distinct value. here I have used ROWID.
SELECT
ID,
LISTAGG(NUM, ',') WITHIN GROUP(
ORDER BY
NUM
) AS NUM,
LISTAGG(POW, ',') WITHIN GROUP(
ORDER BY
POW
) AS POW
FROM
(
SELECT
DISTINCT ROWID,
ID,
REGEXP_SUBSTR(NUM, '[^,]+', 1, LEVEL) NUM,
REGEXP_SUBSTR(POW, '[^,]+', 1, LEVEL) POW
FROM
TEST
CONNECT BY REGEXP_SUBSTR(NUM, '[^,]+', 1, LEVEL) IS NOT NULL
OR REGEXP_SUBSTR(POW, '[^,]+', 1, LEVEL) IS NOT NULL
)
GROUP BY ID
ORDER BY ID;
db<>fiddle demo
Cheers!!
----
UPDATE
----
As mentioned in a comment that it is generating duplicates, I have re-framed the whole query as following:
SELECT
ID,
LISTAGG(C_S.NUM, ',') WITHIN GROUP(
ORDER BY
C_S.NUM
) AS NUM,
LISTAGG(C_S.POW, ',') WITHIN GROUP(
ORDER BY
C_S.POW
) AS POW
FROM
(SELECT
T.ID,
REGEXP_SUBSTR(T.NUM, '[^,]+', 1, NUMS_COMMA.COLUMN_VALUE) NUM,
REGEXP_SUBSTR(T.POW, '[^,]+', 1, NUMS_COMMA.COLUMN_VALUE) POW
FROM
TEST T,
TABLE ( CAST(MULTISET(
SELECT
LEVEL
FROM
DUAL
CONNECT BY
LEVEL <= GREATEST(LENGTH(REGEXP_REPLACE(T.NUM, '[^,]+')),
LENGTH(REGEXP_REPLACE(T.POW, '[^,]+'))) + 1
) AS SYS.ODCINUMBERLIST) ) NUMS_COMMA) C_S
GROUP BY ID;
db<>fiddle demo updated
Cheers!!

ask for a split function in oracle

right now there is a string like this:
789+456-239
I want to get a list like this:
sign | num
+ 789
+ 456
- 239
This could be a way, by using a commonly used split string method, just adapted to your need to handle the sign.
-- test case
with yourString(str) as
(
select '+789+456-239 ' str
from dual
)
-- query
SELECT regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
This gives:
SIGN NUM
-------------- --------------
+ 789
+ 456
- 239
Here I assume that if no sign is given in the beginning of this string, the first sign will be +.
If you need to handle more than one string, you need to identify an ID in your table and the CONNECT BY becomes a bit more complex:
-- test case
with yourString(id, str) as
(
select 1, '+789+456-239' from dual union all
select 2, '789+456-239' from dual union all
select 3, '-789+456-239' from dual
)
-- query
SELECT id,
regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select id,
case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
and prior id = id
and prior sys_guid() is not null
The result:
ID SIGN NUM
---------- ----- --------------------------------------------
1 + 789
1 + 456
1 - 239
2 + 789
2 + 456
2 - 239
3 - 789
3 + 456
3 - 239
Here is a method that does not use regular expressions and can handle multiple input rows:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( id, list ) AS
SELECT 1, '789+456-239' FROM DUAL UNION ALL
SELECT 2, '-123' FROM DUAL UNION ALL
SELECT 3, '+456' FROM DUAL UNION ALL
SELECT 4, '789' FROM DUAL;
Query 1:
WITH bounds ( id, list, start_pos, end_pos, lvl ) AS (
SELECT id,
list,
1,
CASE
WHEN INSTR( list, '+', 2 ) = 0 THEN INSTR( list, '-', 2 )
WHEN INSTR( list, '-', 2 ) = 0 THEN INSTR( list, '+', 2 )
ELSE LEAST( INSTR( list, '+', 2 ), INSTR( list, '-', 2 ) )
END,
1
FROM table_name
UNION ALL
SELECT id,
list,
end_pos,
CASE
WHEN INSTR( list, '+', end_pos + 1 ) = 0 THEN INSTR( list, '-', end_pos + 1 )
WHEN INSTR( list, '-', end_pos + 1 ) = 0 THEN INSTR( list, '+', end_pos + 1 )
ELSE LEAST( INSTR( list, '+', end_pos + 1 ), INSTR( list, '-', end_pos + 1 ) )
END,
lvl + 1
FROM bounds
WHERE end_pos > 0
)
SELECT id,
DECODE( SUBSTR( list, start_pos, 1 ), '-', '-', '+' ) AS sign,
SUBSTR(
list,
start_pos + DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 ),
DECODE( end_pos, 0, LENGTH( list ) + 1, end_pos ) - start_pos - DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 )
) AS item,
lvl
FROM bounds
ORDER BY id, lvl
Results:
| ID | SIGN | ITEM | LVL |
|----|------|------|-----|
| 1 | + | 789 | 1 |
| 1 | + | 456 | 2 |
| 1 | - | 239 | 3 |
| 2 | - | 123 | 1 |
| 3 | + | 456 | 1 |
| 4 | + | 789 | 1 |

regexp_substr to bring back data before a foward slash

I have the following pattern of characters in a dataset. I need to manipulate the data & cross refer it to another table. I'm trying to write a regexp_substr to bring back data before a foward slash starting from the left. for example:-
abc/ab/123/zzz
so I need to get the following results back to then compare to another table
abc
abc/ab
abc/ab/123
I have worked out the other logic but an struggling with the various regexp.
Here is the recursive query with SUBSTR and INSTR:
with cte(col) as
(
select substr(col, 1, instr(col, '/', -1) - 1) from mytable
union all
select substr(col, 1, instr(col, '/', -1) - 1) from cte where instr(col, '/') > 0
)
select col from cte;
And here is the query with REGEXP_REPLACE:
with cte(col) as
(
select regexp_replace(col, '/[^/]*$', '') from mytable
union all
select regexp_replace(col, '/[^/]*$', '') from cte where instr(col, '/') > 0
)
select col from cte;
You don't need a regular expression. You can do it with (faster) string functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE test_data ( id, value ) AS
SELECT 1, 'abc/ab/123/zzz' FROM DUAL;
Query 1:
WITH bounds ( id, value, end_pos ) AS (
SELECT id,
value,
INSTR( value, '/', 1 )
FROM test_data
WHERE INSTR( value, '/', 1 ) > 0
UNION ALL
SELECT id,
value,
INSTR( value, '/', end_pos + 1 )
FROM bounds
WHERE INSTR( value, '/', end_pos + 1 ) > 0
)
SELECT id,
SUBSTR( value, 1, end_pos ) AS item
FROM bounds
ORDER BY id, end_pos
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |
However, if you did want to use regular expressions then you could do:
Query 2:
WITH bounds ( id, value, lvl, item ) AS (
SELECT id,
value,
1,
REGEXP_SUBSTR( value, '.*?/', 1, 1 )
FROM test_data
WHERE REGEXP_SUBSTR( value, '.*?/', 1, 1 ) IS NOT NULL
UNION ALL
SELECT id,
value,
lvl + 1,
item || REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 )
FROM bounds
WHERE REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 ) IS NOT NULL
)
SELECT id,
item
FROM bounds
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |