I have the following pattern of characters in a dataset. I need to manipulate the data & cross refer it to another table. I'm trying to write a regexp_substr to bring back data before a foward slash starting from the left. for example:-
abc/ab/123/zzz
so I need to get the following results back to then compare to another table
abc
abc/ab
abc/ab/123
I have worked out the other logic but an struggling with the various regexp.
Here is the recursive query with SUBSTR and INSTR:
with cte(col) as
(
select substr(col, 1, instr(col, '/', -1) - 1) from mytable
union all
select substr(col, 1, instr(col, '/', -1) - 1) from cte where instr(col, '/') > 0
)
select col from cte;
And here is the query with REGEXP_REPLACE:
with cte(col) as
(
select regexp_replace(col, '/[^/]*$', '') from mytable
union all
select regexp_replace(col, '/[^/]*$', '') from cte where instr(col, '/') > 0
)
select col from cte;
You don't need a regular expression. You can do it with (faster) string functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE test_data ( id, value ) AS
SELECT 1, 'abc/ab/123/zzz' FROM DUAL;
Query 1:
WITH bounds ( id, value, end_pos ) AS (
SELECT id,
value,
INSTR( value, '/', 1 )
FROM test_data
WHERE INSTR( value, '/', 1 ) > 0
UNION ALL
SELECT id,
value,
INSTR( value, '/', end_pos + 1 )
FROM bounds
WHERE INSTR( value, '/', end_pos + 1 ) > 0
)
SELECT id,
SUBSTR( value, 1, end_pos ) AS item
FROM bounds
ORDER BY id, end_pos
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |
However, if you did want to use regular expressions then you could do:
Query 2:
WITH bounds ( id, value, lvl, item ) AS (
SELECT id,
value,
1,
REGEXP_SUBSTR( value, '.*?/', 1, 1 )
FROM test_data
WHERE REGEXP_SUBSTR( value, '.*?/', 1, 1 ) IS NOT NULL
UNION ALL
SELECT id,
value,
lvl + 1,
item || REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 )
FROM bounds
WHERE REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 ) IS NOT NULL
)
SELECT id,
item
FROM bounds
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |
Related
I have below values in table, and need to set valid_values =6 when found >6
ID VALUE VALID_VALUES
---------- --------------- ---------------------------------------------
555 OFF OFF,1,2,3,4,5,6,7,8,9,10
So after change desired output would be as below,
SQL> /
FIS_ID VALUE VALID_VALUES
---------- --------------- ---------------------------------------------
417 OFF OFF,1,2,3,4,5,6,6,6,6,6
You do not need to split and aggregate; instead you can use a regular expression to find either 2-or-more-digit numbers (i.e. [1-9]\d+) or 1-digit values higher than 6 (i.e. [789]) and could include leading zeroes if these may appear in your data set (since you are storing numbers as text):
SELECT id,
value,
REGEXP_REPLACE(
valid_values,
'0*[1-9]\d+|0*[789]',
'6'
) AS valid_values
FROM table_name
Which, for the sample data:
CREATE TABLE table_name ( ID, VALUE, VALID_VALUES ) AS
SELECT 555, 'OFF', 'OFF,1,2,3,4,5,6,7,8,9,10' FROM DUAL UNION ALL
SELECT 666, 'OFF', 'OFF,1,2,3,4,5,6,42,05,0123' FROM DUAL;
Outputs:
ID | VALUE | VALID_VALUES
--: | :---- | :----------------------
555 | OFF | OFF,1,2,3,4,5,6,6,6,6,6
666 | OFF | OFF,1,2,3,4,5,6,6,05,6
db<>fiddle here
You need to split, replace and aggregate as follows:
Select id, value,
Listagg(case when to_number(vals default null on conversion error) is not null
then case when to_number(vals) > 6 then 6 else vals end
else vals end) Within group (order by lvl) as valid_values
From
(Select id, value,
REGEXP_SUBSTR( t.valid_values, '[^,]+', 1, column_value ) ) , ',' ) as vals,
column_value as lvl
from your_table t,
TABLE(CAST(MULTISET(
SELECT level as lvl
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.valid_value, '[^,]+' )
AS SYS.ODCIVARCHAR2LIS ) v
) group by id, value;
For this solution, you need to split using LAG analytic function, before replacing and aggregating as below :
select ID, VALUE
, listagg(
case when regexp_like(separate_value, '^\d+$')
then case when separate_value > 6
then '6'
else separate_value
end
else separate_value
end
, ',') within group (order by lvl) VALID_VALUES
from (
select ID, VALUE
, lvl, substr(VALID_VALUES, lag(pos, 1, 0)over(order by lvl)+1, pos - lag(pos, 1, 0)over(order by lvl)-1) separate_value
from (
select ID, VALUE, VALID_VALUES||','VALID_VALUES, level lvl, instr(VALID_VALUES||',', ',', 1, level)pos
from your_table
connect by level <= length(VALID_VALUES||',')-length(replace(VALID_VALUES||',', ','))
)
)
group by ID, VALUE
;
I have a query which fetches value matching the pattern, I want it to fetch all prefvalue instead of only CEROTG
SELECT regexp_substr('prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1', 'prefvalue:([[:alnum:]_]+)') as Result from dual
Current Output
prefvalue:CEROTG
Expected Output
prefvalue:CEROTG
prefvalue:CEROTG-2
prefvalue:CEROTG_1
One option uses a recursive query:
with
data as (select 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' str from dual),
cte(str, res, lvl) as (
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)'), 1 lvl from data
union all
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)', 1, lvl + 1), lvl + 1
from cte
where lvl < regexp_count(str, 'prefvalue:([[:alnum:]_-]+)')
)
select res from cte
Demo on DB Fiddle:
| RES |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
You can use simple string functions in a recursive subquery-factoring clause:
WITH rsqfc ( entry_data, start_pos, end_pos ) AS (
SELECT entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
UNION ALL
SELECT entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
Which, for your sample data:
CREATE TABLE prefdir_entrydata ( entry_data ) AS
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' FROM DUAL;
Outputs:
| VALUE |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
db<>fiddle here
An example that handles multiple input rows is:
WITH rsqfc ( id, entry_data, start_pos, end_pos ) AS (
SELECT id, entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
WHERE dist_name_short = 'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
UNION ALL
SELECT id, entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT id,
CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
ORDER BY id, start_pos
Which, for the test data:
CREATE TABLE prefdir_entrydata ( id, entry_data, dist_name_short ) AS
SELECT 1,
'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL UNION ALL
SELECT 2,
'prefvalue:CEROTG-2a prefvalue:CEROTG_1v2',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL;
Outputs:
ID | VALUE
-: | :-------------------
1 | prefvalue:CEROTG-2
1 | prefvalue:CEROTG
1 | prefvalue:CEROTG_1
2 | prefvalue:CEROTG-2a
2 | prefvalue:CEROTG_1v2
db<>fiddle here
WITH tbl(DATA) AS (
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1'
FROM dual
)
SELECT REGEXP_SUBSTR(DATA, '(.*?)( |$)', 1, LEVEL, NULL, 1) ELEMENT
FROM tbl
CONNECT BY LEVEL <= REGEXP_COUNT(DATA, ' ')+1;
ELEMENT
------------------------------------------------------
prefvalue:CEROTG-2
prefvalue:CEROTG
prefvalue:CEROTG_1
3 rows selected.
I want to check if there is a row in my table that contains the same letters but in different order, but it must have the exact same letters, no more and no less.
For example, I have the letters "abc":
bca -> true
acb -> true
abcd -> **false**
ab -> **false**
Thanks!
You can use recursive CTEs to split the parameter 'abc' and each column value to letters and compare them:
with
recursive paramletters as (
select 'abc' col, 1 pos, substr('abc', 1, 1) letter
union all
select col, pos + 1, substr(col, pos + 1, 1)
from paramletters
where pos < length(col)
),
param as (
select group_concat(letter, '') over (order by letter) paramvalue
from paramletters
order by paramvalue desc limit 1
),
cteletters as (
select col, 1 pos, substr(col, 1, 1) letter
from tablename
union all
select col, pos + 1, substr(col, pos + 1, 1)
from cteletters
where pos < length(col)
),
cte as (
select * from (
select col, group_concat(letter, '') over (partition by col order by letter) colvalue
from cteletters
)
where length(colvalue) = length(col)
)
select c.col, c.colvalue = p.paramvalue result
from cte c cross join param p
See the demo.
Results:
| col | result |
| ---- | ------ |
| ab | 0 |
| abcd | 0 |
| acb | 1 |
| bca | 1 |
If the letters of the parameter are already sorted (like 'abc') then this code can be simplified to use only the last 2 CTEs.
I'm trying to extract the the different sub-strings within one string. The I want different strings for every string divided by the dash (-) symbol.
I have tried using the SUBSTR position function. It does not work since sometimes there are 4 chars in the second sub string, therefore, the 3rd sub string is not correct.
SELECT SUBSTR(STR, INSTR (STR, '-', -1)+ 1)
STR = F-123-A123-B12 or F-1234-A123-B12
I am trying to get a query that will give me F.
I need another query that will give me 123 or 1234 if there are 4 chars
I need another query to get me A123
I need another query to get B12
I was thinking there would be a regex function that I could use. I could not find one.
For example:
SQL> with test (col) as
2 (select 'F-123-A123-B12' from dual)
3 select regexp_substr(col, '\w+', 1, level) result
4 from test
5 connect by level <= regexp_count(col, '-') + 1;
RESULT
--------------
F
123
A123
B12
SQL>
You don't need regular expressions. INSTR and SUBSTR will work (and are faster):
Oracle Setup:
CREATE TABLE test_data ( str ) AS
SELECT 'F-123-A123-B12' FROM DUAL UNION ALL
SELECT 'F-1234-A123-B12' FROM DUAL
Query 1:
SELECT SUBSTR( str, 1, delimiter1 - 1 ) AS substr1,
SUBSTR( str, delimiter1 + 1, delimiter2 - delimiter1 - 1 ) AS substr2,
SUBSTR( str, delimiter2 + 1, delimiter3 - delimiter2 - 1 ) AS substr3,
SUBSTR( str, delimiter3 + 1 ) AS substr4
FROM (
SELECT str,
INSTR( str, '-', 1, 1 ) AS delimiter1,
INSTR( str, '-', 1, 2 ) AS delimiter2,
INSTR( str, '-', 1, 3 ) AS delimiter3
FROM test_data
) s
Output:
SUBSTR1 | SUBSTR2 | SUBSTR3 | SUBSTR4
:------ | :------ | :------ | :------
F | 123 | A123 | B12
F | 1234 | A123 | B12
If you do want to use regular expressions, then there is no need for a hierarchical query:
Query 2:
SELECT REGEXP_SUBSTR( str, '[^-]+', 1, 1 ) AS substr1,
REGEXP_SUBSTR( str, '[^-]+', 1, 2 ) AS substr2,
REGEXP_SUBSTR( str, '[^-]+', 1, 3 ) AS substr3,
REGEXP_SUBSTR( str, '[^-]+', 1, 4 ) AS substr4
FROM test_data
(Output as Query 1 above.)
Query 3
If you don't know how many delimited values there will be and want to parse them all to rows then you still don't need to use (slow) regular expressions or hierarchical queries and can just use a recursive sub-query factoring clause with simple string functions (and it works with zero-width/NULL sub-strings between delimiters):
WITH substr_bounds ( str, idx, startidx, endidx ) AS (
SELECT str,
1,
1,
INSTR( str, '-', 1 )
FROM test_data
UNION ALL
SELECT str,
idx + 1,
endidx + 1,
INSTR( str, '-', endidx + 1 )
FROM substr_bounds
WHERE endidx > 0
)
SELECT str,
idx,
CASE
WHEN endidx = 0
THEN SUBSTR( str, startidx )
ELSE SUBSTR( str, startidx, endidx - startidx )
END AS substr
FROM substr_bounds
ORDER BY str, idx
Output:
STR | IDX | SUBSTR
:-------------- | --: | :-----
F-123-A123-B12 | 1 | F
F-123-A123-B12 | 2 | 123
F-123-A123-B12 | 3 | A123
F-123-A123-B12 | 4 | B12
F-1234-A123-B12 | 1 | F
F-1234-A123-B12 | 2 | 1234
F-1234-A123-B12 | 3 | A123
F-1234-A123-B12 | 4 | B12
db<>fiddle here
If your string could have a NULL element, use this format to handle it (Note list element 2 is NULL), else you risk the following elements being returned in the wrong positions:
with test (col) as
(select 'F--A123-B12' from dual)
select regexp_substr(col, '(.*?)(-|$)', 1, level, null, 1) result
from test
connect by level <= regexp_count(col, '-') + 1;
RESULT
-----------
F
A123
B12
4 rows selected.
I am working with newest Node 17.3.1 where "substr" function is obsolete and I replace it with "substring" instead and it is the nearest and simplest alternative.
right now there is a string like this:
789+456-239
I want to get a list like this:
sign | num
+ 789
+ 456
- 239
This could be a way, by using a commonly used split string method, just adapted to your need to handle the sign.
-- test case
with yourString(str) as
(
select '+789+456-239 ' str
from dual
)
-- query
SELECT regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
This gives:
SIGN NUM
-------------- --------------
+ 789
+ 456
- 239
Here I assume that if no sign is given in the beginning of this string, the first sign will be +.
If you need to handle more than one string, you need to identify an ID in your table and the CONNECT BY becomes a bit more complex:
-- test case
with yourString(id, str) as
(
select 1, '+789+456-239' from dual union all
select 2, '789+456-239' from dual union all
select 3, '-789+456-239' from dual
)
-- query
SELECT id,
regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select id,
case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
and prior id = id
and prior sys_guid() is not null
The result:
ID SIGN NUM
---------- ----- --------------------------------------------
1 + 789
1 + 456
1 - 239
2 + 789
2 + 456
2 - 239
3 - 789
3 + 456
3 - 239
Here is a method that does not use regular expressions and can handle multiple input rows:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( id, list ) AS
SELECT 1, '789+456-239' FROM DUAL UNION ALL
SELECT 2, '-123' FROM DUAL UNION ALL
SELECT 3, '+456' FROM DUAL UNION ALL
SELECT 4, '789' FROM DUAL;
Query 1:
WITH bounds ( id, list, start_pos, end_pos, lvl ) AS (
SELECT id,
list,
1,
CASE
WHEN INSTR( list, '+', 2 ) = 0 THEN INSTR( list, '-', 2 )
WHEN INSTR( list, '-', 2 ) = 0 THEN INSTR( list, '+', 2 )
ELSE LEAST( INSTR( list, '+', 2 ), INSTR( list, '-', 2 ) )
END,
1
FROM table_name
UNION ALL
SELECT id,
list,
end_pos,
CASE
WHEN INSTR( list, '+', end_pos + 1 ) = 0 THEN INSTR( list, '-', end_pos + 1 )
WHEN INSTR( list, '-', end_pos + 1 ) = 0 THEN INSTR( list, '+', end_pos + 1 )
ELSE LEAST( INSTR( list, '+', end_pos + 1 ), INSTR( list, '-', end_pos + 1 ) )
END,
lvl + 1
FROM bounds
WHERE end_pos > 0
)
SELECT id,
DECODE( SUBSTR( list, start_pos, 1 ), '-', '-', '+' ) AS sign,
SUBSTR(
list,
start_pos + DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 ),
DECODE( end_pos, 0, LENGTH( list ) + 1, end_pos ) - start_pos - DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 )
) AS item,
lvl
FROM bounds
ORDER BY id, lvl
Results:
| ID | SIGN | ITEM | LVL |
|----|------|------|-----|
| 1 | + | 789 | 1 |
| 1 | + | 456 | 2 |
| 1 | - | 239 | 3 |
| 2 | - | 123 | 1 |
| 3 | + | 456 | 1 |
| 4 | + | 789 | 1 |