How to select the list of words containing a particular substring as part of a SQL query (oracle)? - sql

I'm trying to return the list of "words" (separated by spaces) containing a certain substring within a string as part of an Oracle Sql query. Would like to return the result as a comma separated list. Separate rows for each match would also work.
Example String in [text_col] field:
some words 123-asdf-789A and also this one 456-asdf-555A more words etc.
Desired result: 123-asdf-789A, 456-asdf-555A
This is what I have so far but it only returns the first result and the fact that it's two separate regular expressions makes it difficult to concatenate all matches as I would like to do.
CONCAT(REGEXP_SUBSTR(text_col, ''(([^[:space:]]+)\asdf)'', 1, 1, ''i'', 1),
REGEXP_SUBSTR(text_col, ''\asdf([^[:space:]]+)'', 1, 1, ''i'', 1))

You can use some regexp functions together as :
with tab(str) as
(
select 'some words 123-asdf-789A and also this one 456-asdf-555A more words etc' from dual
), t as
(
select regexp_substr(str,'[^[:space:]]+',1,level) as str, level as lvl
from tab
connect by level <= regexp_count(str,'[:space:]')
)
select listagg(str,',') within group (order by lvl) as "Result"
from t
where regexp_like(str,'-');
Result
---------------------------------
123-asdf-789A,456-asdf-555A
Demo
first split by spaces (through [:space:] posix) and take the ones containing dash characters, and finally concatenate by listagg() function

Use a recursive sub-query factoring clause and iterate through all the matches concatenating the string as you go:
Oracle Setup:
CREATE TABLE test_data ( value ) AS
SELECT 'some words 123-asdf-789A and also this one 456-asdf-555A more words etc.' FROM DUAL UNION ALL
SELECT 'some words without the expected sub-string' FROM DUAL UNION ALL
SELECT 'asdf asdf-123 456-asdf 78-asdf-90' FROM DUAL
Query:
WITH matches ( value, idx, cnt, match ) AS (
SELECT value,
0,
REGEXP_COUNT( value, '\S*asdf\S*' ),
CAST( NULL AS VARCHAR2(4000) )
FROM test_data
UNION ALL
SELECT value,
idx + 1,
cnt,
CASE idx WHEN 0 THEN '' ELSE match || ' ' END
|| REGEXP_SUBSTR( value, '\S*asdf\S*', 1, idx + 1 )
FROM matches
WHERE idx < cnt
)
SELECT value, match
FROM matches
WHERE idx = cnt;
Output:
VALUE | MATCH
:----------------------------------------------------------------------- | :--------------------------------
some words without the expected sub-string | null
some words 123-asdf-789A and also this one 456-asdf-555A more words etc. | 123-asdf-789A 456-asdf-555A
asdf asdf-123 456-asdf 78-asdf-90 | asdf asdf-123 456-asdf 78-asdf-90
db<>fiddle here

Related

SQL: using regexp_substr ot regexp_extract, looking for the regex pattern that will only return the string between one character and a space

The row I am trying to parse from is a series of string values separated only by spaces. Sample below:
TX:123 SP:XapZNsyeS INST:456123
I need to use either regexp_substr or regexp_extract to return only values for the string that appears after "TX:" or "SP:", etc. So essentially an expression that only captures the string after a string (e.g. "TX:") and before a space (" ").
Here's one way to split on 2 delimiters. This works on Oracle 12c as you included the Oracle regexp-substr tag. Using a with statement, first set up the original data, then split on a space or the end of the line, then break into name-value pairs.
WITH tbl_original_data(ID, str) AS (
SELECT 1, 'TX:123 SP:XapZNsyeS INST:456123' FROM dual UNION ALL
SELECT 2, 'MI:321 SP:MfeKLgkrJ INST:654321' FROM dual
),
tbl_split_on_space(ID, ELEMENT) AS (
SELECT ID,
REGEXP_SUBSTR(str, '(.*?)( |$)', 1, LEVEL, NULL, 1)
FROM tbl_original_data
CONNECT BY REGEXP_SUBSTR(str, '(.*?)( |$)', 1, LEVEL) IS NOT NULL
AND PRIOR ID = ID
AND PRIOR SYS_GUID() IS NOT NULL
)
--SELECT * FROM tbl_split_on_space;
SELECT ID,
REGEXP_REPLACE(ELEMENT, '^(.*):.*', '\1') NAME,
REGEXP_REPLACE(ELEMENT, '.*:(.*)$', '\1') VALUE
FROM tbl_split_on_space;
ID NAME VALUE
---------- ---------- ----------
1 TX 123
1 SP XapZNsyeS
1 INST 456123
2 MI 321
2 SP MfeKLgkrJ
2 INST 654321
6 rows selected.
EDIT: Realizing this answer is a little more than was asked for, here's a simplified answer to return one element. Don't forget to allow for the ending of a space or the end of the line as well, in case you element is at the end of the line.
WITH tbl_original_data(ID, str) AS (
SELECT 1, 'TX:123 SP:XapZNsyeS INST:456123' FROM dual
)
SELECT REGEXP_SUBSTR(str, '.*?TX:(.*)( |$)', 1, 1, NULL, 1) TX_VALUE
FROM tbl_original_data;
TX_VALUE
--------
123
1 row selected.

REGEXP to validate a specific number

How can I search for a specific number in an array using REGEXP?
I have an array and need to verify if it has a specific number.
Ex: [5,2,1,4,6,19] and I am looking for number 1, but just the number 1 and not any number that contain the digit 1.
I had to do this:
case when REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[[]{1}[1][,]')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[,]{1}[1][,]{1}')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[,]{1}[1][]]')<>0
or REGEXP_INSTR(JSON_QUERY(MY_JSON_COLUMN,'$.path') , '[[]{1}[1][]]') <>0
then 'DIGIT_ONE' else 'NO_DIGIT_ONE'
end
Is there anything simpler?
You can use
(^|\D)1(\D|$)
This will seach for 1 not enclosed with other digits.
See this regex demo.
Details
(^|\D) - start of string or non-digit
1 - a 1 char
(\D|$) - non-digit or end of string.
Do NOT use regular expressions, use a proper JSON parser and then filter for the number you want:
SELECT my_json_column,
CASE
WHEN JSON_EXISTS( my_json_column, '$?(#.path[*] == 1)' )
THEN 'DIGIT ONE'
ELSE 'NO DIGIT ONE'
END AS has_one
FROM table_name;
or (if you are using Oracle 12.1 and cannot use path filter expressions with JSON_EXISTS, which is only available from Oracle 12.2):
SELECT my_json_column,
CASE
WHEN EXISTS(
SELECT 'X'
FROM JSON_TABLE(
t.my_json_column,
'$.path[*]'
COLUMNS (
value NUMBER PATH '$'
)
)
WHERE value = 1
)
THEN 'DIGIT ONE'
ELSE 'NO DIGIT ONE'
END
FROM table_name t;
Which, for the sample data:
CREATE TABLE table_name (
my_json_column CHECK ( my_json_column IS JSON )
) AS
SELECT '{"path":[5,2,1,4,6,19],"not_this_path":[1,2,3,4,5]}' FROM DUAL UNION ALL
SELECT '{"path":[5,2,4,6,19],"not_this_path":[1,2,3,4,5]}' FROM DUAL UNION ALL
SELECT '{"path":[11],"not_this_path":[1]}' FROM DUAL UNION ALL
SELECT '{"path":[2],"not_this_path":[1]}' FROM DUAL UNION ALL
SELECT '{"path":[1,11]}' FROM DUAL;
Both output:
MY_JSON_COLUMN | HAS_ONE
:-------------------------------------------------- | :-----------
{"path":[5,2,1,4,6,19],"not_this_path":[1,2,3,4,5]} | DIGIT ONE
{"path":[5,2,4,6,19],"not_this_path":[1,2,3,4,5]} | NO DIGIT ONE
{"path":[11],"not_this_path":[1]} | NO DIGIT ONE
{"path":[2],"not_this_path":[1]} | NO DIGIT ONE
{"path":[1,11]} | DIGIT ONE
db<>fiddle here
Alternatively, with a little bit more typing (a little bit? Am I kidding?!), splitting the string into rows and comparing values to the search string:
SQL> with test (col) as
2 (select '[5,2,1,4,6,19]' from dual)
3 select t.col,
4 case when '&par_search_string' in
5 (select regexp_substr(substr(col, 2, length(col) - 1), '[^,]+', 1, level) val
6 from test
7 connect by level <= regexp_count(col, ',') + 1
8 )
9 then 'Search string exists'
10 else 'Search string does not exist'
11 end result
12 from test t;
Enter value for par_search_string: 1
COL RESULT
-------------- ----------------------------
[5,2,1,4,6,19] Search string exists
SQL> /
Enter value for par_search_string: 24
COL RESULT
-------------- ----------------------------
[5,2,1,4,6,19] Search string does not exist
SQL>

Using Oracle REGEXP_SUBSTR to extract uppercase data separated by underscores

sample column data:
Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:
Error in CREATE_ACC_STATEMENT() [line 16]
I am looking for a way to extract only the uppercase words (table names) separated by underscores. I want the whole table name, the maximum is 3 underscores and the minimum is 1 underscore. I would like to ignore any capital letters that are initcap.
You can just use regexp_substr():
select regexp_substr(str, '[A-Z_]{3,}', 1, 1, 'c')
from (select 'Failure on table TOLL_USR_TRXN_HISTORY' as str from dual) x;
The pattern says to find substrings with capital letters or underscores, at least 3 characters long. The 1, 1 means start from the first position and return the first match. The 'c' makes the search case-sensitive.
You may use such a SQL Select statement for each substituted individual line
( Failure on table TOLL_USR_TRXN_HISTORY in the below case )
from your text :
select regexp_replace(q.word, '[^a-zA-Z0-9_]+', '') as word
from
(
select substr(str,nvl(lag(spc) over (order by lvl),1)+1*sign(lvl-1),
abs(decode(spc,0,length(str),spc)-nvl(lag(spc) over (order by lvl),1))) word,
nvl(lag(spc) over (order by lvl),1) lg
from
(
with tab as
( select 'Failure on table TOLL_USR_TRXN_HISTORY' str from dual )
select instr(str,' ',1,level) spc, str, level lvl
from tab
connect by level <= 10
)
) q
where lg > 0
and upper(regexp_replace(q.word, '[^a-zA-Z0-9_]+', ''))
= regexp_replace(q.word, '[^a-zA-Z0-9_]+', '')
and ( nvl(length(regexp_substr(q.word,'_',1,1)),0)
+ nvl(length(regexp_substr(q.word,'_',1,2)),0)
+ nvl(length(regexp_substr(q.word,'_',1,3)),0)) > 0
and nvl(length(regexp_substr(q.word,'_',1,4)),0) = 0;
Alternate way to get only table name from below error message , the below query will work only if table_name at end in the mentioned way
with t as( select 'Failure on table TOLL_USR_TRXN_HISTORY:' as data from dual)
SELECT RTRIM(substr(data,instr(data,' ',-1)+1),':') from t
New Query for all messages :
select replace (replace ( 'Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:' , 'Failure on table', ' ' ),':',' ') from dual

remove characters between specific characters in pl/sql

I need get a substring from the below example
luvi.luci#gma
and i want to return luci. So basically i need to remove all the information before '.' and after '#'
more examples:
pd.prd#gded
You can do this with regexp_substr(). Here is an example:
select translate(regexp_substr(email, '[.].*#', 1, 1), 'x.#', 'x')
from (select 'luvi.luci#gma' as email from dual) x
with data (val) as
(
select null from dual union all
select 'luvi.luci' from dual union all
select 'luvi.luci#gma' from dual union all
select 'pd.prd#gded' from dual
)
-- step:1
-- find the second group (\2) within the match
-- ie. (any word/sequence of characters (\w+) flanked by a dot and a #)
-- step:2
-- |. OR any other character not matched in step:1 - will be ignored
-- step:3
-- \2 for each match found while parsing, for the entire match,
-- replace it with the second group - so the dot and the # are dropped from the match
select val, regexp_replace (val, '(\.(\w+)#)|.', '\2') ss from data;

Regexp_replace processing result

I have a string with groups of nubmers. And Id like to make constant length string. Now I use two regexp_replace. First to add 10 numbers to string and next to cut string and take last 10 values:
with s(txt) as ( select '1030123:12031:1341' from dual)
select regexp_replace(
regexp_replace(txt, '(\d+)','0000000000\1')
,'\d+(\d{10})','\1') from s ;
But Id like to use only one regex something like
regexp_replace(txt, '(\d+)',lpad('\1',10,'0'))
But it don't work. lpad executed before regexp. Could you have any ideas?
With a slightly different approach, you can try the following:
with s(id, txt) as
(
select rownum, txt
from (
select '1030123:12031:1341' as txt from dual union all
select '1234:0123456789:1341' from dual
)
)
SELECT listagg(lpad(regexp_substr(s.txt, '[^:]+', 1, lines.column_value), 10, '0'), ':') within group (order by column_value) txt
FROM s,
TABLE (CAST (MULTISET
(SELECT LEVEL FROM dual CONNECT BY instr(s.txt, ':', 1, LEVEL - 1) > 0
) AS sys.odciNumberList )) lines
group by id
TXT
-----------------------------------
0001030123:0000012031:0000001341
0000001234:0123456789:0000001341
This uses the CONNECT BY to split every string based on the separator ':', then uses LPAD to pad to 10 and then aggregates the strings to build rows containing the concatenation of padded values
This works for non-empty sequences (e.g. 123::456)
with s(txt) as ( select '1030123:12031:1341' from dual)
select regexp_replace (regexp_replace (txt,'(\d+)',lpad('0',10,'0') || '\1'),'0*(\d{10})','\1')
from s
;