Oracle Regexp_ help to do a replace on string - sql

I have a field that has several name values seperated by $;, like below:
$;James$;Paul$;
I have a function available on the names that allows me to identify data from a different table i.e. email address.
However the function only works on a single name value.
So I need to do a substring to get each name value and then run a replace command that runs the function to pull out their email address.
I want to get the following responses James#emailserver.com, Paul#emailserver.com.
So my question is: how can I run a regexp_replace command to identify the names and then split them out to run the replace?

Something like this:
with
name_list ( id, str ) as (
select 1, '$;James$;Paul$;' from dual union all
select 2, '$;Jane$;Emily$;Ann$;' from dual
)
select id, substr(str, instr(str, '$;', 1, level) + 2,
instr(str, '$;', 1, level+1) - instr(str, '$;', 1, level) - 2) as name
from name_list
connect by level <= regexp_count(str, '\$;') - 1
and prior id = id
and prior sys_guid() is not null
;
ID NAME
-- -----
1 James
1 Paul
2 Jane
2 Emily
2 Ann
And then you can use this for your comparisons.
Alternatively, if you need to see if the name James is in your input string (and assuming James is the value in a column name_col):
... where name_list.str like '$;' || name_col || '$;'
and you don't need to split the string anymore. The concatenations are necessary, though, because you don't want Anne in the name list to match the name Ann in the column.

This function helps me to split a varchar:
function f_split_string( p_string varchar2, p_separator varchar2, p_pos number) return varchar2
is
v_string varchar2(100);
begin
select partition into v_string from
(select rownum as n , partition from
(select regexp_substr(p_string, '[^'||p_separator||']+', 1, level) partition from dual
connect by regexp_substr(p_string, '[^'||p_separator||']+', 1, level) is not null)
) t1
where t1.n = p_pos;
return trim(v_string);
exception
when others then return 'nfound';
end;
Example call:
/*using the params*/
p_string := '$;James$;Paul$;'; --string to parse
p_separator := '$;'; --separator
p_pos := 1; --position to get
f_split_string(p_string,p_separator,p_pos) /*return 'James'*/
/*if p_pos :=2*/
f_split_string(p_string,p_separator,p_pos) /* return 'Paul' */
/*if p_pos :=3*/
f_split_string(p_string,p_separator,p_pos) /*return 'nfound' */
/*if p_pos :=0*/
f_split_string(p_string,p_separator,p_pos) /* return 'nfound' */

Related

How to count the number of multiple repeating characters in a column and get a list of it in Oracle? [duplicate]

How can I count number of occurrences of the character - in a varchar2 string?
Example:
select XXX('123-345-566', '-') from dual;
----------------------------------------
2
Here you go:
select length('123-345-566') - length(replace('123-345-566','-',null))
from dual;
Technically, if the string you want to check contains only the character you want to count, the above query will return NULL; the following query will give the correct answer in all cases:
select coalesce(length('123-345-566') - length(replace('123-345-566','-',null)), length('123-345-566'), 0)
from dual;
The final 0 in coalesce catches the case where you're counting in an empty string (i.e. NULL, because length(NULL) = NULL in ORACLE).
REGEXP_COUNT should do the trick:
select REGEXP_COUNT('123-345-566', '-') from dual;
Here's an idea: try replacing everything that is not a dash char with empty string. Then count how many dashes remained.
select length(regexp_replace('123-345-566', '[^-]', '')) from dual
I justed faced very similar problem... BUT RegExp_Count couldn't resolved it.
How many times string '16,124,3,3,1,0,' contains ',3,'? As we see 2 times, but RegExp_Count returns just 1. Same thing is with ''bbaaaacc' and when looking in it 'aa' - should be 3 times and RegExp_Count returns just 2.
select REGEXP_COUNT('336,14,3,3,11,0,' , ',3,') from dual;
select REGEXP_COUNT('bbaaaacc' , 'aa') from dual;
I lost some time to research solution on web. Couldn't' find... so i wrote my own function that returns TRUE number of occurance. Hope it will be usefull.
CREATE OR REPLACE FUNCTION EXPRESSION_COUNT( pEXPRESSION VARCHAR2, pPHRASE VARCHAR2 ) RETURN NUMBER AS
vRET NUMBER := 0;
vPHRASE_LENGTH NUMBER := 0;
vCOUNTER NUMBER := 0;
vEXPRESSION VARCHAR2(4000);
vTEMP VARCHAR2(4000);
BEGIN
vEXPRESSION := pEXPRESSION;
vPHRASE_LENGTH := LENGTH( pPHRASE );
LOOP
vCOUNTER := vCOUNTER + 1;
vTEMP := SUBSTR( vEXPRESSION, 1, vPHRASE_LENGTH);
IF (vTEMP = pPHRASE) THEN
vRET := vRET + 1;
END IF;
vEXPRESSION := SUBSTR( vEXPRESSION, 2, LENGTH( vEXPRESSION ) - 1);
EXIT WHEN ( LENGTH( vEXPRESSION ) = 0 ) OR (vEXPRESSION IS NULL);
END LOOP;
RETURN vRET;
END;
I thought of
SELECT LENGTH('123-345-566') - LENGTH(REPLACE('123-345-566', '-', '')) FROM DUAL;
You can try this
select count( distinct pos) from
(select instr('123-456-789', '-', level) as pos from dual
connect by level <=length('123-456-789'))
where nvl(pos, 0) !=0
it counts "properly" olso for how many 'aa' in 'bbaaaacc'
select count( distinct pos) from
(select instr('bbaaaacc', 'aa', level) as pos from dual
connect by level <=length('bbaaaacc'))
where nvl(pos, 0) !=0
here is a solution that will function for both characters and substrings:
select (length('a') - nvl(length(replace('a','b')),0)) / length('b')
from dual
where a is the string in which you search the occurrence of b
have a nice day!
SELECT {FN LENGTH('123-345-566')} - {FN LENGTH({FN REPLACE('123-345-566', '#', '')})} FROM DUAL
select count(*)
from (
select substr('K_u_n_a_l',level,1) str
from dual
connect by level <=length('K_u_n_a_l')
)
where str ='_';

Capitalize words in a string

I've used INITCAP to capitalize words in a string, but I've run into a small issue:
select initcap(q'[JOE'S CARD 'N' CANDY]') from dual;
It returns "Joe'S Card 'N' Candy", but I wonder if there is another way to capitalize the words so it will look like this "Joe's Card 'N' Candy" (notice the s is in lowercase)
In your place I would create a custom PL/SQL procedure of the kind:
create or replace function initcap_cust(p_input varchar2)
return varchar2
as
l_input varchar2(4000) := lower(p_input);
l_capitalize_first_letter boolean := true;
l_output varchar2(4000) := null;
l_curr_char char(1);
begin
-- here we iterate over the lowercased string characters
for i in 1..length(l_input) loop
l_curr_char := substr(l_input, i, 1);
-- if we find a space - OK, next alphabet letter should be capitalized
-- you can add here more delimiters, e.g.: l_curr_char in (' ', ',', etc)
if l_curr_char = ' ' then
l_capitalize_first_letter := true;
end if;
-- makes O'Sullivan look this way
if regexp_like(l_output, '(^| )O''$') then
l_capitalize_first_letter := true;
end if;
-- found the first letter after delimiter - OK, capitalize
if l_capitalize_first_letter and (l_curr_char between 'a' and 'z') then
l_curr_char := upper(l_curr_char);
l_capitalize_first_letter := false;
end if;
-- build the output string
l_output := l_output || l_curr_char;
end loop;
return l_output;
end;
It works in your case and similar ones. Also it can be customized depending on your needs without dealing with complex queries built using the only functions provided by Oracle out of the box.
N.B. Also there is an option to create equivalent java stored procedure, on the link provided by Edgars T. there is an example.
Adapted from this answer to use a single simpler regular expression to parse each word:
WITH names ( name ) AS (
SELECT 'FIRSTNAME O''MALLEY' FROM DUAL UNION
SELECT 'FIRST''NAME TEH''TE' FROM DUAL UNION
SELECT 'FORMAT ME BYGGER''N' FROM DUAL UNION
SELECT 'OLD MCDONALD' FROM DUAL UNION
SELECT 'EVEN OL''DER MACDONALD' FROM DUAL UNION
SELECT q'[JOE'S CARD 'N' CANDY]' FROM DUAL
)
SELECT name,
formatted_name
FROM names
MODEL
PARTITION BY (ROWNUM rn)
DIMENSION BY (0 dim)
MEASURES(name, CAST('' AS VARCHAR2(255)) word, CAST('' AS VARCHAR(255)) formatted_name)
RULES ITERATE(99) UNTIL (word[0] IS NULL)
(
word[0] = REGEXP_SUBSTR(name[0], '[^ ]+( *|$)', 1, ITERATION_NUMBER + 1),
formatted_name[0] = formatted_name[0]
-- Capitalise names starting with ', *', MC and MAC:
|| INITCAP(REGEXP_SUBSTR( word[0], '^([^'']?''|ma?c)?(.)(.*)$', 1, 1, 'i', 1 ) )
-- Capitalise the next letter of the word
|| UPPER( REGEXP_SUBSTR( word[0], '^([^'']?''|ma?c)?(.)(.*)$', 1, 1, 'i', 2 ) )
-- Lower case the rest of the word
|| LOWER( REGEXP_SUBSTR( word[0], '^([^'']?''|ma?c)?(.)(.*)$', 1, 1, 'i', 3 ) )
);
Output:
NAME FORMATTED_NAME
----------------------- ----------------------
EVEN OL'DER MACDONALD Even Ol'der MacDonald
OLD MCDONALD Old McDonald
FIRST'NAME TEH'TE First'name Teh'te
FORMAT ME BYGGER'N Format Me Bygger'n
JOE'S CARD 'N' CANDY Joe's Card 'N' Candy
FIRSTNAME O'MALLEY Firstname O'Malley

Comparing two comma delimited strings

I have 2 strings str1: 'abc,def,ghi' and str2: 'tyu,abc,fgh'.
I want to compare these two strings using the delimiter ,. Now since the 2 strings have abc it should return true. I want a function in Oracle SQL which can perform this operation.
Looks complicated but it is just a couple of helper functions to split a list into separate values (to be contained into a table type) and then a very simple function to test the intersection of two collections.
Oracle Setup:
CREATE TYPE VARCHAR2s_Table AS TABLE OF VARCHAR2(4000);
CREATE FUNCTION regexp_escape(
expression VARCHAR2
) RETURN VARCHAR2 DETERMINISTIC
AS
BEGIN
RETURN REGEXP_REPLACE( expression, '([$^[()+*?{\|])', '\\\1', 1, 0, 'c' );
END;
/
CREATE FUNCTION splitList(
list VARCHAR2,
delim VARCHAR2 := ','
) RETURN VARCHAR2s_Table DETERMINISTIC
AS
pattern VARCHAR2(256);
len BINARY_INTEGER;
t_items VARCHAR2s_Table := VARCHAR2s_Table();
BEGIN
IF list IS NULL THEN
NULL;
ELSIF delim IS NULL THEN
t_items.EXTEND( LENGTH( list ) );
FOR i IN 1 .. LENGTH( list ) LOOP
t_items(i) := SUBSTR( list, i, 1 );
END LOOP;
ELSE
pattern := '(.*?)($|' || REGEXP_ESCAPE( delim ) || ')';
len := REGEXP_COUNT( list, pattern ) - 1;
t_items.EXTEND( len );
IF len = 1 THEN
t_items(1) := list;
ELSE
FOR i IN 1 .. len LOOP
t_items(i) := REGEXP_SUBSTR( list, pattern, 1, i, NULL, 1 );
END LOOP;
END IF;
END IF;
RETURN t_items;
END;
/
CREATE FUNCTION check_list_intersect(
list1 VARCHAR2,
list2 VARCHAR2
) RETURN NUMBER DETERMINISTIC
AS
BEGIN
IF splitList( list1 ) MULTISET INTERSECT splitList( list2 ) IS EMPTY THEN
RETURN 0;
ELSE
RETURN 1;
END IF;
END;
/
Query 1:
SELECT check_list_intersect( 'abc,def,ghi', 'abc' ) AS matches
FROM DUAL;
Results:
MATCHES
---------
1
Query 2:
SELECT check_list_intersect( 'abc,def,ghi', 'abcd' ) AS matches
FROM DUAL;
Results:
MATCHES
---------
0
The below will make the trick.
with temp as (
select 1 strid, 'abc,def,ghi' Error from dual
union all
select 2, 'tyu,abc,fgh' from dual
)
select str
from (
SELECT strid, trim(regexp_substr(str, '[^,]+', 1, level)) str
FROM (SELECT strid, Error str FROM temp) t
CONNECT BY instr(str, ',', 1, level - 1) > 0
)
group by str
having count(distinct strid) > 1;
Warning: This answer is not fully correct. (See the comments.)
Starting from dcieslak answer(csv split with regexp), a variation of the subject would be:
create or replace function check_string_intersec(str1 varchar2, str2 varchar2) return number
is
begin
for k in (SELECT trim(regexp_substr(str1, '[^,]+', 1, level)) item
FROM dual
CONNECT BY instr(str1, ',', 1, level - 1) > 0
)
loop
if instr(str2, k.item,1) > 0 then return 1; end if;
end loop;
return 0;
end;
This splits first string and search for every item in the second string.

Split comma separated values to columns in Oracle

I have values being returned with 255 comma separated values. Is there an easy way to split those into columns without having 255 substr?
ROW | VAL
-----------
1 | 1.25, 3.87, 2, ...
2 | 5, 4, 3.3, ....
to
ROW | VAL | VAL | VAL ...
---------------------
1 |1.25 |3.87 | 2 ...
2 | 5 | 4 | 3.3 ...
Beware! The regexp_substr expression of the format '[^,]+' will not return the expected value if there is a null element in the list and you want that item or one after it. Consider this example where the 4th element is NULL and I want the 5th element and thus expect the '5' to be returned:
SQL> select regexp_substr('1,2,3,,5,6', '[^,]+', 1, 5) from dual;
R
-
6
Surprise! It returns the 5th NON-NULL element, not the actual 5th element! Incorrect data returned and you may not even catch it. Try this instead:
SQL> select regexp_substr('1,2,3,,5,6', '(.*?)(,|$)', 1, 5, NULL, 1) from dual;
R
-
5
So, the above corrected REGEXP_SUBSTR says to look for the 5th occurrence of 0 or more comma-delimited characters followed by a comma or the end of the line (allows for the next separator, be it a comma or the end of the line) and when found return the 1st subgroup (the data NOT including the comma or end of the line).
The search match pattern '(.*?)(,|$)' explained:
( = Start a group
. = match any character
* = 0 or more matches of the preceding character
? = Match 0 or 1 occurrences of the preceding pattern
) = End the 1st group
( = Start a new group (also used for logical OR)
, = comma
| = OR
$ = End of the line
) = End the 2nd group
EDIT: More info added and simplified the regex.
See this post for more info and a suggestion to encapsulate this in a function for easy reuse: REGEX to select nth value from a list, allowing for nulls
It's the post where I discovered the format '[^,]+' has the problem. Unfortunately it's the regex format you will most commonly see as the answer for questions regarding how to parse a list. I shudder to think of all the incorrect data being returned by '[^,]+'!
You can use regexp_substr():
select regexp_substr(val, '[^,]+', 1, 1) as val1,
regexp_substr(val, '[^,]+', 1, 2) as val2,
regexp_substr(val, '[^,]+', 1, 3) as val3,
. . .
I would suggest that you generate a column of 255 numbers in Excel (or another spreadsheet), and use the spreadsheet to generate the SQL code.
If you only have one row, and time to create your
create your own built-in cto_table function to split a string on any separator, then you can use PIVOT + LISTAGG to do it like follows:
select * from (
select rownum r , collection.*
from TABLE(cto_table(',','1.25, 3.87, 2, 19,, 1, 9, ')) collection
)
PIVOT (
LISTAGG(column_value) within group (order by 1) as val
for r in (1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
)
FYI: here is how to create the cto_table function:
CREATE OR REPLACE TYPE t_my_list AS TABLE OF VARCHAR2(100);
CREATE OR REPLACE
FUNCTION cto_table(p_sep in Varchar2, p_list IN VARCHAR2)
RETURN t_my_list
AS
l_string VARCHAR2(32767) := p_list || p_sep;
l_sep_index PLS_INTEGER;
l_index PLS_INTEGER := 1;
l_tab t_my_list := t_my_list();
BEGIN
LOOP
l_sep_index := INSTR(l_string, p_sep, l_index);
EXIT
WHEN l_sep_index = 0;
l_tab.EXTEND;
l_tab(l_tab.COUNT) := TRIM(SUBSTR(l_string,l_index,l_sep_index - l_index));
l_index := l_sep_index + 1;
END LOOP;
RETURN l_tab;
END cto_table;
/
hierarchical query could be used. pivoting can be done with case and group by.
with value_t as
(
select row_t,row_number() OVER (partition by row_t order by rownum )rn,
regexp_substr(val, '[^,]+', 1, LEVEL) val from Table1
CONNECT BY LEVEL <= regexp_count(val, '[^,]+')
AND prior row_t = row_t
AND prior sys_guid() is not null
) select row_t, max( case when rn = 1 THEN val end ) val_1,
max( case when rn = 2 THEN val end ) val_2,
max( case when rn = 3 THEN val end ) val_3
from value_t
group by row_t;

using Oracle SQL - regexp_substr to split a record

I need to split the record for column CMD.NUM_MAI which may contain ',' or ';'.
I did this but it gave me an error:
SELECT REGEXP_SUBSTR (expression.num_mai,
'[^;|,]+',
1,
LEVEL)
FROM (SELECT CMD.num_cmd,
(SELECT COMM.com
FROM COMM
WHERE COMM.cod_soc = CMD.cod_soc AND COMM.cod_com = 'URL_DSD')
AS cod_url,
NVL (CONTACT.nom_cta, TIERS.nom_ct1) AS nom_cta,
NVL (CONTACT.num_mai, TIERS.num_mai) AS num_mai,
NVL (CONTACT.num_tel, TIERS.num_tel) AS num_tel,
TO_CHAR (SYSDATE, 'hh24:MI') AS heur_today
FROM CMD, TIERS, CONTACT
WHERE ( (CMD.cod_soc = :CMD_cod_soc)
AND (CMD.cod_eta = :CMD.cod_eta)
AND (CMD.typ_cmd = :CMD.typ_cmd)
AND (CMD.num_cmd = :CMD.num_cmd))
AND (TIERS.cod_soc(+) = CMD.cod_soc)
AND (TIERS.cod_trs(+) = CMD.cod_trs_tra)
AND (TIERS.cod_soc = CONTACT.cod_soc(+))
AND (TIERS.cod_trs = CONTACT.cod_trs(+))
AND (CONTACT.lib_cta(+) = 'EDITION')) experssion
CONNECT BY REGEXP_SUBSTR (expression.num_mai,'[^;|,]+',1,LEVEL)
Error 1:
The expression in CONNECT BY clause is unary. You have to specify both left and right hand side operands.
Try something like,
CONNECT BY REGEXP_SUBSTR (expression.num_mai,'[^;|,]+',1,LEVEL) IS NOT NULL
Error 2:
Your bind variable name is wrong. Ex: :CMD_cod_eta
Perhaps you wanted this way!
( (CMD.cod_soc = :CMD_cod_soc)
AND (CMD.cod_eta = :CMD_cod_eta)
AND (CMD.typ_cmd = :CMD_typ_cmd)
AND (CMD.num_cmd = :CMD_num_cmd))
This is a common question, I'd put into a function, then call it as needed:
CREATE OR REPLACE function fn_split(i_string in varchar2, i_delimiter in varchar2 default ',', b_dedup_tokens in number default 0)
return sys.dbms_debug_vc2coll
as
l_tab sys.dbms_debug_vc2coll;
begin
select regexp_substr(i_string,'[^' || i_delimiter || ']+', 1, level)
bulk collect into l_tab
from dual
connect by regexp_substr(i_string, '[^' || i_delimiter || ']+', 1, level) is not null
order by level;
if (b_dedup_tokens > 0) then
return l_tab multiset union distinct l_tab;
end if;
return l_tab;
end;
/
This will return a table of varchar2(1000), dbms_debug_vc2coll, which is a preloaded type owned by SYS (or you could create your own type using 4000 perhaps). Anyway, an example using it (with space, comma, or semi-colon used as delimiters):
with test_data as (
select 1 as id, 'A;test;test;string' as test_string from dual
union
select 2 as id, 'Another string' as test_string from dual
union
select 3 as id,'A,CSV,string' as test_string from dual
)
select d.*, column_value as token
from test_data d, table(fn_split(test_string, ' ,;', 0));
Output:
ID TEST_STRING TOKEN
1 A;test;test;string A
1 A;test;test;string test
1 A;test;test;string test
1 A;test;test;string string
2 Another string Another
2 Another string string
3 A,CSV,string A
3 A,CSV,string CSV
3 A,CSV,string string
You can pass 1 instead of 0 to fn_split to dedup the tokens (like the repeated "test" token above)