I would like to replace commas in my strings, between dynamic positions (ie. between double quotes). Note that I will not have more than 2 occurrences of double quotes in my strings if that matters.
My example:
'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun'
Desired output:
'randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun'
So far I've tried things with REGEXP_REPLACE() mixed with INSTR() but could not get anything done.
Cheers
Short & clean.
with t(str) as (select 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' from dual)
select regexp_replace(str,'(^[^"]*|[^"]*$)|,','\1') as result
from t
-
+------------------------------------------------+
| RESULT |
+------------------------------------------------+
| randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun |
+------------------------------------------------+
SQL Fiddle
In addition -
Short and clean generic version
with t(str) as
(
select 'Well,you,went,uptown,riding,in,your,limousine' from dual
union all select 'With,your,fine,"Park, Avenue, clothes"' from dual
union all select 'You,had,the,"Dom, Perignon",in,your,hand,"And, the, spoon",up,your,nose' from dual
union all select '"And, when, you",wake,"up, in, the, morning"' from dual
union all select '"With, your, head, on, fire"' from dual
union all select '"And",your,"eyes, too, bloody","to, see",Go,"on, and, cry, in",your,coffee,"But","don''t","come, bitchin''","to, me"' from dual
)
select regexp_replace(str, '((^|").*?("|$))|,', '\1') as result
from t
--
+------------------------------------------------------------------------------------------------------------+
| RESULT |
+------------------------------------------------------------------------------------------------------------+
| Well,you,went,uptown,riding,in,your,limousine |
| With,your,fine,"Park Avenue clothes" |
| You,had,the,"Dom Perignon",in,your,hand,"And the spoon",up,your,nose |
| "And when you",wake,"up in the morning" |
| "With your head on fire" |
| "And",your,"eyes too bloody","to see",Go,"on and cry in",your,coffee,"But","don't","come bitchin'","to me" |
+------------------------------------------------------------------------------------------------------------+
SQL Fiddle
Assuming you are working on CSV, then it is possible that you will also have nested double quotes as per this sample data:
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL;
You can use the regular expression ^(.*?)("([^\"]|\\")+")(.*)$ to match the terms before, inside the quotes and after and then replace commas in only the middle parts:
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 2 ),
','
)
|| REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 4 ) replaced_value
FROM test_data
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
db<>fiddle here
Update
If you need to handle multiple quoted terms in a string (with nested quotes):
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL UNION ALL
SELECT 'E,"F, G",H,"I, ""J""", K' FROM DUAL UNION ALL
SELECT 'L,M,N' FROM DUAL;
Then you can use a recursive sub-query factoring clause:
WITH replacements( value, prefix, suffix ) AS (
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM test_data
UNION ALL
SELECT value,
prefix
|| REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM replacements
WHERE suffix IS NOT NULL
)
SELECT value,
prefix AS replaced_value
FROM replacements
WHERE suffix IS NULL;
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
L,M,N | L,M,N
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
E,"F, G",H,"I, ""J""", K | E,"F G",H,"I ""J""", K
db<>fiddle here
O
N
K
A
R
how to convert it into ONKAR. reverse of it I know. But this I am not able to solve.
You can't do what you want generally without also having a second column which provides the ordering for each letter. Assuming you do have a column for the position, we can try:
SELECT LISTAGG(letter, '') WITHIN GROUP (ORDER BY position) word
FROM yourTable;
Demo
Data:
letter | position
O | 1
N | 2
K | 3
A | 4
R | 5
Listagg is right solution for strings up to 4000 bytes cuz it returns varchar2 data type. But for longer strings you may get clob data type.
with s (letter, position) as (
select 'O', 1 from dual union all
select 'N', 2 from dual union all
select 'K', 3 from dual union all
select 'A', 4 from dual union all
select 'R', 5 from dual)
select xmlcast(xmlagg(xmlelement(x, letter) order by position) as clob) c
from s;
C
---------------
ONKAR
You can use this as long as data result has all rows that you want to stick together.
with data as (select 'O' as letter from dual
union all
select 'N' from dual
union all
select 'K' from dual
union all
select 'A' from dual
union all
select 'R' from dual)
SELECT LISTAGG(letter, '') WITHIN GROUP (ORDER BY rownum)
FROM data;
If your data is in a single row separated by newline (ASCII 13) characters then you can just use REPLACE( value, CHR(13) ):
Oracle Setup:
CREATE TABLE test_data ( value ) AS
SELECT 'O' || CHR(13) || 'N' || CHR(13) || 'K' || CHR(13) || 'A' || CHR(13) || 'R' FROM DUAL
Query:
SELECT value, REPLACE( value, CHR(13) ) FROM test_data
Output:
VALUE | REPLACE(VALUE,CHR(13))
:-------- | :---------------------
O | ONKAR
N |
K |
A |
R |
db<>fiddle here
I have values like "ABC1234", "ABC", "DEF456", "GHI" etc. in a specific column which I need.
Now I need to split this string but only if the character (e.g. "ABC") are followed by digits.
So if the value is "ABC1234" then I need to cut out ABC and 1234 seperated. But if there is only "ABC" as a value, I just need the "ABC". I can't find any solution with SUBSTR. Do you have any idea?
Note: The length of the characters can differ from 1 to 10 and also the length from the digits (sometimes there isn't any like I showed you).
So if the value is "ABC1234" then I need to cut out ABC and 1234
seperated. But if there is only "ABC" as a value, I just need the
"ABC".
Amidst of other solutions, I propose one solution as shown below:
Logic:
1) Replace all the digits to 1. Check the position of the digit occurring in the string. If
there is no digit in the string then use the String.
2) Extract the alphabets from 1st position to the position where
digit starts.
3) Extract the digit from the position it starts till end. If digit doesnot exists the set it NULL
--Dataset Preparation
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
--Query
select col Original_Column,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then col
else
substr( col,1,instr(regexp_replace(col,'[0-9]','1'),'1',1)-1)
end Col_Alp,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then NULL
Else
substr( col,instr(regexp_replace(col,'[0-9]','1'),'1',1))
END col_digit
from test
where regexp_like(col, '^[a-zA-Z0-9]+$');
Result:
SQL> /
Original_Column Col_Alp col_digit
---------- ----- -----
ABC1234 ABC 1234
ABC ABC NULL
dEfH456 dEfH 456
123GHI NULL 123GHI
456 NULL 456
Using SUBSTR (and INSTR and TRANSLATE):
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE data ( value ) AS
SELECT 'ABC1234' FROM DUAL UNION ALL
SELECT 'ABC123D' FROM DUAL UNION ALL
SELECT 'ABC ' FROM DUAL UNION ALL
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'DEFG456' FROM DUAL UNION ALL
SELECT 'GHI' FROM DUAL UNION ALL
SELECT 'JKLMNOPQRS9' FROM DUAL;
Query 1:
SELECT value,
SUBSTR( value, 1, first_digit - 1 ) AS prefix,
TO_NUMBER( SUBSTR( value, first_digit ) ) AS suffix
FROM (
SELECT value,
INSTR(
TRANSLATE( value, '-1234567890', ' ----------' ),
'-',
1
) AS first_digit
FROM data
)
WHERE SUBSTR( value, first_digit ) IS NOT NULL
AND TRANSLATE( SUBSTR( value, first_digit ), '-1234567890', ' ' ) IS NULL
Results:
| VALUE | PREFIX | SUFFIX |
|-------------|------------|--------|
| ABC1234 | ABC | 1234 |
| DEFG456 | DEFG | 456 |
| JKLMNOPQRS9 | JKLMNOPQRS | 9 |
Try this below query for scenarios mentioned , I didn't split if characters followed by numbers:
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test
if like to move that characters&string to any place my case statement
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select v.col,case when v.string=v.numbers THEN NULL ELSE string end string , v.numbers
from (select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test) v
Would something like this do?
SQL> with test (col) as
2 (select '"ABC1234", "ABC", "dEf456", "123GHI", "456"' from dual),
3 inter as
4 (select trim(regexp_substr(replace(col, '"', ''), '[^,]+', 1, level)) token
5 from test
6 connect by level <= regexp_count(col, ',') + 1
7 )
8 select regexp_substr(token, '^[a-zA-Z]+') letters,
9 regexp_substr(token, '[0-9]+$') digits
10 from inter
11 where regexp_like(token, '^[a-zA-Z]+[0-9]+$');
LETTERS DIGITS
---------- ----------
ABC 1234
dEf 456
SQL>