Replacing characters in a string when between double quotes - sql

I would like to replace commas in my strings, between dynamic positions (ie. between double quotes). Note that I will not have more than 2 occurrences of double quotes in my strings if that matters.
My example:
'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun'
Desired output:
'randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun'
So far I've tried things with REGEXP_REPLACE() mixed with INSTR() but could not get anything done.
Cheers

Short & clean.
with t(str) as (select 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' from dual)
select regexp_replace(str,'(^[^"]*|[^"]*$)|,','\1') as result
from t
-
+------------------------------------------------+
| RESULT |
+------------------------------------------------+
| randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun |
+------------------------------------------------+
SQL Fiddle
In addition -
Short and clean generic version
with t(str) as
(
select 'Well,you,went,uptown,riding,in,your,limousine' from dual
union all select 'With,your,fine,"Park, Avenue, clothes"' from dual
union all select 'You,had,the,"Dom, Perignon",in,your,hand,"And, the, spoon",up,your,nose' from dual
union all select '"And, when, you",wake,"up, in, the, morning"' from dual
union all select '"With, your, head, on, fire"' from dual
union all select '"And",your,"eyes, too, bloody","to, see",Go,"on, and, cry, in",your,coffee,"But","don''t","come, bitchin''","to, me"' from dual
)
select regexp_replace(str, '((^|").*?("|$))|,', '\1') as result
from t
--
+------------------------------------------------------------------------------------------------------------+
| RESULT |
+------------------------------------------------------------------------------------------------------------+
| Well,you,went,uptown,riding,in,your,limousine |
| With,your,fine,"Park Avenue clothes" |
| You,had,the,"Dom Perignon",in,your,hand,"And the spoon",up,your,nose |
| "And when you",wake,"up in the morning" |
| "With your head on fire" |
| "And",your,"eyes too bloody","to see",Go,"on and cry in",your,coffee,"But","don't","come bitchin'","to me" |
+------------------------------------------------------------------------------------------------------------+
SQL Fiddle

Assuming you are working on CSV, then it is possible that you will also have nested double quotes as per this sample data:
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL;
You can use the regular expression ^(.*?)("([^\"]|\\")+")(.*)$ to match the terms before, inside the quotes and after and then replace commas in only the middle parts:
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 2 ),
','
)
|| REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 4 ) replaced_value
FROM test_data
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
db<>fiddle here
Update
If you need to handle multiple quoted terms in a string (with nested quotes):
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL UNION ALL
SELECT 'E,"F, G",H,"I, ""J""", K' FROM DUAL UNION ALL
SELECT 'L,M,N' FROM DUAL;
Then you can use a recursive sub-query factoring clause:
WITH replacements( value, prefix, suffix ) AS (
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM test_data
UNION ALL
SELECT value,
prefix
|| REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM replacements
WHERE suffix IS NOT NULL
)
SELECT value,
prefix AS replaced_value
FROM replacements
WHERE suffix IS NULL;
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
L,M,N | L,M,N
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
E,"F, G",H,"I, ""J""", K | E,"F G",H,"I ""J""", K
db<>fiddle here

Related

How to construct a specific regular expression

I want to create a regular expression that replaces every character in a string except the last 2 with a '*'. For example:
'abcdefgh' --> '******gh'
I am using oracle's regexp_replace, I have written something like:
regexp_replace('dfdfdfdfsdf','(.*)(..)','*\2',1,0)
but it ends up with one "*"
dfdfdfdfsdf --> *df
I would appreciate your kind assistance
You can use LPAD.
select LPAD(SUBSTR('dfdfdfdfsdf',-2),LENGTH('dfdfdfdfsdf'),'*') from dual;
OUTPUT
*********df
CHECK LIVE DEMO HERE
So long as you are not worried about 1 or 2 character strings then you can use the regular expression .(..$)?:
Query
WITH test_data ( value ) AS (
SELECT NULL FROM DUAL UNION ALL
SELECT 'A' FROM DUAL UNION ALL
SELECT 'AB' FROM DUAL UNION ALL
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'ABCD' FROM DUAL UNION ALL
SELECT 'ABCDE' FROM DUAL UNION ALL
SELECT 'ABCDEF' FROM DUAL
)
SELECT value,
REGEXP_REPLACE(
value,
'.(..$)?',
'*\1'
)
FROM test_data
Outputs:
VALUE | REGEXP_REPLACE(VALUE,'.(..$)?','*\1')
:----- | :------------------------------------
null | null
A | *
AB | **
ABC | *BC
ABCD | **CD
ABCDE | ***DE
ABCDEF | ****EF
db<>fiddle here
You can try replacing this pattern by *:
.(?=.{2})
Live example: https://regex101.com/r/uueD6B/1

Transform some substrings from many rows to a new table

I want to transform this output from the row "topic"...
SMARTBASE/N0184/1/MOISTURE/value
SMARTBASE/N0184/1/MOISTURE/unit
SMARTBASE/N0184/1/MOISTURE/timestamp
SMARTBASE/N0184/1/CONDUCTIVITY/value
SMARTBASE/N0184/1/CONDUCTIVITY/unit
SMARTBASE/N0184/1/CONDUCTIVITY/timestamp
to a new table like:
SENSORS|MOISTURE(value)|MOISTURE(unit)|CONDUCTIVITY(value)|CONDUCTIVITY(unit)
N0184|0.41437244624|Raw VWC|0.5297062938712509|mS/cm
first line: values of topic(row), second line: values of value(row)(values of mqtt-topics)
but that's a sensor of 500++... SMARTBASE is not always SMARTBASE, so regexp _... is not a good idea ... At the end this should be saved as a view.
Is that even possible? I don't know how to implement it... or how to start with it. to transform a row in a table, I can use the pivot-function, but the rest, I don't know.
my main problem: How can I access the individual values of the topic?
Use REGEXP_SUBSTR to get the substring components of your topic column and then use PIVOT:
Oracle Setup:
CREATE TABLE table_name ( topic, value ) AS
SELECT 'SMARTBASE/N0184/1/MOISTURE/value', '0.414' FROM DUAL UNION ALL
SELECT 'SMARTBASE/N0184/1/MOISTURE/unit', 'Raw VWC' FROM DUAL UNION ALL
SELECT 'SMARTBASE/N0184/1/MOISTURE/timestamp', '2019-01-01T00:00:00.000' FROM DUAL UNION ALL
SELECT 'SMARTBASE/N0184/1/CONDUCTIVITY/value', '0.529' FROM DUAL UNION ALL
SELECT 'SMARTBASE/N0184/1/CONDUCTIVITY/unit', 'mS/cm' FROM DUAL UNION ALL
SELECT 'SMARTBASE/N0184/1/CONDUCTIVITY/timestamp', '2019-01-01T00:00:00.000' FROM DUAL;
Query:
SELECT SENSOR_TYPE,
SENSOR,
TO_NUMBER( moisture_value ) AS moisture_value,
moisture_unit,
TO_TIMESTAMP( moisture_timestamp, 'YYYY-MM-DD"T"HH24:MI:SS.FF3' ) AS moisture_timestamp,
TO_NUMBER( conductivity_value ) AS conductivity_value,
conductivity_unit,
TO_TIMESTAMP( conductivity_timestamp, 'YYYY-MM-DD"T"HH24:MI:SS.FF3' ) AS conductivity_timestamp
FROM (
SELECT REGEXP_SUBSTR( topic, '[^/]+', 1, 1 ) AS sensor_type,
REGEXP_SUBSTR( topic, '[^/]+', 1, 2 ) AS sensor,
REGEXP_SUBSTR( topic, '[^/]+', 1, 4 ) AS measurement_name,
REGEXP_SUBSTR( topic, '[^/]+', 1, 5 ) AS measurement_metadata_type,
value
FROM table_name
)
PIVOT(
MAX( value )
FOR ( measurement_name, measurement_metadata_type )
IN (
( 'MOISTURE', 'value' ) AS MOISTURE_value,
( 'MOISTURE', 'unit' ) AS MOISTURE_unit,
( 'MOISTURE', 'timestamp' ) AS MOISTURE_timestamp,
( 'CONDUCTIVITY', 'value' ) AS CONDUCTIVITY_value,
( 'CONDUCTIVITY', 'unit' ) AS CONDUCTIVITY_unit,
( 'CONDUCTIVITY', 'timestamp' ) AS CONDUCTIVITY_timestamp
)
)
Output:
SENSOR_TYPE | SENSOR | MOISTURE_VALUE | MOISTURE_UNIT | MOISTURE_TIMESTAMP | CONDUCTIVITY_VALUE | CONDUCTIVITY_UNIT | CONDUCTIVITY_TIMESTAMP
:---------- | :----- | -------------: | :------------ | :------------------------------ | -----------------: | :---------------- | :------------------------------
SMARTBASE | N0184 | .414 | Raw VWC | 01-JAN-19 12.00.00.000000000 AM | .529 | mS/cm | 01-JAN-19 12.00.00.000000000 AM
db<>fiddle here

Oracle SQL - How to Cut out characters from a string with SUBSTR?

I have values like "ABC1234", "ABC", "DEF456", "GHI" etc. in a specific column which I need.
Now I need to split this string but only if the character (e.g. "ABC") are followed by digits.
So if the value is "ABC1234" then I need to cut out ABC and 1234 seperated. But if there is only "ABC" as a value, I just need the "ABC". I can't find any solution with SUBSTR. Do you have any idea?
Note: The length of the characters can differ from 1 to 10 and also the length from the digits (sometimes there isn't any like I showed you).
So if the value is "ABC1234" then I need to cut out ABC and 1234
seperated. But if there is only "ABC" as a value, I just need the
"ABC".
Amidst of other solutions, I propose one solution as shown below:
Logic:
1) Replace all the digits to 1. Check the position of the digit occurring in the string. If
there is no digit in the string then use the String.
2) Extract the alphabets from 1st position to the position where
digit starts.
3) Extract the digit from the position it starts till end. If digit doesnot exists the set it NULL
--Dataset Preparation
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
--Query
select col Original_Column,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then col
else
substr( col,1,instr(regexp_replace(col,'[0-9]','1'),'1',1)-1)
end Col_Alp,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then NULL
Else
substr( col,instr(regexp_replace(col,'[0-9]','1'),'1',1))
END col_digit
from test
where regexp_like(col, '^[a-zA-Z0-9]+$');
Result:
SQL> /
Original_Column Col_Alp col_digit
---------- ----- -----
ABC1234 ABC 1234
ABC ABC NULL
dEfH456 dEfH 456
123GHI NULL 123GHI
456 NULL 456
Using SUBSTR (and INSTR and TRANSLATE):
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE data ( value ) AS
SELECT 'ABC1234' FROM DUAL UNION ALL
SELECT 'ABC123D' FROM DUAL UNION ALL
SELECT 'ABC ' FROM DUAL UNION ALL
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'DEFG456' FROM DUAL UNION ALL
SELECT 'GHI' FROM DUAL UNION ALL
SELECT 'JKLMNOPQRS9' FROM DUAL;
Query 1:
SELECT value,
SUBSTR( value, 1, first_digit - 1 ) AS prefix,
TO_NUMBER( SUBSTR( value, first_digit ) ) AS suffix
FROM (
SELECT value,
INSTR(
TRANSLATE( value, '-1234567890', ' ----------' ),
'-',
1
) AS first_digit
FROM data
)
WHERE SUBSTR( value, first_digit ) IS NOT NULL
AND TRANSLATE( SUBSTR( value, first_digit ), '-1234567890', ' ' ) IS NULL
Results:
| VALUE | PREFIX | SUFFIX |
|-------------|------------|--------|
| ABC1234 | ABC | 1234 |
| DEFG456 | DEFG | 456 |
| JKLMNOPQRS9 | JKLMNOPQRS | 9 |
Try this below query for scenarios mentioned , I didn't split if characters followed by numbers:
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test
if like to move that characters&string to any place my case statement
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select v.col,case when v.string=v.numbers THEN NULL ELSE string end string , v.numbers
from (select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test) v
Would something like this do?
SQL> with test (col) as
2 (select '"ABC1234", "ABC", "dEf456", "123GHI", "456"' from dual),
3 inter as
4 (select trim(regexp_substr(replace(col, '"', ''), '[^,]+', 1, level)) token
5 from test
6 connect by level <= regexp_count(col, ',') + 1
7 )
8 select regexp_substr(token, '^[a-zA-Z]+') letters,
9 regexp_substr(token, '[0-9]+$') digits
10 from inter
11 where regexp_like(token, '^[a-zA-Z]+[0-9]+$');
LETTERS DIGITS
---------- ----------
ABC 1234
dEf 456
SQL>

regexp_substr to bring back data before a foward slash

I have the following pattern of characters in a dataset. I need to manipulate the data & cross refer it to another table. I'm trying to write a regexp_substr to bring back data before a foward slash starting from the left. for example:-
abc/ab/123/zzz
so I need to get the following results back to then compare to another table
abc
abc/ab
abc/ab/123
I have worked out the other logic but an struggling with the various regexp.
Here is the recursive query with SUBSTR and INSTR:
with cte(col) as
(
select substr(col, 1, instr(col, '/', -1) - 1) from mytable
union all
select substr(col, 1, instr(col, '/', -1) - 1) from cte where instr(col, '/') > 0
)
select col from cte;
And here is the query with REGEXP_REPLACE:
with cte(col) as
(
select regexp_replace(col, '/[^/]*$', '') from mytable
union all
select regexp_replace(col, '/[^/]*$', '') from cte where instr(col, '/') > 0
)
select col from cte;
You don't need a regular expression. You can do it with (faster) string functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE test_data ( id, value ) AS
SELECT 1, 'abc/ab/123/zzz' FROM DUAL;
Query 1:
WITH bounds ( id, value, end_pos ) AS (
SELECT id,
value,
INSTR( value, '/', 1 )
FROM test_data
WHERE INSTR( value, '/', 1 ) > 0
UNION ALL
SELECT id,
value,
INSTR( value, '/', end_pos + 1 )
FROM bounds
WHERE INSTR( value, '/', end_pos + 1 ) > 0
)
SELECT id,
SUBSTR( value, 1, end_pos ) AS item
FROM bounds
ORDER BY id, end_pos
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |
However, if you did want to use regular expressions then you could do:
Query 2:
WITH bounds ( id, value, lvl, item ) AS (
SELECT id,
value,
1,
REGEXP_SUBSTR( value, '.*?/', 1, 1 )
FROM test_data
WHERE REGEXP_SUBSTR( value, '.*?/', 1, 1 ) IS NOT NULL
UNION ALL
SELECT id,
value,
lvl + 1,
item || REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 )
FROM bounds
WHERE REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 ) IS NOT NULL
)
SELECT id,
item
FROM bounds
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |

Oracle SQL REGEX_LIKE

SELECT first_name, last_name
FROM employees
WHERE REGEXP_LIKE (first_name, '^Ste(v|ph)en$');
The following query returns the first and last names for those employees with a first name of Steven or Stephen (where first_name begins with Ste and ends with en and in between is either v or ph)
is there a call that is opposite where the query will return everything that would not have (v or ph) between Ste and en?
so that it would return things like:
Stezen
Stellen
is it as simple as putting NOT in front of REGEXP_LIKE?
How about MINUS
SELECT *
FROM employees
WHERE REGEXP_LIKE( first_name , '^Ste([[:alpha:]])+en$')
MINUS
SELECT *
FROM employees
WHERE REGEXP_LIKE( first_name , '^Ste(v|ph)en$');
and this too:
WITH t AS
( SELECT 'Stezen' first_name FROM dual
UNION ALL
SELECT 'Steven' FROM dual
UNION ALL
SELECT 'Stephen' FROM dual
)
SELECT *
FROM t
WHERE REGEXP_LIKE( first_name , '^Ste([[:alpha:]])+en$')
AND NOT REGEXP_LIKE( first_name , '^Ste(v|ph)en$');
You need something like this:
SELECT 'Match'
FROM dual
WHERE REGEXP_LIKE ('Steden', '^Ste[^(v|ph)]en$');
EDIT
This will exclude any two (or more) letter combinations but still allow "v" :
SELECT 'Match'
FROM dual
WHERE REGEXP_LIKE ('Stephen', '^Ste[[:alpha:]]en$');
Since Oracle does not support look-ahead functionality, I will have to agree with others that we will have to deal with "v" explicitly, either by excluding the entire name(word) or at least specifying its exact position.
SELECT name
FROM WhateverTable
WHERE REGEXP_LIKE (name, '^Ste[[:alpha:]]en$') AND SUBSTR(name, 4, 1) <> 'v';
Two options:
The first query uses two REGEXP_LIKE tests: one regular expression to generically match; and one for excluding the invalid matches.
The second query uses REGEXP_SUBSTR to testfor a generic match and extract the sub-group of the match and then tests to see whether it should be exluded.
The third query then looks at how you can extend the query by having another table containing the match criteria and allows you to build and test multiple name variants.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE tbl ( str ) AS
SELECT 'Stephen' FROM DUAL
UNION ALL SELECT 'Steven' FROM DUAL
UNION ALL SELECT 'Stepen' FROM DUAL
UNION ALL SELECT 'Steephen' FROM DUAL
UNION ALL SELECT 'Steeven' FROM DUAL
UNION ALL SELECT 'Steeven' FROM DUAL
UNION ALL SELECT 'Smith' FROM DUAL
UNION ALL SELECT 'Smithe' FROM DUAL
UNION ALL SELECT 'Smythe' FROM DUAL
UNION ALL SELECT 'Smythee' FROM DUAL;
CREATE TABLE exclusions ( prefix, exclusion, suffix ) AS
SELECT 'Ste', 'v|ph', 'en' FROM DUAL
UNION ALL SELECT 'Sm', 'ithe?|ythe', '' FROM DUAL;
Query 1:
SELECT str
FROM tbl
WHERE REGEXP_LIKE( str, '^Ste(\w+)en$' )
AND NOT REGEXP_LIKE( str, '^Ste(v|ph)en$' )
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
Query 2:
SELECT str
FROM (SELECT str,
REGEXP_SUBSTR( str, '^Ste(\w+)en$', 1, 1, NULL, 1 ) AS match
FROM tbl)
WHERE match IS NOT NULL
AND NOT REGEXP_LIKE( match, '^(v|ph)$' )
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
Query 3:
SELECT str
FROM tbl t
WHERE EXISTS ( SELECT 1
FROM exclusions e
WHERE REGEXP_LIKE( t.str, '^' || e.prefix || '(\w+)' || e.suffix || '$' )
AND NOT REGEXP_LIKE( t.str, '^' || e.prefix || '(' || e.exclusion || ')' || e.suffix || '$' )
)
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
| Smythee |