Oracle SQL REGEX_LIKE - sql

SELECT first_name, last_name
FROM employees
WHERE REGEXP_LIKE (first_name, '^Ste(v|ph)en$');
The following query returns the first and last names for those employees with a first name of Steven or Stephen (where first_name begins with Ste and ends with en and in between is either v or ph)
is there a call that is opposite where the query will return everything that would not have (v or ph) between Ste and en?
so that it would return things like:
Stezen
Stellen
is it as simple as putting NOT in front of REGEXP_LIKE?

How about MINUS
SELECT *
FROM employees
WHERE REGEXP_LIKE( first_name , '^Ste([[:alpha:]])+en$')
MINUS
SELECT *
FROM employees
WHERE REGEXP_LIKE( first_name , '^Ste(v|ph)en$');
and this too:
WITH t AS
( SELECT 'Stezen' first_name FROM dual
UNION ALL
SELECT 'Steven' FROM dual
UNION ALL
SELECT 'Stephen' FROM dual
)
SELECT *
FROM t
WHERE REGEXP_LIKE( first_name , '^Ste([[:alpha:]])+en$')
AND NOT REGEXP_LIKE( first_name , '^Ste(v|ph)en$');

You need something like this:
SELECT 'Match'
FROM dual
WHERE REGEXP_LIKE ('Steden', '^Ste[^(v|ph)]en$');
EDIT
This will exclude any two (or more) letter combinations but still allow "v" :
SELECT 'Match'
FROM dual
WHERE REGEXP_LIKE ('Stephen', '^Ste[[:alpha:]]en$');
Since Oracle does not support look-ahead functionality, I will have to agree with others that we will have to deal with "v" explicitly, either by excluding the entire name(word) or at least specifying its exact position.
SELECT name
FROM WhateverTable
WHERE REGEXP_LIKE (name, '^Ste[[:alpha:]]en$') AND SUBSTR(name, 4, 1) <> 'v';

Two options:
The first query uses two REGEXP_LIKE tests: one regular expression to generically match; and one for excluding the invalid matches.
The second query uses REGEXP_SUBSTR to testfor a generic match and extract the sub-group of the match and then tests to see whether it should be exluded.
The third query then looks at how you can extend the query by having another table containing the match criteria and allows you to build and test multiple name variants.
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE tbl ( str ) AS
SELECT 'Stephen' FROM DUAL
UNION ALL SELECT 'Steven' FROM DUAL
UNION ALL SELECT 'Stepen' FROM DUAL
UNION ALL SELECT 'Steephen' FROM DUAL
UNION ALL SELECT 'Steeven' FROM DUAL
UNION ALL SELECT 'Steeven' FROM DUAL
UNION ALL SELECT 'Smith' FROM DUAL
UNION ALL SELECT 'Smithe' FROM DUAL
UNION ALL SELECT 'Smythe' FROM DUAL
UNION ALL SELECT 'Smythee' FROM DUAL;
CREATE TABLE exclusions ( prefix, exclusion, suffix ) AS
SELECT 'Ste', 'v|ph', 'en' FROM DUAL
UNION ALL SELECT 'Sm', 'ithe?|ythe', '' FROM DUAL;
Query 1:
SELECT str
FROM tbl
WHERE REGEXP_LIKE( str, '^Ste(\w+)en$' )
AND NOT REGEXP_LIKE( str, '^Ste(v|ph)en$' )
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
Query 2:
SELECT str
FROM (SELECT str,
REGEXP_SUBSTR( str, '^Ste(\w+)en$', 1, 1, NULL, 1 ) AS match
FROM tbl)
WHERE match IS NOT NULL
AND NOT REGEXP_LIKE( match, '^(v|ph)$' )
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
Query 3:
SELECT str
FROM tbl t
WHERE EXISTS ( SELECT 1
FROM exclusions e
WHERE REGEXP_LIKE( t.str, '^' || e.prefix || '(\w+)' || e.suffix || '$' )
AND NOT REGEXP_LIKE( t.str, '^' || e.prefix || '(' || e.exclusion || ')' || e.suffix || '$' )
)
Results:
| STR |
|----------|
| Stepen |
| Steephen |
| Steeven |
| Steeven |
| Smythee |

Related

Sort string and number combination in descending in oralce

I am trying to sort a combination of string and number in descending order .
Input :
P9S1
P7S1
P13S1
P12S2
P10S1
Expected output:
P13S1
P12S2
P10S1
P9S1
P7S1
Here is what I tried
Sample code:
with
inputs (firmware) as (
select 'P9S1' from dual union all
select 'P7S1' from dual union all
select 'P13S1' from dual union all
select 'P12S2' from dual union all
select 'P10S1' from dual
)
select firmware
from inputs
order by
regexp_replace(firmware, '\d+\.\d+') desc ;
But this doesn't work as expected. Any help would be appreciated.
Thanks
You did not actually explain how the strings should turned to numbers.
This would work for your dataset:
order by to_number(regexp_replace(firmware, '\D', '')) desc
The idea is to remove all non-digits characters from the string, turn the resulting string to a number, and use it for sorting.
with inputs (firmware) as (
select 'P9S1' from dual union all
select 'P7S1' from dual union all
select 'P13S1' from dual union all
select 'P12S2' from dual union all
select 'P10S1' from dual
)
select firmware
from inputs
order by to_number(regexp_replace(firmware, '\D', '')) desc ;
| FIRMWARE |
| :------- |
| P13S1 |
| P12S2 |
| P10S1 |
| P9S1 |
| P7S1 |

Replacing characters in a string when between double quotes

I would like to replace commas in my strings, between dynamic positions (ie. between double quotes). Note that I will not have more than 2 occurrences of double quotes in my strings if that matters.
My example:
'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun'
Desired output:
'randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun'
So far I've tried things with REGEXP_REPLACE() mixed with INSTR() but could not get anything done.
Cheers
Short & clean.
with t(str) as (select 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' from dual)
select regexp_replace(str,'(^[^"]*|[^"]*$)|,','\1') as result
from t
-
+------------------------------------------------+
| RESULT |
+------------------------------------------------+
| randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun |
+------------------------------------------------+
SQL Fiddle
In addition -
Short and clean generic version
with t(str) as
(
select 'Well,you,went,uptown,riding,in,your,limousine' from dual
union all select 'With,your,fine,"Park, Avenue, clothes"' from dual
union all select 'You,had,the,"Dom, Perignon",in,your,hand,"And, the, spoon",up,your,nose' from dual
union all select '"And, when, you",wake,"up, in, the, morning"' from dual
union all select '"With, your, head, on, fire"' from dual
union all select '"And",your,"eyes, too, bloody","to, see",Go,"on, and, cry, in",your,coffee,"But","don''t","come, bitchin''","to, me"' from dual
)
select regexp_replace(str, '((^|").*?("|$))|,', '\1') as result
from t
--
+------------------------------------------------------------------------------------------------------------+
| RESULT |
+------------------------------------------------------------------------------------------------------------+
| Well,you,went,uptown,riding,in,your,limousine |
| With,your,fine,"Park Avenue clothes" |
| You,had,the,"Dom Perignon",in,your,hand,"And the spoon",up,your,nose |
| "And when you",wake,"up in the morning" |
| "With your head on fire" |
| "And",your,"eyes too bloody","to see",Go,"on and cry in",your,coffee,"But","don't","come bitchin'","to me" |
+------------------------------------------------------------------------------------------------------------+
SQL Fiddle
Assuming you are working on CSV, then it is possible that you will also have nested double quotes as per this sample data:
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL;
You can use the regular expression ^(.*?)("([^\"]|\\")+")(.*)$ to match the terms before, inside the quotes and after and then replace commas in only the middle parts:
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 2 ),
','
)
|| REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+")(.*)$', 1, 1, NULL, 4 ) replaced_value
FROM test_data
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
db<>fiddle here
Update
If you need to handle multiple quoted terms in a string (with nested quotes):
CREATE TABLE test_data ( value ) AS
SELECT 'randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun' FROM DUAL UNION ALL
SELECT 'randomtext,123,"A, ""BC"", D",sun' FROM DUAL UNION ALL
SELECT 'E,"F, G",H,"I, ""J""", K' FROM DUAL UNION ALL
SELECT 'L,M,N' FROM DUAL;
Then you can use a recursive sub-query factoring clause:
WITH replacements( value, prefix, suffix ) AS (
SELECT value,
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( value, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM test_data
UNION ALL
SELECT value,
prefix
|| REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 1 )
|| REPLACE(
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 2 ),
','
),
REGEXP_SUBSTR( suffix, '^(.*?)("([^\"]|"")+"|$)(.*)$', 1, 1, NULL, 4 )
FROM replacements
WHERE suffix IS NOT NULL
)
SELECT value,
prefix AS replaced_value
FROM replacements
WHERE suffix IS NULL;
Which outputs:
VALUE | REPLACED_VALUE
:----------------------------------------------- | :---------------------------------------------
L,M,N | L,M,N
randomtext,123,"JEAN SEBASTIEN, GUY, DANIEL",sun | randomtext,123,"JEAN SEBASTIEN GUY DANIEL",sun
randomtext,123,"A, ""BC"", D",sun | randomtext,123,"A ""BC"" D",sun
E,"F, G",H,"I, ""J""", K | E,"F G",H,"I ""J""", K
db<>fiddle here

How to construct a specific regular expression

I want to create a regular expression that replaces every character in a string except the last 2 with a '*'. For example:
'abcdefgh' --> '******gh'
I am using oracle's regexp_replace, I have written something like:
regexp_replace('dfdfdfdfsdf','(.*)(..)','*\2',1,0)
but it ends up with one "*"
dfdfdfdfsdf --> *df
I would appreciate your kind assistance
You can use LPAD.
select LPAD(SUBSTR('dfdfdfdfsdf',-2),LENGTH('dfdfdfdfsdf'),'*') from dual;
OUTPUT
*********df
CHECK LIVE DEMO HERE
So long as you are not worried about 1 or 2 character strings then you can use the regular expression .(..$)?:
Query
WITH test_data ( value ) AS (
SELECT NULL FROM DUAL UNION ALL
SELECT 'A' FROM DUAL UNION ALL
SELECT 'AB' FROM DUAL UNION ALL
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'ABCD' FROM DUAL UNION ALL
SELECT 'ABCDE' FROM DUAL UNION ALL
SELECT 'ABCDEF' FROM DUAL
)
SELECT value,
REGEXP_REPLACE(
value,
'.(..$)?',
'*\1'
)
FROM test_data
Outputs:
VALUE | REGEXP_REPLACE(VALUE,'.(..$)?','*\1')
:----- | :------------------------------------
null | null
A | *
AB | **
ABC | *BC
ABCD | **CD
ABCDE | ***DE
ABCDEF | ****EF
db<>fiddle here
You can try replacing this pattern by *:
.(?=.{2})
Live example: https://regex101.com/r/uueD6B/1

How to convert vertical string into horizontal in Oracle

O
N
K
A
R
how to convert it into ONKAR. reverse of it I know. But this I am not able to solve.
You can't do what you want generally without also having a second column which provides the ordering for each letter. Assuming you do have a column for the position, we can try:
SELECT LISTAGG(letter, '') WITHIN GROUP (ORDER BY position) word
FROM yourTable;
Demo
Data:
letter | position
O | 1
N | 2
K | 3
A | 4
R | 5
Listagg is right solution for strings up to 4000 bytes cuz it returns varchar2 data type. But for longer strings you may get clob data type.
with s (letter, position) as (
select 'O', 1 from dual union all
select 'N', 2 from dual union all
select 'K', 3 from dual union all
select 'A', 4 from dual union all
select 'R', 5 from dual)
select xmlcast(xmlagg(xmlelement(x, letter) order by position) as clob) c
from s;
C
---------------
ONKAR
You can use this as long as data result has all rows that you want to stick together.
with data as (select 'O' as letter from dual
union all
select 'N' from dual
union all
select 'K' from dual
union all
select 'A' from dual
union all
select 'R' from dual)
SELECT LISTAGG(letter, '') WITHIN GROUP (ORDER BY rownum)
FROM data;
If your data is in a single row separated by newline (ASCII 13) characters then you can just use REPLACE( value, CHR(13) ):
Oracle Setup:
CREATE TABLE test_data ( value ) AS
SELECT 'O' || CHR(13) || 'N' || CHR(13) || 'K' || CHR(13) || 'A' || CHR(13) || 'R' FROM DUAL
Query:
SELECT value, REPLACE( value, CHR(13) ) FROM test_data
Output:
VALUE | REPLACE(VALUE,CHR(13))
:-------- | :---------------------
O | ONKAR
N |
K |
A |
R |
db<>fiddle here

Oracle SQL - How to Cut out characters from a string with SUBSTR?

I have values like "ABC1234", "ABC", "DEF456", "GHI" etc. in a specific column which I need.
Now I need to split this string but only if the character (e.g. "ABC") are followed by digits.
So if the value is "ABC1234" then I need to cut out ABC and 1234 seperated. But if there is only "ABC" as a value, I just need the "ABC". I can't find any solution with SUBSTR. Do you have any idea?
Note: The length of the characters can differ from 1 to 10 and also the length from the digits (sometimes there isn't any like I showed you).
So if the value is "ABC1234" then I need to cut out ABC and 1234
seperated. But if there is only "ABC" as a value, I just need the
"ABC".
Amidst of other solutions, I propose one solution as shown below:
Logic:
1) Replace all the digits to 1. Check the position of the digit occurring in the string. If
there is no digit in the string then use the String.
2) Extract the alphabets from 1st position to the position where
digit starts.
3) Extract the digit from the position it starts till end. If digit doesnot exists the set it NULL
--Dataset Preparation
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
--Query
select col Original_Column,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then col
else
substr( col,1,instr(regexp_replace(col,'[0-9]','1'),'1',1)-1)
end Col_Alp,
CASE
WHEN (instr(regexp_replace(col,'[0-9]','1'),'1',1)) = 0
then NULL
Else
substr( col,instr(regexp_replace(col,'[0-9]','1'),'1',1))
END col_digit
from test
where regexp_like(col, '^[a-zA-Z0-9]+$');
Result:
SQL> /
Original_Column Col_Alp col_digit
---------- ----- -----
ABC1234 ABC 1234
ABC ABC NULL
dEfH456 dEfH 456
123GHI NULL 123GHI
456 NULL 456
Using SUBSTR (and INSTR and TRANSLATE):
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE data ( value ) AS
SELECT 'ABC1234' FROM DUAL UNION ALL
SELECT 'ABC123D' FROM DUAL UNION ALL
SELECT 'ABC ' FROM DUAL UNION ALL
SELECT 'ABC' FROM DUAL UNION ALL
SELECT 'DEFG456' FROM DUAL UNION ALL
SELECT 'GHI' FROM DUAL UNION ALL
SELECT 'JKLMNOPQRS9' FROM DUAL;
Query 1:
SELECT value,
SUBSTR( value, 1, first_digit - 1 ) AS prefix,
TO_NUMBER( SUBSTR( value, first_digit ) ) AS suffix
FROM (
SELECT value,
INSTR(
TRANSLATE( value, '-1234567890', ' ----------' ),
'-',
1
) AS first_digit
FROM data
)
WHERE SUBSTR( value, first_digit ) IS NOT NULL
AND TRANSLATE( SUBSTR( value, first_digit ), '-1234567890', ' ' ) IS NULL
Results:
| VALUE | PREFIX | SUFFIX |
|-------------|------------|--------|
| ABC1234 | ABC | 1234 |
| DEFG456 | DEFG | 456 |
| JKLMNOPQRS9 | JKLMNOPQRS | 9 |
Try this below query for scenarios mentioned , I didn't split if characters followed by numbers:
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test
if like to move that characters&string to any place my case statement
with test (col) as
(select 'ABC1234' from dual union all
select 'ABC' from dual union all
select 'dEfH456' from dual union all
select '123GHI' from dual union all
select '456' from dual
)
select v.col,case when v.string=v.numbers THEN NULL ELSE string end string , v.numbers
from (select col,reverse(trim(regexp_replace(reverse(col),'^[0-9]+',' '))) string ,trim(regexp_replace(col,'^[a-zA-Z]+',' ')) numbers from test) v
Would something like this do?
SQL> with test (col) as
2 (select '"ABC1234", "ABC", "dEf456", "123GHI", "456"' from dual),
3 inter as
4 (select trim(regexp_substr(replace(col, '"', ''), '[^,]+', 1, level)) token
5 from test
6 connect by level <= regexp_count(col, ',') + 1
7 )
8 select regexp_substr(token, '^[a-zA-Z]+') letters,
9 regexp_substr(token, '[0-9]+$') digits
10 from inter
11 where regexp_like(token, '^[a-zA-Z]+[0-9]+$');
LETTERS DIGITS
---------- ----------
ABC 1234
dEf 456
SQL>