Reg Expression in oracle? - sql

I have a string like this '102/103/104/106'
Now if i pass 102 as input then output should be the next field that is 103. if 103 then output should be 104 and if 106 then output should be null(as for last field I don't have any further expression). I can do this using procedure by splitting the string into arrays and comparing. But can I do this through sql statement something like this
select '102/103/104/106' from dual where [expression 102 or 103].
Thanks!!

You can do it in pure SQL with something like this:
--convert your string into rows
with vals as (
select
substr('102/103/104/106',
instr('102/103/104/106', '/', 1, level)-3,
3
) col,
level lvl
from dual
connect by level <= length('102/103/104/106')-length(replace('102/103/104/106', '/'))+1
)
select *
from (
select col,
lead(col) over (order by lvl) next_val -- find the next value in the list
from vals
)
where col = :val;
Basically, convert your string into rows by parsing it. Then use the analytic lead to find the "next" value.

-- p_whole_string = '102/103/104/106'
-- p_prev = '102'
select
regexp_substr(p_whole_string, '(^|/)' || p_prev || '/([^/]+)', 1, 1, null, 2)
as next
from dual;
Added NVL to return last value if 106 is entered:
SELECT NVL(REGEXP_SUBSTR('102/103/104/106', '(^|/)' || '106' || '/([^/]+)', 1, 1, null, 2), REGEXP_SUBSTR('102/103/104/106', '[^/]+$')) as nxt
FROM dual
/

works for Oracle form 10 up.
SELECT
REGEXP_SUBSTR(
REGEXP_SUBSTR('102/103/104/106', '(^|/)102/[^/]+'), -- returns 102/103
'[^/]+',1,2) val -- takes second part
FROM DUAL;
with parameters looks like this:
-- p_string_to_search = '102/103/104/106'
-- p_string_to_match = '102'
SELECT
REGEXP_SUBSTR(
REGEXP_SUBSTR(p_string_to_search, '(^|/)' || p_string_to_match ||'/[^/]+'), -- returns 102/103
'[^/]+',1,2) val -- takes second part
FROM DUAL;

Related

Substring from underscore and onwards in Oracle

I have a string with under score and some characters. I need to apply substring and get values to the left excluding underscore. So I applied below formula and its working correctly for those strings which have underscore (_). But for strings without (_) it is bringing NULL. Any suggestions how this can be handled in the substring itself.
Ex: ABC_BASL ---> Works correctly; ABC ---> gives null
My formula as below -
select SUBSTR('ABC_BAS',1,INSTR('ABC_BAS','_')-1) from dual;
ABC
select SUBSTR('ABC',1,INSTR('ABC','_')-1) from dual;
(NULL)
You could use a CASE expression to first check for an underscore:
WITH yourTable AS (
SELECT 'ABC_BAS' AS col FROM dual UNION ALL
SELECT 'ABC' FROM dual
)
SELECT
CASE WHEN col LIKE '%\_%' ESCAPE '\'
THEN SUBSTR(col, 1, INSTR(col, '_') - 1)
ELSE col END AS col_out
FROM yourTable;
Use regular expression matching:
SELECT REGEXP_SUBSTR('ABC_BAS', '(.*)([_]|$)?', 1, 1, NULL, 1) FROM DUAL;
returns 'ABC', and
SELECT REGEXP_SUBSTR('ABC', '(.*)([_]|$)?', 1, 1, NULL, 1) FROM DUAL;
also returns 'ABC'.
db<>fiddle here
EDIT
The above gives correct results, but I missed the easiest possible regular expression to do the job:
SELECT REGEXP_SUBSTR('ABC_BAS', '[^_]*') FROM DUAL;
returns 'ABC', as does
SELECT REGEXP_SUBSTR('ABC', '[^_]*') FROM DUAL;
db<>fiddle here
Yet another approach is to use the DECODE in the length parameter of the substr as follows:
substr(str,
1,
decode(instr(str,'_'), 0, lenght(str), instr(str,'_') - 1)
)
You seem to want everything up to the first '_'. If so, one method usesregexp_replace():
select regexp_replace(str, '(^[^_]+)_.*$', '\1')
from (select 'ABC' as str from dual union all
select 'ABC_BAS' from dual
) s
A simpler method is:
select regexp_substr(str, '^[^_]+')
from (select 'ABC' as str from dual union all
select 'ABC_BAS' from dual
) s
Here is a db<>fiddle.
I'd use
regexp_replace(text,'_.*')
or if performance was a concern,
substr(text, 1, instr(text||'_', '_') -1)
For example,
with demo(text) as
( select column_value
from table(sys.dbms_debug_vc2coll('ABC', 'ABC_DEF', 'ABC_DEF_GHI')) )
select text
, regexp_replace(text,'_.*')
, substr(text, 1, instr(text||'_', '_') -1)
from demo;
TEXT REGEXP_REPLACE(TEXT,'_.*') SUBSTR(TEXT,1,INSTR(TEXT||'_','_')-1)
------------ --------------------------- -------------------------------------
ABC ABC ABC
ABC_DEF ABC ABC
ABC_DEF_GHI ABC ABC
Ok i think i got it. Add nvl to the substring and insert the condition as below -
select nvl(substr('ABC',1,instr('F4001Z','_')-1),'ABC') from dual;

SQL to find upper case words from a column

I have a description column in my table and its values are:
This is a EXAMPLE
This is a TEST
This is a VALUE
I want to display only EXAMPLE, TEST, and VALUE from the description column.
How do I achieve this?
This could be a way:
-- a test case
with test(id, str) as (
select 1, 'This is a EXAMPLE' from dual union all
select 2, 'This is a TEST' from dual union all
select 3, 'This is a VALUE' from dual union all
select 4, 'This IS aN EXAMPLE' from dual
)
-- concatenate the resulting words
select id, listagg(str, ' ') within group (order by pos)
from (
-- tokenize the strings by using the space as a word separator
SELECT id,
trim(regexp_substr(str, '[^ ]+', 1, level)) str,
level as pos
FROM test t
CONNECT BY instr(str, ' ', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
-- only get the uppercase words
where regexp_like(str, '^[A-Z]+$')
group by id
The idea is to tokenize every string, then cut off the words that are not made by upper case characters and then concatenate the remaining words.
The result:
1 EXAMPLE
2 TEST
3 VALUE
4 IS EXAMPLE
If you need to handle some other character as an upper case letter, you may edit the where condition to filter for the matching words; for example, with '_':
with test(id, str) as (
select 1, 'This is a EXAMPLE' from dual union all
select 2, 'This is a TEST' from dual union all
select 3, 'This is a VALUE' from dual union all
select 4, 'This IS aN EXAMPLE' from dual union all
select 5, 'This IS AN_EXAMPLE' from dual
)
select id, listagg(str, ' ') within group (order by pos)
from (
SELECT id,
trim(regexp_substr(str, '[^ ]+', 1, level)) str,
level as pos
FROM test t
CONNECT BY instr(str, ' ', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
where regexp_like(str, '^[A-Z_]+$')
group by id
gives:
1 EXAMPLE
2 TEST
3 VALUE
4 IS EXAMPLE
5 IS AN_EXAMPLE
Here's another solution. It was inspired by Aleksej's answer.
The idea? Get all the words. Then aggregate only fully uppercased to a list.
Sample data:
create table descriptions (ID int, Description varchar2(100));
insert into descriptions (ID, Description)
select 1 as ID, 'foo Foo FOO bar Bar BAR' as Description from dual
union all select 2, 'This is an EXAMPLE TEST Description VALUE' from dual
;
Query:
select id, Description, listagg(word, ',') within group (order by pos) as UpperCaseWords
from (
select
id, Description,
trim(regexp_substr(Description, '\w+', 1, level)) as word,
level as pos
from descriptions t
connect by regexp_instr(Description, '\s+', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
where word = upper(word)
group by id, Description
Result:
ID | DESCRIPTION | UPPERCASEWORDS
-- | ----------------------------------------- | ------------------
1 | foo Foo FOO bar Bar BAR | FOO,BAR
2 | This is an EXAMPLE TEST Description VALUE | EXAMPLE,TEST,VALUE
It is possible to achieve this thanks to the REGEXP_REPLACE function:
SELECT REGEXP_REPLACE(my_column, '(^[A-Z]| |[a-z][A-Z]*|[A-Z]*[a-z])', '') AS Result FROM my_table
It uses a regex which replaces first upper case char of the line and converts every lower case char and space with blanks.
Try this:
SELECT SUBSTR(column_name, INSTR(column_name,' ',-1) + 1)
FROM your_table;
This should do the trick:
SELECT SUBSTR(REGEXP_REPLACE(' ' || REGEXP_REPLACE(description, '(^[A-Z]|[a-z]|[A-Z][a-z]+|[,])', ''), ' +', ' '), 2, 9999) AS only_upper
FROM (
select 'Hey IF you do not know IT, This IS a test of UPPERCASE and IT, with good WILL and faith, Should BE fine to be SHOWN' description
from dual
)
I have added condition to strip commas, you can add inside that brakets other special characters to remove.
ONLY_UPPER
-----------------------------------
IF IT IS UPPERCASE IT WILL BE SHOWN
This is a function based on some of the regular expression answers.
create or replace function capwords(orig_string varchar2)
return varchar2
as
out_string varchar2(80);
begin
out_string := REGEXP_REPLACE(orig_string, '([a-z][A-Z_]*|[A-Z_]*[a-z])', '');
out_string := REGEXP_REPLACE(trim(out_string), '( *)', ' ');
return out_string;
end;
/
Removes strings of upper case letters and underscores that have lower case letters
on either end. Replaces multiple adjacent spaces with one space.
Trims extra spaces off of the ends. Assumes max size of 80 characters.
Slightly edited output:
>select id,str,capwords(str) from test;
ID STR CAPWORDS(STR)
---------- ------------------------------ ------------------
1 This is a EXAMPLE EXAMPLE
2 This is a TEST TEST
3 This is a VALUE VALUE
4 This IS aN EXAMPLE IS EXAMPLE
5 This is WITH_UNDERSCORE WITH_UNDERSCORE
6 ThiS IS aN EXAMPLE IS EXAMPLE
7 thiS IS aN EXAMPLE IS EXAMPLE
8 This IS wiTH_UNDERSCORE IS
If you only need to "display" the result without changing the values in the column then you can use CASE WHEN (in the example Description is the column name):
Select CASE WHEN Description like '%EXAMPLE%' then 'EXAMPLE' WHEN Description like '%TEST%' then 'TEST' WHEN Description like '%VALUE%' then 'VALUE' END From [yourTable]
The conditions are not case sensitive even if you write it all in uppercase.
You can add Else '<Value if all conditions are wrong>' before the END in case there are descriptions that don't contain any of the values. The example will return NULL for those cases, and writing ELSE Description will return the original value of that row.
It also works if you need to update. It is simple and practical, easy way out, haha.

remove extra + from text SQL

this refers to a question asked by someone else previously
previous question
my question is how do I adapt this solution so that before any function/script is ran the name and value fields are stripped of any additional + and updated so no additional + remain.
For e.g.
Name Value
A+B+C+ 1+2+3+
A++B 1++2
this should be updated to
Name Value
A+B+C 1+2+3
A+B 1+2
once this update has taken place, I can run the solution provided in the previous question.
Thanks
You need to replace ++ with + and to remove the + at the end of the string.
/* sample data */
with input(Name, Value) as (
select 'A+B+C+' ,'1+2+3+' from dual union all
select 'A++B' ,'1++2' from dual
)
/* query */
select trim('+' from regexp_replace(name, '\+{2,}', '+') ) as name,
trim('+' from regexp_replace(value, '\+{2,}', '+') ) as value
from input
If you need to update a table, you may need:
update yourTable
set name = trim('+' from regexp_replace(name, '\+{2,}', '+') ),
value= trim('+' from regexp_replace(value, '\+{2,}', '+') )
In a more compact way, without the external trim ( assuming you have no leading +):
/* sample data */
with input(Name, Value) as (
select 'A+B+C+' ,'1+2+3+' from dual union all
select 'A++B+++C+' ,'1++2+++3+' from dual union all
select 'A+B' ,'1+2' from dual
)
/* query */
select regexp_replace(name, '(\+)+(\+|$)', '\2') as name,
regexp_replace(value, '(\+)+(\+|$)', '\2') as value
from input
You could use something on the lines of:
Select substr('1+2+3+', 0, length('1+2+3+')-1) from dual ;
Select replace('1++2', '++', '+') from dual;
I'm assuming you have the output already present in a variable you can play with.
EDIT:
Here's a function that can solve the problem (You can call this function in your select clauses thereby solving the problem):
CREATE OR REPLACE Function ReplaceChars
( name_in IN varchar2 )
RETURN varchar2
IS
changed_string varchar2(20) ;
BEGIN
changed_string:=replace(name_in, '++', '+') ;
CASE WHEN substr(changed_string, -1) in ('+')
then
changed_string:=substr(changed_string,0, length(changed_string) - 1) ;
else changed_string:=changed_string ;
end CASE ;
RETURN changed_string;
END;
You can use the below:
LTRIM(RTRIM (REGEXP_REPLACE (column_name, '\+{2,}', '+'), '+'),'+')
Eg:
SELECT LTRIM(RTRIM (REGEXP_REPLACE ('+A+++B+C+++D++', '\+{2,}', '+'), '+'),'+') VALUE
FROM DUAL;
returns output: A+B+C+D
if youre working with ssms, GIVE IT A GO:::
UPDATE tablename
SET colname=
CASE colname WHEN LIKE '%++%' THEN
WHILE colname LIKE '%++%'
(REPLACE(colname,++,+))
END LOOP
WHEN LIKE '%+' THEN
SUBSTR(colname, 1, LENGTH(colname)-1)
WHEN LIKE '+%' THEN
SUBSTR(colname, 2, LENGTH(colname))
ELSE
colname
END

Select query where a column contains set of numbers

I have to write a query on a table which has a varchar column. Value in this column may have a numbers as substring
Lets possible say the column values are
Data
-----------------------
abc=123/efg=143/ijk=163
abc=123/efg=153/ijk=173
now I have to query the table where data contains the numbers [123,143,163] but shouldnt contain any other number.
How can I write this select query ?
This looks like a very bad database design. If you are interested in separate information stored in a string, then don't store the string but the separate information in separate columns. Change this if possible and such queries will become super simple.
However, for the time being it's easy to find the records as described, provided there are always three numbers in the string as in your sample data. Add a slash at the end of the string, so every number has a leading = and a trailing /. Then look up the numbers in the string with LIKE.
select *
from mytable
where data || `/` like '%=123/%'
and data || `/` like '%=143/%'
and data || `/` like '%=163/%';
If these three numbers are in the string, then all numbers match. Hence there is no other number not matching.
If there can be more numbers in the string but no duplicates, then count equal signs to determine how many numbers are in the string:
select *
from mytable
where data || '/' like '%=123/%'
and data || '/' like '%=143/%'
and data || '/' like '%=163/%'
and regexp_count(data, '=') = 3;
And here is a query accepting even duplicate numbers in the string:
select *
from mytable
where regexp_count(data, '=') >= 3
and regexp_count(data, '=') =
regexp_count(data || '/', '=123/') +
regexp_count(data || '/', '=143/') +
regexp_count(data || '/', '=163/');
Oracle Setup:
CREATE TABLE table_name ( data ) AS
SELECT 'abc=123/efg=143/ijk=163' FROM DUAL UNION ALL
SELECT 'abc=123/efg=153/ijk=173' FORM DUAL;
Then you can create some virtual columns to represent the data:
ALTER TABLE table_name ADD abc GENERATED ALWAYS AS (
TO_NUMBER( REGEXP_SUBSTR( data, '(^|/)abc=(\d+)(/|$)', 1, 1, NULL, 2 ) )
) VIRTUAL;
ALTER TABLE table_name ADD efg GENERATED ALWAYS AS (
TO_NUMBER( REGEXP_SUBSTR( data, '(^|/)efg=(\d+)(/|$)', 1, 1, NULL, 2 ) )
) VIRTUAL;
ALTER TABLE table_name ADD ijk GENERATED ALWAYS AS (
TO_NUMBER( REGEXP_SUBSTR( data, '(^|/)ijk=(\d+)(/|$)', 1, 1, NULL, 2 ) )
) VIRTUAL;
And can add appropriate indexes if you want:
CREATE INDEX table_name__abc_efg_ijk__idx ON table_name( abc, efg, ijk );
Query:
Then if you are only going to have those three keys you can do:
SELECT abc, efg, ijk
FROM table_name
WHERE abc = 123
AND efg = 143
AND ijk = 163;
However, if you could get more than three keys and want ignore additional values then you could do:
CREATE TYPE intlist AS TABLE OF INT;
/
SELECT *
FROM table_name
WHERE INTLIST( 143, 123, 163 )
=
CAST(
MULTISET(
SELECT TO_NUMBER(
REGEXP_SUBSTR(
t.data,
'[^/=]+=(\d+)(/|$)',
1,
LEVEL,
NULL,
1
)
)
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.data, '[^/=]+=(\d+)(/|$)' )
)
AS INTLIST
);
This has the added bonus that INTLIST(123, 143, 163) can be passed as a bind parameter (depending on the client program you are using and the Oracle driver) so that you can simply change how many and what numbers you want to filter for (and that the order of the values does not matter).
Also, if you want it to contain at least those values then you can change INTLIST( ... ) = to INTLIST( ... ) SUBMULTISET OF.

Substring from string oracle

I have strings : 'A-20-1-1', 'A-10-10', 'A-10-11-1'
And result from substringing:
'A-20-1-1', 'A-10-10', 'A-10-11-1'
1 10 11
Code won't works fine:
Select Substr(string, instr(string,'-',1,2)+1, instr(string, '-',1,2)-1)
From dual;
At the beginning I find second '-', than next one if exists. If not I get string length.
create table a(b varchar2(20));
insert into a values('A-20-1-1');
insert into a values('A-10-10');
insert into a values('A-10-11-1');
Select
b,
substr(b,instr(b,'-',1,2)+1,decode(instr(b,'-',1,3),0,length(b)-instr(b,'-',1,2),instr(b,'-',1,3)-instr(b,'-',1,2)-1)) z
from a;
gives us what you need:
A-20-1-1 1
A-10-10 10
A-10-11-1 11
At first I find the second - sign position, and then get a substring of the value from this position to the rest of the value. Then I exclude the part of the string from previous step if the string has the - sign. Like this:
with t(d) as (
select 'A-20-1-1' from dual union all
select 'A-10-10-4' from dual union all
select 'A-10-11-1' from dual
)
select REPLACE(SUBSTR(d, INSTR(d, '-', 1, 2) + 1), SUBSTR(d, INSTR(d, '-', 1, 3))) from t
RES
---
1
10
11