Oracle REGEXP_REPLACE function to find decimal and special characters - sql

I am working with table data that contains strings with decimal and back-slash like below:
info
1/2.2.2
2/1.1.1
3/1.1.11
I need to use a regular expression to replace the data like below:
info
1/2.2
2/1.1
3/1.1

Don't use a (slow) regular expression, use simple (faster) string functions instead:
SELECT info,
CASE
WHEN INSTR(info, '.', 1, 2) > 0
THEN SUBSTR(info, 1, INSTR(info, '.', 1, 2) - 1)
ELSE info
END AS part
FROM table_name;
Which, for the sample data:
CREATE TABLE table_name (info) AS
SELECT '1/2.2.2' FROM DUAL UNION ALL
SELECT '2/1.1.1' FROM DUAL UNION ALL
SELECT '3/1.1.11' FROM DUAL UNION ALL
SELECT '3/1.1' FROM DUAL;
Outputs:
INFO
PART
1/2.2.2
1/2.2
2/1.1.1
2/1.1
3/1.1.11
3/1.1
3/1.1
3/1.1
If you want to update the table then:
UPDATE table_name
SET info = SUBSTR(info, 1, INSTR(info, '.', 1, 2) - 1)
WHERE INSTR(info, '.', 1, 2) > 0
fiddle

For the sake of argument, here's a solution using REGEXP_SUBSTR(). REGEXP_SUBSTR() returns NULL if the pattern is not found. Thanks to MT0 for the CTE so I didn't have to type it up :-)
WITH table_name(ID, info) AS (
SELECT 1, '1/2.2.2' FROM DUAL UNION ALL
SELECT 2, '2/1.1.1' FROM DUAL UNION ALL
SELECT 3, '3/1.1.11' FROM DUAL UNION ALL
SELECT 4, '3/1.1' FROM DUAL UNION ALL
SELECT 5, '4/4' FROM DUAL)
SELECT ID, REGEXP_SUBSTR(info, '\d/\d\.\d') DATA
from table_name;
ID DATA
---------- --------
1 1/2.2
2 2/1.1
3 3/1.1
4 3/1.1
5
5 rows selected.

Related

How to query data which is not unique up to a certain point?

Basically the current conditions of the query are
WHERE data_payload_uri BETWEEN
'/organization/team/folder/2021'
AND
'/organization/team/folder/2022'
And this gets all data for the year of 2021.
A sample of the data_payload_uri data looks like this:
/organization/team/folder/20210101/orig
/organization/team/folder/20210102/orig
/organization/team/folder/20210102/orig_v1
/organization/team/folder/20210103/orig
/organization/team/folder/20210104/orig
/organization/team/folder/20210105/orig
/organization/team/folder/20210105/orig_v1
/organization/team/folder/20210105/orig_v2
What I would like to do is only query the rows where up until the last forward-slash, the row is NOT unique.
What this means, is I want to NOT query the rows which ONLY have one orig
/organization/team/folder/20210101/orig
/organization/team/folder/20210103/orig
/organization/team/folder/20210104/orig
but I DO want to query all the other rows
/organization/team/folder/20210105/orig
/organization/team/folder/20210105/orig_v1
/organization/team/folder/20210105/orig_v2
/organization/team/folder/20210102/orig
/organization/team/folder/20210102/orig_v1
What is the best way to do this? Pls let me know if anything is unclear and thank you for any help
You can use the analytic COUNT function:
SELECT *
FROM (
SELECT t.*,
COUNT(DISTINCT data_payload_uri) OVER (
PARTITION BY SUBSTR(data_payload_uri, 1, INSTR(data_payload_uri, '/', -1))
) AS cnt
FROM table_name t
WHERE data_payload_uri >= '/organization/team/folder/2021'
AND data_payload_uri < '/organization/team/folder/2022'
)
WHERE cnt > 1
Which, for the sample data:
CREATE TABLE table_name (id, data_payload_uri) AS
SELECT 1, '/organization/team/folder/20210101/orig' FROM DUAL UNION ALL
SELECT 2, '/organization/team/folder/20210102/orig' FROM DUAL UNION ALL
SELECT 3, '/organization/team/folder/20210102/orig_v1' FROM DUAL UNION ALL
SELECT 4, '/organization/team/folder/20210103/orig' FROM DUAL UNION ALL
SELECT 5, '/organization/team/folder/20210104/orig' FROM DUAL UNION ALL
SELECT 6, '/organization/team/folder/20210105/orig' FROM DUAL UNION ALL
SELECT 7, '/organization/team/folder/20210105/orig_v1' FROM DUAL UNION ALL
SELECT 8, '/organization/team/folder/20210105/orig_v2' FROM DUAL;
Outputs:
ID
DATA_PAYLOAD_URI
CNT
2
/organization/team/folder/20210102/orig
2
3
/organization/team/folder/20210102/orig_v1
2
6
/organization/team/folder/20210105/orig
3
7
/organization/team/folder/20210105/orig_v1
3
8
/organization/team/folder/20210105/orig_v2
3
db<>fiddle here

Convert a series of Number values in Text in Oracle SQL Query

In the Oracle database, I have string values (VARCHAR2) like 1,4,7,8. The number represents as 1=car, 2= bus, 3=BB, 4=SB, 5=Ba, 6=PA, 7=HB, and 8 =G
and want to convert the above-said example to "car,SB,HB,G" in my query results
I tried to use "Decode" but it does not work. Please advise how to make it works. Would appreciate.
Thanks`
Initially, I have used the following query:
Select Clientid as C#, vehicletypeExclusions as vehicle from
clients
The sample of outcomes are:
C# Vehicle
20 1,19,20,23,24,7,5
22 1,19,20,23,24,7,5
I also tried the following that gives me the null value of vehicles:
Select Clientid as C#, Decode (VEHICLETYPEEXCLUSIONS, '1', 'car',
'3','bus', '5','ba' ,'7','HB', '8','G'
, '9','LED1102', '10','LED1104', '13','LED8-2',
'14','Flip4-12', '17','StAT1003', '19','Taxi-Min', '20','Tax_Sed',
'21','Sup-veh' , '22','T-DATS', '23','T-Mini',
'24','T-WAM') as vehicle_Ex from clients >
Here's one option. Read comments within code. Sample data in lines #1 - 13; query begins at line #14.
SQL> with
2 expl (id, name) as
3 (select 1, 'car' from dual union all
4 select 2, 'bus' from dual union all
5 select 3, 'BB' from dual union all
6 select 4, 'SB' from dual union all
7 select 5, 'Ba' from dual union all
8 select 6, 'PA' from dual union all
9 select 7, 'HB' from dual union all
10 select 8, 'G' from dual
11 ),
12 temp (col) as
13 (select '1,4,7,8' from dual),
14 -- split COL to rows
15 spl as
16 (select regexp_substr(col, '[^,]+', 1, level) val,
17 level lvl
18 from temp
19 connect by level <= regexp_count(col, ',') + 1
20 )
21 -- join SPL with EXPL; aggregate the result
22 select listagg(e.name, ',') within group (order by s.lvl) result
23 from expl e join spl s on s.val = e.id;
RESULT
--------------------------------------------------------------------------------
car,SB,HB,G
SQL>
Using the function f_subst from https://stackoverflow.com/a/68537479/429100 :
create or replace
function f_subst(str varchar2, template varchar2, subst sys.odcivarchar2list) return varchar2
as
res varchar2(32767):=str;
begin
for i in 1..subst.count loop
res:=replace(res, replace(template,'%d',i), subst(i));
end loop;
return res;
end;
/
I've replaced ora_name_list_t (nested table) with sys.odcivarchar2list (varray) to make this example easier, but I would suggest to create your own collection for example create type varchar2_table as table of varchar2(4000);
Example:
select
f_subst(
'1,4,7,8'
,'%d'
,sys.odcivarchar2list('car','bus','BB','SB','Ba','PA','HB','G')
) s
from dual;
S
----------------------------------------
car,SB,HB,G
Assume you have a lookup table (associating the numeric codes with descriptions) and a table of input strings, which I called sample_inputs in my tests, as shown below:
create table lookup (code, descr) as
select 1, 'car' from dual union all
select 2, 'bus' from dual union all
select 3, 'BB' from dual union all
select 4, 'SB' from dual union all
select 5, 'Ba' from dual union all
select 6, 'PA' from dual union all
select 7, 'HB' from dual union all
select 8, 'G' from dual
;
create table sample_inputs (str) as
select '1,4,7,8' from dual union all
select null from dual union all
select '3' from dual union all
select '5,5,5' from dual union all
select '6,2,8' from dual
;
One strategy for solving your problem is to split the input - slightly modified to make it a JSON array, so that we can use json_table to split it - then join to the lookup table and re-aggregate.
select s.str, l.descr_list
from sample_inputs s cross join lateral
( select listagg(descr, ',') within group (order by ord) as descr_list
from json_table( '[' || str || ']', '$[*]'
columns code number path '$', ord for ordinality)
join lookup l using (code)
) l
;
STR DESCR_LIST
------- ------------------------------
1,4,7,8 car,SB,HB,G
3 BB
5,5,5 Ba,Ba,Ba
6,2,8 PA,bus,G

Extracting substring in Oracle

Let's say I have three rows with value as
1 121/2808B|:6081
2 OD308B|:6081_1:
3 008312100001200|:6081_1
I want to display value only until B but want to exclude everything after B. So as you can see in above data:
from 121/2808B|:6081 I want only 121/2808B
from OD308B|:6081_1: only OD308B
from 008312100001200|:6081_1 only 008312100001200.
Thanks for the Help.
Try this: regexp_substr('<Your_string>','[^B]+')
SELECT
REGEXP_SUBSTR('121/2808B|:6081', '[^B]+')
FROM
DUAL;
REGEXP_S
--------
121/2808
SELECT
REGEXP_SUBSTR('OD308B|:6081_1:', '[^B]+')
FROM
DUAL;
REGEX
-----
OD308
SELECT
REGEXP_SUBSTR('008312100001200.', '[^B]+')
FROM
DUAL;
REGEXP_SUBSTR('0
----------------
008312100001200.
db<>fiddle demo
Cheers!!
You could try using SUBSTR() and INSTR()
select SUBSTR('121/2808B|:6081',1,INSTR('121/2808B|:6081','B', 1, 1) -1)
from DUAL
I think you forgot to mention that you wanted to use | as a field separator, but I deduced this from the expected result from the third string. As such the following should give you what you want:
WITH cteData AS (SELECT 1 AS ID, '121/2808B|:6081' AS STRING FROM DUAL UNION ALL
SELECT 2, 'OD308B|:6081_1:' FROM DUAL UNION ALL
SELECT 3, '008312100001200|:6081_1' FROM DUAL)
SELECT ID, STRING, SUBSTR(STRING, 1, CASE
WHEN INSTR(STRING, 'B') = 0 THEN INSTR(STRING, '|')-1
ELSE INSTR(STRING, 'B')-1
END) AS UP_TO_B
FROM cteData;
dbfiddle here
Assuming Bob Jarvis is correct in the assumption that "|" is also a delimiter (as seems likely) try:
-- define test data
with test as
( select '121/2808B|:6081' stg from dual union all
select 'OD308B|:6081_1:' from dual union all
select '008312100001200|:6081_1' from dual
)
-- execute extract
select regexp_substr(stg , '[^B|]+') val
from test ;

Get substring with REGEXP_SUBSTR

I need to use regexp_substr, but I can't use it properly
I have column (l.id) with numbers, for example:
1234567891123!123 EXPECTED OUTPUT: 1234567891123
123456789112!123 EXPECTED OUTPUT: 123456789112
12345678911!123 EXPECTED OUTPUT: 12345678911
1234567891123!123 EXPECTED OUTPUT: 1234567891123
I want use regexp_substr before the exclamation mark (!)
SELECT REGEXP_SUBSTR(l.id,'[%!]',1,13) from l.table
is it ok ?
You can try using INSTR() and substr()
DEMO
select substr(l.id,1,INSTR(l.id,'!', 1, 1)-1) from dual
You want to remove the exclamation mark and all following characters it seems. That is simply:
select regexp_replace(id, '!.*', '') from mytable;
Look at it like a delimited string where the bang is the delimiter and you want the first element, even if it is NULL. Make sure to test all possibilities, even the unexpected ones (ALWAYS expect the unexpected)! Here the assumption is if there is no delimiter you'll want what's there.
The regex returns the first element followed by a bang or the end of the line. Note this form of the regex handles a NULL first element.
SQL> with tbl(id, str) as (
select 1, '1234567891123!123' from dual union all
select 2, '123456789112!123' from dual union all
select 3, '12345678911!123' from dual union all
select 4, '1234567891123!123' from dual union all
select 5, '!123' from dual union all
select 6, '123!' from dual union all
select 7, '' from dual union all
select 8, '12345' from dual
)
select id, regexp_substr(str, '(.*?)(!|$)', 1, 1, NULL, 1)
from tbl
order by id;
ID REGEXP_SUBSTR(STR
---------- -----------------
1 1234567891123
2 123456789112
3 12345678911
4 1234567891123
5
6 123
7
8 12345
8 rows selected.
SQL>
If you like to use REGEXP_SUBSTR rather than regexp_replace then you can use
SELECT REGEXP_SUBSTR(l.id,'^\d+')
assuming you have only numbers before !
If I understand correctly, this is the pattern that you want:
SELECT REGEXP_SUBSTR(l.id,'^[^!]+', 1)
FROM (SELECT '1234567891123!123' as id from dual) l

Retrieve certain number from data set in Oracle 10g

1. <0,0><120.96,2000><241.92,4000><362.88,INF>
2. <0,0><143.64,2000><241.92,4000><362.88,INF>
3. <0,0><125.5,2000><241.92,4000><362.88,INF>
4. <0,0><127.5,2000><241.92,4000><362.88,INF>
Above is the data set I have in Oracle 10g. I need output as below
1. 120.96
2. 143.64
3. 125.5
4. 125.5
the output I want is only before "comma" (120.96). I tried using REGEXP_SUBSTR but I could not get any output. It will be really helpful if someone could provide effective way to solve this
Here is one method that first parses out the second element and then gets the first number in it:
select regexp_substr(regexp_substr(x, '<[^>]*>', 1, 2), '[0-9.]+', 1, 1)
Another method just gets the third number in the string:
select regexp_substr(x, '[0-9.]+', 1, 3)
Here is an approach without using Regexp.
Find the index of second occurrence of '<'. Then find the second occurrence of ',' use those values in substring.
with
data as
(
select '<0,0><120.96,2000><241.92,4000><362.88,INF>' x from dual
UNION ALL
select '<0,0><143.64,2000><241.92,4000><362.88,INF>' x from dual
UNION ALL
select '<0,0><125.5,2000><241.92,4000><362.88,INF>' from dual
)
select substr(x, instr(x,'<',1,2)+1, instr(x,',',1,2)- instr(x,'<',1,2)-1)
from data
Approach Using Regexp:
Identify the 2nd occurence of numerical value followed by a comma
Then remove the trailing comma.
with
data as
(
select '<0,0><120.96,2000><241.92,4000><362.88,INF>' x from dual
UNION ALL
select '<0,0><143.64,2000><241.92,4000><362.88,INF>' x from dual
UNION ALL
select '<0,0><125.5,2000><241.92,4000><362.88,INF>' from dual
)
select
trim(TRAILING ',' FROM regexp_substr(x,'[0-9.]+,',1,2))
from data
This example uses regexp_substr to get the string contained within the 2nd occurance of a less than sign and a comma:
SQL> with tbl(id, str) as (
select 1, '<0,0><120.96,2000><241.92,4000><362.88,INF>' from dual union
select 2, '<0,0><143.64,2000><241.92,4000><362.88,INF>' from dual union
select 3, '<0,0><125.5,2000><241.92,4000><362.88,INF>' from dual union
select 4, '<0,0><127.5,2000><241.92,4000><362.88,INF>' from dual
)
select id,
regexp_substr(str, '<(.*?),', 1, 2, null, 1) value
from tbl;
ID VALUE
---------- -------------------------------------------
1 120.96
2 143.64
3 125.5
4 127.5
EDIT: I realized the OP specified 10g and the regexp_substr example I gave used the 6th argument (subgroup) which was added in 11g. Here is an example using regexp_replace instead which should work with 10g:
SQL> with tbl(id, str) as (
select 1, '<0,0><120.96,2000><241.92,4000><362.88,INF>' from dual union
select 2, '<0,0><143.64,2000><241.92,4000><362.88,INF>' from dual union
select 3, '<0,0><125.5,2000><241.92,4000><362.88,INF>' from dual union
select 4, '<0,0><127.5,2000><241.92,4000><362.88,INF>' from dual
)
select id,
regexp_replace(str, '^(.*?)><(.*?),.*$', '\2') value
from tbl;
ID VALUE
---------- ----------
1 120.96
2 143.64
3 125.5
4 127.5
SQL>