Oracle split by regex and aggregate again - sql

I have a table from where I need to get only some part of record with comma after one part of record.
for example I have
ABCD [1000-1987] BCD[101928-876] adgs[10987-786]
I want to get the record like :
1000-1987,101928-876,10987-786
Can you please help me out to get the record as mentioned.

If you don't use 11g and do not want to use wm_concat:
WITH
my_data AS (
SELECT 'ABCD [1000-1987] BCD[101928-876] adgs[10987-786]' AS val FROM dual
)
SELECT
ltrim(
MAX(
sys_connect_by_path(
rtrim(ltrim(regexp_substr(val, '\[[0-9-]*\]', 1, level, NULL), '['), ']'),
',')
),
',') AS val_part
FROM my_data
CONNECT BY regexp_substr(val, '\[[0-9-]*\]', 1, level, NULL) IS NOT NULL
;
If using wm_concat is ok for you:
WITH
my_data AS (
SELECT 'ABCD [1000-1987] BCD[101928-876] adgs[10987-786]' AS val FROM dual
)
SELECT
wm_concat(rtrim(ltrim(regexp_substr(val, '\[[0-9-]*\]', 1, level, NULL), '['), ']')) AS val_part
FROM my_data
CONNECT BY regexp_substr(val, '\[[0-9-]*\]', 1, level, NULL) IS NOT NULL
;
If you use 11g:
WITH
my_data AS (
SELECT 'ABCD [1000-1987] BCD[101928-876] adgs[10987-786]' AS val FROM dual
)
SELECT
listagg(regexp_substr(val, '[a-b ]*\[([0-9-]*)\] ?', 1, level, 'i', 1), ',') WITHIN GROUP (ORDER BY 1) AS val_part
FROM my_data
CONNECT BY regexp_substr(val, '[a-b ]*\[([0-9-]*)\] ?', 1, level, 'i', 1) IS NOT NULL
;
Read more about string aggregation techniques: Tim Hall about aggregation techniques
Read more about regexp_substr: regexp_substr - Oracle Documentation - 10g
Read more about regexp_substr: regexp_substr - Oracle Documentation - 11g

You don't have to split and then aggregate it. You can use regexp_replace to keep only those characters within square brackets, then replace the square brackets by comma.
WITH my_data
AS (SELECT 'ABCD [1000-1987] BCD[101928-876] adgs[10987-786]' AS val
FROM DUAL)
SELECT RTRIM (
REPLACE (
REGEXP_REPLACE (val, '(\[)(.*?\])|(.)', '\2'),
']', ','),
',')
FROM my_data;

Related

Regex values after special character with empty values

I am struggle with regex to split spring into columns in Oracle database.
select (REGEXP_SUBSTR(replace('1:::9999', ' ',''), '[^: ]+', 1, 4)) from dual;
I need to obtain 4th value from that string as a column value, sometimes values at position 2,3 are empty and my query doesn't work. I am trying to figure out what regex will work
You can use
select (REGEXP_SUBSTR(replace('1:::9999', ' ',''), '([^: ]*)(:|$)', 1, 4, 'i', 1)) from dual;
Here, the ([^: ]*)(:|$) matches
([^: ]*) - Group 1: any zero or more chars other than : and space
(:|$) - Group 2, either : or end of string.
You do not need a (slower) regex for this task, use simple substr/instr functions:
with input_(val) as (
select '1:::9999' from dual
union all
select '1:2::' from dual
union all
select '1:2::3:5' from dual
)
, replaced as (
select input_.*, replace(val, ' ', '') as val_replaced
from input_
)
select
val,
substr(
val_replaced,
/*Locate the first occurrence of a colon and get a substring ...*/
instr(val_replaced, ':', 1, 3) + 1,
/*.. until the end, if the next colon is absent, or until the next colon*/
nvl(nullif(instr(val_replaced, ':', 1, 4), 0), length(val_replaced) + 1) - instr(val_replaced, ':', 1, 3) - 1
) as col
from replaced
VAL
COL
1:::9999
9999
1:2::
null
1:2::3:5
3
fiddle with performance difference.

How to include the code into a REPLACE function in oracle?

User #psaraj12 helped me with a ticket here about finding ascii character in a string in my DB with the following code:
with test (col) as (
select
'L.A.D'
from
dual
union all
select
'L.􀈉.D.'
from
dual
)
select
col,
case when max(ascii_of_one_character) >= 65535 then 'NOT OK' else 'OK' end result
from
(
select
col,
substr(col, column_value, 1) one_character,
ascii(
substr(col, column_value, 1)
) ascii_of_one_character
from
test cross
join table(
cast(
multiset(
select
level
from
dual connect by level <= length(col)
) as sys.odcinumberlist
)
)
)
group by
col
having max(ascii_of_one_character) >= 4000000000;
The script looks for characters of a certain range GROUPs them and marks displays them.
Is it possible to include this in a REPLACE statement of a similar sort:
REPLACE(table.column, max(ascii_of_one_character) >= 4000000000, '')
EDIT: As per #flyaround answer this is the code I use changed a little bit:
with test (col) as (
select skunden.name1
from skunden
)
select col
, REGEXP_REPLACE(col, 'max(ascii_of_one_character)>=4000000000', '') as cleaned
, CASE WHEN REGEXP_COUNT(col, 'max(ascii_of_one_character)>=4000000000') > 0 THEN 0 ELSE 1 END as isOk
from test;
Coming back to your original code, because my suggested REGEX_REPLACE is not working sufficient with high surrogates. Your approach is already very effective, so I jumped into it to have a solution here.
MERGE
INTO skunden
USING (
select
id as innerId,
name as innerName,
case when max(ascii_of_one_character) >= 65535 then 0 else 1 end isOk,
listagg(case when ascii_of_one_character <65535 then one_character end , '') within group (order by rn) as cleaned
from
(
select
id,
name,
substr(name, column_value, 1) one_character,
ascii(
substr(name, column_value, 1)
) ascii_of_one_character
, rownum as rn
from
skunden cross
join table(
cast(
multiset(
select
level
from
dual connect by level <= length(name)
) as sys.odcinumberlist
)
)
)
group by
id, name
having max(ascii_of_one_character) >= 4000000000
)
ON (skunden.id = innerId)
WHEN MATCHED THEN
UPDATE
SET name = cleaned
;
On MERGE you can't use the referencing column for an update. Therefore you should use the unique key (I used 'id' in my example) of your table.
The resulting value will be 'L..D' for your example value of 'L.􀈉.D.'
If I got your question correctly you would like to remove characters with a higher decimal representation of characters than specified.
You could check to use REGEXP_REPLACE for this, like:
with test (col) as (
select
'L.A.D'
from
dual
union all
select
'L.􀈉.D.'
from
dual
)
select col
, REGEXP_REPLACE(col, '[^\u00010000-\u0010FFFF]+$', '') as cleaned
, CASE WHEN REGEXP_COUNT(col, '[^\u00010000-\u0010FFFF]+$') > 0 THEN 0 ELSE 1 END as isOk
from test;

Substring from underscore and onwards in Oracle

I have a string with under score and some characters. I need to apply substring and get values to the left excluding underscore. So I applied below formula and its working correctly for those strings which have underscore (_). But for strings without (_) it is bringing NULL. Any suggestions how this can be handled in the substring itself.
Ex: ABC_BASL ---> Works correctly; ABC ---> gives null
My formula as below -
select SUBSTR('ABC_BAS',1,INSTR('ABC_BAS','_')-1) from dual;
ABC
select SUBSTR('ABC',1,INSTR('ABC','_')-1) from dual;
(NULL)
You could use a CASE expression to first check for an underscore:
WITH yourTable AS (
SELECT 'ABC_BAS' AS col FROM dual UNION ALL
SELECT 'ABC' FROM dual
)
SELECT
CASE WHEN col LIKE '%\_%' ESCAPE '\'
THEN SUBSTR(col, 1, INSTR(col, '_') - 1)
ELSE col END AS col_out
FROM yourTable;
Use regular expression matching:
SELECT REGEXP_SUBSTR('ABC_BAS', '(.*)([_]|$)?', 1, 1, NULL, 1) FROM DUAL;
returns 'ABC', and
SELECT REGEXP_SUBSTR('ABC', '(.*)([_]|$)?', 1, 1, NULL, 1) FROM DUAL;
also returns 'ABC'.
db<>fiddle here
EDIT
The above gives correct results, but I missed the easiest possible regular expression to do the job:
SELECT REGEXP_SUBSTR('ABC_BAS', '[^_]*') FROM DUAL;
returns 'ABC', as does
SELECT REGEXP_SUBSTR('ABC', '[^_]*') FROM DUAL;
db<>fiddle here
Yet another approach is to use the DECODE in the length parameter of the substr as follows:
substr(str,
1,
decode(instr(str,'_'), 0, lenght(str), instr(str,'_') - 1)
)
You seem to want everything up to the first '_'. If so, one method usesregexp_replace():
select regexp_replace(str, '(^[^_]+)_.*$', '\1')
from (select 'ABC' as str from dual union all
select 'ABC_BAS' from dual
) s
A simpler method is:
select regexp_substr(str, '^[^_]+')
from (select 'ABC' as str from dual union all
select 'ABC_BAS' from dual
) s
Here is a db<>fiddle.
I'd use
regexp_replace(text,'_.*')
or if performance was a concern,
substr(text, 1, instr(text||'_', '_') -1)
For example,
with demo(text) as
( select column_value
from table(sys.dbms_debug_vc2coll('ABC', 'ABC_DEF', 'ABC_DEF_GHI')) )
select text
, regexp_replace(text,'_.*')
, substr(text, 1, instr(text||'_', '_') -1)
from demo;
TEXT REGEXP_REPLACE(TEXT,'_.*') SUBSTR(TEXT,1,INSTR(TEXT||'_','_')-1)
------------ --------------------------- -------------------------------------
ABC ABC ABC
ABC_DEF ABC ABC
ABC_DEF_GHI ABC ABC
Ok i think i got it. Add nvl to the substring and insert the condition as below -
select nvl(substr('ABC',1,instr('F4001Z','_')-1),'ABC') from dual;

regex oracle sql return all capturing groups

I have an regex like
select regexp_substr('some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>', 1, 1, NULL, 1) from dual which can return a value for a single capturing group.
How can I instead return all the capturing groups as an additional column? (string concat of results is fine)
select regexp_substr('some stuff TOTAL_SCORE<518> TOTAL_SCORE<123>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>') from dual
Query 1:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT REGEXP_REPLACE(
value,
'.*TOTAL_SCORE<(\d{3})>.*OTHER_VALUE<(\d{3})>.*',
'\1,\2'
) As scores
FROM your_table
Output:
SCORES
-------
518,456
Query 2:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518> TOTAL_SCORE<123> some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT l.column_value As scores
FROM your_table t,
TABLE(
CAST(
MULTISET(
SELECT TO_NUMBER(
REGEXP_SUBSTR(
t.value,
'TOTAL_SCORE<(\d{3})>',
1,
LEVEL,
NULL,
1
)
)
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, 'TOTAL_SCORE<(\d{3})>' )
) AS SYS.ODCINUMBERLIST
)
) l;
Output:
SCORES
-------
518
123

SQL - Divide single column in multiple columns

I have the following SQL question:
How to divide a column (text inside) using the SELECT command into two separate columns with split text?
I need to separate the text-data, using the space character.
I know it is better to put an example to make it easy. So:
SELECT COLUMN_A FROM TABLE1
output:
COLUMN_A
-----------
LORE IPSUM
desired output:
COLUMN_A COLUMN_B
--------- ----------
LORE IPSUM
Thank you all for the help.
Depends on the consistency of the data - assuming a single space is the separator between what you want to appear in column one vs two:
WITH TEST_DATA AS
(SELECT 'LOREM IPSUM' COLUMN_A FROM DUAL)
SELECT SUBSTR(t.COLUMN_A, 1, INSTR(t.COLUMN_A, ' ')-1) AS COLUMN_A,
SUBSTR(t.COLUMN_A, INSTR(t.COLUMN_A, ' ')+1) AS COLUMN_B
FROM test_data T;
You can also use below query with REGEX:
WITH TEST_DATA AS
(SELECT 'LOREM IPSUM' COLUMN_A FROM DUAL)
SELECT REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 1, 1) COLUMN_A,
REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 1, 2) COLUMN_B
FROM test_data T;
Oracle 10g+ has regex support, allowing more flexibility depending on the situation you need to solve. It also has a regex substring method...
EDIT:
3 WORDS SPLIT:
WITH TEST_DATA AS
(SELECT 'LOREM IPSUM DIMSUM' COLUMN_A FROM DUAL)
SELECT REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 1, 1) COLUMN_A,
REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 1, 2) COLUMN_B,
REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 2, 3) COLUMN_C
FROM test_data T;
Reference:
SUBSTR
INSTR
The solution can be generalized using a counter and the PIVOT operator, the counter to get the word number and the PIVOT to change rows to columns
WITH Counter (N) AS (
SELECT LEVEL FROM DUAL
CONNECT BY LEVEL <= (SELECT MAX(regexp_count( COLUMN_A, ' ')) + 1
FROM Table1)
)
SELECT Word_1, Word_2, Word_3, Word_4
FROM (SELECT t.COLUMN_A
, c.N N
, REGEXP_SUBSTR(t.COLUMN_A, '[^ ]+', 1, c.N) Word
FROM Table1 t
LEFT JOIN Counter c ON c.N <= regexp_count( COLUMN_A, ' ') + 1) b
PIVOT
(MAX(Word) FOR N IN (1 Word_1, 2 Word_2, 3 Word_3, 4 Word_4)) pvt
SQLFiddle demo
But that have a fixed columns list in the PIVOT definition, to really have a general query a dynamic pivot or a PIVOT XML is needed
INSERT INTO Rough (Tag_Id,Status_ , new_)
WITH TEST_DATA AS
(SELECT regexp_replace('&data' ,'\s+',' ') COLUMN_A FROM DUAL)
SELECT REGEXP_SUBSTR (REGEXP_SUBSTR (t.COLUMN_A, '[^-]+', 1, LEVEL), '[^ ]+', 1, 1) AS Col1,
REGEXP_SUBSTR (REGEXP_SUBSTR (t.COLUMN_A, '[^-]+', 1, LEVEL), '[^ ]+', 1, 2) AS Col2,
REGEXP_SUBSTR (REGEXP_SUBSTR (t.COLUMN_A, '[^-]+', 1, LEVEL), '[^ ]+', 1, 3) AS Col3
FROM test_data T
CONNECT BY LEVEL <= LENGTH (REGEXP_REPLACE (t.COLUMN_A, '[^-]+')) + 1;