Most effective way to aggregate clob value in Oracle - sql

I have a table like:
SOME_ID FIRST_CLOB ANOTHER_CLOB
0 1.5|3.6|0.3 5.5|9.6
1 0.2 4.0|7.2
2 3.1|0.7|1.2 9.2|8.8|6.3
Length of clob is not defined. It could (and in reality will be) much longer than in example.
I need to get something like that:
SOME_ID FIRST_CLOB_MEAN ANOTHER_CLOB_MEAN
0 1.8 7.55
1 0.2 5.6
2 1.66 8.1
It could be not only a mean, but a count for example. Now we counting it when processing data in pandas but it takes very long time to load a table with clob values.
I see only one way: create a trigger on input which would:
1.Split every CLOB field like that:
CLOB
1.5
3.6
0.3
2.Find mean value from it
I'm not sure is that best way to do it and will it work if I'm importing table from csv. Will be grateful for advices
P.S. I have another solution: just split CLOB without aggregation like that:
NEW_ID SOME_ID FIRST_CLOB ANOTHER_CLOB
0 0 1.5 5.5
1 0 3.6 9.6
2 0 0.3 NULL
3 1 0.2 4.0
4 1 NULL 7.2
5 2 3.1 9.2
6 2 0.7 8.8
7 2 1.2 6.3
It will load to pandas faster but the table will be increased enormously(one CLOB can contain 10,25,50,100 items) and its also undesirable

You can use:
WITH bounds ( some_id, first_clob, start1, end1, another_clob, start2, end2 ) AS (
SELECT some_id,
first_clob,
1,
INSTR( first_clob, '|', 1 ),
another_clob,
1,
INSTR( another_clob, '|', 1 )
FROM table_name
UNION ALL
SELECT some_id,
first_clob,
DECODE( end1, 0, 0, end1 + 1 ),
DECODE( end1, 0, 0, INSTR( first_clob, '|', end1 + 1 ) ),
another_clob,
DECODE( end2, 0, 0, end2 + 1 ),
DECODE( end2, 0, 0, INSTR( another_clob, '|', end2 + 1 ) )
FROM bounds
WHERE end1 > 0
OR end2 > 0
)
SELECT some_id,
AVG(
TO_NUMBER(
CASE
WHEN start1 = 0 THEN NULL
WHEN end1 = 0 THEN SUBSTR( first_clob, start1 )
ELSE SUBSTR( first_clob, start1, end1 - start1 )
END
)
) AS FIRST_CLOB_MEAN,
AVG(
TO_NUMBER(
CASE
WHEN start2 = 0 THEN NULL
WHEN end2 = 0 THEN SUBSTR( another_clob, start2 )
ELSE SUBSTR( another_clob, start2, end2 - start2 )
END
)
) AS ANOTHER_CLOB_MEAN
FROM bounds
GROUP BY some_id
ORDER BY some_id
or
SELECT some_id,
first_clob_mean,
another_clob_mean
FROM table_name t
CROSS APPLY (
SELECT AVG( TO_NUMBER(column_value) ) AS first_clob_mean
FROM XMLTABLE( ('"' || REPLACE( t.first_clob, '|', '","' ) || '"') )
)
CROSS APPLY (
SELECT AVG( TO_NUMBER(column_value) ) AS another_clob_mean
FROM XMLTABLE( ('"' || REPLACE( t.another_clob, '|', '","' ) || '"') )
)
Which, for the sample data:
INSERT INTO table_name ( some_id, first_clob, another_clob )
SELECT 0, '1.5|3.6|0.3', '5.5|9.6' FROM DUAL UNION ALL
SELECT 1, '0.2', '4.0|7.2' FROM DUAL UNION ALL
SELECT 2, '3.1|0.7|1.2', '9.2|8.8|6.3' FROM DUAL;
DECLARE
v_clob CLOB;
BEGIN
FOR i IN 1 .. 4000 LOOP
IF v_clob IS NOT NULL THEN
v_clob := v_clob || '|';
END IF;
v_clob := v_clob || i;
END LOOP;
INSERT INTO table_name VALUES ( 4, v_clob, NULL );
END;
/
Both output:
SOME_ID | FIRST_CLOB_MEAN | ANOTHER_CLOB_MEAN
------: | ---------------------------------------: | ----------------:
0 | 1.8 | 7.55
1 | .2 | 5.6
2 | 1.66666666666666666666666666666666666667 | 8.1
4 | 2000.5 | null
db<>fiddle here

A query like this one can be used to split the CLOBs by the pipe delimiter, then take the averages. Each CLOB needs to be split in its own sub-query so the count of numbers in the CLOB does not affect the count of numbers in another CLOB in the same row.
SELECT c1.some_id, c1.first_clob_mean, c2.another_clob_mean
FROM ( SELECT some_id,
AVG (TO_NUMBER (REGEXP_SUBSTR (first_clob,
'[^|]+',
1,
LEVEL))) AS first_clob_mean
FROM clobs
CONNECT BY LEVEL <= REGEXP_COUNT (first_clob, '\|') + 1
AND PRIOR SYS_GUID () IS NOT NULL
AND PRIOR some_id = some_id
GROUP BY some_id) c1,
( SELECT some_id,
AVG (TO_NUMBER (REGEXP_SUBSTR (another_clob,
'[^|]+',
1,
LEVEL))) AS another_clob_mean
FROM clobs
CONNECT BY LEVEL <= REGEXP_COUNT (another_clob, '\|') + 1
AND PRIOR SYS_GUID () IS NOT NULL
AND PRIOR some_id = some_id
GROUP BY some_id) c2
WHERE c1.some_id = c2.some_id
ORDER BY some_id;

Related

Oracle SQL regexp_substr

I have a query which fetches value matching the pattern, I want it to fetch all prefvalue instead of only CEROTG
SELECT regexp_substr('prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1', 'prefvalue:([[:alnum:]_]+)') as Result from dual
Current Output
prefvalue:CEROTG
Expected Output
prefvalue:CEROTG
prefvalue:CEROTG-2
prefvalue:CEROTG_1
One option uses a recursive query:
with
data as (select 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' str from dual),
cte(str, res, lvl) as (
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)'), 1 lvl from data
union all
select str, regexp_substr(str, 'prefvalue:([[:alnum:]_-]+)', 1, lvl + 1), lvl + 1
from cte
where lvl < regexp_count(str, 'prefvalue:([[:alnum:]_-]+)')
)
select res from cte
Demo on DB Fiddle:
| RES |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
You can use simple string functions in a recursive subquery-factoring clause:
WITH rsqfc ( entry_data, start_pos, end_pos ) AS (
SELECT entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
UNION ALL
SELECT entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
Which, for your sample data:
CREATE TABLE prefdir_entrydata ( entry_data ) AS
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1' FROM DUAL;
Outputs:
| VALUE |
| :----------------- |
| prefvalue:CEROTG-2 |
| prefvalue:CEROTG |
| prefvalue:CEROTG_1 |
db<>fiddle here
An example that handles multiple input rows is:
WITH rsqfc ( id, entry_data, start_pos, end_pos ) AS (
SELECT id, entry_data, 1, INSTR( entry_data, ' ', 1 )
FROM prefdir_entrydata
WHERE dist_name_short = 'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
UNION ALL
SELECT id, entry_data, end_pos + 1, INSTR( entry_data, ' ', end_pos + 1 )
FROM rsqfc
WHERE end_pos > 0
)
SELECT id,
CASE end_pos
WHEN 0
THEN SUBSTR( entry_data, start_pos )
ELSE SUBSTR( entry_data, start_pos, end_pos - start_pos )
END AS value
FROM rsqfc
ORDER BY id, start_pos
Which, for the test data:
CREATE TABLE prefdir_entrydata ( id, entry_data, dist_name_short ) AS
SELECT 1,
'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL UNION ALL
SELECT 2,
'prefvalue:CEROTG-2a prefvalue:CEROTG_1v2',
'prefentry=imagerepository,prefgroup=cdi_globals,prefgroup=component,prefgroup=system,prefcontext=default,prefroot=prefroot'
FROM DUAL;
Outputs:
ID | VALUE
-: | :-------------------
1 | prefvalue:CEROTG-2
1 | prefvalue:CEROTG
1 | prefvalue:CEROTG_1
2 | prefvalue:CEROTG-2a
2 | prefvalue:CEROTG_1v2
db<>fiddle here
WITH tbl(DATA) AS (
SELECT 'prefvalue:CEROTG-2 prefvalue:CEROTG prefvalue:CEROTG_1'
FROM dual
)
SELECT REGEXP_SUBSTR(DATA, '(.*?)( |$)', 1, LEVEL, NULL, 1) ELEMENT
FROM tbl
CONNECT BY LEVEL <= REGEXP_COUNT(DATA, ' ')+1;
ELEMENT
------------------------------------------------------
prefvalue:CEROTG-2
prefvalue:CEROTG
prefvalue:CEROTG_1
3 rows selected.

ask for a split function in oracle

right now there is a string like this:
789+456-239
I want to get a list like this:
sign | num
+ 789
+ 456
- 239
This could be a way, by using a commonly used split string method, just adapted to your need to handle the sign.
-- test case
with yourString(str) as
(
select '+789+456-239 ' str
from dual
)
-- query
SELECT regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
This gives:
SIGN NUM
-------------- --------------
+ 789
+ 456
- 239
Here I assume that if no sign is given in the beginning of this string, the first sign will be +.
If you need to handle more than one string, you need to identify an ID in your table and the CONNECT BY becomes a bit more complex:
-- test case
with yourString(id, str) as
(
select 1, '+789+456-239' from dual union all
select 2, '789+456-239' from dual union all
select 3, '-789+456-239' from dual
)
-- query
SELECT id,
regexp_substr(str, '[+-]', 1, level) sign,
regexp_substr(str, '[^+-]+', 1, level) num
FROM ( select id,
case
when substr(str, 1, 1) in ('+','-') then str
-- I add a plus sign if the first char of the string is not a sign
else '+' || str
end as str
from yourString
)
CONNECT BY regexp_instr(str, '[+-]', 1, level ) > 0
and prior id = id
and prior sys_guid() is not null
The result:
ID SIGN NUM
---------- ----- --------------------------------------------
1 + 789
1 + 456
1 - 239
2 + 789
2 + 456
2 - 239
3 - 789
3 + 456
3 - 239
Here is a method that does not use regular expressions and can handle multiple input rows:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE table_name ( id, list ) AS
SELECT 1, '789+456-239' FROM DUAL UNION ALL
SELECT 2, '-123' FROM DUAL UNION ALL
SELECT 3, '+456' FROM DUAL UNION ALL
SELECT 4, '789' FROM DUAL;
Query 1:
WITH bounds ( id, list, start_pos, end_pos, lvl ) AS (
SELECT id,
list,
1,
CASE
WHEN INSTR( list, '+', 2 ) = 0 THEN INSTR( list, '-', 2 )
WHEN INSTR( list, '-', 2 ) = 0 THEN INSTR( list, '+', 2 )
ELSE LEAST( INSTR( list, '+', 2 ), INSTR( list, '-', 2 ) )
END,
1
FROM table_name
UNION ALL
SELECT id,
list,
end_pos,
CASE
WHEN INSTR( list, '+', end_pos + 1 ) = 0 THEN INSTR( list, '-', end_pos + 1 )
WHEN INSTR( list, '-', end_pos + 1 ) = 0 THEN INSTR( list, '+', end_pos + 1 )
ELSE LEAST( INSTR( list, '+', end_pos + 1 ), INSTR( list, '-', end_pos + 1 ) )
END,
lvl + 1
FROM bounds
WHERE end_pos > 0
)
SELECT id,
DECODE( SUBSTR( list, start_pos, 1 ), '-', '-', '+' ) AS sign,
SUBSTR(
list,
start_pos + DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 ),
DECODE( end_pos, 0, LENGTH( list ) + 1, end_pos ) - start_pos - DECODE( SUBSTR( list, start_pos, 1 ), '-', 1, '+', 1, 0 )
) AS item,
lvl
FROM bounds
ORDER BY id, lvl
Results:
| ID | SIGN | ITEM | LVL |
|----|------|------|-----|
| 1 | + | 789 | 1 |
| 1 | + | 456 | 2 |
| 1 | - | 239 | 3 |
| 2 | - | 123 | 1 |
| 3 | + | 456 | 1 |
| 4 | + | 789 | 1 |

regexp_substr to bring back data before a foward slash

I have the following pattern of characters in a dataset. I need to manipulate the data & cross refer it to another table. I'm trying to write a regexp_substr to bring back data before a foward slash starting from the left. for example:-
abc/ab/123/zzz
so I need to get the following results back to then compare to another table
abc
abc/ab
abc/ab/123
I have worked out the other logic but an struggling with the various regexp.
Here is the recursive query with SUBSTR and INSTR:
with cte(col) as
(
select substr(col, 1, instr(col, '/', -1) - 1) from mytable
union all
select substr(col, 1, instr(col, '/', -1) - 1) from cte where instr(col, '/') > 0
)
select col from cte;
And here is the query with REGEXP_REPLACE:
with cte(col) as
(
select regexp_replace(col, '/[^/]*$', '') from mytable
union all
select regexp_replace(col, '/[^/]*$', '') from cte where instr(col, '/') > 0
)
select col from cte;
You don't need a regular expression. You can do it with (faster) string functions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE test_data ( id, value ) AS
SELECT 1, 'abc/ab/123/zzz' FROM DUAL;
Query 1:
WITH bounds ( id, value, end_pos ) AS (
SELECT id,
value,
INSTR( value, '/', 1 )
FROM test_data
WHERE INSTR( value, '/', 1 ) > 0
UNION ALL
SELECT id,
value,
INSTR( value, '/', end_pos + 1 )
FROM bounds
WHERE INSTR( value, '/', end_pos + 1 ) > 0
)
SELECT id,
SUBSTR( value, 1, end_pos ) AS item
FROM bounds
ORDER BY id, end_pos
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |
However, if you did want to use regular expressions then you could do:
Query 2:
WITH bounds ( id, value, lvl, item ) AS (
SELECT id,
value,
1,
REGEXP_SUBSTR( value, '.*?/', 1, 1 )
FROM test_data
WHERE REGEXP_SUBSTR( value, '.*?/', 1, 1 ) IS NOT NULL
UNION ALL
SELECT id,
value,
lvl + 1,
item || REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 )
FROM bounds
WHERE REGEXP_SUBSTR( value, '.*?/', 1, lvl + 1 ) IS NOT NULL
)
SELECT id,
item
FROM bounds
Results:
| ID | ITEM |
|----|-------------|
| 1 | abc/ |
| 1 | abc/ab/ |
| 1 | abc/ab/123/ |

Split IPv4 address into 4 numbers in Oracle sql

I'm trying to split a given IPv4 address into four numbers.
In SQL Server this query works well for me:
select CAST (PARSENAME('10.20.30.40',4) AS INT)
result: 10
select CAST (PARSENAME('10.20.30.40',3) AS INT)
result: 20
and so on.
I need the equivalent syntax in Oracle SQL, but can't find it. Any idea?
You could use regexp_substr:
select ip,
regexp_substr(ip, '\d+',1,1) as first_octet,
regexp_substr(ip, '\d+',1,2) as second_octet,
regexp_substr(ip, '\d+',1,3) as third_octet,
regexp_substr(ip, '\d+',1,4) as fourth_octet
from (select '10.20.30.40' AS ip from dual )ips;
Rextester Demo
You can use simple string functions (INSTR and SUBSTR) that are much faster than regular expressions:
SQL Fiddle
Oracle 11g R2 Schema Setup:
CREATE TABLE sample_data ( ip_address ) AS
SELECT '10.20.30.40' FROM DUAL
Query 1:
SELECT TO_NUMBER(
SUBSTR( ip_address, 1, first_sep - 1 )
) AS ClassA,
TO_NUMBER(
SUBSTR( ip_address, first_sep + 1, second_sep - first_sep )
) AS ClassB,
TO_NUMBER(
SUBSTR( ip_address, second_sep + 1, third_sep - second_sep )
) AS ClassC,
TO_NUMBER(
SUBSTR( ip_address, third_sep + 1 )
) AS ClassD
FROM (
SELECT ip_address,
INSTR( ip_address, '.', 1, 1 ) AS first_sep,
INSTR( ip_address, '.', 1, 2 ) AS second_sep,
INSTR( ip_address, '.', 1, 3 ) AS third_sep
FROM sample_data
)
Results:
| CLASSA | CLASSB | CLASSC | CLASSD |
|--------|--------|--------|--------|
| 10 | 20 | 30 | 40 |
In case you need all in one function, this is another solution:
SELECT REGEXP_SUBSTR('10.20.30.40', '\d+', 1, LEVEL) as octet, level
FROM dual
CONNECT BY LEVEL <= 4;
OCTET LEVEL
10 1
20 2
30 3
40 4

How to replace comma separated text values in a column in Oracle?

I have a table t1 with a varchar col V_RELNIST_SKEY which contains comma separated numbers between 1 and 12 as shown. I want to write a select statement to replace numbers by string. For e.g., value 5,6 should be replaced by five,six and so on.
|V_RELNIST_SKEY|
|6 |
|5,6 |
|1,12 |
|1,2,3,12 |
Oracle Setup:
CREATE TABLE test_data ( value ) as
SELECT '9' FROM DUAL UNION ALL
SELECT '6' FROM DUAL UNION ALL
SELECT '1' FROM DUAL UNION ALL
SELECT '2,3' FROM DUAL UNION ALL
SELECT '5,6,7' FROM DUAL UNION ALL
SELECT '8,4' FROM DUAL UNION ALL
SELECT '1,2,3,4,5,6,7,8,9,10,11,12' FROM DUAL;
Query:
SELECT value,
column_value AS words
FROM test_data t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LISTAGG(
TO_CHAR(
TO_DATE(
REGEXP_SUBSTR( t.value, '\d+', 1, LEVEL ),
'J'
),
'JSP'
),
','
) WITHIN GROUP ( ORDER BY LEVEL )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '\d+' )
) AS SYS.ODCIVARCHAR2LIST
)
) w;
Output:
VALUE WORDS
-------------------------- ----------------------------------------
9 NINE
6 SIX
1 ONE
2,3 TWO,THREE
5,6,7 FIVE,SIX,SEVEN
8,4 EIGHT,FOUR
1,2,3,4,5,6,7,8,9,10,11,12 ONE,TWO,THREE,FOUR,FIVE,SIX,SEVEN,EIGHT,
NINE,TEN,ELEVEN,TWELVE
Update
What if I have to replace 1 with A, 2 with B, 3 with C and so on?
SELECT value,
COLUMN_VALUE AS words
FROM test_data t
CROSS JOIN
TABLE(
CAST(
MULTISET(
SELECT LISTAGG(
CHR( 64 + REGEXP_SUBSTR( t.value, '\d+', 1, LEVEL ) ),
','
) WITHIN GROUP ( ORDER BY LEVEL )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '\d+' )
) AS SYS.ODCIVARCHAR2LIST
)
) w;
Output:
VALUE WORDS
-------------------------- ----------------------------------------
9 I
6 F
1 A
2,3 B,C
5,6,7 E,F,G
8,4 H,D
1,2,3,4,5,6,7,8,9,10,11,12 A,B,C,D,E,F,G,H,I,J,K,L