Concatenating clob cloumn values in sql query - sql

I am using this statement in my sql query to concate large clob column values but the output contains extra ","(commas) not able to figure out what is going wrong.?
SELECT RTRIM(
XMLAGG(
XMLELEMENT(
E,
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
THEN TABLE1.COL_NAME
ELSE NULL
END,
', '
).EXTRACT('//text()')
ORDER BY TABLE1.UNIQ_ID
).GETCLOBVAL(),
','
) COMBINED_VAL

If you are asking about the trailing commas, then you are concatenating using comma then space so the trailing character is a space and not a comma.
If you are asking about adjacent separators with no value in between then when the WHEN UNIQ_ID IN ( ... ) part of your CASE statement is not matched you will have a NULL value; this is concatenated into the aggregated output and then you will find that you have two adjacent comma-space separators with no text in between.
For example:
WITH test_data ( id, value ) AS (
SELECT 1, 'a' FROM DUAL UNION ALL
SELECT 2, NULL FROM DUAL UNION ALL
SELECT 3, 'b' FROM DUAL
)
SELECT RTRIM(
XMLAGG(
XMLELEMENT(
E,
value,
', '
).EXTRACT('//text()')
ORDER BY id
).GETCLOBVAL(),
','
) AS COMBINED_VAL
FROM test_data;
Outputs:
| COMBINED_VAL |
| :----------- |
| a, , b, |
The trailing comma-space isn't trimmed as the last character is a space and the values are a then NULL then b and the NULL is represented as a zero-width substring.
db<>fiddle here

That's pretty easy:
do not aggregate rows which you don't want to get. To do that you just need to generate xmlelement only for required rows, and just return null for others.
Just put all characters you want to trim from your result into second parameter of rtrim:
SELECT RTRIM(
XMLAGG(
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
and COL_NAME is not null
THEN XMLELEMENT(
E,
TABLE1.COL_NAME||', '
)
END
ORDER BY TABLE1.UNIQ_ID
).extract('//text()').GETCLOBVAL(),
', '
) COMBINED_VAL
from table1;
Full test case with sample data and results: https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=452c715247e8edda8735014ff2fb34f4
with
SOME_TABLE(VAL, VAL_NM) as (
select level*2, 'SOME_TEXT' from dual connect by level<=10
)
,TABLE1(UNIQ_ID, COL_NAME) as (
select level UNIQ_ID
, to_clob(level) COL_NAME
from dual
connect by level<=20
)
SELECT RTRIM(
XMLAGG(
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
and COL_NAME is not null
THEN XMLELEMENT(
E,
TABLE1.COL_NAME||', '
)
END
ORDER BY TABLE1.UNIQ_ID
).extract('//text()').GETCLOBVAL(),
', '
) COMBINED_VAL
from TABLE1;
Results:
COMBINED_VAL
----------------------------------------
2, 4, 6, 8, 10, 12, 14, 16, 18, 20

Related

How to include the code into a REPLACE function in oracle?

User #psaraj12 helped me with a ticket here about finding ascii character in a string in my DB with the following code:
with test (col) as (
select
'L.A.D'
from
dual
union all
select
'L.􀈉.D.'
from
dual
)
select
col,
case when max(ascii_of_one_character) >= 65535 then 'NOT OK' else 'OK' end result
from
(
select
col,
substr(col, column_value, 1) one_character,
ascii(
substr(col, column_value, 1)
) ascii_of_one_character
from
test cross
join table(
cast(
multiset(
select
level
from
dual connect by level <= length(col)
) as sys.odcinumberlist
)
)
)
group by
col
having max(ascii_of_one_character) >= 4000000000;
The script looks for characters of a certain range GROUPs them and marks displays them.
Is it possible to include this in a REPLACE statement of a similar sort:
REPLACE(table.column, max(ascii_of_one_character) >= 4000000000, '')
EDIT: As per #flyaround answer this is the code I use changed a little bit:
with test (col) as (
select skunden.name1
from skunden
)
select col
, REGEXP_REPLACE(col, 'max(ascii_of_one_character)>=4000000000', '') as cleaned
, CASE WHEN REGEXP_COUNT(col, 'max(ascii_of_one_character)>=4000000000') > 0 THEN 0 ELSE 1 END as isOk
from test;
Coming back to your original code, because my suggested REGEX_REPLACE is not working sufficient with high surrogates. Your approach is already very effective, so I jumped into it to have a solution here.
MERGE
INTO skunden
USING (
select
id as innerId,
name as innerName,
case when max(ascii_of_one_character) >= 65535 then 0 else 1 end isOk,
listagg(case when ascii_of_one_character <65535 then one_character end , '') within group (order by rn) as cleaned
from
(
select
id,
name,
substr(name, column_value, 1) one_character,
ascii(
substr(name, column_value, 1)
) ascii_of_one_character
, rownum as rn
from
skunden cross
join table(
cast(
multiset(
select
level
from
dual connect by level <= length(name)
) as sys.odcinumberlist
)
)
)
group by
id, name
having max(ascii_of_one_character) >= 4000000000
)
ON (skunden.id = innerId)
WHEN MATCHED THEN
UPDATE
SET name = cleaned
;
On MERGE you can't use the referencing column for an update. Therefore you should use the unique key (I used 'id' in my example) of your table.
The resulting value will be 'L..D' for your example value of 'L.􀈉.D.'
If I got your question correctly you would like to remove characters with a higher decimal representation of characters than specified.
You could check to use REGEXP_REPLACE for this, like:
with test (col) as (
select
'L.A.D'
from
dual
union all
select
'L.􀈉.D.'
from
dual
)
select col
, REGEXP_REPLACE(col, '[^\u00010000-\u0010FFFF]+$', '') as cleaned
, CASE WHEN REGEXP_COUNT(col, '[^\u00010000-\u0010FFFF]+$') > 0 THEN 0 ELSE 1 END as isOk
from test;

How to match a string after a token (using regex)?

I'm trying to extract mail addresses after a token 'eaddr:'. So it would match the all occurrences in line entries, first consecutive string without spaces after that token: I tried:
SELECT regexp_substr(tab.entry, 'eaddr:\(.*?\)',1,1,'e',1)
from (
select 'String, email#domain.com' as entry
union
select 'eaddr:mail1#domain.com eaddr:mail2#domain.com sometext eaddr: mail3#domain.com some4354% text' as entry
union
select 'eaddr:mail5#domain.org' as entry
union
select 'Just a string' as entry
) tab
;
but it does not work. The correct result set is:
null
mail1#domain.com mail2#domain.com mail3#domain.com
mail5#domain.org
null
First of all, I suggest using a better regex to verify the email format. I am inspired by Gordon's SPLIT_TO_TABLE + LATERAL approach, and wrote some sample queries to fetch those emails from the entries.
If you want to get all the emails together, you can use this one:
with t as (
select 'String, email#domain.com' as entry
union
select 'eaddr:mail1#domain.com eaddr:mail2#domain.com sometext eaddr: mail3#domain.com some4354% text' as entry
union
select 'eaddr:mail5#domain.org' as entry
union
select 'Just a string' as entry
)
Select LISTAGG( regexp_substr( s.value, '[A-Z0-9a-z._%+-]+#[A-Za-z0-9.-]+\\.[A-Za-z]{2,64}' ) ,' ' ) emails from t,
lateral SPLIT_TO_TABLE(t.entry, 'eaddr:') s
where s.seq > 1;
+---------------------------------------------------------------------+
| EMAILS |
+---------------------------------------------------------------------+
| mail1#domain.com mail2#domain.com mail3#domain.com mail5#domain.org |
+---------------------------------------------------------------------+
To get the exact result in your question, you can use the following query:
with t as (
select 'String, email#domain.com' as entry
union
select 'eaddr:mail1#domain.com eaddr:mail2#domain.com sometext eaddr: mail3#domain.com some4354% text' as entry
union
select 'eaddr:mail5#domain.org' as entry
union
select 'Just a string' as entry
)
select emails from
(
Select t.entry, s.*,
LISTAGG( regexp_substr( IFF(s.seq = 1, '', s.value ), '[A-Z0-9a-z._%+-]+#[A-Za-z0-9.-]+\\.[A-Za-z]{2,64}' ) ,' ' )
OVER ( PARTITION BY s.seq ) emails
from t,
lateral SPLIT_TO_TABLE(t.entry, ' ') s )
where index = 1;
+----------------------------------------------------+
| EMAILS |
+----------------------------------------------------+
| NULL |
| mail1#domain.com mail2#domain.com mail3#domain.com |
| NULL |
| mail5#domain.org |
+----------------------------------------------------+
As far as I know, you can return only one match at a time from REGEXP_SUBSTR. The code below:
with tab(entry) as (
select 'String, email#domain.com' from dual
union
select 'eaddr:mail1#domain.com eaddr:mail2#domain.com sometext eaddr: mail3#domain.com some4354% text' from dual
union
select 'eaddr:mail5#domain.org' from dual
union
select 'Just a string' from dual
)
SELECT
regexp_substr(entry, 'eaddr:\s*(\S*)\s*',1,1,'i',1)
|| coalesce(' ' || regexp_substr(entry, 'eaddr:\s*(\S*)\s*',1,2,'i', 1), '')
|| coalesce(' ' || regexp_substr(entry, 'eaddr:\s*(\S*)\s*',1,3,'i', 1), '') as match,
regexp_count(entry, 'eaddr:\s*(\S*)\s*') as nmatches
from tab
gives the result below (using Oracle). You can use REGEXP_COUNT as shown to get the number of matches. If there are more than 3 email addresses, you can add more || coalesce( lines as needed.
P.S. I'm not sure what the 'e' flag does in your example. I'm guessing that is a Snowflake-specific value.
You need to split the strings, extract the emails, and then reaggregate. I don't have Snowflake on hand, but this or something similar should do:
select t.*, s.emails
from t left join lateral
(select list_agg(split(s.value, ' ')), ' ') as emails
from table(string_split_to_table(t.entry, 'eaddr:')) as s
) s;
I'm not 100% sure that Snowflake supports multiple-character delimiters, for instance. If that is the case, you can use:
select t.*, s.emails
from t left join lateral
(select list_agg(substr(s.value, 7), ' ') as emails
from table(string_split_to_table(t.entry, ' ')) as s
where value like 'eaddr:%'
) s;
Using Javascript UDF
create or replace function ext_mail(col VARCHAR)
returns varchar
language javascript
as
$$
var y = COL.match(/(?!eaddr):(\s+)?\w+#\w+/g);
if (y) {
ext_out = y.join(' ');
return ext_out.replace(/:|\s+/g,' ')
}
else return 'NULL'
$$
;
with t as (
select 'String, email#domain.com' as entry
union
select 'eaddr:mail1#domain.com eaddr:mail2#domain.com sometext eaddr: mail3#domain.com some4354% text' as entry
union
select 'eaddr:mail5#domain.org' as entry
union
select 'Just a string' as entry
) select ext_mail(ENTRY) from t;

SQL to find upper case words from a column

I have a description column in my table and its values are:
This is a EXAMPLE
This is a TEST
This is a VALUE
I want to display only EXAMPLE, TEST, and VALUE from the description column.
How do I achieve this?
This could be a way:
-- a test case
with test(id, str) as (
select 1, 'This is a EXAMPLE' from dual union all
select 2, 'This is a TEST' from dual union all
select 3, 'This is a VALUE' from dual union all
select 4, 'This IS aN EXAMPLE' from dual
)
-- concatenate the resulting words
select id, listagg(str, ' ') within group (order by pos)
from (
-- tokenize the strings by using the space as a word separator
SELECT id,
trim(regexp_substr(str, '[^ ]+', 1, level)) str,
level as pos
FROM test t
CONNECT BY instr(str, ' ', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
-- only get the uppercase words
where regexp_like(str, '^[A-Z]+$')
group by id
The idea is to tokenize every string, then cut off the words that are not made by upper case characters and then concatenate the remaining words.
The result:
1 EXAMPLE
2 TEST
3 VALUE
4 IS EXAMPLE
If you need to handle some other character as an upper case letter, you may edit the where condition to filter for the matching words; for example, with '_':
with test(id, str) as (
select 1, 'This is a EXAMPLE' from dual union all
select 2, 'This is a TEST' from dual union all
select 3, 'This is a VALUE' from dual union all
select 4, 'This IS aN EXAMPLE' from dual union all
select 5, 'This IS AN_EXAMPLE' from dual
)
select id, listagg(str, ' ') within group (order by pos)
from (
SELECT id,
trim(regexp_substr(str, '[^ ]+', 1, level)) str,
level as pos
FROM test t
CONNECT BY instr(str, ' ', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
where regexp_like(str, '^[A-Z_]+$')
group by id
gives:
1 EXAMPLE
2 TEST
3 VALUE
4 IS EXAMPLE
5 IS AN_EXAMPLE
Here's another solution. It was inspired by Aleksej's answer.
The idea? Get all the words. Then aggregate only fully uppercased to a list.
Sample data:
create table descriptions (ID int, Description varchar2(100));
insert into descriptions (ID, Description)
select 1 as ID, 'foo Foo FOO bar Bar BAR' as Description from dual
union all select 2, 'This is an EXAMPLE TEST Description VALUE' from dual
;
Query:
select id, Description, listagg(word, ',') within group (order by pos) as UpperCaseWords
from (
select
id, Description,
trim(regexp_substr(Description, '\w+', 1, level)) as word,
level as pos
from descriptions t
connect by regexp_instr(Description, '\s+', 1, level - 1) > 0
and prior id = id
and prior sys_guid() is not null
)
where word = upper(word)
group by id, Description
Result:
ID | DESCRIPTION | UPPERCASEWORDS
-- | ----------------------------------------- | ------------------
1 | foo Foo FOO bar Bar BAR | FOO,BAR
2 | This is an EXAMPLE TEST Description VALUE | EXAMPLE,TEST,VALUE
It is possible to achieve this thanks to the REGEXP_REPLACE function:
SELECT REGEXP_REPLACE(my_column, '(^[A-Z]| |[a-z][A-Z]*|[A-Z]*[a-z])', '') AS Result FROM my_table
It uses a regex which replaces first upper case char of the line and converts every lower case char and space with blanks.
Try this:
SELECT SUBSTR(column_name, INSTR(column_name,' ',-1) + 1)
FROM your_table;
This should do the trick:
SELECT SUBSTR(REGEXP_REPLACE(' ' || REGEXP_REPLACE(description, '(^[A-Z]|[a-z]|[A-Z][a-z]+|[,])', ''), ' +', ' '), 2, 9999) AS only_upper
FROM (
select 'Hey IF you do not know IT, This IS a test of UPPERCASE and IT, with good WILL and faith, Should BE fine to be SHOWN' description
from dual
)
I have added condition to strip commas, you can add inside that brakets other special characters to remove.
ONLY_UPPER
-----------------------------------
IF IT IS UPPERCASE IT WILL BE SHOWN
This is a function based on some of the regular expression answers.
create or replace function capwords(orig_string varchar2)
return varchar2
as
out_string varchar2(80);
begin
out_string := REGEXP_REPLACE(orig_string, '([a-z][A-Z_]*|[A-Z_]*[a-z])', '');
out_string := REGEXP_REPLACE(trim(out_string), '( *)', ' ');
return out_string;
end;
/
Removes strings of upper case letters and underscores that have lower case letters
on either end. Replaces multiple adjacent spaces with one space.
Trims extra spaces off of the ends. Assumes max size of 80 characters.
Slightly edited output:
>select id,str,capwords(str) from test;
ID STR CAPWORDS(STR)
---------- ------------------------------ ------------------
1 This is a EXAMPLE EXAMPLE
2 This is a TEST TEST
3 This is a VALUE VALUE
4 This IS aN EXAMPLE IS EXAMPLE
5 This is WITH_UNDERSCORE WITH_UNDERSCORE
6 ThiS IS aN EXAMPLE IS EXAMPLE
7 thiS IS aN EXAMPLE IS EXAMPLE
8 This IS wiTH_UNDERSCORE IS
If you only need to "display" the result without changing the values in the column then you can use CASE WHEN (in the example Description is the column name):
Select CASE WHEN Description like '%EXAMPLE%' then 'EXAMPLE' WHEN Description like '%TEST%' then 'TEST' WHEN Description like '%VALUE%' then 'VALUE' END From [yourTable]
The conditions are not case sensitive even if you write it all in uppercase.
You can add Else '<Value if all conditions are wrong>' before the END in case there are descriptions that don't contain any of the values. The example will return NULL for those cases, and writing ELSE Description will return the original value of that row.
It also works if you need to update. It is simple and practical, easy way out, haha.

Need to replace alpha numeric characters by space and create new column

I need to convert the following
Column 1 Column 2
ABC, Company ABC Company
TA. Comp TA Comp
How can I get Column2 in sql where I am removing all ',' '.' to space.
How about:
with testdata as (
select 'ABC, Company Inc.' as col1 from dual
union all
select 'TA. Comp' as col1 from dual
)
select trim(regexp_replace(regexp_replace(col1, '[[:punct:]]',' '), ' {2,}', ' ')) as col2
from testdata;
Output:
ABC Company Inc
TA Comp
Assuming punctuation is what you're trying to blank out.
You can try to use:
SELECT REGEXP_REPLACE(column1, '[^a-zA-Z0-9 ]+', '')
FROM DUAL
with t (val) as
(
select 'ABC,. Cmpany' from dual union all
select 'A, VC' from dual union all
select 'A,, BC...com' from dual
)
select
val,
replace(replace(val, ',', ''), '.', '') x , -- one way
regexp_replace(val, '[,.]', '') y -- another way
from t
;
VAL X Y
--------------- ---------- ----------
ABC,. Cmpany ABC Cmpany ABC Cmpany
A, VC A VC A VC
A,, BC...com A BCcom A BCcom

regex oracle sql return all capturing groups

I have an regex like
select regexp_substr('some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>', 1, 1, NULL, 1) from dual which can return a value for a single capturing group.
How can I instead return all the capturing groups as an additional column? (string concat of results is fine)
select regexp_substr('some stuff TOTAL_SCORE<518> TOTAL_SCORE<123>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>') from dual
Query 1:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT REGEXP_REPLACE(
value,
'.*TOTAL_SCORE<(\d{3})>.*OTHER_VALUE<(\d{3})>.*',
'\1,\2'
) As scores
FROM your_table
Output:
SCORES
-------
518,456
Query 2:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518> TOTAL_SCORE<123> some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT l.column_value As scores
FROM your_table t,
TABLE(
CAST(
MULTISET(
SELECT TO_NUMBER(
REGEXP_SUBSTR(
t.value,
'TOTAL_SCORE<(\d{3})>',
1,
LEVEL,
NULL,
1
)
)
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, 'TOTAL_SCORE<(\d{3})>' )
) AS SYS.ODCINUMBERLIST
)
) l;
Output:
SCORES
-------
518
123