Regular Expression - sql

I have data like this:
A:123, A:983, A:122, B:232, B:392, C:921, D:221, D:121, D:838
And I want to have my result like
A:123, 983, 122, B:232, 392, C:921, D:221, 121, 838
Can anyone please suggest?

You can use regexp_substr() and listagg() functions
with connect by level <= regexp_count(':') as
with t(str) as
(
select 'A:123, A:983, A:122, B:232, B:392, C:921, D:221, D:121, D:838' from dual
), t2 as
(
select level as rn,
regexp_substr(str,'([[:alpha:]]+)',1,level) as letter,
regexp_substr(str,'(\d)+',1,level) as num
from t
connect by level <= regexp_count(str,':')
), t3 as
(
select letter||':'||listagg(num,',') within group (order by rn) as str
from t2
group by letter
)
select listagg(str,',') within group (order by substr(str,1,1)) as str
from t3;
STR
-------------------------------------------
A:123,983,122,B:232,392,C:921,D:221,121,838
Demo

You do not need regular expressions as it can be done with standard string functions:
Oracle Setup:
CREATE TABLE test_data ( value ) AS
SELECT 'A:123, A:983, A:122, B:232, B:392, C:921, D:221, D:121, D:838' FROM DUAL
Query:
WITH rsqfc ( id, value, spos, sep, epos ) AS (
SELECT ROWNUM,
value,
1,
INSTR( value, ':', 1 ),
INSTR( value, ', ', 1 )
FROM test_data
UNION ALL
SELECT id,
value,
epos + 2,
INSTR( value, ':', epos + 2 ),
INSTR( value, ', ', epos + 2 )
FROM rsqfc
WHERE epos > 0
),
items ( id, prefix, value ) AS (
SELECT id,
SUBSTR( value, spos, sep - spos ),
CASE
WHEN epos > 0
THEN SUBSTR( value, sep + 1, epos - sep - 1 )
ELSE SUBSTR( value, sep + 1 )
END
FROM rsqfc
),
item_groups ( id, prefix, grouped_value ) AS (
SELECT id,
prefix,
LISTAGG( value, ',' ) WITHIN GROUP ( ORDER BY ROWNUM )
FROM items
GROUP BY id, prefix
)
SELECT LISTAGG( prefix || ':' || grouped_value, ', ' )
WITHIN GROUP ( ORDER BY prefix ) AS value
FROM item_groups
GROUP BY id
Output:
| VALUE |
| :--------------------------------------------- |
| A:123,983,122, B:232,392, C:921, D:221,121,838 |
db<>fiddle here

Related

How to count data based on specific condition in Oracle

Let's say I have a table of persons with ids and names as follows
[Person]
ID NAME
================
1 Michael
2 Michelle
3 Emma
4 Evan
5 Ellen
6 Gary
I want to count the number of persons based on the first characters of their names.
Here's the output I expect
NUMBER_OF_PERSONS
=================
2 //M = Michael and Michelle
3 //E = Emma, Evan and Ellen
1 //G = Gary
How do I achieve this in Oracle?
And here's my query
select count(id) as number_of_person
from person
where substr(name) in (select distinct substr(name,1,1) from person);
You can acheive that purpose using below solution.
with Person (ID, NAME ) as (
select 1, 'Michael' from dual union all
select 2, 'Michelle' from dual union all
select 3, 'Emma' from dual union all
select 4, 'Evan' from dual union all
select 5, 'Ellen' from dual union all
select 6, 'Gary' from dual
)
select count(*) || ' //' || substr(NAME, 1, 1) || ' = ' ||
case
when regexp_count( listagg(NAME, ' and ') within group ( order by ID ), ' and ') > 1
then regexp_replace( listagg(NAME, ', ') within group ( order by ID ), ', ([^,]+)$', ' and \1 ', 1, 1 )
else listagg(NAME, ' and ') within group ( order by ID )
end NUMBER_OF_PERSONS
from Person
group by substr(NAME, 1, 1)
order by substr(NAME, 1, 1)
;
db<>fiddle
If you just want the count, you would use group by:
select substr(name, 1, 1) as first_letter,
count(*) as number_of_person
from person
group by substr(name, 1, 1) ;
If, in addition, you actually wanted the list of names, you could put that in another column, assuming there are not too many:
select substr(name, 1, 1) as first_letter,
count(*) as number_of_person,
listagg(name, ', ') within group (order by name) as names
from person
group by substr(name, 1, 1) ;
This is my solution to it:
WITH tbl AS (
SELECT 1 AS ID, 'Michael' AS NAME FROM dual UNION
SELECT 2, 'Michelle' FROM dual UNION
SELECT 3, 'Emma' FROM dual UNION
SELECT 4, 'Evan' FROM dual UNION
SELECT 5, 'Ellen' FROM dual UNION
SELECT 6, 'Gary' FROM dual
)
SELECT COUNT(1)
, SUBSTR(names.name,1,1)
, REGEXP_REPLACE((listagg(names.name,', ') WITHIN GROUP (ORDER BY names.name)), ',([^,]*)$', ' and \1')
FROM tbl names
GROUP BY SUBSTR(names.name,1,1);

Concatenating clob cloumn values in sql query

I am using this statement in my sql query to concate large clob column values but the output contains extra ","(commas) not able to figure out what is going wrong.?
SELECT RTRIM(
XMLAGG(
XMLELEMENT(
E,
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
THEN TABLE1.COL_NAME
ELSE NULL
END,
', '
).EXTRACT('//text()')
ORDER BY TABLE1.UNIQ_ID
).GETCLOBVAL(),
','
) COMBINED_VAL
If you are asking about the trailing commas, then you are concatenating using comma then space so the trailing character is a space and not a comma.
If you are asking about adjacent separators with no value in between then when the WHEN UNIQ_ID IN ( ... ) part of your CASE statement is not matched you will have a NULL value; this is concatenated into the aggregated output and then you will find that you have two adjacent comma-space separators with no text in between.
For example:
WITH test_data ( id, value ) AS (
SELECT 1, 'a' FROM DUAL UNION ALL
SELECT 2, NULL FROM DUAL UNION ALL
SELECT 3, 'b' FROM DUAL
)
SELECT RTRIM(
XMLAGG(
XMLELEMENT(
E,
value,
', '
).EXTRACT('//text()')
ORDER BY id
).GETCLOBVAL(),
','
) AS COMBINED_VAL
FROM test_data;
Outputs:
| COMBINED_VAL |
| :----------- |
| a, , b, |
The trailing comma-space isn't trimmed as the last character is a space and the values are a then NULL then b and the NULL is represented as a zero-width substring.
db<>fiddle here
That's pretty easy:
do not aggregate rows which you don't want to get. To do that you just need to generate xmlelement only for required rows, and just return null for others.
Just put all characters you want to trim from your result into second parameter of rtrim:
SELECT RTRIM(
XMLAGG(
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
and COL_NAME is not null
THEN XMLELEMENT(
E,
TABLE1.COL_NAME||', '
)
END
ORDER BY TABLE1.UNIQ_ID
).extract('//text()').GETCLOBVAL(),
', '
) COMBINED_VAL
from table1;
Full test case with sample data and results: https://dbfiddle.uk/?rdbms=oracle_11.2&fiddle=452c715247e8edda8735014ff2fb34f4
with
SOME_TABLE(VAL, VAL_NM) as (
select level*2, 'SOME_TEXT' from dual connect by level<=10
)
,TABLE1(UNIQ_ID, COL_NAME) as (
select level UNIQ_ID
, to_clob(level) COL_NAME
from dual
connect by level<=20
)
SELECT RTRIM(
XMLAGG(
CASE WHEN UNIQ_ID IN ( SELECT VAL
FROM SOME_TABLE
WHERE VAL_NM = 'SOME_TEXT' )
and COL_NAME is not null
THEN XMLELEMENT(
E,
TABLE1.COL_NAME||', '
)
END
ORDER BY TABLE1.UNIQ_ID
).extract('//text()').GETCLOBVAL(),
', '
) COMBINED_VAL
from TABLE1;
Results:
COMBINED_VAL
----------------------------------------
2, 4, 6, 8, 10, 12, 14, 16, 18, 20

Oracle joining tables using WITH clause with SPLIT [duplicate]

This question already has answers here:
How to split a varchar column as multiple values in SQL?
(2 answers)
Closed 1 year ago.
I have this table that is not linked to another table, because they don't have any same column. Now they want a report that will link both tables.
But the problem is the only common column to them is the WAFER_INFO column which has multiple values separated by a comma that is why I need to split them to make multiple records but different WAFER_INFO.
First Table
select wafer_info
from bondertab_g3
where tha_reel_id='TGDT349028H'
order by insert_dm,tha_reel_id,processlk_ky
Results for the query above
TGK343067-22,TGK343067-25,TGK343067-24,TGK343067-23
Second Table
select hp_part_nr,wafer_id,good_cnt,total_rej_cnt,processlk_ky,toollk_ky,toolnrlk_ky,materiallk_ky
from sawinsptab
where wafer_id ='TGK343067-22';
select hp_part_nr,wafer_id,good_cnt,total_rej_cnt,processlk_ky,toollk_ky,toolnrlk_ky,materiallk_ky
from sawinsptab
where wafer_id ='TGK343067-25';
select hp_part_nr,wafer_id,good_cnt,total_rej_cnt,processlk_ky,toollk_ky,toolnrlk_ky,materiallk_ky
from sawinsptab
where wafer_id ='TGK343067-24';
select hp_part_nr,wafer_id,good_cnt,total_rej_cnt,processlk_ky,toollk_ky,toolnrlk_ky,materiallk_ky
from sawinsptab
where wafer_id ='TGK343067-23';
Basically just all of them in the first table
I already achieve on how to split all those records using this code
With DATA AS (
select tha_reel_id, wafer_info str
from bondertab_g3
where tha_reel_id='TGDT349028H'
)
SELECT A.tha_reel_id, trim(regexp_substr(A.str, '[^,]+', 1, LEVEL)) WAFERID FROM DATA A
CONNECT BY instr(str, ',', 1, LEVEL - 1) > 0
Now my question is how can I connect my above query and connect it to the Second Table when the only column same is the WAFERID
Find where the sawinsptab.wafer_id (wrapped in your comma delimiters) is a sub-string of bondertab_g3.wafer_info (wrapped in your comma delimiters):
select hp_part_nr,
wafer_id,
good_cnt,
total_rej_cnt,
processlk_ky,
toollk_ky,
toolnrlk_ky,
materiallk_ky
from sawinsptab s
where EXISTS (
select 1
from bondertab_g3 b
where b.tha_reel_id='TGDT349028H'
and ','||b.wafer_info||',' LIKE '%,'||s.wafer_id||',%'
);
or
select s.hp_part_nr,
s.wafer_id,
s.good_cnt,
s.total_rej_cnt,
s.processlk_ky,
s.toollk_ky,
s.toolnrlk_ky,
s.materiallk_ky,
b.other_column
from sawinsptab s
INNER JOIN bondertab_g3 b
ON ( ','||b.wafer_info||',' LIKE '%,'||s.wafer_id||',%' )
where b.tha_reel_id='TGDT349028H';
or, if you need to use an index on wafer_id and want to split the delimited string then, you can do it with a recursive sub-query factoring clause and simple string functions (rather than slow regular expressions):
select hp_part_nr,
wafer_id,
good_cnt,
total_rej_cnt,
processlk_ky,
toollk_ky,
toolnrlk_ky,
materiallk_ky
from sawinsptab s
where wafer_id IN (
WITH delimiter_bounds ( wafer_info, startidx, endidx ) AS (
SELECT wafer_info,
1,
INSTR( wafer_info, ',', 1 )
FROM bondertab_g3
WHERE tha_reel_id='TGDT349028H'
UNION ALL
SELECT wafer_info,
endidx + 1,
INSTR( wafer_info, ',', endidx + 1 )
FROM delimiter_bounds
WHERE endidx > 0
)
SELECT CASE
WHEN endidx = 0
THEN SUBSTR( wafer_info, startidx )
ELSE SUBSTR( wafer_info, startidx, endidx - startidx )
END
from delimiter_bounds
);
or
WITH delimiter_bounds ( wafer_info, other_column, startidx, endidx ) AS (
SELECT wafer_info,
other_column,
1,
INSTR( wafer_info, ',', 1 )
FROM bondertab_g3
WHERE tha_reel_id='TGDT349028H'
UNION ALL
SELECT wafer_info,
other_column,
endidx + 1,
INSTR( wafer_info, ',', endidx + 1 )
FROM delimiter_bounds
WHERE endidx > 0
)
select s.hp_part_nr,
s.wafer_id,
s.good_cnt,
s.total_rej_cnt,
s.processlk_ky,
s.toollk_ky,
s.toolnrlk_ky,
s.materiallk_ky,
b.other_column
from sawinsptab s
INNER JOIN (
SELECT CASE
WHEN endidx = 0
THEN SUBSTR( wafer_info, startidx )
ELSE SUBSTR( wafer_info, startidx, endidx - startidx )
END AS wafer_id,
other_column
FROM delimiter_bounds
) b
ON ( s.wafer_id = b.wafer_id )
You can change the first query to a CTE and then use that directly in your query:
with data as (
select tha_reel_id, wafer_info str
from bondertab_g3
where tha_reel_id = 'TGDT349028H'
),
wafers as (
select d.tha_reel_id, trim(regexp_substr(d.str, '[^,]+', 1, LEVEL)) as waferid
from data d
connect by instr(str, ',', 1, LEVEL - 1) > 0
)
select s.*
from sawinsptab s
where wafer_id in (select w.waferid from wafers);

regex oracle sql return all capturing groups

I have an regex like
select regexp_substr('some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>', 1, 1, NULL, 1) from dual which can return a value for a single capturing group.
How can I instead return all the capturing groups as an additional column? (string concat of results is fine)
select regexp_substr('some stuff TOTAL_SCORE<518> TOTAL_SCORE<123>some stuff OTHER_VALUE<456> foo <after>', 'TOTAL_SCORE<(\d{3})>') from dual
Query 1:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518>some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT REGEXP_REPLACE(
value,
'.*TOTAL_SCORE<(\d{3})>.*OTHER_VALUE<(\d{3})>.*',
'\1,\2'
) As scores
FROM your_table
Output:
SCORES
-------
518,456
Query 2:
-- Sample data
WITH your_table ( value ) AS (
SELECT 'some stuff TOTAL_SCORE<518> TOTAL_SCORE<123> some stuff OTHER_VALUE<456> foo <after>' FROM DUAL
)
-- Query
SELECT l.column_value As scores
FROM your_table t,
TABLE(
CAST(
MULTISET(
SELECT TO_NUMBER(
REGEXP_SUBSTR(
t.value,
'TOTAL_SCORE<(\d{3})>',
1,
LEVEL,
NULL,
1
)
)
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, 'TOTAL_SCORE<(\d{3})>' )
) AS SYS.ODCINUMBERLIST
)
) l;
Output:
SCORES
-------
518
123

Split comma separated values in Oracle 9i

In Oracle, I have columns called orderids
orderids
111,222,333
444,55,66
77,77
How can get the output as
Orderid
111
222
333
444
55
66
77
77
Try this:
WITH TT AS
(SELECT orderid COL1 FROM orders)
SELECT substr(str,
instr(str, ',', 1, LEVEL) + 1,
instr(str, ',', 1, LEVEL + 1) -
instr(str, ',', 1, LEVEL) - 1) COL1
FROM (SELECT rownum AS r,
','|| COL1||',' AS STR
FROM TT )
CONNECT BY PRIOR r = r
AND instr(str, ',', 1, LEVEL + 1) > 0
AND PRIOR dbms_random.STRING('p', 10) IS NOT NULL
;
See this SQLFiddle
This is one appraoch:
with order_table as (
select '111,222,333' as orderids from dual
union all select '444,55,66' from dual
union all select '77,77' from dual
)
select substr(orderids, instr(orderids, ',', 1, lvl) + 1, instr(orderids, ',', 1, lvl + 1) - instr(orderids, ',', 1, lvl) - 1) orderid
from
( select ',' || orderids || ',' as orderids from order_table ),
( select level as lvl from dual connect by level <= 100 )
where lvl <= length(orderids) - length(replace(orderids, ',')) - 1;
Just remove the WITH clause and replace the order_table with your real table.
This too might help you,
with t(orderid) as
(
SELECT '111,222,333' FROM dual
UNION
SELECT '444,55,66' FROM dual
UNION
SELECT '177,77' FROM dual
)
SELECT trim(x.COLUMN_VALUE.EXTRACT('e/text()')) cols
FROM t t, TABLE (xmlsequence(XMLTYPE('<e><e>' || REPLACE(t.orderid,',','</e><e>')|| '</e></e>').EXTRACT('e/e'))) x;
instr(','||NVL('972414AQ,972414AQ',I.CUSIP)||',', ','||I.CUSIP||',') > 0
This is the actual query I was looking for.