Oracle REGEXP_SUBSTR returning NULL as value - sql

I am trying to split data from a column into rows but I am facing this issue here.
When i run this query it splits the data fine but it is also returning NULL as an extra row too.
Here is the value I am trying to split ,162662163,90133140,163268955,169223426,169222899,
WITH CTE AS(
SELECT
RTRIM(LTRIM(PG2.MULTILIST11, ','), ',') ACCESS_BY_GROUPS
FROM AGILE.ITEM I
INNER JOIN AGILE.PAGE_TWO PG2 ON PG2.ID = I.ID
WHERE
ITEM_NUMBER IN --('313-000074',
('313-000090')
)
SELECT DISTINCT
REGEXP_SUBSTR(ACCESS_BY_GROUPS, '[^,]+', 1, column_value) ACCESS_BY_GROUPS
FROM CTE
CROSS JOIN TABLE(CAST(MULTISET(SELECT LEVEL FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(CTE.ACCESS_BY_GROUPS, ',') + 10
) AS sys.odcinumberlist))
I don't want to get that null value. I cannot apply that check out of the query because it will affect other column values too so I want it to be handled somewhere in the function. I hope someone can help.

It is because of leading and trailing commas.
One option is to begin from position 2 (see line #3) and limit number of values (line #5)
SQL> with test (col) as
2 (select ',162662163,90133140,163268955,169223426,169222899,' from dual)
3 select regexp_substr(col, '[^,]+', 2, level) res
4 from test
5 connect by level <= regexp_count(col, ',') - 1
6 /
RES
-------------------------------------------------
162662163
90133140
163268955
169223426
169222899
SQL>
Another is to remove leading and trailing comma first, and then split the rest into rows:
SQL> with test (col) as
2 (select ',162662163,90133140,163268955,169223426,169222899,' from dual),
3 temp as
4 -- remove leading and trailing comma first
5 (select ltrim(rtrim(col, ','), ',') col
6 from test
7 )
8 select regexp_substr(col, '[^,]+', 1, level) res
9 from temp
10 connect by level <= regexp_count(col, ',') + 1;
RES
------------------------------------------------
162662163
90133140
163268955
169223426
169222899
SQL>
[EDIT - for more than a single row]
If there are more rows involved, code has to be changed. Note that there must be some kind of a unique identifier for every row (ID in my example).
SQL> with test (id, col) as
2 (select 1, ',162662163,90133140,163268955,169223426,169222899,' from dual union all
3 select 2, ',1452761,1452762,' from dual
4 )
5 select id,
6 regexp_substr(col, '[^,]+', 2, column_value) res
7 from test cross join table(cast(multiset(select level from dual
8 connect by level <= regexp_count(col, ',') - 1
9 ) as sys.odcinumberlist))
10 order by id, column_value
11 /
ID RES
---------- -------------------------------------------------
1 162662163
1 90133140
1 163268955
1 169223426
1 169222899
2 1452761
2 1452762
7 rows selected.
SQL>

Related

SUBSTR to ADD value in oracle

I have table with column having data in below format in Oracle DB.
COL 1
abc,mno:EMP
xyz:EMP;tyu,opr:PROF
abc,mno:EMP;tyu,opr:PROF
I am trying to convert the data in below format
COL 1
abc:EMP;mno:EMP
xyz:EMP;tyu:PROF;opr:PROF
abc:EMP;mno:EMP;tyu:PROF;opr:PROF
Basically trying to get everything after : and before ; to move it substitute comma with it.
I tried some SUBSTR and LISTAGG but couldn't get anything worth sharing.
Regards.
Here's one option; read comments within code.
SQL> with test (id, col) as
2 -- sample data
3 (select 1, 'abc,mno:EMP' from dual union all
4 select 2, 'xyz:EMP;tyu,opr:PROF' from dual union all
5 select 3, 'abc,mno:EMP;tyu,opr:PROF' from dual
6 ),
7 temp as
8 -- split sample data to rows
9 (select id,
10 column_value cv,
11 regexp_substr(col, '[^;]+', 1, column_value) val
12 from test cross join
13 table(cast(multiset(select level from dual
14 connect by level <= regexp_count(col, ';') + 1
15 ) as sys.odcinumberlist))
16 )
17 -- finally, replace comma with a string that follows a colon sign
18 select id,
19 listagg(replace(val, ',', substr(val, instr(val, ':')) ||';'), ';') within group (order by cv) new_val
20 from temp
21 group by id
22 order by id;
ID NEW_VAL
---------- ----------------------------------------
1 abc:EMP;mno:EMP
2 xyz:EMP;tyu:PROF;opr:PROF
3 abc:EMP;mno:EMP;tyu:PROF;opr:PROF
SQL>
Using the answer of littlefoot, if i were to use cross apply i wouldnt need to cast as multiset...
with test (id, col) as
-- sample data
(select 1, 'abc,mno:EMP' from dual union all
select 2, 'xyz:EMP;tyu,opr:PROF' from dual union all
select 3, 'abc,mno:EMP;tyu,opr:PROF' from dual
),
temp as
-- split sample data to rows
(select id,
column_value cv,
regexp_substr(col, '[^;]+', 1, column_value) val
from test
cross apply (select level as column_value
from dual
connect by level<= regexp_count(col, ';') + 1)
)
-- finally, replace comma with a string that follows a colon sign
select id,
listagg(replace(val, ',', substr(val, instr(val, ':')) ||';'), ';') within group (order by cv) new_val
from temp
group by id
order by id;
You do not need recursive anything, just basic regex: if the pattern is always something,something2:someCode (e.g. you have no colon before the comma), then it would be sufficient.
with test (id, col) as (
select 1, 'abc,mno:EMP' from dual union all
select 2, 'xyz:EMP;tyu,opr:PROF' from dual union all
select 3, 'abc,mno:EMP;tyu,opr:PROF' from dual union all
select 3, 'abc,mno:EMP;tyu,opr:PROF;something:QWE;something2:QWE' from dual
)
select
/*
Grab this groups:
1) Everything before the comma
2) Then everything before the colon
3) And then everything between the colon and a semicolon
Then place group 3 between 1 and 2
*/
trim(trailing ';' from regexp_replace(col || ';', '([^,]+),([^:]+):([^;]+)', '\1:\3;\2:\3')) as res
from test
| RES |
| :------------------------------------------------------------- |
| abc:EMP;mno:EMP |
| xyz:EMP;tyu:PROF;opr:PROF |
| abc:EMP;mno:EMP;tyu:PROF;opr:PROF |
| abc:EMP;mno:EMP;tyu:PROF;opr:PROF;something:QWE;something2:QWE |
db<>fiddle here

Monitoring data with delimiter to create a new rows

I want to create a rows from one row on table that contains a delimiter on some fields as mentioned below on screen shoot
I want a result A separalte rows for the rows that already contains a deleimiter ;
the inputs are data from table 1 and the output is data as mentionned below on table 2 using oracle sql :insert and select query
you can see below the output recommanded:
One method is a recursive CTE:
with cte(id, description, val, before, after, n, cnt) as (
select id, description, val, before, after, 1 as n, regexp_count(description, ';')
from t
union all
select id, description, val, before, after, n + 1, cnt
from cte
where n <= cnt
)
select id,
regexp_substr(description, '[^;]+', 1, n) as description,
regexp_substr(val, '[^;]+', 1, n) as val,
regexp_substr(before, '[^;]+', 1, n) as before,
regexp_substr(after, '[^;]+', 1, n) as after
from cte;
Here is a db<>fiddle.
Alternatively:
SQL> with test (id, description, val, after, before) as
2 -- you already have sample data and don't type this
3 (select 1, 'ARTIC1;ARTIC2;ART11', '15;2;3', '12;6;8', '13;7;12' from dual union all
4 select 2, 'ARTICLE3;ARTICLE4' , '3;5' , '10;23' , '12;25' from dual union all
5 select 3, 'ARTICLE 5' , '6' , '2' , '1.9' from dual
6 )
7 -- query that does the job begins here
8 select id,
9 regexp_substr(description, '[^;]+', 1, column_value) descr,
10 regexp_substr(val , '[^;]+', 1, column_value) val,
11 regexp_substr(after , '[^;]+', 1, column_value) after,
12 regexp_substr(before , '[^;]+', 1, column_value) before
13 from test cross join
14 table(cast(multiset(select level from dual
15 connect by level <= regexp_count(description, ';') + 1
16 ) as sys.odcinumberlist))
17 order by id, descr, val;
ID DESCR VAL AFTER BEFORE
---------- ---------- ---------- ---------- ----------
1 ARTIC1 15 12 13
1 ARTIC2 2 6 7
1 ART11 3 8 12
2 ARTICLE3 3 10 12
2 ARTICLE4 5 23 25
3 ARTICLE 5 6 2 1.9
6 rows selected.
SQL>

How to count number of words in delimited string in Oracle SQL

For example i have the table called 'Table1'. and column called 'country'.
I want to count the value of word in string.below is my data for column 'country':
country:
"japan singapore japan chinese chinese chinese"
expected output: in above data we can see the japan appear two time, singapore once and chinese 3 times.i want to count value of word where japan is count as one, singapore as one and chinese as one. hence the ouput will be 3. please help me
ValueOfWord: 3
Firstly, it is a bad design to store multiple values in a single column as delimited string. You should consider normalizing the data as a permanent solution.
With the denormalized data, you could do it in a single SQL using REGEXP_SUBSTR:
SELECT COUNT(DISTINCT(regexp_substr(country, '[^ ]+', 1, LEVEL))) as "COUNT"
FROM table_name
CONNECT BY LEVEL <= regexp_count(country, ' ')+1
/
Demo:
SQL> WITH sample_data AS
2 ( SELECT 'japan singapore japan chinese chinese chinese' str FROM dual
3 )
4 -- end of sample_data mocking real table
5 SELECT COUNT(DISTINCT(regexp_substr(str, '[^ ]+', 1, LEVEL))) as "COUNT"
6 FROM sample_data
7 CONNECT BY LEVEL <= regexp_count(str, ' ')+1
8 /
COUNT
----------
3
See Split single comma delimited string into rows in Oracle to understand how the query works.
UPDATE
For multiple delimited string rows you need to take care of the number of rows formed by the CONNECT BY clause.
See Split comma delimited strings in a table in Oracle for more ways of doing the same task.
Setup
Let's say you have a table with 3 rows like this:
SQL> CREATE TABLE t(country VARCHAR2(200));
Table created.
SQL> INSERT INTO t VALUES('japan singapore japan chinese chinese chinese');
1 row created.
SQL> INSERT INTO t VALUES('singapore indian malaysia');
1 row created.
SQL> INSERT INTO t VALUES('french french french');
1 row created.
SQL> COMMIT;
Commit complete.
SQL> SELECT * FROM t;
COUNTRY
---------------------------------------------------------------------------
japan singapore japan chinese chinese chinese
singapore indian malaysia
french french french
Using REGEXP_SUBSTR and REGEXP_COUNT:
We expect the output as 6 since there are 6 unique strings.
SQL> SELECT COUNT(DISTINCT(regexp_substr(t.country, '[^ ]+', 1, lines.column_value))) count
2 FROM t,
3 TABLE (CAST (MULTISET
4 (SELECT LEVEL FROM dual
5 CONNECT BY LEVEL <= regexp_count(t.country, ' ')+1
6 ) AS sys.odciNumberList ) ) lines
7 ORDER BY lines.column_value
8 /
COUNT
----------
6
There are many other methods to achieve the desired output. Let's see how:
Using XMLTABLE
SQL> SELECT COUNT(DISTINCT(country)) COUNT
2 FROM
3 (SELECT trim(COLUMN_VALUE) country
4 FROM t,
5 xmltable(('"'
6 || REPLACE(country, ' ', '","')
7 || '"'))
8 )
9 /
COUNT
----------
6
Using MODEL clause
SQL> WITH
2 model_param AS
3 (
4 SELECT country AS orig_str ,
5 ' '
6 || country
7 || ' ' AS mod_str ,
8 1 AS start_pos ,
9 Length(country) AS end_pos ,
10 (LENGTH(country) -
11 LENGTH(REPLACE(country, ' '))) + 1 AS element_count ,
12 0 AS element_no ,
13 ROWNUM AS rn
14 FROM t )
15 SELECT COUNT(DISTINCT(Substr(mod_str, start_pos, end_pos-start_pos))) count
16 FROM (
17 SELECT *
18 FROM model_param
19 MODEL PARTITION BY (rn, orig_str, mod_str)
20 DIMENSION BY (element_no)
21 MEASURES (start_pos, end_pos, element_count)
22 RULES ITERATE (2000)
23 UNTIL (ITERATION_NUMBER+1 = element_count[0])
24 ( start_pos[ITERATION_NUMBER+1] =
25 instr(cv(mod_str), ' ', 1, cv(element_no)) + 1,
26 end_pos[ITERATION_NUMBER+1] =
27 instr(cv(mod_str), ' ', 1, cv(element_no) + 1) )
28 )
29 WHERE element_no != 0
30 ORDER BY mod_str , element_no
31 /
COUNT
----------
6
Did you store that kind of string in a single entry?
If not, try
SELECT COUNT(*)
FROM (SELECT DISTINCT T.country FROM Table1 T)
If yes, I would write an external program to parse the string and return the result you want.
Like using java.
Create a String set.
I would use JDBC to retrieve the record, and use split to split strings in tokens using ' 'delimiter. For every token, if it is not in the set, add it to the set.
When parse finishes, get the length of the set, which is the value you want.
Break the string based on the space delimiter
SELECT COUNT(DISTINCT regexp_substr(col, '[^ ]+', 1, LEVEL))
FROM T
CONNECT BY LEVEL <= regexp_count(col, ' ')+1
For counting DISTINCT words
SELECT col,
COUNT(DISTINCT regexp_substr(col, '[^ ]+', 1, LEVEL))
FROM T
CONNECT BY LEVEL <= regexp_count(col, ' ')+1
GROUP BY col
FIDDLE

How to reverse the string 'ab,cd,ef' to 'ef->cd->ab'

when I select the table from Oracle, I want to handle one col'val :
eg:
'ab,cd,ef' to 'ef->cd->ab';
'AB,BC' to 'BC->AB';
'ACNN,BBCCAC' to 'BBCCAC->ACNN';
'BBBDC,DCCX,FFF' to 'FFF->DCCX->BBBDC'
We have two tasks. The first is to tokenize the original strings. This is quite easy with regular expressions (although there are more performant approaches if you are dealing with large volumes). The second task is to re-assemble the tokens in reverse order; we can use the 11gR2 LISTAGG() function for this:
with tokens as (
select distinct col1, regexp_substr(col1, '[^,]+', 1, level) as tkn, level as rn
from t23
connect by level <= regexp_count (col1, '[,]') +1
)
select col1
, listagg(tkn, '->')
within group (order by rn desc) as rev_col1
from tokens
group by col1
/
Here is a SQL Fiddle.
You can do it with a mix of string split and string aggregation.
Using:
REGEXP_SUBSTR : To split the comma delimited string into rows
LISTAGG : To aggregate the values
You can have a look at this article to understand how string split works http://lalitkumarb.wordpress.com/2015/03/04/split-comma-delimited-strings-in-a-table-using-oracle-sql/
SQL> WITH DATA AS(
2 SELECT 1 ID, 'ab,cd,ef' text FROM dual UNION ALL
3 SELECT 2 ID, 'AB,BC' text FROM dual UNION ALL
4 SELECT 3 ID, 'ACNN,BBCCAC' text FROM dual
5 )
6 SELECT ID,
7 listagg(text, ',') WITHIN GROUP (
8 ORDER BY rn DESC) reversed_indices
9 FROM
10 (SELECT t.id,
11 rownum rn,
12 trim(regexp_substr(t.text, '[^,]+', 1, lines.COLUMN_VALUE)) text
13 FROM data t,
14 TABLE (CAST (MULTISET
15 (SELECT LEVEL FROM dual CONNECT BY LEVEL <= regexp_count(t.text, ',')+1
16 ) AS sys.odciNumberList ) ) lines
17 ORDER BY ID
18 )
19 GROUP BY ID
20 /
ID REVERSED_INDICES
---------- ------------------------------
1 ef,cd,ab
2 BC,AB
3 BBCCAC,ACNN
SQL>
Let's say your table looks like:
SQL> SELECT * FROM t;
ID TEXT
---------- ------------------------------
1 ab,cd,ef
2 AB,BC
3 ACNN,BBCCAC
4 word1,word2,word3
5 1,2,3
SQL>
Using the above query:
SQL> SELECT ID,
2 listagg(text, '-->') WITHIN GROUP (
3 ORDER BY rn DESC) reversed_indices
4 FROM
5 (SELECT t.id,
6 rownum rn,
7 trim(regexp_substr(t.text, '[^,]+', 1, lines.COLUMN_VALUE)) text
8 FROM t,
9 TABLE (CAST (MULTISET
10 (SELECT LEVEL FROM dual CONNECT BY LEVEL <= regexp_count(t.text, ',')+1
11 ) AS sys.odciNumberList ) ) lines
12 ORDER BY ID
13 )
14 GROUP BY ID
15 /
ID REVERSED_INDICES
---------- ------------------------------
1 ef-->cd-->ab
2 BC-->AB
3 BBCCAC-->ACNN
4 word3-->word2-->word1
5 3-->2-->1
SQL>

Preserve order when converting a delimited string to a column

I want to preserve the record order, which is provided as comma delimited string. The 5th item in by delimited string is a null. I need the 5th row to be null as well.
with test as
(select 'ABC,DEF,GHI,JKL,,MNO' str from dual
)
select rownum, regexp_substr (str, '[^,]+', 1, rownum) split
from test
connect by level <= length (regexp_replace (str, '[^,]+' )) + 1
The current result I'm getting puts this in the 6th position:
1 ABC
2 DEF
3 GHI
4 JKL
5 MNO
6
Order is preserved by your expression, but your regular expression doesn't match nulls correctly, so the 5th item disappears. The 6th row is a NULL because there are no more match after the 5th match.
You could do this instead:
SQL> with test as
2 (select 'ABC,DEF,GHI,JKL,,MNO' str from dual
3 )
4 SELECT rownum,
5 rtrim(regexp_substr(str || ',', '[^,]*,', 1, rownum), ',') split
6 FROM test
7 CONNECT BY LEVEL <= length(regexp_replace(str, '[^,]+')) + 1;
ROWNUM SPLIT
---------- ---------------------------------------------------------------
1 ABC
2 DEF
3 GHI
4 JKL
5
6 MNO
6 rows selected
Or this:
SQL> with test as
2 (select 'ABC,DEF,GHI,JKL,,MNO' str from dual
3 )
4 SELECT rownum,
5 regexp_substr(str, '([^,]*)(,|$)', 1, rownum, 'i', 1) split
6 FROM test
7 CONNECT BY LEVEL <= length(regexp_replace(str, '[^,]+')) + 1;
ROWNUM SPLIT
---------- ------------------------------------------------------------
1 ABC
2 DEF
3 GHI
4 JKL
5
6 MNO
6 rows selected
Try Something like this:
SELECT
STR,
REPLACE ( SUBSTR ( STR,
CASE LEVEL
WHEN 1
THEN
0
ELSE
INSTR ( STR,
'~',
1,
LEVEL
- 1 )
END
+ 1,
1 ),
'~' )
FROM
(SELECT 'A~~C~~E' AS STR FROM DUAL)
CONNECT BY
LEVEL <= LENGTH ( REGEXP_REPLACE ( STR,
'[^~]+' ) )
+ 1;
This one works..
SELECT
ROWNUM,
CAST ( REGEXP_SUBSTR ( STR,
'(.*?)(,|$)',
1,
LEVEL,
NULL,
1 ) AS CHAR ( 12 ) )
OUTPUT
FROM
(SELECT 'ABC,DEF,GHI,JKL,,MNO' AS STR FROM DUAL)
CONNECT BY
LEVEL <= REGEXP_COUNT ( STR,
',' )
+ 1;