transpose multiple strings Oracle [duplicate] - sql

Here is the DDL --
create table tbl1 (
id number,
value varchar2(50)
);
insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');
Notice, here value is comma separated string.
But, we need result like following-
ID VALUE
-------------
1 AA
1 UT
1 BT
1 SK
1 SX
2 AA
2 UT
2 SX
3 UT
3 SK
3 SX
3 ZF
How do we write SQL for this?

I agree that this is a really bad design.
Try this if you can't change that design:
select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
order by id, level;
OUPUT
id value level
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Credits to this
To remove duplicates in a more elegant and efficient way (credits to #mathguy)
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and PRIOR id = id
and PRIOR SYS_GUID() is not null
order by id, level;
If you want an "ANSIer" approach go with a CTE:
with t (id,res,val,lev) as (
select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
from tbl1
where regexp_substr(value, '[^,]+', 1, 1) is not null
union all
select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
from t
where regexp_substr(val, '[^,]+', 1, lev+1) is not null
)
select id, res,lev
from t
order by id, lev;
OUTPUT
id val lev
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Another recursive approach by MT0 but without regex:
WITH t ( id, value, start_pos, end_pos ) AS
( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
UNION ALL
SELECT id,
value,
end_pos + 1,
INSTR( value, ',', end_pos + 1 )
FROM t
WHERE end_pos > 0
)
SELECT id,
SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
start_pos;
I've tried 3 approaches with a 30000 rows dataset and 118104 rows returned and got the following average results:
My recursive approach: 5 seconds
MT0 approach: 4 seconds
Mathguy approach: 16 seconds
MT0 recursive approach no-regex: 3.45 seconds
#Mathguy has also tested with a bigger dataset:
In all cases the recursive query (I only tested the one with regular
substr and instr) does better, by a factor of 2 to 5. Here are the
combinations of # of strings / tokens per string and CTAS execution
times for hierarchical vs. recursive, hierarchical first. All times in
seconds
30,000 x 4: 5 / 1.
30,000 x 10: 15 / 3.
30,000 x 25: 56 / 37.
5,000 x 50: 33 / 14.
5,000 x 100: 160 / 81.
10,000 x 200: 1,924 / 772

This will get the values without requiring you to remove duplicates or having to use a hack of including SYS_GUID() or DBMS_RANDOM.VALUE() in the CONNECT BY:
SELECT t.id,
v.COLUMN_VALUE AS value
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v
Update:
Returning the index of the element in the list:
Option 1 - Return a UDT:
CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/
CREATE TYPE string_pair_table IS TABLE OF string_pair;
/
SELECT t.id,
v.*
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS string_pair_table
)
) v;
Option 2 - Use ROW_NUMBER():
SELECT t.id,
v.COLUMN_VALUE AS value,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v;

Vercelli posted a correct answer. However, with more than one string to split, connect by will generate an exponentially-growing number of rows, with many, many duplicates. (Just try the query without distinct.) This will destroy performance on data of non-trivial size.
One common way to overcome this problem is to use a prior condition and an additional check to avoid cycles in the hierarchy. Like so:
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and prior id = id
and prior sys_guid() is not null
order by id, level;
See, for example, this discussion on OTN: https://community.oracle.com/thread/2526535

An alternate method is to define a simple PL/SQL function:
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN SYS.ODCIVARCHAR2LIST DETERMINISTIC
AS
p_result SYS.ODCIVARCHAR2LIST := SYS.ODCIVARCHAR2LIST();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
Then the SQL becomes very simple:
SELECT t.id,
v.column_value AS value
FROM TBL1 t,
TABLE( split_String( t.value ) ) v

--converting row of data into comma sepaerated string
SELECT
department_id,
LISTAGG(first_name, ',') WITHIN GROUP(
ORDER BY
first_name
) comma_separted_data
FROM
hr.employees
GROUP BY
department_id;
--comma-separated string into row of data
CREATE TABLE t (
deptno NUMBER,
employee_name VARCHAR2(255)
);
INSERT INTO t VALUES (
10,
'mohan,sam,john'
);
INSERT INTO t VALUES (
20,
'manideeep,ashok,uma'
);
INSERT INTO t VALUES (
30,
'gopal,gopi,manoj'
);
SELECT
deptno,
employee_name,
regexp_count(employee_name, ',') + 1,
regexp_substr(employee_name, '\w+', 1, 1)
FROM
t,
LATERAL (
SELECT
level l
FROM
dual
CONNECT BY
level < regexp_count(employee_name, ',') + 1
);
DROP TABLE t;

SELECT COL1, COL2
FROM ( SELECT INDX, MY_STR1, MY_STR2, COL1_ELEMENTS, COL1, COL2_ELEMENTS, COL2
FROM ( SELECT 0 "INDX", COL1 "MY_STR1", COL1_ELEMENTS, COL1, '' "MY_STR2", COL2_ELEMENTS, COL2
FROM(
SELECT
REPLACE(COL1, ', ', ',') "COL1", -- In case there is a space after comma
Trim(Length(Replace(COL1, ' ', ''))) - Trim(Length(Translate(REPLACE(COL1, ', ', ','), 'A,', 'A'))) + 1 "COL1_ELEMENTS", -- Number of elements
Replace(COL2, ', ', ',') "COL2", -- In case there is a space after comma
Trim(Length(Replace(COL2, ' ', ''))) - Trim(Length(Translate(REPLACE(COL2, ', ', ','), 'A,', 'A'))) + 1 "COL2_ELEMENTS" -- Number of elements
FROM
(SELECT 'aaa,bbb,ccc' "COL1", 'qq, ww, ee' "COL2" FROM DUAL) -- Your example data
)
)
MODEL -- Modeling --> INDX = 0 COL1='aaa,bbb,ccc' COL2='qq,ww,ee'
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, COL2, CAST('a' as VarChar2(4000)) as MY_STR1, CAST('a' as VarChar2(4000)) as MY_STR2)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + 1),
COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END),
MY_STR2[0] = COL2[CV()],
MY_STR2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], InStr(MY_STR2[ITERATION_NUMBER], ',', 1) + 1),
COL2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR2[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR2[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR2[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS -- INDX 0 contains starting strings
--
-- COL1 COL2
-- ---- ----
-- aaa qq
-- bbb ww
-- ccc ee

Related

How to rearrange the letter in string in alphabetical order in SQL

How to rearrange the letter in string in alphabetical order in SQL
For example
cbaz to abcz
You can split the string up into characters and then aggregate:
WITH characters ( rid, value, ch, i, l ) AS (
SELECT ROWID,
value,
SUBSTR(value, 1, 1),
1,
LENGTH(value)
FROM table_name
UNION ALL
SELECT rid,
value,
SUBSTR(value, i + 1, 1),
i + 1,
l
FROM characters
WHERE i < l
)
SELECT MAX( value ) AS original,
LISTAGG(ch) WITHIN GROUP ( ORDER BY ch ) AS ordered
FROM characters
GROUP BY rid
or:
SELECT value As original,
ordered
FROM table_name t
CROSS APPLY (
SELECT LISTAGG(SUBSTR(t.value, LEVEL, 1))
WITHIN GROUP (ORDER BY SUBSTR(t.value, LEVEL, 1)) AS ordered
FROM DUAL
CONNECT BY LEVEL <= LENGTH(t.value)
)
Which, for the sample data:
CREATE TABLE table_name ( value ) AS
SELECT 'cbaz' FROM DUAL UNION ALL
SELECT 'zyx' FROM DUAL UNION ALL
SELECT 'zyx' FROM DUAL;
Outputs:
ORIGINAL
ORDERED
cbaz
abcz
zyx
xyz
zyx
xyz
db<>fiddle here
Just for fun, you could do this programmatically:
with function sort_letters
( p_str varchar2 )
return varchar2
as
type charList is table of simple_integer index by varchar2(1);
letters charList;
letter varchar2(1);
sorted_letters long;
begin
if p_str is not null then
for i in 1..length(p_str) loop
letter := substr(p_str,i,1);
letters(letter) :=
case
when letters.exists(letter) then letters(letter) +1
else 1
end;
end loop;
letter := letters.first;
loop
sorted_letters := sorted_letters || rpad(letter, letters(letter), letter);
letter := letters.next(letter);
exit when letter is null;
end loop;
end if;
return sorted_letters;
end;
select sort_letters('abracadabra')
from dual
/
SORT_LETTERS('ABRACADABRA')
---------------------------
aaaaabbcdrr

How to get the value for column reading as '=4*10*2' as 80 in sql - implement the same using select query without creating the function

I have read only access to this particular database, hence I am not allowed to create any functions.
I am trying to achieve the below one using select statement.
How to get the value for column reading as '=4*10*2' as 80 in sql - implement the same using select query without creating the function.
I used the below query:
qty
----
10*4*2
4*3*1
5*1*1
select case when length=1 then substr(qty,1,1)
when length=2 then substr(qty,1,1)*substr(qty,2,1)
when length=3 then substr(qty,1,1)*substr(qty,2,1)*substr(qty,3,1)
else qty
end
from (select replace(qty,'*','') as qty from table_quants);
The above query works fine until and unless the value does not contain 10s or zeroes.
i.e,
qty
10*4*2 0 ------> which is not correct, I should get 80 instead of zero
4*3*1 12
5*1*1 5
Can someone pls help me out.
If it were Oracle, then
SQL> with table_quants (id, qty) as
2 -- sample data
3 (select 1, '10*4*2' from dual union all
4 select 2, '4*3*1' from dual union all
5 select 3, '5*1*1' from dual
6 ),
7 split_qty as
8 -- split QTY column to rows
9 (select id,
10 qty,
11 regexp_substr(qty, '[^*]+', 1, column_value) val
12 from table_quants cross join
13 table(cast(multiset(select level from dual
14 connect by level <= regexp_count(qty, '\*') + 1
15 ) as sys.odcinumberlist))
16 )
17 -- compute the result
18 select id,
19 qty,
20 round(exp(sum(ln(val)))) result
21 from split_qty
22 group by id, qty
23 order by id;
ID QTY RESULT
---------- ------ ----------
1 10*4*2 80
2 4*3*1 12
3 5*1*1 5
SQL>
XMLTABLE is often a shortcut for simple expressions, eg
SQL> create table t ( expr varchar2(20));
Table created.
SQL> insert into t values ('1+2');
1 row created.
SQL> insert into t values ('1+2*7-3+11');
1 row created.
SQL> select * from t, xmltable(t.expr);
EXPR COLUMN_VALUE
-------------------- ------------------------------
1+2 3
1+2*7-3+11 23
Same idea with SQL Server, just a bit shorter:
with table_quants (id, qty) as
-- sample data
(select 1, '10*4*2' union all
select 2, '4*3*1' union all
select 3, '5*1*1'
)
select id, exp( (select sum(log(value)) from string_split(qty,'*')) ) result
from table_quants
outputs
id result
----------- ----------------------
1 80
2 12
3 5
(3 rows affected)
In Oracle, you can use a recursive sub-query factoring clause and simple string functions (which, in this testing, was faster than CROSS JOINing with a correlated TABLE collection expression generated by CAST and MULTISET):
WITH multiplied_values ( qty, value, start_pos, end_pos ) AS (
SELECT qty,
1,
1,
INSTR( qty, '*', 1 )
FROM table_name
UNION ALL
SELECT qty,
value * SUBSTR( qty, start_pos, end_pos - start_pos ),
end_pos + 1,
INSTR( qty, '*', end_pos + 1 )
FROM multiplied_values
WHERE end_pos > 0
)
SELECT qty,
value * SUBSTR( qty, start_pos ) AS value
FROM multiplied_values
WHERE end_pos = 0;
Which, for your sample data:
CREATE TABLE table_name ( qty ) AS
SELECT '10*4*2' FROM DUAL UNION ALL
SELECT '4*3*1' FROM DUAL UNION ALL
SELECT '5*1*1' FROM DUAL;
Outputs:
QTY | VALUE
:----- | ----:
10*4*2 | 80
4*3*1 | 12
5*1*1 | 5
db<>fiddle here
The equivalent in SQL Server is:
WITH multiplied_values ( qty, value, start_pos, end_pos ) AS (
SELECT qty,
1,
1,
CHARINDEX( '*', qty, 1 )
FROM table_name
UNION ALL
SELECT qty,
value * CAST( SUBSTRING( qty, start_pos, end_pos - start_pos ) AS INT ),
end_pos + 1,
CHARINDEX( '*', qty, end_pos + 1 )
FROM multiplied_values
WHERE end_pos > 0
)
SELECT qty,
value * CAST( SUBSTRING( qty, start_pos, LEN( qty ) - start_pos + 1 ) AS INT )
AS value
FROM multiplied_values
WHERE end_pos = 0;
db<>fiddle here
In Oracle you can do this:
with function evaluate_expression(p_expression in varchar2)
return number
is
l_cursor integer default dbms_sql.open_cursor;
l_feedback integer default 0;
l_retval number; /* with divisions we might get a NUMBER */
begin
dbms_sql.parse(l_cursor,'begin :ret_val := ' || p_expression ||'; end;', dbms_sql.native );
dbms_sql.bind_variable(l_cursor,':ret_val',l_retval);
l_feedback := dbms_sql.execute(l_cursor);
dbms_sql.variable_value(l_cursor, ':ret_val', l_retval);
dbms_sql.close_cursor(l_cursor);
return l_retval;
exception
when others then
dbms_sql.close_cursor(l_cursor);
if (sqlcode=-1476) then
return 0;
else
raise;
end if;
end;
select evaluate_expression('(3*(2+3)+10-1)/2') from dual;
EVALUATE_EXPRESSION('(3*(2+3)+10-1)/2')
---------------------------------------
12
Or if you have many expressions to evaluate you can create a view:
create view exprs as
select '(3*(2+3)+10-1)/2' expr from dual union all
select '1+2+3+4+5+6' from dual union all
select '1*2*3*4*5*6' from dual
;
and use the above to resolve the expressions:
with function evaluate_expression(p_expression in varchar2)
return number
is
l_cursor integer default dbms_sql.open_cursor;
l_feedback integer default 0;
l_retval number; /* with divisions we might get a NUMBER */
begin
dbms_sql.parse(l_cursor,'begin :ret_val := ' || p_expression ||'; end;', dbms_sql.native );
dbms_sql.bind_variable(l_cursor,':ret_val',l_retval);
l_feedback := dbms_sql.execute(l_cursor);
dbms_sql.variable_value(l_cursor, ':ret_val', l_retval);
dbms_sql.close_cursor(l_cursor);
return l_retval;
exception
when others then
dbms_sql.close_cursor(l_cursor);
if (sqlcode=-1476) then
return 0;
else
raise;
end if;
end;
select expr||'='||evaluate_expression(expr) expr
from (select expr from exprs)
;
EXPR
---------------------------------------------------------
(3*(2+3)+10-1)/2=12
1+2+3+4+5+6=21
1*2*3*4*5*6=720

pl sql query to compare 2 columns comma separated values and find differences

COL 1 Values: QQQ,QQ,123,VVVV
COL 2 VALUES: WWWW,VVV,QQQ
Compare COL1 values vs COL 2 values:
For e.g.
1) If a value exist in COL1 but NOT in COL2 then display that COL1 value under COL VALUE ADDED
Output Expected: COL VALUE ADDED = QQ,123,VVVV
2) If a value exist in COL2 but NOT in COL1 then display that COL2 values under COL VALUE Removed.
Output Expected: COL VALUE REMOVED = WWWW,VVV
3) If the set of values are same within COL1 and COL2 then display it as NULL
Can these be handled and compared as expected dynamically via PLSQL command? I expect to have dynamic comma separated values that needs to be compared between Current and Before set of values.
Here's one option:
SQL> with
2 test (col1, col2) as
3 (select 'QQQ,QQ,123,VVVV', 'WWWW,VVV,QQQ' from dual
4 ),
5 t1 (col) as
6 (select regexp_substr(col1, '[^,]+', 1, level)
7 from test
8 connect by level <= regexp_count(col1, ',') + 1
9 ),
10 t2 (col) as
11 (select regexp_substr(col2, '[^,]+', 1, level)
12 from test
13 connect by level <= regexp_count(col2, ',') + 1
14 ),
15 one_minus_two as
16 (select col from t1
17 minus
18 select col from t2
19 ),
20 two_minus_one as
21 (select col from t2
22 minus
23 select col from t1
24 )
25 select 'Col value added: ' ||
26 listagg(col, ',') within group (order by null) as result
27 from one_minus_two
28 union all
29 select 'Col value removed: ' ||
30 listagg(col, ',') within group (order by null)
31 from two_minus_one
32 union all
33 select 'NULL'
34 from dual
35 where (select listagg(col, ',') within group (order by col) from t1) =
36 (select listagg(col, ',') within group (order by col) from t2);
RESULT
--------------------------------------------------------------------------------
Col value added: 123,QQ,VVVV
Col value removed: VVV,WWWW
SQL>
If both values are equal:
SQL> with
2 test (col1, col2) as
3 (select 'AAA,BBB,CCC', 'CCC,AAA,BBB' from dual
4 ),
<SNIP>
35 where (select listagg(col, ',') within group (order by col) from t1) =
36 (select listagg(col, ',') within group (order by col) from t2);
RESULT
--------------------------------------------------------------------------------
Col value added:
Col value removed:
NULL
SQL>
Here's one way to solve it:
WITH cteData AS (SELECT 'QQQ,QQ,123,VVVV' AS COL1,
'WWWW,VVV,QQQ' AS COL2
FROM DUAL),
cteCol1 AS (SELECT REGEXP_SUBSTR(COL1, '[^,]+', 1, LEVEL) AS COL1
FROM cteData
CONNECT BY LEVEL < REGEXP_COUNT(COL1, ',')+2),
cteCol2 AS (SELECT REGEXP_SUBSTR(COL2, '[^,]+', 1, LEVEL) AS COL2
FROM cteData
CONNECT BY LEVEL < REGEXP_COUNT(COL2, ',')+2),
cteAdded AS (SELECT c1.COL1 -- COL1 not found in COL2
FROM cteCol1 c1
WHERE c1.COL1 NOT IN (SELECT COL2
FROM cteCol2)),
cteRemoved AS (SELECT c2.COL2 -- in COL2 but NOT in COL1
FROM cteCol2 c2
WHERE c2.COL2 NOT IN (SELECT COL1
FROM cteCol1)),
cteAdded_list AS (SELECT LISTAGG(a.COL1, ',') WITHIN GROUP (ORDER BY 1) AS ADDED
FROM cteAdded a),
cteRemoved_list AS (SELECT LISTAGG(r.COL2, ',') WITHIN GROUP (ORDER BY 1) AS REMOVED
FROM cteRemoved r),
cteResults AS (SELECT CASE
WHEN a.ADDED IS NULL THEN NULL
ELSE 'COL VALUE ADDED = ' || a.ADDED
END AS RESULT
FROM cteAdded_list a
UNION ALL
SELECT CASE
WHEN r.REMOVED IS NULL THEN NULL
ELSE 'COL VALUE REMOVED = ' || r.REMOVED
END AS RESULT
FROM cteRemoved_list r)
SELECT RESULT
FROM cteResults
WHERE RESULT IS NOT NULL
With the data you supplied this returns:
COL VALUE ADDED = 123,QQ,VVVV
COL VALUE REMOVED = VVV,WWWW
If you replace the strings in cteData with ones which are identical, e.g. set both to '123,456,789' then it returns an empty result set.
Since you wanted a PL/SQL solution, you can create a function to split the string to an array and then use MULTISET operators:
Split Function:
From my previous answer
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN stringlist DETERMINISTIC
AS
p_result stringlist := stringlist();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
PL/SQL:
Then you can use it in a PL/SQL block:
DECLARE
col1 VARCHAR2(4000) := 'QQQ,QQ,123,VVVV';
col2 VARCHAR2(4000) := 'WWWW,VVV,QQQ';
arr1 stringlist := SPLIT_STRING( col1 );
arr2 stringlist := SPLIT_STRING( col2 );
added stringlist := arr1 MULTISET EXCEPT arr2;
removed stringlist := arr2 MULTISET EXCEPT arr1;
BEGIN
FOR i IN 1 .. added.COUNT LOOP
DBMS_OUTPUT.PUT( added(i) || ',' );
END LOOP;
DBMS_OUTPUT.NEW_LINE();
FOR i IN 1 .. removed.COUNT LOOP
DBMS_OUTPUT.PUT( removed(i) || ',' );
END LOOP;
DBMS_OUTPUT.NEW_LINE();
END;
/
Which outputs:
dbms_output:
QQ,123,VVVV,
WWWW,VVV,
SQL:
If you want to implement it in SQL then with the test data:
CREATE TABLE test_data ( col1, col2 ) AS
SELECT 'QQQ,QQ,123,VVVV', 'WWWW,VVV,QQQ' FROM DUAL;
You can query it using:
SELECT ( SELECT LISTAGG( column_value, ',' ) WITHIN GROUP ( ORDER BY ROWNUM )
FROM TABLE( a.arr1 MULTISET EXCEPT a.arr2 ) ) AS added,
( SELECT LISTAGG( column_value, ',' ) WITHIN GROUP ( ORDER BY ROWNUM )
FROM TABLE( a.arr2 MULTISET EXCEPT a.arr1 ) ) AS removed
FROM (
SELECT SPLIT_STRING( col1 ) AS arr1,
SPLIT_STRING( col2 ) AS arr2
FROM test_data
) a;
Which outputs:
ADDED | REMOVED
:---------- | :-------
QQ,123,VVVV | WWWW,VVV
db<>fiddle here
db<>fiddle here

How to convert comma separated values to rows in oracle?

Here is the DDL --
create table tbl1 (
id number,
value varchar2(50)
);
insert into tbl1 values (1, 'AA, UT, BT, SK, SX');
insert into tbl1 values (2, 'AA, UT, SX');
insert into tbl1 values (3, 'UT, SK, SX, ZF');
Notice, here value is comma separated string.
But, we need result like following-
ID VALUE
-------------
1 AA
1 UT
1 BT
1 SK
1 SX
2 AA
2 UT
2 SX
3 UT
3 SK
3 SX
3 ZF
How do we write SQL for this?
I agree that this is a really bad design.
Try this if you can't change that design:
select distinct id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
order by id, level;
OUPUT
id value level
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Credits to this
To remove duplicates in a more elegant and efficient way (credits to #mathguy)
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and PRIOR id = id
and PRIOR SYS_GUID() is not null
order by id, level;
If you want an "ANSIer" approach go with a CTE:
with t (id,res,val,lev) as (
select id, trim(regexp_substr(value,'[^,]+', 1, 1 )) res, value as val, 1 as lev
from tbl1
where regexp_substr(value, '[^,]+', 1, 1) is not null
union all
select id, trim(regexp_substr(val,'[^,]+', 1, lev+1) ) res, val, lev+1 as lev
from t
where regexp_substr(val, '[^,]+', 1, lev+1) is not null
)
select id, res,lev
from t
order by id, lev;
OUTPUT
id val lev
1 AA 1
1 UT 2
1 BT 3
1 SK 4
1 SX 5
2 AA 1
2 UT 2
2 SX 3
3 UT 1
3 SK 2
3 SX 3
3 ZF 4
Another recursive approach by MT0 but without regex:
WITH t ( id, value, start_pos, end_pos ) AS
( SELECT id, value, 1, INSTR( value, ',' ) FROM tbl1
UNION ALL
SELECT id,
value,
end_pos + 1,
INSTR( value, ',', end_pos + 1 )
FROM t
WHERE end_pos > 0
)
SELECT id,
SUBSTR( value, start_pos, DECODE( end_pos, 0, LENGTH( value ) + 1, end_pos ) - start_pos ) AS value
FROM t
ORDER BY id,
start_pos;
I've tried 3 approaches with a 30000 rows dataset and 118104 rows returned and got the following average results:
My recursive approach: 5 seconds
MT0 approach: 4 seconds
Mathguy approach: 16 seconds
MT0 recursive approach no-regex: 3.45 seconds
#Mathguy has also tested with a bigger dataset:
In all cases the recursive query (I only tested the one with regular
substr and instr) does better, by a factor of 2 to 5. Here are the
combinations of # of strings / tokens per string and CTAS execution
times for hierarchical vs. recursive, hierarchical first. All times in
seconds
30,000 x 4: 5 / 1.
30,000 x 10: 15 / 3.
30,000 x 25: 56 / 37.
5,000 x 50: 33 / 14.
5,000 x 100: 160 / 81.
10,000 x 200: 1,924 / 772
This will get the values without requiring you to remove duplicates or having to use a hack of including SYS_GUID() or DBMS_RANDOM.VALUE() in the CONNECT BY:
SELECT t.id,
v.COLUMN_VALUE AS value
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v
Update:
Returning the index of the element in the list:
Option 1 - Return a UDT:
CREATE TYPE string_pair IS OBJECT( lvl INT, value VARCHAR2(4000) );
/
CREATE TYPE string_pair_table IS TABLE OF string_pair;
/
SELECT t.id,
v.*
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT string_pair( level, TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS string_pair_table
)
) v;
Option 2 - Use ROW_NUMBER():
SELECT t.id,
v.COLUMN_VALUE AS value,
ROW_NUMBER() OVER ( PARTITION BY id ORDER BY ROWNUM ) AS lvl
FROM TBL1 t,
TABLE(
CAST(
MULTISET(
SELECT TRIM( REGEXP_SUBSTR( t.value, '[^,]+', 1, LEVEL ) )
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT( t.value, '[^,]+' )
)
AS SYS.ODCIVARCHAR2LIST
)
) v;
Vercelli posted a correct answer. However, with more than one string to split, connect by will generate an exponentially-growing number of rows, with many, many duplicates. (Just try the query without distinct.) This will destroy performance on data of non-trivial size.
One common way to overcome this problem is to use a prior condition and an additional check to avoid cycles in the hierarchy. Like so:
select id, trim(regexp_substr(value,'[^,]+', 1, level) ) value, level
from tbl1
connect by regexp_substr(value, '[^,]+', 1, level) is not null
and prior id = id
and prior sys_guid() is not null
order by id, level;
See, for example, this discussion on OTN: https://community.oracle.com/thread/2526535
An alternate method is to define a simple PL/SQL function:
CREATE OR REPLACE FUNCTION split_String(
i_str IN VARCHAR2,
i_delim IN VARCHAR2 DEFAULT ','
) RETURN SYS.ODCIVARCHAR2LIST DETERMINISTIC
AS
p_result SYS.ODCIVARCHAR2LIST := SYS.ODCIVARCHAR2LIST();
p_start NUMBER(5) := 1;
p_end NUMBER(5);
c_len CONSTANT NUMBER(5) := LENGTH( i_str );
c_ld CONSTANT NUMBER(5) := LENGTH( i_delim );
BEGIN
IF c_len > 0 THEN
p_end := INSTR( i_str, i_delim, p_start );
WHILE p_end > 0 LOOP
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, p_end - p_start );
p_start := p_end + c_ld;
p_end := INSTR( i_str, i_delim, p_start );
END LOOP;
IF p_start <= c_len + 1 THEN
p_result.EXTEND;
p_result( p_result.COUNT ) := SUBSTR( i_str, p_start, c_len - p_start + 1 );
END IF;
END IF;
RETURN p_result;
END;
/
Then the SQL becomes very simple:
SELECT t.id,
v.column_value AS value
FROM TBL1 t,
TABLE( split_String( t.value ) ) v
--converting row of data into comma sepaerated string
SELECT
department_id,
LISTAGG(first_name, ',') WITHIN GROUP(
ORDER BY
first_name
) comma_separted_data
FROM
hr.employees
GROUP BY
department_id;
--comma-separated string into row of data
CREATE TABLE t (
deptno NUMBER,
employee_name VARCHAR2(255)
);
INSERT INTO t VALUES (
10,
'mohan,sam,john'
);
INSERT INTO t VALUES (
20,
'manideeep,ashok,uma'
);
INSERT INTO t VALUES (
30,
'gopal,gopi,manoj'
);
SELECT
deptno,
employee_name,
regexp_count(employee_name, ',') + 1,
regexp_substr(employee_name, '\w+', 1, 1)
FROM
t,
LATERAL (
SELECT
level l
FROM
dual
CONNECT BY
level < regexp_count(employee_name, ',') + 1
);
DROP TABLE t;
SELECT COL1, COL2
FROM ( SELECT INDX, MY_STR1, MY_STR2, COL1_ELEMENTS, COL1, COL2_ELEMENTS, COL2
FROM ( SELECT 0 "INDX", COL1 "MY_STR1", COL1_ELEMENTS, COL1, '' "MY_STR2", COL2_ELEMENTS, COL2
FROM(
SELECT
REPLACE(COL1, ', ', ',') "COL1", -- In case there is a space after comma
Trim(Length(Replace(COL1, ' ', ''))) - Trim(Length(Translate(REPLACE(COL1, ', ', ','), 'A,', 'A'))) + 1 "COL1_ELEMENTS", -- Number of elements
Replace(COL2, ', ', ',') "COL2", -- In case there is a space after comma
Trim(Length(Replace(COL2, ' ', ''))) - Trim(Length(Translate(REPLACE(COL2, ', ', ','), 'A,', 'A'))) + 1 "COL2_ELEMENTS" -- Number of elements
FROM
(SELECT 'aaa,bbb,ccc' "COL1", 'qq, ww, ee' "COL2" FROM DUAL) -- Your example data
)
)
MODEL -- Modeling --> INDX = 0 COL1='aaa,bbb,ccc' COL2='qq,ww,ee'
DIMENSION BY(0 as INDX)
MEASURES(COL1, COL1_ELEMENTS, COL2, CAST('a' as VarChar2(4000)) as MY_STR1, CAST('a' as VarChar2(4000)) as MY_STR2)
RULES ITERATE (10) --UNTIL (ITERATION_NUMBER <= COL1_ELEMENTS[ITERATION_NUMBER + 1]) -- If you don't know the number of elements this should be bigger then you aproximation. Othewrwise it will split given number of elements
(
COL1_ELEMENTS[ITERATION_NUMBER + 1] = COL1_ELEMENTS[0],
MY_STR1[0] = COL1[CV()],
MY_STR1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], InStr(MY_STR1[ITERATION_NUMBER], ',', 1) + 1),
COL1[ITERATION_NUMBER + 1] = SubStr(MY_STR1[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR1[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR1[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR1[ITERATION_NUMBER]) END),
MY_STR2[0] = COL2[CV()],
MY_STR2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], InStr(MY_STR2[ITERATION_NUMBER], ',', 1) + 1),
COL2[ITERATION_NUMBER + 1] = SubStr(MY_STR2[ITERATION_NUMBER], 1, CASE WHEN InStr(MY_STR2[ITERATION_NUMBER], ',') <> 0 THEN InStr(MY_STR2[ITERATION_NUMBER], ',')-1 ELSE Length(MY_STR2[ITERATION_NUMBER]) END)
)
)
WHERE INDX > 0 And INDX <= COL1_ELEMENTS -- INDX 0 contains starting strings
--
-- COL1 COL2
-- ---- ----
-- aaa qq
-- bbb ww
-- ccc ee

Oracle- Split string comma delimited (string contains spaces and consecutive commas)

I can't find a solution about how to split a comma-delimited string in ORACLE. Searched a lot, nothing works for my case
Code
DECLARE
TYPE T_ARRAY_OF_VARCHAR IS TABLE OF VARCHAR2(2000) INDEX BY BINARY_INTEGER;
MY_ARRAY T_ARRAY_OF_VARCHAR;
MY_STRING VARCHAR2(2000) := '12 3,456,,abc,def';
BEGIN
FOR CURRENT_ROW IN (
with test as
(select MY_STRING from dual)
select regexp_substr(MY_STRING, '[^,]+', 1, rownum) SPLIT
from test
connect by level <= length (regexp_replace(MY_STRING, '[^,]+')) + 1)
LOOP
DBMS_OUTPUT.PUT_LINE('>' || CURRENT_ROW.SPLIT || '<');
--DBMS_OUTPUT.PUT_LINE(CURRENT_ROW.SPLIT);
MY_ARRAY(MY_ARRAY.COUNT) := CURRENT_ROW.SPLIT;
END LOOP;
DBMS_OUTPUT.PUT_LINE('Array Size:' || MY_ARRAY.COUNT);
END;
/
The output is:
>12 3<
>456<
>abc<
>def<
><
Array Size:5
The empty value is out of order!!!!
Try this for the parsing the list part. It handles NULLS:
SQL> select regexp_substr('12 3,456,,abc,def', '(.*?)(,|$)', 1, level, null, 1) SPLIT, level
from dual
connect by level <= regexp_count('12 3,456,,abc,def',',') + 1
ORDER BY level;
SPLIT LEVEL
----------------- ----------
12 3 1
456 2
3
abc 4
def 5
SQL>
Unfortunately when you search for regex's for parsing lists, you will always find this form which does NOT handle nulls and should be avoided: '[^,]+'. See here for more info: Split comma separated values to columns in Oracle.
Try xmltable and flwor expresion.
The following example is not secure and throw error if you put string without comma. But is simpler to understand.
select xmlcast(column_value as varchar2(2000)) value_list
from xmltable('for $val in ora:tokenize($strList,",")
return $val'
passing '12 3,456,,abc,def' as "strList"
);
And secured version.
select xmlcast(column_value as varchar2(2000)) value_list
from xmltable('for $val at $index in ora:tokenize(concat(",",$strList),",")
where $index > 1
return $val' passing '12 3,456,,abc,def' as "strList"
);
Little modification to your query, assuming you can pick one char which will not be present in MY_STRING, e.g. pipe |
with test as
(select '12 3,456,,,,abc,def' MY_STRING from dual)
select trim('|' from regexp_substr(regexp_replace(MY_STRING,',,',',|,|'),'[^,]+',1,level)) SPLIT
from test
connect by level <= length (regexp_replace(MY_STRING, '[^,]+')) + 1;
Output:
SPLIT
-----------------------
12 3
456
(null)
(null)
(null)
abc
def
No need of PL/SQL, you could do it in plain SQL. See Split comma delimited strings in a table in Oracle.
Using MODEL clause:
WITH sample_data AS (
SELECT '12 3,456,,,,,abc,def' str FROM dual
)
-- end of sample_data mimicking real table
,
model_param AS (
SELECT str AS orig_str ,
','
|| str
|| ',' AS mod_str ,
1 AS start_pos ,
Length(str) AS end_pos ,
(LENGTH(str) -
LENGTH(REPLACE(str, ','))) + 1 AS element_count ,
0 AS element_no ,
ROWNUM AS rn
FROM sample_data )
SELECT trim(Substr(mod_str, start_pos, end_pos-start_pos)) str
FROM (
SELECT *
FROM model_param
MODEL PARTITION BY ( rn, orig_str, mod_str)
DIMENSION BY (element_no)
MEASURES (start_pos, end_pos, element_count)
RULES ITERATE (2000)
UNTIL (ITERATION_NUMBER+1 = element_count[0])
( start_pos[ITERATION_NUMBER+1] =
instr(cv(mod_str), ',', 1, cv(element_no)) + 1,
end_pos[ITERATION_NUMBER+1] =
instr(cv(mod_str), ',', 1, cv(element_no) + 1) )
)
WHERE element_no != 0
ORDER BY mod_str ,
element_no
/
Output
STR
----------------------
12 3
456
abc
def
8 rows selected.
If you want to do it in PL/SQL, then you could use a pipelined table function:
SQL> CREATE OR REPLACE TYPE test_type
2 AS
3 TABLE OF VARCHAR2(100)
4 /
Type created.
SQL> CREATE OR REPLACE FUNCTION comma_to_table(
2 p_list IN VARCHAR2)
3 RETURN test_type PIPELINED
4 AS
5 l_string LONG := p_list || ',';
6 l_comma_index PLS_INTEGER;
7 l_index PLS_INTEGER := 1;
8 BEGIN
9 LOOP
10 l_comma_index := INSTR(l_string, ',', l_index);
11 EXIT
12 WHEN l_comma_index = 0;
13 PIPE ROW ( TRIM(SUBSTR(l_string, l_index, l_comma_index - l_index)));
14 l_index := l_comma_index + 1;
15 END LOOP;
16 RETURN;
17 END comma_to_table;
18 /
Function created.
Let's see the output:
SQL> SELECT *
2 FROM TABLE(comma_to_table('12 3,456,,,,,abc,def'))
3 /
COLUMN_VALUE
------------------------------------------------------------------------------
12 3
456
abc
def
8 rows selected.
SQL>