Split comma separated string with Oracle - sql

I have a query that pulls the content of a text separated by comma exactly as this:
INSERVICE JOB #: N19020200001
SERVICE_CENTER:SBY,OH_CIRCUIT:MALTA8501,CREW:3675,URD_PRINT:STG-123/S1,FEEDER:PFB969,ISOLATED_1:SCC-1-B969,ISOLATED_2:UDTB969-5,RECONDUCTOR:Y,JACKETED_CABLE:N,CABLE_CART:N,LIVE_FRONT:N,BOOM:null,BACK_HOE:null,EASY_HAULING:null
Is there a way in SQL I can select/partition it to be in separate fields as below (it is always consistent as above):

I have a custom string parser function you can use ...
NOTE: For headers, you can replace them like REPLACE('HEADER','') and get values with commas...
You can check below code and use it in your db:
FUNCTION STRING_PARSER(VAL VARCHAR2, POSITION VARCHAR2, DELIMITER VARCHAR2) RETURN VARCHAR2 IS
v_pos3 number;
v_pos4 number;
BEGIN
/* Return 3rd occurrence of '_' */
v_pos3 := INSTR(VAL, DELIMITER, 1, POSITION) + 1;
/* Return 4rd occurrence of '_' */
v_pos4 := INSTR(VAL, DELIMITER, 1, POSITION + 1);
return SUBSTR(VAL, v_pos3, v_pos4 - v_pos3);
END;
Usage :
select report_tools_pkg.string_parser(',1,2,3',2,',') from dual
Note: Add a ',' || columnname to your sql if you want to you use ,it as it is ...

Related

Oracle remove html from clob fields

I have a simple function to convert html blob to plain text
FUNCTION HTML_TO_TEXT(html IN CLOB) RETURN CLOB
IS v_return CLOB;
BEGIN
select utl_i18n.unescape_reference(regexp_replace(html, '<.+?>', ' ')) INTO v_return from dual;
return (v_return);
END;
called in that way:
SELECT A, B, C, HTML_TO_TEXT(BLobField) FROM t1
all works fine until BlobFields contains more than 4000 character, then i got
ORA-01704: string literal too long
01704. 00000 - "string literal too long"
*Cause: The string literal is longer than 4000 characters.
*Action: Use a string literal of at most 4000 characters.
Longer values may only be entered using bind variables.
i try to avoud string inside function using variables but nothing changes:
FUNCTION HTML_TO_TEXT(html IN CLOB) RETURN CLOB
IS v_return CLOB;
"stringa" CLOB;
BEGIN
SELECT regexp_replace(html, '<.+?>', ' ') INTO "stringa" FROM DUAL;
select utl_i18n.unescape_reference("stringa") INTO v_return from dual;
return (v_return);
END;
Do not use regular expressions to parse HTML. If you want to extract the text then use an XML parser:
SELECT a,
b,
c,
UTL_I18N.UNESCAPE_REFERENCE(
XMLQUERY(
'//text()'
PASSING XMLTYPE(blobfield, 1)
RETURNING CONTENT
).getStringVal()
) AS text
FROM t1
Which will work where the extracted text is 4000 characters or less (since XMLTYPE.getStringVal() will return a VARCHAR2 data type and UTL_I18N.UNESCAPE_REFERENCE accepts a VARCHAR2 argument).
If you want to get it to work on CLOB values then you can still use XMLQUERY and getClobVal() but UTL_I18N.UNESCAPE_REFERENCE still only works on VARCHAR2 input (and not CLOBs) so you will need to split the CLOB into segments and parse those and concatenate them once you are done.
Something like:
CREATE FUNCTION html_to_text(
i_xml IN XMLTYPE
) RETURN CLOB
IS
v_text CLOB;
v_output CLOB;
str VARCHAR2(4000);
len PLS_INTEGER;
pos PLS_INTEGER := 1;
lim CONSTANT PLS_INTEGER := 4000;
BEGIN
SELECT XMLQUERY(
'//text()'
PASSING i_xml
RETURNING CONTENT
).getStringVal()
INTO v_text
FROM DUAL;
len := LENGTH(v_text);
WHILE pos <= len LOOP
str := DBMS_LOB.SUBSTR(v_text, lim, pos);
v_output := v_output || UTL_I18N.UNESCAPE_REFERENCE(str);
pos := pos + lim;
END LOOP;
RETURN v_output;
END;
/
However, you probably want to make it more robust and check if you are going to split the string in the middle of an escaped XML character.
db<>fiddle here

How to put comma separated values to a column in oracle

I have a JSON response and after processing the response my output looks like this :
column_variable := 'col1,col2,col3';
data_clob :=
"2017-10-14,abc,1,
2019-10-13,abc,12,
2019-10-12,abc,,
"
;
as the original response was having escape characters for new line ,data_clob also has been converted accordingly .
How do I convert this comma separated values in oracle table :
My output should look like this :
col1 col2 col3
2017-10-14 abc 1
2019-10-13 abc 12
2019-10-12 abc null
I was looking through similar questions ,but I dont want to use REGEXP_SUBSTR as I dont know the number of columns I will get in the response .
for e.g : column_variable might have 'col1,col2,col3,col4,col5,col6';
I am using oracle 12.1.0.2.0
Please help !
There is very easy way to achieve it using Polymorphic Table Functions (Oracle 18c):
Dynamic CSV to Columns Converter: Polymorphic Table Function Example:
create or replace package csv_pkg as
/* The describe function defines the new columns */
function describe (
tab in out dbms_tf.table_t,
col_names varchar2
) return dbms_tf.describe_t;
/* Fetch_rows sets the values for the new columns */
procedure fetch_rows (col_names varchar2);
end csv_pkg;
and body:
create or replace package body csv_pkg as
function describe(
tab in out dbms_tf.table_t,
col_names varchar2
)
return dbms_tf.describe_t as
new_cols dbms_tf.columns_new_t;
col_id pls_integer := 2;
begin
/* Enable the source colun for reading */
tab.column(1).pass_through := FALSE;
tab.column(1).for_read := TRUE;
new_cols(1) := tab.column(1).description;
/* Extract the column names from the header string,
creating a new column for each
*/
for j in 1 .. ( length(col_names) - length(replace(col_names,',')) ) + 1 loop
new_cols(col_id) := dbms_tf.column_metadata_t(
name=>regexp_substr(col_names, '[^,]+', 1, j),--'c'||j,
type=>dbms_tf.type_varchar2
);
col_id := col_id + 1;
end loop;
return dbms_tf.describe_t( new_columns => new_cols );
end;
procedure fetch_rows (col_names varchar2) as
rowset dbms_tf.row_set_t;
row_count pls_integer;
begin
/* read the input data set */
dbms_tf.get_row_set(rowset, row_count => row_count);
/* Loop through the input rows... */
for i in 1 .. row_count loop
/* ...and the defined columns, extracting the relevant value
start from 2 to skip the input string
*/
for j in 2 .. ( length(col_names) - length(replace(col_names,',')) ) + 2 loop
rowset(j).tab_varchar2(i) :=
regexp_substr(rowset(1).tab_varchar2(i), '[^,]+', 1, j - 1);
end loop;
end loop;
/* Output the new columns and their values */
dbms_tf.put_row_set(rowset);
end;
end csv_pkg;
--function
create or replace function csv_to_columns(
tab table, col_names varchar2
) return table pipelined row polymorphic using csv_pkg;
Then you simply pass:
select *
from csv_to_columns( data_clob, column_variable );
Here's one possible solution for Oracle versions below 18 and maybe 12, not sure... This is not perfect and will create an empty column at the end based on data you provided - extra spaces, commas, etc... This may also create a blank space between the 'SELECT' and the first column in output. All that can be removed later manually or with more coding. I hope this helps, at least in some ways:
SELECT 'SELECT '''||REPLACE(str, chr(10), ''' FROM dual'||chr(10)||'UNION ALL'||chr(10)||'SELECT ''')||''' FROM dual' str
FROM
(
SELECT TRIM(REPLACE(str, ',', ''''||', ''')) str FROM
(
SELECT TRIM(BOTH '"' FROM
'"2017-10-14,abc,1,
2019-10-13,abc,12,
2019-10-12,abc,,"') AS str FROM dual
)
)
/
This will build the select statement that can be cleaned up and executed manually or with dynamic SQL:
SELECT '2017-10-14' col, 'abc' col, '1' col, '' FROM dual
UNION ALL
SELECT '2019-10-13' col, 'abc' col, '12' col, '' FROM dual
UNION ALL
SELECT '2019-10-12' col, 'abc' col, '' col, '' FROM dual
The output of the above select statement:
COL COL_1 COL_2
2017-10-14 abc 1
2019-10-13 abc 12
2019-10-12 abc null

Oracle. Not valid ascii value of regex result

I'd like to edit a string. Get from 2 standing nearby digits digit and letter (00 -> 0a, 01 - 0b, 23-> 2c etc.)
111324 -> 1b1d2e.
Then my code:
set serveroutput on size unlimited
declare
str varchar2(128);
function convr(num varchar2) return varchar2 is
begin
return chr(ascii(num)+49);
-- return chr(ascii(num)+49)||'<-'||(ascii(num)+49)||','||ascii(num)||','||num||'|';
end;
function replace_dd(str varchar2) return varchar2 is
begin
return regexp_replace(str,'((\d)(\d))','\2'||convr('\3'));
end;
begin
str := '111324';
Dbms_Output.Put_Line(str);
Dbms_Output.Put_Line(replace_dd(str));
end;
But I get the next string: '112'.
When I checked result by commented return string I'v got:
'1<-141,92,1|1<-141,92,3|2<-141,92,4|'.
ascii(num) does not depend on num. It always works like ascii('\'). It is 92, plus 49 we got 141 and it is out of ascii table. But num by itself is printed correctly.
How can I get correct values? Or maybe another way to resolve this issue?
What is happening is that the replacement string is expanded first, and only after it is fully processed, any remaining backreferences like \2 are replaced by string fragments. So convr('\3') is processed first, and at this stage '\3' is a literal. ascii() returns the ascii code of the FIRST character of whatever string it receives as argument. So the 3 plays no role, you only get ascii('\') as you noticed. Then your user-defined function is evaluated and plugged back into the concatenation... by now there is no \3 left in the replacement string.
Exercise: Try to explain/understand why
regexp_replace('abcdef', '(b).*(e)', '\2' || upper('\1'))
is aebf and not aeBf. (Hint: what is the return from upper('\1') by itself, unrelated to anything else?)
You could split the input string into component characters, apply your transformation on those with even index and combine the string back (all in SQL, no need for loops and such). Something like this (done in plain SQL, you can rewrite it into your function if you like):
with
inputs ( str ) as (
select '111324' from dual union all
select '372' from dual
),
singletons ( str, idx, ch ) as (
select str, level, substr(str, level, 1)
from inputs
connect by level <= length(str)
and prior str = str
and prior sys_guid() is not null
)
select str,
listagg(case mod(idx, 2) when 1 then ch else chr(ascii(ch)+49) end, '')
within group (order by idx)
as modified_str
from singletons
group by str
;
STR MODIFIED_STR
------ --------------
111324 1b1d2e
372 3h2
Here code adds 5 to a single letter and resolve the isssue.
set serveroutput on size unlimited
declare
str varchar2(128);
str1 varchar2(128);
function replace_a(str varchar2) return varchar2 is
begin
return regexp_replace(str,'(\D)','5\1');
end;
function convr(str varchar2) return varchar2 is
ind number;
ret varchar2(128);
begin
Dbms_Output.Put_Line(str);
--return chr(ascii(num)+49)||'<-'||(ascii(num)+49)||','||ascii(num)||','||num||'|';
ind := 1 ;
ret :=str;
loop
ind := regexp_instr(':'||ret,'(#\d#)',ind) ;
exit when ind=0;
Dbms_Output.Put_Line(ind);
ret := substr(ret,1,ind-2)||chr(ascii(substr(ret,ind,1))+49)||substr(ret,ind+2);
SYS.Dbms_Output.Put_Line(ret);
end loop;
return ret;
end;
function replace_dd(str varchar2) return varchar2 is
begin
return convr(regexp_replace(str,'((\d)(\d))','\2#\3#'));
end;
begin
str := '11a34';
Dbms_Output.Put_Line(str);
Dbms_Output.Put_Line(replace_a(str));
Dbms_Output.Put_Line(replace_dd(replace_a(str)));
end;
result:
11a34
115a34
1#1#5a3#4#
3
1b5a3#4#
7
1b5a3e
1b5a3e

Formatting Oracle SQL column with non-standard format

I have an Oracle database where a file location is stored. Unfortunately, it isn't properly formatted.
For example, the file location is C:\images\00\45\34\34.IMG and is stored in the database as: 00453434.
I am able to use CONCAT to put C:\images and .IMG around the column, but I can't format the actual location to put \s in.
I've tried to_char, and to_number but it requires a specified format.
(My crappy attempt: to_char(filename, '09"\"09"\"09"\"09'))
Is there any way in SQL to format freely?
One method... assuming fixed length of each segment meaning each path is 2 digits including file name.
select 'C:\images\'|| substr('00453434',1,2) || '\' ||
substr('00453434',3,2) || '\' ||
substr('00453434',5,2) || '\' ||
substr('00453434',7,2) || '.IMG' as fullPath from dual
If needed at multiple queries, creating a PL/SQL function can also solve your problem. This example also assumes that each path has 2 digits, but supports paths of different lengths:
CREATE OR REPLACE FUNCTION GET_FILENAME(ID IN VARCHAR2, PREFIX IN VARCHAR2, SUFFIX IN VARCHAR2) RETURN VARCHAR2 IS
i PLS_INTEGER;
r VARCHAR2(4000);
BEGIN
r := PREFIX;
FOR i IN 1..LENGTH(ID)/2 LOOP
r := r || '\' || SUBSTR(ID, 2*i-1, 2);
END LOOP;
RETURN r || SUFFIX;
END;
/
The function can then be used within your standard SQL queries (or view definitions) as follows:
WITH TA_FILES AS (
SELECT '12345678' AS ID FROM DUAL
)
SELECT GET_FILENAME(ID, 'C:\images', '.IMG') FROM TA_FILES

How to cut varchar/text before n'th occurence of delimiter? PostgreSQL

I have strings (saved in database as varchar) and I have to cut them just before n'th occurence of delimiter.
Example input:
String: 'My-Example-Awesome-String'
Delimiter: '-'
Occurence: 2
Output:
My-Example
I implemented this function for fast prototype:
CREATE OR REPLACE FUNCTION find_position_delimiter(fulltext varchar, delimiter varchar, occurence integer)
RETURNS varchar AS
$BODY$
DECLARE
result varchar = '';
arr text[] = regexp_split_to_array( fulltext, delimiter);
word text;
counter integer := 0;
BEGIN
FOREACH word IN ARRAY arr LOOP
EXIT WHEN ( counter = occurence );
IF (counter > 0) THEN result := result || delimiter;
END IF;
result := result || word;
counter := counter + 1;
END LOOP;
RETURN result;
END;
$BODY$
LANGUAGE 'plpgsql' IMMUTABLE;
SELECT find_position_delimiter('My-Example-Awesome-String', '-', 2);
For now it assumes that string is not empty (provided by query where I will call function) and delimiter string contains at least one delimiter of provided pattern.
But now I need something better for performance test. If it is possible, I would love to see the most universal solution, because not every user of my system is working on PostgreSQL database (few of them prefer Oracle, MySQL or SQLite), but it is not the most importatnt. But performance is - because on specific search, that function can be called even few hundreds times.
I didn't find anything about fast and easy using varchar as a table of chars and checking for occurences of delimiter (I could remember position of occurences and then create substring from first char to n'th delimiter position-1). Any ideas? Are smarter solutions?
# EDIT: yea, I know that function in every database will be a bit different, but body of function can be very similliar or the same. Generality is not a main goal :) And sorry for that bad function working-name, I just saw it has not right meaning.
you can try doing something based on this:
select
varcharColumnName,
INSTR(varcharColumnName,'-',1,2),
case when INSTR(varcharColumnName,'-',1,2) <> 0
THEN SUBSTR(varcharColumnName, 1, INSTR(varcharColumnName,'-',1,2) - 1)
else '...'
end
from tableName;
of course, you have to handle "else" the way you want. It works on postgres and oracle (tested), it should work on other dbms's because these are standard sql functions
//edit - as a function, however this way it's rather hard to make it cross-dbms
CREATE OR REPLACE FUNCTION find_position_delimiter(fulltext varchar, delimiter varchar, occurence integer)
RETURNS varchar as
$BODY$
DECLARE
result varchar := '';
delimiterPos integer := 0;
BEGIN
delimiterPos := INSTR(fulltext,delimiter,1,occurence);
result := SUBSTR(fulltext, 1, delimiterPos - 1);
RETURN result;
END;
$BODY$
LANGUAGE 'plpgsql' IMMUTABLE;
SELECT find_position_delimiter('My-Example-Awesome-String', '-', 2);
create or replace function trunc(string text, delimiter char, occurence int) returns text as $$
return delimiter.join(string.split(delimiter)[:occurence])
$$ language plpythonu;
# select trunc('My-Example-Awesome-String', '-', 2);
trunc
------------
My-Example
(1 row)