Using Oracle REGEXP_SUBSTR to extract uppercase data separated by underscores

Using Oracle REGEXP_SUBSTR to extract uppercase data separated by underscores - sql

sample column data:
Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:
Error in CREATE_ACC_STATEMENT() [line 16]
I am looking for a way to extract only the uppercase words (table names) separated by underscores. I want the whole table name, the maximum is 3 underscores and the minimum is 1 underscore. I would like to ignore any capital letters that are initcap.

You can just use regexp_substr():
select regexp_substr(str, '[A-Z_]{3,}', 1, 1, 'c')
from (select 'Failure on table TOLL_USR_TRXN_HISTORY' as str from dual) x;
The pattern says to find substrings with capital letters or underscores, at least 3 characters long. The 1, 1 means start from the first position and return the first match. The 'c' makes the search case-sensitive.

You may use such a SQL Select statement for each substituted individual line
( Failure on table TOLL_USR_TRXN_HISTORY in the below case )
from your text :
select regexp_replace(q.word, '[^a-zA-Z0-9_]+', '') as word
from
(
select substr(str,nvl(lag(spc) over (order by lvl),1)+1*sign(lvl-1),
abs(decode(spc,0,length(str),spc)-nvl(lag(spc) over (order by lvl),1))) word,
nvl(lag(spc) over (order by lvl),1) lg
from
(
with tab as
( select 'Failure on table TOLL_USR_TRXN_HISTORY' str from dual )
select instr(str,' ',1,level) spc, str, level lvl
from tab
connect by level <= 10
)
) q
where lg > 0
and upper(regexp_replace(q.word, '[^a-zA-Z0-9_]+', ''))
= regexp_replace(q.word, '[^a-zA-Z0-9_]+', '')
and ( nvl(length(regexp_substr(q.word,'_',1,1)),0)
+ nvl(length(regexp_substr(q.word,'_',1,2)),0)
+ nvl(length(regexp_substr(q.word,'_',1,3)),0)) > 0
and nvl(length(regexp_substr(q.word,'_',1,4)),0) = 0;

Alternate way to get only table name from below error message , the below query will work only if table_name at end in the mentioned way
with t as( select 'Failure on table TOLL_USR_TRXN_HISTORY:' as data from dual)
SELECT RTRIM(substr(data,instr(data,' ',-1)+1),':') from t
New Query for all messages :
select replace (replace ( 'Failure on table TOLL_USR_TRXN_HISTORY:
Failure on table DOCUMENT_IMAGES:' , 'Failure on table', ' ' ),':',' ') from dual

Related

How to select the list of words containing a particular substring as part of a SQL query (oracle)?

I'm trying to return the list of "words" (separated by spaces) containing a certain substring within a string as part of an Oracle Sql query. Would like to return the result as a comma separated list. Separate rows for each match would also work.
Example String in [text_col] field:
some words 123-asdf-789A and also this one 456-asdf-555A more words etc.
Desired result: 123-asdf-789A, 456-asdf-555A
This is what I have so far but it only returns the first result and the fact that it's two separate regular expressions makes it difficult to concatenate all matches as I would like to do.
CONCAT(REGEXP_SUBSTR(text_col, ''(([^[:space:]]+)\asdf)'', 1, 1, ''i'', 1),
REGEXP_SUBSTR(text_col, ''\asdf([^[:space:]]+)'', 1, 1, ''i'', 1))

You can use some regexp functions together as :
with tab(str) as
(
select 'some words 123-asdf-789A and also this one 456-asdf-555A more words etc' from dual
), t as
(
select regexp_substr(str,'[^[:space:]]+',1,level) as str, level as lvl
from tab
connect by level <= regexp_count(str,'[:space:]')
)
select listagg(str,',') within group (order by lvl) as "Result"
from t
where regexp_like(str,'-');
Result
---------------------------------
123-asdf-789A,456-asdf-555A
Demo
first split by spaces (through [:space:] posix) and take the ones containing dash characters, and finally concatenate by listagg() function

Use a recursive sub-query factoring clause and iterate through all the matches concatenating the string as you go:
Oracle Setup:
CREATE TABLE test_data ( value ) AS
SELECT 'some words 123-asdf-789A and also this one 456-asdf-555A more words etc.' FROM DUAL UNION ALL
SELECT 'some words without the expected sub-string' FROM DUAL UNION ALL
SELECT 'asdf asdf-123 456-asdf 78-asdf-90' FROM DUAL
Query:
WITH matches ( value, idx, cnt, match ) AS (
SELECT value,
0,
REGEXP_COUNT( value, '\S*asdf\S*' ),
CAST( NULL AS VARCHAR2(4000) )
FROM test_data
UNION ALL
SELECT value,
idx + 1,
cnt,
CASE idx WHEN 0 THEN '' ELSE match || ' ' END
|| REGEXP_SUBSTR( value, '\S*asdf\S*', 1, idx + 1 )
FROM matches
WHERE idx < cnt
)
SELECT value, match
FROM matches
WHERE idx = cnt;
Output:
VALUE | MATCH
:----------------------------------------------------------------------- | :--------------------------------
some words without the expected sub-string | null
some words 123-asdf-789A and also this one 456-asdf-555A more words etc. | 123-asdf-789A 456-asdf-555A
asdf asdf-123 456-asdf 78-asdf-90 | asdf asdf-123 456-asdf 78-asdf-90
db<>fiddle here

Escape single quotes from comma separated string in Oracle

I am trying to escape single quotes from the comma separated string in Oracle SQL Developer, below is my Select query:
SELECT (CASE WHEN PS.SUPPLIER_NO IS NULL THEN 'FALSE' ELSE 'TRUE' END) AS "Check"
,dm."branch_id", dm."SUPPLIER_NO", dm."supplier_name", dm."date_created"
,dm."vendor_no", dm."ORDERING_ADDRESS_1", dm."ORDERING_ADDRESS_2"
,dm."ORDERING_CITY", dm."ORDERING_STATE_COD", dm."ORDERING_ZIP"
,dm."country_code", dm."fax_area_code", dm."fax_no", dm."fax_extension"
FROM datamart.udm_supplier_dim#teradata.wesco.com dm
LEFT JOIN PROJECT_SUPPLIERS PS ON PS.SUPPLIER_NO = dm."SUPPLIER_NO"
AND PS.BRANCH_ID = dm."branch_id"
AND PS.PROJECT_ID = 'e82a654af6c64e8297576b88b5eff138'
WHERE dm."branch_id" IN (REPLACE('6218, 5577','''',''''''));
I tried to replace/escape the single quotes from Where IN clause but it gives error of invalid number.
ORA-01722: invalid number
When I am trying to select same string using replace from other select statement it is working.
select REPLACE('6218, 5577','''','''''') from dual;
above query works as expected and gives o/p as '6218, 5577'.
Can anyone please advise, Why it is not working in my main Select statement?

'6218, 5577' this is a string and not a list of values. So if you do select REPLACE('6218, 5577','''','''''') from dual; you are trying to replace single Quote in your string. since it does not exists in your string, nothing will be replaced.
the result of you select is still the same string and not a list as you expect.
you should split a comma delimited string in rows.
here is one way to do that
with tab as (
SELECT trim(regexp_substr('6218, 5577', '[^,]+', 1, LEVEL)) str
FROM dual
CONNECT BY instr('6218, 5577', ',', 1, LEVEL - 1) > 0
)
than you can use it on your select
with tab as (
SELECT trim(regexp_substr('6218, 5577', '[^,]+', 1, LEVEL)) str
FROM dual
CONNECT BY instr('6218, 5577', ',', 1, LEVEL - 1) > 0
)
select ...
from ...
WHERE dm."branch_id" IN (select str from tab );

REGEXP_REPLACE to replace emails in a list except a specific domain

I am novice to regular expressions. I am trying to remove emails from a list which do not belong to a specific domain.
for e.g. I have a below list of emails:
John#yahoo.co.in , Jacob#gmail.com, Bob#rediff.com,
Lisa#abc.com, sam#gmail.com , rita#yahoo.com
I need to get only the gmail ids:
Jacob#gmail.com, sam#gmail.com
Please note we may have spaces before the comma delimiters.
Appreciate any help!

This could be a start for you.
SELECT *
FROM ( SELECT REGEXP_SUBSTR (str,
'[[:alnum:]\.\+]+#gmail.com',
1,
LEVEL)
AS SUBSTR
FROM (SELECT ' John#yahoo.co.in , Jacob.foo#gmail.com, Bob#rediff.com,Lisa#abc.com, sam#gmail.com , sam.bar+stackoverflow#gmail.com, rita#yahoo.com, foobar '
AS str
FROM DUAL)
CONNECT BY LEVEL <= LENGTH (REGEXP_REPLACE (str, '[^,]+')) + 1)
WHERE SUBSTR IS NOT NULL ;
Put in a few more examples, but an email checker should comply to the respective RFCs, look at wikipedia for further knowledge about them https://en.wikipedia.org/wiki/Email_address
Inspiration from https://stackoverflow.com/a/17597049/869069

Rather than suppress the emails not matching a particular domain (in your example, gmail.com), you might try getting only those emails that match the domain:
WITH a1 AS (
SELECT 'John#yahoo.co.in , Jacob#gmail.com, Bob#rediff.com,Lisa#abc.com, sam#gmail.com , rita#yahoo.com' AS email_list FROM dual
)
SELECT LISTAGG(TRIM(email), ',') WITHIN GROUP ( ORDER BY priority )
FROM (
SELECT REGEXP_SUBSTR(email_list, '[^,]+#gmail.com', 1, LEVEL, 'i') AS email
, LEVEL AS priority
FROM a1
CONNECT BY LEVEL <= REGEXP_COUNT(email_list, '[^,]+#gmail.com', 1, 'i')
);
That said, Oracle is probably not the best tool for this (do you have these email addresses stored as a list in a table somewhere? If so then #GordonLinoff's comment is apt - fix your data model if you can).

Here's a method using a CTE just for a different take on the problem. First step is to make a CTE "table" that contains the parsed list elements. Then select from that. The CTE regex handles NULL list elements.
with main_tbl(email) as (
select ' John#yahoo.co.in , Jacob.foo#gmail.com, Bob#rediff.com,Lisa#abc.com, sam#gmail.com , sam.bar+stackoverflow#gmail.com, rita#yahoo.com, foobar '
from dual
),
email_list(email_addr) as (
select trim(regexp_substr(email, '(.*?)(,|$)', 1, level, NULL, 1))
from main_tbl
connect by level <= regexp_count(email, ',')+1
)
-- select * from email_list;
select LISTAGG(TRIM(email_addr), ', ') WITHIN GROUP ( ORDER BY email_addr )
from email_list
where lower(email_addr) like '%gmail.com';

Regexp_replace processing result

I have a string with groups of nubmers. And Id like to make constant length string. Now I use two regexp_replace. First to add 10 numbers to string and next to cut string and take last 10 values:
with s(txt) as ( select '1030123:12031:1341' from dual)
select regexp_replace(
regexp_replace(txt, '(\d+)','0000000000\1')
,'\d+(\d{10})','\1') from s ;
But Id like to use only one regex something like
regexp_replace(txt, '(\d+)',lpad('\1',10,'0'))
But it don't work. lpad executed before regexp. Could you have any ideas?

With a slightly different approach, you can try the following:
with s(id, txt) as
(
select rownum, txt
from (
select '1030123:12031:1341' as txt from dual union all
select '1234:0123456789:1341' from dual
)
)
SELECT listagg(lpad(regexp_substr(s.txt, '[^:]+', 1, lines.column_value), 10, '0'), ':') within group (order by column_value) txt
FROM s,
TABLE (CAST (MULTISET
(SELECT LEVEL FROM dual CONNECT BY instr(s.txt, ':', 1, LEVEL - 1) > 0
) AS sys.odciNumberList )) lines
group by id
TXT
-----------------------------------
0001030123:0000012031:0000001341
0000001234:0123456789:0000001341
This uses the CONNECT BY to split every string based on the separator ':', then uses LPAD to pad to 10 and then aggregates the strings to build rows containing the concatenation of padded values

This works for non-empty sequences (e.g. 123::456)
with s(txt) as ( select '1030123:12031:1341' from dual)
select regexp_replace (regexp_replace (txt,'(\d+)',lpad('0',10,'0') || '\1'),'0*(\d{10})','\1')
from s
;

Check palindrome without using string functions with condition

I have a table EmployeeTable.
If I want only that records where employeename have character of 1 to 5
will be palindrome and there also condition like total character is more then 10 then 4 to 8 if character less then 7 then 2 to 5 and if character less then 5 then all char will be checked and there that are palindrome then only display.
Examples :- neen will be display
neetan not selected
kiratitamara will be selected
I try this something on string function like FOR first case like name less then 5 character long
SELECT SUBSTRING(EmployeeName,1,5),* from EmaployeeTable where
REVERSE (SUBSTRING(EmployeeName,1,5))=SUBSTRING(EmployeeName,1,5)
I want to do that without string functions,
Can anyone help me on this?

You need at least SUBSTRING(), I have a solution like this:
(In SQL Server)
DECLARE #txt varchar(max) = 'abcba'
;WITH CTE (cNo, cChar) AS (
SELECT 1, SUBSTRING(#txt, 1, 1)
UNION ALL
SELECT cNo + 1, SUBSTRING(#txt, cNo + 1, 1)
FROM CTE
WHERE SUBSTRING(#txt, cNo + 1, 1) <> ''
)
SELECT COUNT(*)
FROM (
SELECT *, ROW_NUMBER() OVER (ORDER BY cNo DESC) as cRevNo
FROM CTE t1 CROSS JOIN
(SELECT Max(cNo) AS strLength FROM CTE) t2) dt
WHERE
dt.cNo <= dt.strLength / 2
AND
dt.cChar <> (SELECT dti.cChar FROM CTE dti WHERE dti.cNo = cRevNo)
The result will shows the count of differences and 0 means no differences.
Note :
Current solution is Non-Case-Sensitive for change it to a Case-Sensitive you need to check the strings in a case-sensitive collation like Latin1_General_BIN
You can use this solution as a SVF or something like that.

I dont realy understand why you dont want to use string functions in your query, but here is one solution. Compute everything beforehand:
Add Column:
ALTER TABLE EmployeeTable
ADD SubString AS
SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)
PERSISTED
GO
ALTER TABLE EmployeeTable
ADD Palindrome AS
REVERSE(SUBSTRING(EmployeeName,
(
CASE WHEN LEN(EmployeeName)>10
THEN 4
WHEN LEN(EmployeeName)>7
THEN 2
ELSE 1 END
)
,
(
CASE WHEN LEN(EmployeeName)>10
THEN 8
WHEN LEN(EmployeeName)>7
THEN 5
ELSE 5 END
)) PERSISTED
GO
Then your query will looks like:
SELECT * from EmaployeeTable
where Palindrome = SubString
BUT!
This is not a good idea. Please tell us, why you dont want to use string functios.

You could do it building a list of palindrome words using a recursive query that generates palindrome words till a length o n characters and then selects employees with the name matching a palindrome word. This may be a really inefficient way, but it does the trick
This is a sample query for Oracle, PostgreSQL should support this feature as well with little differences on syntax. I don't know about other RDBMS.
with EmployeeTable AS (
SELECT 'ADA' AS employeename
FROM DUAL
UNION ALL
SELECT 'IDA' AS employeename
FROM DUAL
UNION ALL
SELECT 'JACK' AS employeename
FROM DUAL
), letters as (
select chr(ascii('A') + rownum - 1) as letter
from dual
connect by ascii('A') + rownum - 1 <= ascii('Z')
), palindromes(word, len ) as (
SELECT WORD, LEN
FROM (
select CAST(NULL AS VARCHAR2(100)) as word, 0 as len
from DUAL
union all
select letter as word, 1 as len
from letters
)
union all
select l.letter||p.word||l.letter AS WORD, len + 1 AS LEN
from palindromes p
cross join letters l
where len <= 4
)
SEARCH BREADTH FIRST BY word SET order1
CYCLE word SET is_cycle TO 'Y' DEFAULT 'N'
select *
from EmployeeTable
WHERE employeename IN (
SELECT WORD
FROM palindromes
)

DECLARE #cPalindrome VARCHAR(100) = 'SUBI NO ONIBUS'
SET #cPalindrome = REPLACE(#cPalindrome, ' ', '')
;WITH tPalindromo (iNo) AS (
SELECT 1
WHERE SUBSTRING(#cPalindrome, 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome), 1)
UNION ALL
SELECT iNo + 1
FROM tPalindromo
WHERE SUBSTRING(#cPalindrome, iNo + 1, 1) = SUBSTRING(#cPalindrome, LEN(#cPalindrome) - iNo, 1)
AND LEN(#cPalindrome) > iNo
)
SELECT IIF(MAX(iNo) = LEN(#cPalindrome), 'PALINDROME', 'NOT PALINDROME')
FROM tPalindromo

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Using Oracle REGEXP_SUBSTR to extract uppercase data separated by underscores - sql

Related

How to select the list of words containing a particular substring as part of a SQL query (oracle)?

Escape single quotes from comma separated string in Oracle

REGEXP_REPLACE to replace emails in a list except a specific domain

Regexp_replace processing result

Check palindrome without using string functions with condition

Categories

Resources