I am struggle with regex to split spring into columns in Oracle database.
select (REGEXP_SUBSTR(replace('1:::9999', ' ',''), '[^: ]+', 1, 4)) from dual;
I need to obtain 4th value from that string as a column value, sometimes values at position 2,3 are empty and my query doesn't work. I am trying to figure out what regex will work
You can use
select (REGEXP_SUBSTR(replace('1:::9999', ' ',''), '([^: ]*)(:|$)', 1, 4, 'i', 1)) from dual;
Here, the ([^: ]*)(:|$) matches
([^: ]*) - Group 1: any zero or more chars other than : and space
(:|$) - Group 2, either : or end of string.
You do not need a (slower) regex for this task, use simple substr/instr functions:
with input_(val) as (
select '1:::9999' from dual
union all
select '1:2::' from dual
union all
select '1:2::3:5' from dual
)
, replaced as (
select input_.*, replace(val, ' ', '') as val_replaced
from input_
)
select
val,
substr(
val_replaced,
/*Locate the first occurrence of a colon and get a substring ...*/
instr(val_replaced, ':', 1, 3) + 1,
/*.. until the end, if the next colon is absent, or until the next colon*/
nvl(nullif(instr(val_replaced, ':', 1, 4), 0), length(val_replaced) + 1) - instr(val_replaced, ':', 1, 3) - 1
) as col
from replaced
VAL
COL
1:::9999
9999
1:2::
null
1:2::3:5
3
fiddle with performance difference.
This is my current code, what I want to do is rather than hard code this replace is put those values in a table and use those values to do the replace without a while or cursor. Keep in mind multiple replaces may happen to the same field for instance Mr. Guy would replace the "." but then would also need to replace "Mr ".
SELECT
REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(TRIM(di.FirstName), '.', ''), ',', ''), 'Mr ', ''), 'Dr ', ''), 'Mrs ', ''), 'Ms', '')
FROM core..asdf di
If your DBMS supports both GROUP_CONCAT() (or equivalent, like LISTAGG() in Vertica),
You can
create an in-line table with the titles you want to remove,
group-concatenate that in-line table into a single string, bar separated
surround that bar-separated list by rounded parentheses, and add '\b' for "word" boundary, '\.?' meaning zero or one times the dot character (and not any character), and '\s*' for one or more white spaces
and finally use that regular expression you just created on a REGEXP_REPLACE() call.
WITH
indata(fname) AS (
SELECT 'Mr Arthur'
UNION ALL SELECT 'Mrs Tricia'
UNION ALL SELECT 'Ms Eccentrica'
UNION ALL SELECT 'Dr Gag'
UNION ALL SELECT 'Mr. Arthur'
UNION ALL SELECT 'Mrs. Tricia'
UNION ALL SELECT 'Ms. Eccentrica'
UNION ALL SELECT 'Dr. Gag'
)
,
titles(title) AS (
SELECT 'Mr'
UNION ALL SELECT 'Mrs'
UNION ALL SELECT 'Ms'
UNION ALL SELECT 'Dr'
)
,
regx(regx) AS (
SELECT
'('||LISTAGG(title USING PARAMETERS separator='|')||')\b\.?\s*'
-- OR GROUP_CONCAT(title,',') in other DBMSs ...
FROM titles
)
-- control query ...
-- SELECT * FROM regx;
-- out regx
-- out ----------------------
-- out (Mr|Mrs|Ms|Dr)\.?\s*
SELECT
REGEXP_REPLACE(fname,regx) AS fname
FROM indata CROSS JOIN regx;
-- out fname
-- out ------------
-- out Arthur
-- out Tricia
-- out Eccentrica
-- out Gag
-- out Arthur
-- out Tricia
-- out Eccentrica
-- out Gag
If a column has bad data such as:
45612345698
(456)123-7452
125-145-9856
Without fixing the data. Is it possible to have a sql query of 1251459856 which then would return the 3rd item in the column?
Hmmm . . . you could use replace():
where replace(replace(replace(col, '-', ''), '(', ''), ')', '') = '1251459856'
If your data is worse than just "-",")" and "(" you could go for a more generic solution and strip on any non-numeric character with the following
WITH sample_data_tab (str) AS
(
SELECT '45612345698' FROM DUAL UNION
SELECT '(456)123-7452' FROM DUAL UNION
SELECT '125-145-9856' FROM DUAL UNION
SELECT '989 145 9856' FROM DUAL
)
SELECT regexp_replace(str, '[^0-9]', '') FROM sample_data_tab
I have the below structure(' ' refers to empty spaces):
name description
---------------------
a yes
b ' '
c ' '
d null
I am searching for a query that give me the rows contain empty spaces, asked for the below result .
name description
---------------------
b ' '
c ' '
this query select * from tab1 where description =' '; will give me only c, in my query I have many values have long spaces.
You can user REGEXP_LIKE:
with src as (select 'a' as name,'yes' as description from dual
union all
select 'b',' ' from dual
union all
select 'c',' ' from dual
union all
select 'd',null from dual)
select * from src where regexp_like(description,'^[ ]+$'))
Edited: added regexp_like(description,'^[ ]+$') to take into account only descriptions with spaces. If there is a description in the format ' s ', ' s' or 's ' it will not be selected.
Use TRIM function to trim the spaces.
select * from tab1 where TRIM(description) IS NULL;
I have not tested it but it should work.
with this basic query:
with sample_data(name, description) as (
select 'a', 'yes' from dual union all
select 'b', ' ' from dual union all
select 'c', ' ' from dual union all
select 'd', null from dual
)
select *
from sample_data
you can pick and choose among the following where clauses to get your desired results:
where regexp_like(description,'[ ]')); -- at least one space in the string
where regexp_like(description,'[ ]{2,')); -- two or more consecutive spaces
where regexp_like(description,'^[ ]+$')); -- just spaces of any length
where regexp_like(description,'^[ ]{2,}')); -- just paces of length 2 or more
if you want any white space character (e.g. tabs, vertical tabs, non blanking spaces, etc.) you can replace the single space character class [ ] with this [[:space:]] named character class in any of the above where clauses.
Use LIKE operator
SELECT *
FROM tab1
WHERE description LIKE ' %'
Here is the table data with the column name as Ships.
+--------------+
Ships |
+--------------+
Duke of north |
---------------+
Prince of Wales|
---------------+
Baltic |
---------------+
Replace all characters between the first and the last spaces (excluding these spaces) by symbols
of an asterisk (*). The number of asterisks must be equal to number of replaced characters.
Regular expressions are your friend :)
First match the space, followed by any other characters, ending in a space.
Then replace that with a string that consists of the starting and trailing space and, in between, a string of asterisks.
The string of asterisks is made by right padding a single asterisk with further asterisks to the appropriate length. That length is the length of the regular expression matched minus two characters for the leading/trailing space.
select regexp_replace(column_value,' .* ',
' '||rpad('*',length(regexp_substr(column_value,' .* '))-2,'*')||' ')
from table(sys.dbms_debug_vc2coll(
'Duke of north','Prince of Wales','Baltic','what if two spaces'));
Duke ** north
Prince ** Wales
Baltic
what ****** spaces
This really smells like homework. So I won't provide you with the full deal, but point you in the right direction instead:
Check out the function InStr. Espcecially its 3rd and 4th parameters, that allow you to search starting at the Xth char and/or search the Yth occurrence.
Edit: If someone finds this thread in a search and hopes for a solution that works in older versions of Oracle, this is how I'd have done it.
(I posted it as a comment to another post, but the author deleted his answer for some inexplicable reason o_O )
SELECT case
when InStr(Name, ' ', 1) > 0 and
InStr(Name, ' ', 1) <> InStr(Name, ' ', -1) then
SubStr(Name, 1, InStr(Name, ' ', 1) - 1) ||
lPad('*', InStr(Name, ' ', -1) - InStr(Name, ' ', 1) + 1, '*') ||
SubStr(Name, InStr(Name, ' ', -1) + 1)
else
Trim(Name)
end
FROM SomeTable
Although the data in the original question only had one word in between, it is possible to have more than one word in between the first and the last the word. For example:"This is an example with more than one word"
I suppose the solution should be such that it handles all these as well....
Anyway, here is another solution:
With
I As(
/*Serves as an input parameter*/
Select 'This is an example with more than one word' Str From Dual
)
,D As(
/*Split words into rows*/
Select RegExp_SubStr(Str,'[^ ]+',1,Level) Word,RowNum Seq,First_value(RowNum) Over(Order By RowNum Desc) L
From I
Connect By RegExp_SubStr(Str,'[^ ]+',1,Level) Is Not NULL
)
Select
/*Assemble all together - other than the first and the last word, replace all the rest into "*"*/
--uncomment the ListAgg statement if using 11g--
--ListAgg(Decode(Seq,1,Word,L,Word,RegExp_Replace(Word,'.','*')),' ') Within Group(Order By Seq) Statement
--If using earlier version of Oracle then use the following--
Trim(RegExp_Replace(XMLAgg(XMLElement(R,Decode(Seq,1,Word,L,Word,RegExp_Replace(Word,'.','*'))||' ') Order By Seq),'</?R>')) Statement
From D
/
OUTPUT:
This ** ** ******* **** **** **** *** word
SELECT a actual_string,
first_word,
SUBSTR(output1,1,LENGTH(output1)-LENGTH(SUBSTR(output1,(
CASE
WHEN regexp_count(output1,' ')=0
THEN 0
ELSE regexp_instr(output1,' ',1,regexp_count(output1,' '))
END)+1))) middle_words,
last_word,
CASE
WHEN first_word=last_word
THEN first_word
ELSE first_word
||TRANSLATE(upper(SUBSTR(output1,1,LENGTH(output1)-LENGTH(SUBSTR(output1,(
CASE
WHEN regexp_count(output1,' ')=0
THEN 0
ELSE regexp_instr(output1,' ',1,regexp_count(output1,' '))
END)+1)))),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','**************************')
||last_word
END final_result
FROM
(SELECT a,
CASE
WHEN SUBSTR(a,1,regexp_instr(a,' ',1)) IS NULL
THEN a
ELSE SUBSTR(a,1,regexp_instr(a,' ',1))
END first_word,
SUBSTR(a,(
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END)+1) last_word,
SUBSTR(a, LENGTH(
CASE
WHEN SUBSTR(a,1,regexp_instr(a,' ',1)) IS NULL
THEN a
ELSE SUBSTR(a,1,regexp_instr(a,' ',1))
END)+1, LENGTH(SUBSTR(a,(
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END)+1))-2) middle_words,
CASE
WHEN regexp_instr(a,' ',1) +1>1
THEN SUBSTR(a,regexp_instr(a,' ',1)+1,
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END )
ELSE a
END output1--,
FROM
( SELECT 'Duke of north' a FROM dual
UNION
SELECT 'Prince of Wales' a FROM dual
UNION
SELECT 'Baltic' a FROM dual
UNION
SELECT 'what if two spaces' a FROM dual
UNION
SELECT 'what if two or spaces' a FROM dual
)
)