Oracle replacing text between first and last spaces - sql

Here is the table data with the column name as Ships.
+--------------+
Ships |
+--------------+
Duke of north |
---------------+
Prince of Wales|
---------------+
Baltic |
---------------+
Replace all characters between the first and the last spaces (excluding these spaces) by symbols
of an asterisk (*). The number of asterisks must be equal to number of replaced characters.

Regular expressions are your friend :)
First match the space, followed by any other characters, ending in a space.
Then replace that with a string that consists of the starting and trailing space and, in between, a string of asterisks.
The string of asterisks is made by right padding a single asterisk with further asterisks to the appropriate length. That length is the length of the regular expression matched minus two characters for the leading/trailing space.
select regexp_replace(column_value,' .* ',
' '||rpad('*',length(regexp_substr(column_value,' .* '))-2,'*')||' ')
from table(sys.dbms_debug_vc2coll(
'Duke of north','Prince of Wales','Baltic','what if two spaces'));
Duke ** north
Prince ** Wales
Baltic
what ****** spaces

This really smells like homework. So I won't provide you with the full deal, but point you in the right direction instead:
Check out the function InStr. Espcecially its 3rd and 4th parameters, that allow you to search starting at the Xth char and/or search the Yth occurrence.
Edit: If someone finds this thread in a search and hopes for a solution that works in older versions of Oracle, this is how I'd have done it.
(I posted it as a comment to another post, but the author deleted his answer for some inexplicable reason o_O )
SELECT case
when InStr(Name, ' ', 1) > 0 and
InStr(Name, ' ', 1) <> InStr(Name, ' ', -1) then
SubStr(Name, 1, InStr(Name, ' ', 1) - 1) ||
lPad('*', InStr(Name, ' ', -1) - InStr(Name, ' ', 1) + 1, '*') ||
SubStr(Name, InStr(Name, ' ', -1) + 1)
else
Trim(Name)
end
FROM SomeTable

Although the data in the original question only had one word in between, it is possible to have more than one word in between the first and the last the word. For example:"This is an example with more than one word"
I suppose the solution should be such that it handles all these as well....
Anyway, here is another solution:
With
I As(
/*Serves as an input parameter*/
Select 'This is an example with more than one word' Str From Dual
)
,D As(
/*Split words into rows*/
Select RegExp_SubStr(Str,'[^ ]+',1,Level) Word,RowNum Seq,First_value(RowNum) Over(Order By RowNum Desc) L
From I
Connect By RegExp_SubStr(Str,'[^ ]+',1,Level) Is Not NULL
)
Select
/*Assemble all together - other than the first and the last word, replace all the rest into "*"*/
--uncomment the ListAgg statement if using 11g--
--ListAgg(Decode(Seq,1,Word,L,Word,RegExp_Replace(Word,'.','*')),' ') Within Group(Order By Seq) Statement
--If using earlier version of Oracle then use the following--
Trim(RegExp_Replace(XMLAgg(XMLElement(R,Decode(Seq,1,Word,L,Word,RegExp_Replace(Word,'.','*'))||' ') Order By Seq),'</?R>')) Statement
From D
/
OUTPUT:
This ** ** ******* **** **** **** *** word

SELECT a actual_string,
first_word,
SUBSTR(output1,1,LENGTH(output1)-LENGTH(SUBSTR(output1,(
CASE
WHEN regexp_count(output1,' ')=0
THEN 0
ELSE regexp_instr(output1,' ',1,regexp_count(output1,' '))
END)+1))) middle_words,
last_word,
CASE
WHEN first_word=last_word
THEN first_word
ELSE first_word
||TRANSLATE(upper(SUBSTR(output1,1,LENGTH(output1)-LENGTH(SUBSTR(output1,(
CASE
WHEN regexp_count(output1,' ')=0
THEN 0
ELSE regexp_instr(output1,' ',1,regexp_count(output1,' '))
END)+1)))),'ABCDEFGHIJKLMNOPQRSTUVWXYZ','**************************')
||last_word
END final_result
FROM
(SELECT a,
CASE
WHEN SUBSTR(a,1,regexp_instr(a,' ',1)) IS NULL
THEN a
ELSE SUBSTR(a,1,regexp_instr(a,' ',1))
END first_word,
SUBSTR(a,(
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END)+1) last_word,
SUBSTR(a, LENGTH(
CASE
WHEN SUBSTR(a,1,regexp_instr(a,' ',1)) IS NULL
THEN a
ELSE SUBSTR(a,1,regexp_instr(a,' ',1))
END)+1, LENGTH(SUBSTR(a,(
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END)+1))-2) middle_words,
CASE
WHEN regexp_instr(a,' ',1) +1>1
THEN SUBSTR(a,regexp_instr(a,' ',1)+1,
CASE
WHEN regexp_count(a,' ')=0
THEN 0
ELSE regexp_instr(a,' ',1,regexp_count(a,' '))
END )
ELSE a
END output1--,
FROM
( SELECT 'Duke of north' a FROM dual
UNION
SELECT 'Prince of Wales' a FROM dual
UNION
SELECT 'Baltic' a FROM dual
UNION
SELECT 'what if two spaces' a FROM dual
UNION
SELECT 'what if two or spaces' a FROM dual
)
)

Related

how to replace a value for string if letters are missing?

I ran into a problem where i have to create a 'LettersOfName' column. As name suggest I have to get letter 2,3 and 5 from ORGANISATIONNAME column and letters 2 and 3 from CLIENTLASTNAME column, then concatenate to form letters of name column. The condition is if letters of name is not equal to length 5 than replace with '22222' also if any of the letters is missing from first name and last name than replace with '22222'. I am using this query.
select
( CASE WHEN LENGTH (UPPER( SUBSTR(ORGANISATIONNAME, 2,2) || SUBSTR(ORGANISATIONNAME,5,1)) || UPPER(SUBSTR(CLIENTLASTNAME,2,2))) != '5' THEN '22222'
ELSE UPPER( SUBSTR(ORGANISATIONNAME, 2,2) || SUBSTR(ORGANISATIONNAME,5,1)) || UPPER(SUBSTR(CLIENTLASTNAME,2,2)) END)
AS LETTERSOFNAME
from client;
So, far this query runs fine, but when we have name like 'Jo Anne' or 'J Shark' it is missing letter '2' and '3' but does not replace the string with '22222'. When length is not equal to 5 it replaces with '22222'. I am using Oracle 12c.
If after the concatenations of the letters you remove all the spaces and the length of the remaining string is less than 5 then replace with '22222':
SELECT
CASE
WHEN LENGTH(REPLACE(SUBSTR(ORGANISATIONNAME, 2, 2) || SUBSTR(ORGANISATIONNAME, 5, 1) || SUBSTR(CLIENTLASTNAME, 2, 2), ' ', '')) < 5 THEN '22222'
ELSE UPPER(SUBSTR(ORGANISATIONNAME, 2, 2) || SUBSTR(ORGANISATIONNAME, 5, 1) || SUBSTR(CLIENTLASTNAME, 2, 2))
END LETTERSOFNAME
FROM client
Or with a CTE:
WITH cte AS (
SELECT
UPPER(REPLACE(
SUBSTR(ORGANISATIONNAME, 2, 2) ||
SUBSTR(ORGANISATIONNAME, 5, 1) ||
SUBSTR(CLIENTLASTNAME, 2, 2),
' ',
''
)) LETTERSOFNAME
FROM client
)
SELECT
CASE
WHEN LENGTH(LETTERSOFNAME) < 5 THEN '22222'
ELSE LETTERSOFNAME
END LETTERSOFNAME
FROM cte
See the demo.
You should first remove the white space between the string and and then apply your case statement on it
replace ('J Shark', ' ', '')
Reason is white space is being counted as a character in J Shark and that is why second and third characters are missing.
Here is an example demo.
Here is my approach:
Put both columns ORGANISATIONNAME and CLIENTLASTNAME to another table with identity column (to identify each row)
Write a function to split text by a string (in this case pass a space)
Get the identity and the splitted data to 2 tables each for column 1 and 2
Consider each table and apply your logic
Concatenate the row values separated by space, with the ID (1 record per ID)
Join the 2 tables (by IDs)
Join the 2 tables for matches in Col-Split data, and get the IDs
Now Query for the data in table in above 1

Separate fullname into first and last, and remove 'junk'

Wasn't sure of the best way to word this. So I have a column with names, as below:
SalesPerson_Name
----------------
Undefined - 0
Sam Brett-sbrett
Kelly Roberts-kroberts
Michael Paramore-mparamore
Alivia Lawler-alawler
Ryan Hooker-rhooker
Heather Alford-halford
Cassandra Blegen-cblegen
JD Holland-jholland
Vendor Accounts-VENDOR
Other Accounts-OTHER
Getting the names separated is easy enough with PARSENAME and REPLACE functions, but where I'm running into a pickle is with getting rid of the 'junk' at the end:
SELECT SalesPerson_Key
,SalesPerson_Name
,CASE
WHEN PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 2) IS NULL
THEN PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 1)
ELSE PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 2)
END AS FirstName
,CASE
WHEN PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 2) IS NULL
THEN NULL
ELSE PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 1)
END AS LastName
FROM Salesperson
RESULTS FOR LASTNAME COLUMN:
LastName
--------
0
Brett-sbrett
Roberts-kroberts
Paramore-mparamore
Lawler-alawler
Hooker-rhooker
Alford-halford
Blegen-cblegen
Holland-jholland
Accounts-VENDOR
Accounts-OTHER
Specifically, I want to get rid of the text (userid) at the end of the last name. If the names were the same length, I could just use a RIGHT function, but they vary in length. Ideas?
select left(PARSENAME(REPLACE(SalesPerson_Name, ' ', '.'), 1), len(SalesPerson_Name)-CHARINDEX('-',SalesPerson_Name)-1)
You are getting charindex of - and taking the left string of it.
If you just want to remove the last word (username) you can use a query like this
select
rtrim(
substring(
SalesPerson_Name,
1,
charindex('-',SalesPerson_Name,1)-1
)
)
from Salesperson
The charindex function locates the occurrence of the character/s you are looking for.
Consider whether hyphen is followed by a space or not, and split depending on these two cases
with Salesperson( SalesPerson_Name ) as
(
select 'Undefined - 0' union all
select 'Sam Brett-sbrett' union all
select 'Kelly Roberts-kroberts' union all
select 'Michael Paramore-mparamore' union all
select 'Alivia Lawler-alawler'
)
select case when substring(SalesPerson_Name,charindex(' ',SalesPerson_Name)+1,1) = '-' then
substring(SalesPerson_Name,charindex(' ',SalesPerson_Name)+3,len(SalesPerson_Name))
else
substring(SalesPerson_Name,charindex(' ',SalesPerson_Name)+1,len(SalesPerson_Name))
end as last_name
from Salesperson s;
last_name
------------------
0
Brett-sbrett
Roberts-kroberts
Paramore-mparamore
Lawler-alawler

Parsing Name Field in SQL

I am trying to separate a name field into the appropriate fields. The name field is not consistently the same. It can show up as Doe III,John w or Doe,John, or Doe III,John, or Doe,John W or it may be lacking the suffix and or middle initial. Any ideas would be greatly appreciated.
SELECT (
CASE LEN(REPLACE(FirstName, ' ', ''))
WHEN LEN(FirstName + ' ') - 1
THEN PARSENAME(REPLACE(FirstName, ' ', '.'), 2)
ELSE PARSENAME(REPLACE(FirstName, ' ', '.'), 3)
END
) AS LastName
,(
CASE LEN(REPLACE(FirstName, ' ', ''))
WHEN LEN(FirstName + ',') - 1
THEN NULL
ELSE PARSENAME(REPLACE(FirstName, ' ', '.'), 2)
END
) AS Suffix
,PARSENAME(REPLACE(FirstName, ' ', '.'), 1) AS FirstName
FROM Trusts.dbo.tblMember
I need the name regardless of the format, as stated above, to parse into the appropriate fields of LastName,Suffix,FirstName,MiddleInitial, regardless of whether it has a suffix or a middle initial
If the given 4 names are the only type of cases, then you can use something like below.
Note: I used a CTE table tbl2 to separate comma_pos,first_space,second_space for better understanding in the main query. You can replace these value in main query with their corresponding function in CTE, to make the main query faster. I mean replace comma_pos in main query with charindex(',',name) an so on.
Also I am assuming that there are no leading/trailing or extra whitespaces or any junk character in name column. If you have, then sanitize your data first before proceeding.
Rexter Sample
with tbl2 as (
select tbl.*,
charindex(',',name) as comma_pos,
charindex(' ',name,1) first_space,
charindex(' ',name,charindex(' ',name,1)+1) second_space
from tbl)
select tbl2.name
,case when second_space <> 0
then substring(name,comma_pos+1,second_space-comma_pos-1)
when first_space > comma_pos
then substring(name,comma_pos+1,first_space-comma_pos-1)
else substring(name,comma_pos+1,len(name)-comma_pos)
end as first_name
,case when second_space <> 0
then substring(name,second_space+1,len(name)-second_space)
when first_space > comma_pos
then substring(name,first_space+1,len(name)-first_space)
end as middle_name
,case when first_space=0 or first_space>comma_pos
then substring(name,1,comma_pos-1)
else substring(name,1,first_space-1)
end as last_name
,case when first_space=0 or first_space>comma_pos
then null
else substring(name,first_space,comma_pos-first_space)
end as suffix
from tbl2;

Convert varchar to 3 (sometimes 4) chars in T-SQL

I select data from a database. The values are (field name is ADR_KOMP_VL) :
4 , 61A, 100, 12, 58, 123C, 6 A, 5
I need to convert these values to 3 digits (except when there is a letter then it is 4)
So the converted values should be:
004, 061A, 100, 012, 058, 123C, 006A, 005
The rules are:
Always 3 digits
No spaces
If the original value is less than three digits, put 0's in front of it.(The length is 3)
If the original value contains a letter, put 0's in front of it (but the length is 4)
For the "no space" part I have this:
select REPLACE(ADR_KOMP_VL, ' ','')
The solution I have so far is:
SELECT RIGHT('000' + CONVERT(VARCHAR(4),REPLACE(ADR_KOMP_VL, ' ','')), 3)
But this only gives me the right length, when there is no letter in the value. My problem is how to handle the values with a letter in them??
This only check if the last character is letter. Additional logic will be required if that's not the case
SELECT REPLICATE('0', CASE WHEN ISNUMERIC(RIGHT(ADR_KOMP_VL, 1)) = 0 THEN 4
ELSE 3
END - LEN(REPLACE(ADR_KOMP_VL, ' ', '')))
+ REPLACE(ADR_KOMP_VL, ' ', '')
FROM TX
EDIT - actually this might work better, checks for whole ADR_KOMP_VL if it's numeric:
SELECT REPLICATE('0', CASE WHEN ISNUMERIC(REPLACE(ADR_KOMP_VL, ' ', '')) = 0 THEN 4
ELSE 3
END - LEN(REPLACE(ADR_KOMP_VL, ' ', '')))
+ REPLACE(ADR_KOMP_VL, ' ', '')
FROM TX
SQLFiddle DEMO
You can use a case statement:
SELECT (case when ADR_KOMP_VL like '%[A-Z]%'
then RIGHT('0000' + CONVERT(VARCHAR(4),REPLACE(ADR_KOMP_VL, ' ','')), 4)
else RIGHT('000' + CONVERT(VARCHAR(4),REPLACE(ADR_KOMP_VL, ' ','')), 3)
end)

Can the Select list in a SQL Statement use Regular Expressions

I have a SQL statement,
select ColumnName from Table
And I get this result,
Error 192.168.1.67 UserName 0bce6c62-1efb-416d-bce5-71c3c8247b75 An existing ....
So anyway the field has a lot of stuff in it, I just want to get out the 'UserName'.
Can I use a regex for that?
I mean it would be kind of like this,
select SUBSTRING(ColumnName, 0, 5) from Table
Except the SUBSTRING would be replaced with a regex of some kind. I am comfortable with regex, but I am not sure how to apply it in this case, or even if you can.
If I could get this working it would be great because I plan to pull the data into a temporary table, and do some quite complicated things matching it with other tables etc. If I can get this all working it would save me writing a C# app to do it with.
Thanks.
No, out of the box, SQL Server doesn't support regexs.
You could retrofit those by means of a SQL-CLR assembly that you deploy into SQL Server.
I think going you should use SUBSTRING anyway. Using regular expression is more flexible but also lead to a large processing overhead. This becomes even worse if your have to process a large recordsets.
You have to justify if there's the need for flexibility in first place.
If so you should read about it here:
http://msdn.microsoft.com/en-us/magazine/cc163473.aspx
Using T-SQL only can look like that:
SELECT 'Error 192.168.1.67 XUserNameX 0bce6c62-1efb-416d-bce5-71c3c8247b75 An existing' expr
INTO log_table
GO
WITH
split1 (expr, cstart, cend)
AS (
SELECT
expr, 1, 0
FROM
log_table a
), split2 (expr, cstart, cend, div)
AS (
SELECT
a.expr, a.cend + 1, CHARINDEX(' ', a.expr, a.cend + 1), 1
FROM
split1 a
UNION ALL
SELECT
a.expr, a.cend + 1, CHARINDEX(' ', a.expr, a.cend + 1), div+1
FROM
split2 a
WHERE
a.cend > 1
), substrings(expr, div)
AS (
SELECT
SUBSTRING(expr, cstart, cend - cstart), div
FROM
split2
)
SELECT expr from
substrings a
where
a.div = 3
UPDATE
we cannot tell where the start of the
username is. Unless we can say 'find
me the start character after the
second space'
That is fairly straightforward:
Filter out strings that have fewer than
two spaces (alternatively, have three
or more words);
Find the position after the first
space (alternatively, the beginning
of the second word);
Find the position after the the first
space after the first space
(alternatively, the beginning of the
third word);
Determine the length of the third
word using the position of the next
space (or the end of the string is
there are only three words);
Use the above values with the
SUBSTRING() function to return the
third word.
Example:
WITH MyTable (ColumnName)
AS
(
SELECT NULL
UNION ALL
SELECT ''
UNION ALL
SELECT 'One.'
UNION ALL
SELECT 'Two words.'
UNION ALL
SELECT 'Three word sentence.'
UNION ALL
SELECT 'Sentence containing four words.'
UNION ALL
SELECT 'Five words in this sentence.'
UNION ALL
SELECT 'Sentence containing more than five words.'
),
AtLeastThreeWords (ColumnName, pos_word_2_start)
AS
(
SELECT M1.ColumnName, CHARINDEX(' ', M1.ColumnName) + LEN(' ') + 1
FROM MyTable AS M1
WHERE LEN(M1.ColumnName) - LEN(REPLACE(M1.ColumnName, ' ', '')) >= 2
),
MyTable2 (ColumnName, pos_word_3_start)
AS
(
SELECT M1.ColumnName,
CHARINDEX(' ', M1.ColumnName, pos_word_2_start) + LEN(' ') + 1
FROM AtLeastThreeWords AS M1
),
MyTable3 (ColumnName, pos_word_3_start, pos_word_3_end)
AS
(
SELECT M1.ColumnName, M1.pos_word_3_start,
CHARINDEX(' ', M1.ColumnName, pos_word_3_start) + LEN(' ')
FROM MyTable2 AS M1
),
MyTable4 (ColumnName, pos_word_3_start, word_3_length)
AS
(
SELECT M1.ColumnName, M1.pos_word_3_start,
CASE
WHEN pos_word_3_start < pos_word_3_end
THEN pos_word_3_end - pos_word_3_start
ELSE LEN(M1.ColumnName) - pos_word_3_start + 1
END
FROM MyTable3 AS M1
)
SELECT M1.ColumnName,
SUBSTRING(M1.ColumnName, pos_word_3_start, word_3_length)
AS word_3
FROM MyTable4 AS M1;
ORIGINAL ANSWER:
Is the problem that the position and/or length of the username value may not be constant in the data but always follows the string 'username '? If so, you can use CHARINDEX with SUBSTRING e.g.
WITH MyTable (ColumnName)
AS
(
SELECT 'Error 192.168.1.67 UserName 0bce6c62-1efb-416d-bce5-71c3c8247b75 An existing ....'
UNION ALL
SELECT 'Username onedaywhen is invalid'
),
MyTable1 (ColumnName, pos1)
AS
(
SELECT M1.ColumnName, CHARINDEX('UserName ', M1.ColumnName) + LEN('UserName ') + 1
FROM MyTable AS M1
),
MyTable2 (ColumnName, pos1, pos2)
AS
(
SELECT M1.ColumnName, M1.pos1,
CHARINDEX(' ', M1.ColumnName, pos1) - M1.pos1
FROM MyTable1 AS M1
)
SELECT SUBSTRING(M1.ColumnName, M1.pos1, M1.pos2)
FROM MyTable2 AS M1;
...though you'd need to make it more robust e.g. when there is no trailing space after the username value etc.