Recursive SQL UDF for removing non-alpha-numeric characters - sql

So I'm trying to create my first recursive udf (using MS SQL) to strip anything that's not letters and numbers from a string.
This was inspired by this post (Replace with wildcard, in SQL)
CREATE FUNCTION uf_RemoveNonAlphaNumericChar(
#p_CharIndex int,
#p_Value Varchar(max) )
RETURNS varchar(max)
AS
BEGIN
SET #p_CharIndex = PATINDEX('%[^0-9,a-z]%', #p_Value)
SET #p_Value = STUFF(#p_Value,#p_CharIndex , 1, SPace(0) )
IF #p_CharIndex > 0
BEGIN
EXEC #p_Value = uf_RemoveNonAlphaNumericChar #p_CharIndex = #p_CharIndex,
#p_Value = #p_Value
END
RETURN #p_Value
END
This is one step in a bigger problem where I'm trying to split a string that could be XXX###YYYY into three parts when some of the parts may be missing.
And I'm trying to do it without a while loop (that solution already exists but runs slow).
if Patindex had a start position (in MS SQL), I would already be done. Of course, it would also not be as much fun. Or as cuss-filled...

I found yours problem. You removing symbol if even you dont find it ;)
Look at updated answer:
CREATEFUNCTION uf_RemoveNonAlphaNumericChar(
#p_CharIndex int,
#p_Value Varchar(max) )
RETURNS varchar(max)
AS
BEGIN
SET #p_CharIndex = PATINDEX('%[^0-9,a-z]%', #p_Value)
IF #p_CharIndex > 0
BEGIN
SET #p_Value = STUFF(#p_Value,#p_CharIndex , 1, SPace(0) )
EXEC #p_Value = uf_RemoveNonAlphaNumericChar #p_CharIndex = #p_CharIndex,
#p_Value = #p_Value
END
RETURN #p_Value
END

Does it have to be recursion?
CREATE FUNCTION [dbo].[uf_RemoveNonAlphaNumericChar]
(
#val varchar(max)
)
RETURNS varchar(1000)
AS
BEGIN
DECLARE #s VARCHAR(max), #i INT
SET #s = #val
SET #i = PATINDEX('%[^a-z0-9]%', #s)
WHILE #i > 0
BEGIN
SET #s = REPLACE(#s, SUBSTRING(#s, #i, 1), '')
SELECT #i = PATINDEX('%[^a-z0-9]%', #s)
END
RETURN #s
END

Related

Proper/Title Case a Column with Exceptions table in SQL Server

I am trying to convert a column which is in upper case to proper case but with exceptions like certain acronyms, abbreviations. I am following the below code to implement that. But looks like this will be an ongoing process and so, I want to create a table with the exceptions in order to make it easy to clean the data and I want to be able to call the exceptions table from the function. It would be great if anyone can help me with any codes they have which is similar to this or any ideas on how to implement it.
ALTER FUNCTION [dbo].[Business_ProperCase]
(#Text AS VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN
-- declare some variables
DECLARE #Reset BIT; DECLARE #Ret VARCHAR(8000); DECLARE #i INT;
DECLARE #c0 CHAR(1); DECLARE #c1 CHAR(1); DECLARE #c2 CHAR(1);
DECLARE #CaseLen INT;
DECLARE #CaseExceptions VARCHAR(8000);
DECLARE #CaseValue VARCHAR(8000);
-- Set some default values
SELECT #Reset = 1, #i=1, #Ret = '';
-- only apply if all characters are already in uppercase
IF (UPPER(#Text)=#Text COLLATE Latin1_General_CS_AI)
BEGIN
-- add a leading and trailing space to indicate word delimiters (bol & eol)
SET #Text = ' ' + #Text + ' ';
-- cycle through each character,
-- if non-alpha, uppercase next alpha character.
-- if alpha then lowercase subsequent alphas.
WHILE (#i <= LEN(#Text))
SELECT
#c0=SUBSTRING(#Text,#i-2,1), #c1=SUBSTRING(#Text,#i-1,1), #c2=SUBSTRING(#Text,#i,1),
#Ret = #Ret + CASE WHEN #Reset=1 THEN UPPER(#c2) ELSE LOWER(#c2) END,
#Reset = CASE
WHEN #c0 = ' ' AND #c1 = 'M' AND #c2 = 'c' THEN 1
WHEN #c0 = ' ' AND #c1 IN ('D', 'I', 'O') AND #c2 = '''' THEN 1
WHEN #c2 LIKE '[a-zA-Z'']' THEN 0 -- Apply LOWER to any character after alphas or apostrophes
ELSE 1 -- Apply UPPER to any character after symbols/punctuation
END,
#i = #i +1
-- add a trailing space in case the previous rule changed this.
SET #Ret = #Ret + ' ';
-- custom exceptions: this search is case-insensitive and will
-- replace the word to the case as it is written in the list.
-- NOTE: this list has to end with a comma!
SELECT #i=0, #CaseLen=0,
#CaseExceptions = 'ABS,LLC,MD,MBA,MA,
--Want to create a table for these exceptions and call them from this function
-- Loop through exception cases
WHILE CHARINDEX(',', #CaseExceptions, #i+1)>0
BEGIN
-- get the delimited word
SET #CaseLen = CHARINDEX(',', #CaseExceptions, #i+1) - #i
SET #CaseValue = SUBSTRING(#CaseExceptions, #i, #CaseLen)
-- replace it in the original text
SET #Ret = REPLACE(#Ret, ' '+#CaseValue+' ', ' '+#CaseValue+' ')
-- get position of next word
SET #i = CHARINDEX(',', #CaseExceptions, #i+#CaseLen) +1
END
-- remove any leading and trailing spaces
SET #Ret = LTRIM(RTRIM(#Ret));
-- capitalize first character of data irrespective of previous rules
SET #Ret = UPPER(SUBSTRING(#Ret,1,1)) + SUBSTRING(#Ret,2,LEN(#Ret));
END
ELSE
BEGIN
-- return the string unaffected if it is not in uppercase
SET #Ret=#Text
END
RETURN #Ret
END
Create a table (I use TITLE_CASE_EXCEPTION as my example) with a column EXCEPTION
Then it is data driven from there.
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[GUI].[fn_TITLE_CASE]') AND type in (N'FN', N'IF', N'TF', N'FS', N'FT'))
DROP FUNCTION [GUI].[fn_TITLE_CASE]
GO
CREATE FUNCTION [GUI].[fn_TITLE_CASE]
(
#STRING VARCHAR(MAX)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
SET QUOTED_IDENTIFIER OFF
DECLARE #RESET BIT
DECLARE #_OUT_STRING VARCHAR(MAX)
DECLARE #I INT
DECLARE #C CHAR(1)
DECLARE #CASE_LEN INT = 0
DECLARE #CASE_EXCEPTIONS VARCHAR(MAX) = ''
DECLARE #CASE_VALUE VARCHAR(MAX) = ''
IF #STRING IS NULL
RETURN NULL
IF #STRING = ''
RETURN #STRING
SELECT #STRING = LOWER(RTRIM(#STRING)), #RESET = 1, #I = 1, #_OUT_STRING = ''
WHILE (#I <= LEN(#STRING))
SELECT
#C = SUBSTRING(#STRING, #I, 1),
#_OUT_STRING = #_OUT_STRING + CASE WHEN #RESET = 1 THEN UPPER(#C) ELSE #C END,
#RESET = CASE WHEN #C LIKE '[a-zA-Z'']' THEN 0 ELSE 1 END,
#I = #I + 1
SELECT #I = 0, #_OUT_STRING = #_OUT_STRING + ' '
SELECT #CASE_EXCEPTIONS = #CASE_EXCEPTIONS + RTRIM(EXCEPTION) + ',' FROM [LOOKUP].TITLE_CASE_EXCEPTION
WHILE CHARINDEX(',', #CASE_EXCEPTIONS, #I + 1) > 0
BEGIN
-- get the delimited word
SET #CASE_LEN = CHARINDEX(',', #CASE_EXCEPTIONS, #I + 1) - #I
SET #CASE_VALUE = SUBSTRING(#CASE_EXCEPTIONS, #I, #CASE_LEN)
-- replace it in the original text
SET #_OUT_STRING = REPLACE(#_OUT_STRING, ' ' + #CASE_VALUE + ' ', ' ' + #CASE_VALUE + ' ')
-- get position of next word
SET #I = CHARINDEX(',', #CASE_EXCEPTIONS, #I + #CASE_LEN) + 1
END
RETURN RTRIM(#_OUT_STRING)
END
GO
Here's an example for you to reference:
declare #s varchar(256) = 'This is a SQL test';
declare #t table (ignore varchar(256) not null);
insert into #t (ignore) values ('SQL');
declare #pos int = 1;
declare #nextpos int;
declare #w varchar(256);
while #pos <= len(#s)
begin
set #nextpos = charindex(' ', #s + ' ', #pos);
set #w = substring(#s, #pos, #nextpos - #pos);
if not exists (select 1 from #t where ignore = #w)
set #s = stuff(
#s, #pos, #nextpos - #pos,
stuff(lower(#w), 1, 1, upper(left(#w, 1)))
);
set #pos = #nextpos + 1;
select #s;
end
To answer the original request.. set up a table "Exceptions" with a single column ConcatList of type nvarchar (100) and add the exceptions to this table... then create a view with to concatenate them together...
create table exceptions (ConcatList nvarchar(100))
create view [dbo].vExceptions
as
Select distinct
substring(
(
Select ','+ up.ConcatList AS [text()]
From exceptions up
ORDER BY up.ConcatList
For XML PATH ('')
), 2, 4000) [exceptions]
From exceptions p
Here is a slightly enhanced version of the stored procedure from the question.
(although an admittedly inelegant solution) to account for:
Lower case words (of, the, an, etc)
Hhyphenated acronyms
Exceptions that are immediately preceeded or followed with a dash or comma.
alter FUNCTION [dbo].[Business_ProperCase]
(#Text AS VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN
-- declare some variables
DECLARE #Reset BIT; DECLARE #Ret VARCHAR(8000); DECLARE #i INT;
DECLARE #c0 CHAR(1); DECLARE #c1 CHAR(1); DECLARE #c2 CHAR(1);
DECLARE #CaseLen INT;
DECLARE #CaseExceptions VARCHAR(8000);
DECLARE #CaseValue VARCHAR(8000);
-- Set some default values
SELECT #Reset = 1, #i=1, #Ret = '';
-- only apply if all characters are already in uppercase
IF (UPPER(#Text)=#Text COLLATE Latin1_General_CS_AI)
BEGIN
-- add a leading and trailing space to indicate word delimiters (bol & eol)
SET #Text = ' ' + #Text + ' ';
-- cycle through each character,
-- if non-alpha, uppercase next alpha character.
-- if alpha then lowercase subsequent alphas.
WHILE (#i <= LEN(#Text))
SELECT
#c0=SUBSTRING(#Text,#i-2,1), #c1=SUBSTRING(#Text,#i-1,1), #c2=SUBSTRING(#Text,#i,1),
#Ret = #Ret + CASE WHEN #Reset=1 THEN UPPER(#c2) ELSE LOWER(#c2) END,
#Reset = CASE WHEN #c0 = ' ' AND #c1 = 'M' AND #c2 = 'c' THEN 1
WHEN #c0 = ' ' AND #c1 IN ('D', 'I', 'O') AND #c2 = '''' THEN 1
WHEN #c2 LIKE '[a-zA-Z'']' THEN 0 -- Apply LOWER to any character after alphas or apostrophes
ELSE 1 -- Apply UPPER to any character after symbols/punctuation
END,
#i = #i +1
-- add a trailing space in case the previous rule changed this.
SET #Ret = #Ret + ' ';
-- custom exceptions: this search is case-insensitive and will
-- replace the word to the case as it is written in the list.
-- NOTE: this list has to end with a comma!
SELECT #i=0, #CaseLen=0,
#CaseExceptions = exceptions from vExceptions
--Want to create a table for these exceptions and call them from this function
-- Loop through exception cases
WHILE CHARINDEX(',', #CaseExceptions, #i+1)>0
BEGIN
-- get the delimited word
SET #CaseLen = CHARINDEX(',', #CaseExceptions, #i+1) - #i
SET #CaseValue = SUBSTRING(#CaseExceptions, #i, #CaseLen)
if (#CaseValue = 'OF' or #CaseValue = 'AND' or #CaseValue ='THE' or #CaseValue='FOR')
begin
--replace with lower case 'of', 'and', 'the', 'for'
SET #Ret = REPLACE(#Ret, ' '+#CaseValue+' ', ' '+lower(#CaseValue)+' ')
end
else
begin
if (CHARINDEX(' '+ #CaseValue +' ', #Ret)>0 )
begin
-- replace it in the original text
SET #Ret = REPLACE(#Ret, ' '+#CaseValue+' ', ' '+#CaseValue+' ')
end
else if (CHARINDEX(' '+#CaseValue+',', #Ret)>0 )
begin
--replace text (with no spaces around it)
SET #Ret = REPLACE(#Ret, ' '+#CaseValue+',', ' '+#CaseValue+',')
end
else if (CHARINDEX(' '+#CaseValue+'-', #Ret)>0 )
begin
--replace text (with no spaces around it)
SET #Ret = REPLACE(#Ret, ' '+#CaseValue+'-', ' '+#CaseValue+'-')
end
else if (CHARINDEX('-'+#CaseValue+' ', #Ret)>0 )
begin
--replace text (with no spaces around it)
SET #Ret = REPLACE(#Ret, '-'+#CaseValue+' ', '-'+#CaseValue+' ')
end
else if (CHARINDEX(','+#CaseValue+' ', #Ret)>0 )
begin
--replace text (with no spaces around it)
SET #Ret = REPLACE(#Ret, ','+#CaseValue+' ', '-'+#CaseValue+' ')
end
end
-- get position of next word
SET #i = CHARINDEX(',', #CaseExceptions, #i+#CaseLen) +1
END
-- remove any leading and trailing spaces
SET #Ret = LTRIM(RTRIM(#Ret));
-- capitalize first character of data irrespective of previous rules
SET #Ret = UPPER(SUBSTRING(#Ret,1,1)) + SUBSTRING(#Ret,2,LEN(#Ret));
END
ELSE
BEGIN
-- return the string unaffected if it is not in uppercase
SET #Ret=#Text
END
RETURN #Ret
END
Create a table (I use ExceptionsTable as my example) with a column WordExcepts. Then add the following after your last DECLARE at the top of the page:
DECLARE #sql nvarchar(2000);
SET #sql = 'N select WordExcepts from ExceptionsTable'
Then down below adjust your exceptions to be:
#CaseExceptions = #sql
Just add to your table as needed and they get filtered out of the function.

How to toggle case of Entire string in sql

I want to toggle case of entire string.
I am able to do for characters, not for string.
DECLARE #Char AS VARCHAR(1)
SET #Char='a'
IF ASCII(#Char)>=97 AND ASCII(#Char) <=122
PRINT UPPER(#Char)
IF ASCII(#Char)>=65 AND ASCII(#Char) <=90
PRINT LOWER(#Char)
How, I can change case for entire string?
For Ex. "AbCdE", I want to change it to "aBcDe".
You can do it by creating functions:
First make function for one character:
CREATE FUNCTION ToggleChar
(
#Char VARCHAR(1)
)
RETURNS VARCHAR(1)
AS
BEGIN
RETURN CHAR(ASCII(UPPER(#Char))+ASCII(LOWER(#Char))-ASCII(#Char))
END
Then, create function for string:
CREATE FUNCTION ToggleCase
(
#Str VARCHAR(MAX)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #ResultStr VARCHAR(MAX)
SET #ResultStr=''
WHILE ( #Str<>'')
BEGIN
SET #ResultStr=#ResultStr + [dbo].[ToggleChar](#Str)
SET #Str= SUBSTRING(#Str,2,LEN(#Str))
END
RETURN #ResultStr
END
Now, use this function to toggle string.
SELECT dbo.ToggleCase('AbCdE') AS ToggleString
Try this:
DECLARE #Name VARCHAR(10) = 'SaMplE'
DECLARE #Count INT = 1
WHILE #Count <= LEN(#Name)
BEGIN
SET #Name = STUFF(#Name, #Count, 1,
CASE
WHEN ASCII(SUBSTRING(#Name,#Count,1)) BETWEEN 97 AND 122 THEN
UPPER(SUBSTRING(#Name,#Count,1))
WHEN ASCII(SUBSTRING(#Name,#Count,1)) BETWEEN 65 AND 90 THEN
LOWER(SUBSTRING(#Name,#Count,1))
END)
SET #Count = #Count + 1
END
SELECT #Name

How to change case in string

My table has one column that contain strings like: ” HRM_APPLICATION_DELAY_IN”
I want to perform bellow operations on each row on this column
convert to lower case
remove underscore “_”
change case (convert to upper case) of the character after the underscore like: ” hrm_Application_Delay_In”
Need help for conversion. Thanks for advance
Here is a function to achieve it:
create function f_test
(
#a varchar(max)
)
returns varchar(max)
as
begin
set #a = lower(#a)
while #a LIKE '%\_%' ESCAPE '\'
begin
select #a = stuff(#a, v, 2, upper(substring(#a, v+1,1)))
from (select charindex('_', #a) v) a
end
return #a
end
Example:
select dbo.f_test( HRM_APPLICATION_DELAY_IN')
Result:
hrmApplicationDelayIn
To update your table here is an example how to write the syntax with the function:
UPDATE <yourtable>
SET <yourcolumn> = dbo.f_test(col)
WHERE <yourcolumn> LIKE '%\_%' ESCAPE '\'
For a variable this is overkill, but I'm using this to demonstrate a pattern
declare #str varchar(100) = 'HRM_APPLICATION_DELAY_IN';
;with c(one,last,rest) as (
select cast(lower(left(#str,1)) as varchar(max)),
left(#str,1), stuff(lower(#str),1,1,'')
union all
select one+case when last='_'
then upper(left(rest,1))
else left(rest,1) end,
left(rest,1), stuff(rest,1,1,'')
from c
where rest > ''
)
select max(one)
from c;
That can be extended to a column in a table
-- Sample table
declare #tbl table (
id int identity not null primary key clustered,
str varchar(100)
);
insert #tbl values
('HRM_APPLICATION_DELAY_IN'),
('HRM_APPLICATION_DELAY_OUT'),
('_HRM_APPLICATION_DELAY_OUT'),
(''),
(null),
('abc<de_fg>hi');
-- the query
;with c(id,one,last,rest) as (
select id,cast(lower(left(str,1)) as varchar(max)),
left(str,1), stuff(lower(str),1,1,'')
from #tbl
union all
select id,one+case when last='_'
then upper(left(rest,1))
else left(rest,1) end,
left(rest,1), stuff(rest,1,1,'')
from c
where rest > ''
)
select id,max(one)
from c
group by id
option (maxrecursion 0);
-- result
ID COLUMN_1
1 hrm_Application_Delay_In
2 hrm_Application_Delay_Out
3 _Hrm_Application_Delay_Out
4
5 (null)
6 abc<de_Fg>hi
select
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(lower('HRM_APPLICATION_DELAY_IN'),'_a','A'),'_b','B'),'_c','C'),'_d','D'),'_e','E'),'_f','F'),
'_g','G'),'_h','H'),'_i','I'),'_j','J'),'_k','K'),'_l','L'),
'_m','M'),'_n','N'),'_o','O'),'_p','P'),'_q','Q'),'_r','R'),
'_s','S'),'_t','T'),'_u','U'),'_v','V'),'_w','W'),'_x','X'),
'_y','Y'),'_z','Z'),'_','')
Bellow two steps can solve problem,as example i use sys.table.user can use any one
declare #Ret varchar(8000), #RetVal varchar(8000), #i int, #count int = 1;
declare #c varchar(10), #Text varchar(8000), #PrevCase varchar, #ModPrefix varchar(10);
DECLARE #FileDataTable TABLE(TableName varchar(200))
INSERT INTO #FileDataTable
select name FROM sys.tables where object_name(object_id) not like 'sys%' order by name
SET #ModPrefix = 'Pur'
DECLARE crsTablesTruncIns CURSOR
FOR select TableName FROM #FileDataTable
OPEN crsTablesTruncIns
FETCH NEXT FROM crsTablesTruncIns INTO #Text
WHILE ##FETCH_STATUS = 0
BEGIN
SET #RetVal = '';
select #i=1, #Ret = '';
while (#i <= len(#Text))
begin
SET #c = substring(#Text,#i,1)
--SET #Ret = #Ret + case when #Reset=1 then UPPER(#c) else LOWER(#c)
IF(#PrevCase = '_' OR #i = 1)
SET #Ret = UPPER(#c)
ELSE
SET #Ret = LOWER(#c)
--#Reset = case when #c like '[a-zA-Z]' then 0 else 1 end,
if(#c like '[a-zA-Z]')
SET #RetVal = #RetVal + #Ret
if(#c = '_')
SET #PrevCase = '_'
else
SET #PrevCase = ''
SET #i = #i +1
end
SET #RetVal = #ModPrefix + #RetVal
print cast(#count as varchar) + ' ' + #RetVal
SET #count = #count + 1
EXEC sp_RENAME #Text , #RetVal
SET #RetVal = ''
FETCH NEXT FROM crsTablesTruncIns INTO #Text
END
CLOSE crsTablesTruncIns
DEALLOCATE crsTablesTruncIns
I'd like to show you my nice and simple solution. It uses Tally function to split the string by pattern, in our case by underscope. For understanding Tally functions, read this article.
So, this is how my tally function looks like:
CREATE FUNCTION [dbo].[tvf_xt_tally_split](
#String NVARCHAR(max)
,#Delim CHAR(1))
RETURNS TABLE
as
return
(
WITH Tally AS (SELECT top (select isnull(LEN(#String),100)) n = ROW_NUMBER() OVER(ORDER BY [name]) from master.dbo.syscolumns)
(
SELECT LTRIM(RTRIM(SUBSTRING(#Delim + #String + #Delim,N+1,CHARINDEX(#Delim,#Delim + #String + #Delim,N+1)-N-1))) Value, N as Ix
FROM Tally
WHERE N < LEN(#Delim + #String + #Delim)
AND SUBSTRING(#Delim + #String + #Delim,N,1) = #Delim
)
)
This function returns a table, where each row represents part of string between #Delim (in our case between underscopes). Rest of the work is simple, just cobination of LEFT, RIGHT, LEN, UPPER and LOWER functions.
declare #string varchar(max)
set #string = ' HRM_APPLICATION_DELAY_IN'
-- convert to lower case
set #string = LOWER(#string)
declare #output varchar(max)
-- build string
select #output = coalesce(#output + '_','') +
UPPER(left(Value,1)) + RIGHT(Value, LEN(Value) - 1)
from dbo.tvf_xt_tally_split(#string, '_')
-- lower first char
select left(lower(#output),1) + RIGHT(#output, LEN(#output) - 1)

Storing phone nos with only numbers and with "x" for extension?

I have a test function which would sanitize phone nos and allow only nos and characters "x" or "X" to be stored. I have it to where it does most of it other than it allows multiple x's which I don't want. Can anybody help me add it to the regular expression also let me know if you spot potential issues ?
CREATE Function [dbo].[RemoveAlphaCharacters](#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
While PatIndex('%[^0-9,x,X]%', #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex('%[^0-9,x,X]%', #Temp), 1, '')
Return #TEmp
End
The problem with PATINDEX here is that it can't really determine that the pattern should change after it hits a string for the first time. So maybe this approach will be simpler:
CREATE FUNCTION [dbo].[RemoveAlphaCharacters]
(
#Temp VARCHAR(1000)
)
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #i INT, #hitX BIT, #t VARCHAR(1000), #c CHAR(1);
SELECT #i = 1, #hitX = 0, #t = '';
WHILE #i <= LEN(#Temp)
BEGIN
SET #c = SUBSTRING(#Temp, #i, 1);
IF LOWER(#c) = 'x' AND #hitX = 0
BEGIN
SET #t = #t + #c;
SET #hitX = 1;
END
IF #c LIKE '[0-9]'
BEGIN
SET #t = #t + #c;
END
SET #i = #i + 1;
END
RETURN(#t);
END
GO
SELECT dbo.RemoveAlphaCharacters('401-867-9092');
SELECT dbo.RemoveAlphaCharacters('401-867-9092x32');
SELECT dbo.RemoveAlphaCharacters('401-867-9092x32x54');
Results:
4018679092
4018679092x32
4018679092x3254

SQL Server Equivalent to ORACLE INSTR

I wanted to know if in SQL Server there is an equivalent to the Oracle INSTR function?
I know that there is CHARINDEX and PATINDEX, but with the Oracle version I can also specify the Nth appearance of the character(s) I am looking for.
Oracle INSTR:
instr( string1, string2 [, start_position [, **nth_appearance** ] ] )
The CHARINDEX almost gets me there, but I wanted to have it start at the nth_appearance of the character in the string.
You were spot on that nth_appearance does not exist in SQL Server.
Shamelessly copying a function (Equivalent of Oracle's INSTR with 4 parameters in SQL Server) created for your problem (please note that #Occurs is not used the same way as in Oracle - you can't specify "3rd appearance", but "occurs 3 times"):
CREATE FUNCTION udf_Instr
(#str1 varchar(8000), #str2 varchar(1000), #start int, #Occurs int)
RETURNS int
AS
BEGIN
DECLARE #Found int, #LastPosition int
SET #Found = 0
SET #LastPosition = #start - 1
WHILE (#Found < #Occurs)
BEGIN
IF (CHARINDEX(#str1, #str2, #LastPosition + 1) = 0)
BREAK
ELSE
BEGIN
SET #LastPosition = CHARINDEX(#str1, #str2, #LastPosition + 1)
SET #Found = #Found + 1
END
END
RETURN #LastPosition
END
GO
SELECT dbo.udf_Instr('x','axbxcxdx',1,4)
GO
DROP FUNCTION udf_Instr
GO
Here is a version of Oracle's INSTR function which also works with a negative position for a reverse lookup as per Oracle's Doc here :- https://docs.oracle.com/cd/B28359_01/olap.111/b28126/dml_functions_1103.htm#OLADM564
CREATE FUNCTION dbo.INSTR(#str NVARCHAR(MAX), #substr NVARCHAR(MAX), #position INT = 1, #occurance INT = 1)
RETURNS INT
AS
BEGIN
DECLARE #loc INT = #position;
IF #loc < 0
BEGIN
SET #str = REVERSE(#str);
SET #substr = REVERSE(#substr);
SET #loc = #loc * -1;
END
IF #loc > 0
BEGIN
SET #loc = #loc - 1;
END
WHILE (#occurance > 0 AND CHARINDEX(#substr, #str, #loc + 1) > 0)
BEGIN
SET #loc = CHARINDEX(#substr, #str, #loc + 1);
SET #occurance = #occurance - 1;
END
IF #occurance > 0
BEGIN
SET #loc = 0;
END
IF #position < 0
BEGIN
SET #loc = LEN(#str) - #loc;
END
RETURN #loc
END
Change #str1 varchar(8000), #str2 varchar(1000) to #str1 varchar(1000), #str2 varchar(8000)
or
change CHARINDEX(#str1, #str2, #LastPosition + 1) to CHARINDEX(#str2, #str1, #LastPosition + 1)
You can use the following UDF (inline function rather than scalar)
CREATE FUNCTION dbo.INSTR
(
#str VARCHAR(8000),
#Substr VARCHAR(1000),
#start INT ,
#Occurance INT
)
RETURNS TABLE
AS
RETURN
WITH Tally (n) AS
(
SELECT TOP (LEN(#str)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0)) a(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) b(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) c(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d(n)
)
, Find_N_STR as
(
SELECT
CASE WHEN DENSE_RANK() OVER(PARTITION BY #Substr ORDER BY (CHARINDEX(#Substr ,#STR ,N))) = #Occurance
THEN MAX(N-#start +1) OVER (PARTITION BY CHARINDEX(#Substr ,#STR ,N) )
ELSE 0
END [Loc]
FROM Tally
WHERE CHARINDEX(#Substr ,#STR ,N) > 0
)
SELECT Loc= MAX(Loc)
FROM Find_N_STR
WHERE Loc > 0
How to use:
declare #T table
(
Name_Level_Class_Section varchar(25)
)
insert into #T values
('Jacky_1_B2_23'),
('Johnhy_1_B2_24'),
('Peter_2_A5_3')
select t.Name_Level_Class_Section , l.Loc
from #t t
cross apply dbo.INSTR (t.Name_Level_Class_Section, '_',1,2) l
Try this !!
CREATE FUNCTION dbo.INSTR (#str VARCHAR(8000), #substr VARCHAR(255), #start INT, #occurrence INT)
RETURNS INT
AS
BEGIN
DECLARE #found INT = #occurrence,
#pos INT = #start;
WHILE 1=1
BEGIN
-- Find the next occurrence
SET #pos = CHARINDEX(#substr, #str, #pos);
-- Nothing found
IF #pos IS NULL OR #pos = 0
RETURN #pos;
-- The required occurrence found
IF #found = 1
BREAK;
-- Prepare to find another one occurrence
SET #found = #found - 1;
SET #pos = #pos + 1;
END
RETURN #pos;
END
GO
Usage :
-- Find the second occurrence of letter 'o'
SELECT dbo.INSTR('Moscow', 'o', 1, 2);
-- Result: 5