Searching for non-visible characters - sql

I'm troubleshooting some strangeness in output of my SQL Server when I want to get records that are not null or empty string:
SELECT myString
FROM myTable
WHERE myString IS NOT NULL OR myString != ''
In addition to records that obviously fit (string values returned to SSMS's grid) I'm seeing records where the grid cell is blank. When I select the cell and try to copy it, my clipboard manager (clipmate.com) complains that the data is invalid.
If I output the query to file instead of to grid and then inspect via Hex char mode, sure enough - there are characters I would not expect (much less want) to be in there.
Collation of my database collation value shows as: SQL_Latin1_General_CP1_CI_AS
How would I go about eliminating any/all non-visible characters?

can try this, will remove any characters that are not printable.
CREATE FUNCTION [dbo].[RemoveNonPrintableChars]
(
#p_string varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
declare #l_pos int = 1
declare #l_str varchar(max) = ''
while (#l_pos <= len(#p_string))
begin
if (ascii(substring(#p_string,#l_pos,1)) >=32)
begin
set #l_str=#l_str+substring(#p_string,#l_pos,1)
end
set #l_pos = #l_pos+1
end
return #l_str
END
declare
#l_str varchar(max) = 'andrew'--select char(7)
select dbo.[RemoveNonPrintableChars] (#l_str)
set #l_str = 'andrew'+char(7)-- add NP character
select dbo.[RemoveNonPrintableChars] (#l_str)

If by chance you can't use a UDF.
You may notice that I don't just strip the control characters here, I replace them with a space so not to concatenate strings.
Declare #YourTable table (SomeField varchar(50))
Insert Into #YourTable values
('Michael'+char(13)+char(10)+'LastName')
Select A.*
,B.Value
From #YourTable A
Cross Apply (
Select Value = replace(replace((
Select ''+C
From (
Select N,C=case when ASCII(Substring(A.SomeField,N ,1))>31 then Substring(A.SomeField,N ,1) else '{--space--}' end
From ( Select Top (Len(A.SomeField)) N=Row_Number() Over (Order By Number) From master..spt_values ) N
) C Order by N
For XML Path('') ) ,'{--space--}',' '),' ',' ')
) B
Returns
SomeField Value
Michael Michael LastName
LastName
EDIT
However, If you do want a UDF consider the following non-linear approach
CREATE FUNCTION [dbo].[udf-Str-Strip-Control](#S varchar(max))
Returns varchar(max)
Begin
;with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(C) As (Select Top (32) Char(Row_Number() over (Order By (Select NULL))-1) From cte1 a,cte1 b)
Select #S = Replace(#S,C,' ')
From cte2
Return LTrim(RTrim(Replace(Replace(#S,' ',' '),' ',' ')))
End
--Select [dbo].[udf-Str-Strip-Control]('Michael'+char(13)+char(10)+'LastName') --Returns: Michael LastName

I like John's answers better, and would probably modify them if you were looking to control whether or not to include certain control characters. This is the function I used in the past to clean up some strings.
create function dbo.fnCleanVarchar (
#StringParameter varchar(max)
, #CleanStyle tinyint = 1
) returns varchar(max) as
begin;
if #StringParameter is null
return null;
if #CleanStyle > 3 set #CleanStyle = 1;
declare #StringReturn varchar(max);
declare #StringLength int;
declare #CharacterCode int;
declare #CharacterCodePosition int;
set #StringReturn = '';
set #StringLength = len(#StringParameter);
set #CharacterCodePosition = 1;
while #CharacterCodePosition <= #StringLength
begin
set #CharacterCode = ascii(substring(#stringParameter , #CharacterCodePosition , 1))
-- Removes Unprintable Characters 0-8,12,14-31
-- If Style = 1, Remove Unprintable Characters except Tab (9), New Line (10), Carraige Return (13)
-- If Style = 2, Remove Unprintable Characters except character 9 (Tab)
-- If Style = 3, Remove Unprintable Characters and character 9 (Tab)
set #StringReturn = #StringReturn + case
when #CharacterCode >31
then char(#CharacterCode)
when #Style = 3
then ''
when #Style = 2 and #CharacterCode = 9
then char(9)
when #Style = 1 and #CharacterCode in ( 9 , 10 , 13 )
then char(#CharacterCode)
else ''
end;
set #CharacterCodePosition = #CharacterCodePosition + 1
end;
if len(#StringReturn) = 0
return null;
return #StringReturn
end;

Checking for non-visible fields is directly related to find non-visible characters, so consider these two notes:
Note 1: SQL Server will auto-trimming spaces in clauses so N' ' = N'' is true, and any continues strings of empty characters;
Empty characters are a character that is equal to N''.
Note 2: There are 65536 Unicode characters, you can view them with a query like this:
WITH CTE(i, c) AS (
SELECT 0, NCHAR(0) COLLATE SQL_Latin1_General_CP1_CI_AS --I add COLLATE to express your collation but I think it is optional
UNION ALL
SELECT i+1, NCHAR(i+1) COLLATE SQL_Latin1_General_CP1_CI_AS
FROM CTE
WHERE i < 65535
)
SELECT *
FROM CTE
OPTION ( MaxRecursion 0 );
Some of those are not visible and empty like NCHAR(0), NCHAR(12288), ...,
Some of those are not visible and not empty like NCHAR(1), ...,
Some of those are visible and empty like NCHAR(502), ... !!!!.
So if your field is a nvarchar string, you will have a big problem to filter not visible characters, and for varchar strings you have a problem but more little than that.
Side note: You can use COALESCE(myString, '') != '' instead of yours ;).
SUMMARY :
For a little pin don't create a hammer machine!.
When this behaviors are not so important for you and your project don't try to change or handle or create your own equal string function ;).
A sample function to check if a string is visible(returns 1) or not(returns 0) can be like this:
CREATE FUNCTION IsVisible ( #string varchar(max) )
RETURNS bit
AS
BEGIN
DECLARE #pString varchar(max) = #string;
WITH InvisibleChars AS (
SELECT c COLLATE SQL_Latin1_General_CP1_CI_AS AS c
FROM (VALUES (CHAR(0)), (CHAR(1)), (CHAR(9)), (CHAR(10)),
(CHAR(11)), (CHAR(12)), (CHAR(13)), (CHAR(28)),
(CHAR(29)), (CHAR(30)), (CHAR(31)), (CHAR(32)),
(CHAR(160)) -- Above characters are non-visibles
) t(c)
)
SELECT #pString = REPLACE(#pString, c, '')
FROM InvisibleChars;
RETURN CASE WHEN #pString = '' THEN 0 ELSE 1 END;
END
GO

Related

Change characters but keep length

I am migrating sensitive data to a database, and I need to hide details of the text. We would like to keep the volume and length of the text, but change the meaning.
For example:
"James has been well received, and should be helped when ever he finds it hard to speak"
should change to:
"jhdfy dfw aslk dfe kjdfkjd, kjf kjdsf df iotryy erhr lsdj jf ytwe it kjdf tr kjsdd"
Is there a way to update all rows, set the column text to this random type text? Really only want to change charactors (a-z, A-Z), and keep the rest.
One option is to use a bunch of nested replaces . . . but that would probably hit on the maximum number of nested functions.
You could write a painful query using outer apply:
select
from t outer apply
(select replace(t.col, 'a', 'z') as col1) outer apply
(select replace(col1, 'b', 'y') ) outer apply
. . .
However, you might want to write your own function. In other databases, this is called translate() (after the Unix command). If you Google SQL Server translate, I think you'll find examples on the web.
One way is to split the string character by character and replace each row with a random string. And then concatenate them back to get the desired output
DECLARE #str VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
;WITH Cte(orig, random) AS(
SELECT
SUBSTRING(t.a, v.number + 1, 1),
CASE
WHEN SUBSTRING(t.a, v.number + 1, 1) LIKE '[a-z]'
THEN CHAR(ABS(CHECKSUM(NEWID())) % 25 + 97)
ELSE SUBSTRING(t.a, v.number + 1, 1)
END
FROM (SELECT #str) t(a)
CROSS JOIN master..spt_values v
WHERE
v.number < LEN(t.a)
AND v.type = 'P'
)
SELECT
OrignalString = #str,
RandomString = (
SELECT '' + random
FROM Cte FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'
)
TRY IT HERE
OK this is possible using a user defined function (UDF) and a view.
SQL Server does not allow random number generation in a UDF but does allow it in a view. Ref: http://blog.sqlauthority.com/2012/11/20/sql-server-using-rand-in-user-defined-functions-udf/
So here is the solution
CREATE VIEW [dbo].[rndView]
AS
SELECT RAND() rndResult
GO
CREATE FUNCTION [dbo].[RandFn]()
RETURNS float
AS
BEGIN
DECLARE #rndValue float
SELECT #rndValue = rndResult
FROM rndView
RETURN #rndValue
END
GO
CREATE FUNCTION [dbo].[randomstring] ( #stringToParse VARCHAR(MAX))
RETURNS
varchar(max)
AS
BEGIN
/*
A = 65
Z = 90
a = 97
z = 112
declare #stringToParse VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
Select [dbo].[randomstring] ( #stringToParse )
go
Update SpecialTable
Set SpecialString = [dbo].[randomstring] (SpecialString)
go
*/
declare #StringToreturn varchar(max) = ''
declare #charCounter int = 1
declare #len int = len(#stringToParse)
declare #thisRand int
declare #UpperA int = 65
declare #UpperZ int = 90
declare #LowerA int = 97
declare #LowerZ int = 112
declare #thisChar char(1)
declare #Random_Number float
declare #randomChar char(1)
WHILE #charCounter < #len
BEGIN
SELECT #thisChar = SUBSTRING(#stringToParse, #charCounter, 1)
set #randomChar = #thisChar
--print #randomChar
SELECT #Random_Number = dbo.RandFn()
--print #Random_Number
--only swap if a-z or A-Z
if ASCII(#thisChar) >= #UpperA and ASCII(#thisChar) <= #UpperZ begin
--upper case
set #thisRand = #UpperA + (#Random_Number * convert(float, (#UpperZ-#UpperA)))
set #randomChar = CHAR(#thisRand)
--print #thisRand
end
if ASCII(#thisChar) >= #LowerA and ASCII(#thisChar) <= #LowerZ begin
--upper case
set #thisRand = #LowerA + (#Random_Number * convert(float, (#LowerZ-#LowerA)))
set #randomChar = CHAR(#thisRand)
end
--print #thisRand
--print #randomChar
set #StringToreturn = #StringToreturn + #randomChar
SET #charCounter = #charCounter + 1
END
--Select * from #returnList
return #StringToreturn
END
GO

Split words with a capital letter in sql

Does anyone know how to split words starting with capital letters from a string?
Example:
DECLARE #var1 varchar(100) = 'OneTwoThreeFour'
DECLARE #var2 varchar(100) = 'OneTwoThreeFourFive'
DECLARE #var3 varchar(100) = 'One'
SELECT #var1 as Col1, <?> as Col2
SELECT #var2 as Col1, <?> as Col2
SELECT #var3 as Col1, <?> as Col2
expected result:
Col1 Col2
OneTwoThreeFour One Two three Four
OneTwoThreeFourFive One Two Three Four Five
One One
If this is not possible (or if too long) an scalar function would be okay as well.
Here is a function I created that is similar to the "removing non-alphabetic characters". How to strip all non-alphabetic characters from string in SQL Server?
This one uses a case sensitive collation which actively seeks out a non-space/capital letter combination and then uses the STUFF function to insert the space. This IS a scalar UDF, so some folks will immediately say that it will be slower than other solutions. To that notion, I say, please test it. This function does not use any table data and only loops as many times as necessary, so it will likely give you very good performance.
Create Function dbo.Split_On_Upper_Case(#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^ ][A-Z]%'
While PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) + 1, 0, ' ')
Return #Temp
End
Call it like this:
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('One')
Select dbo.Split_On_Upper_Case('OneTwoThree')
Select dbo.Split_On_Upper_Case('stackOverFlow')
Select dbo.Split_On_Upper_Case('StackOverFlow')
Here is a function I have just created.
FUNCTION
CREATE FUNCTION dbo.Split_On_Upper_Case
(
#String VARCHAR(4000)
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #Char CHAR(1);
DECLARE #i INT = 0;
DECLARE #OutString VARCHAR(4000) = '';
WHILE (#i <= LEN(#String))
BEGIN
SELECT #Char = SUBSTRING(#String, #i,1)
IF (#Char = UPPER(#Char) Collate Latin1_General_CS_AI)
SET #OutString = #OutString + ' ' + #Char;
ELSE
SET #OutString = #OutString + #Char;
SET #i += 1;
END
SET #OutString = LTRIM(#OutString);
RETURN #OutString;
END
Test Data
DECLARE #TABLE TABLE (Strings VARCHAR(1000))
INSERT INTO #TABLE
VALUES ('OneTwoThree') ,
('FourFiveSix') ,
('SevenEightNine')
Query
SELECT dbo.Split_On_Upper_Case(Strings) AS Vals
FROM #TABLE
Result Set
╔══════════════════╗
║ Vals ║
╠══════════════════╣
║ One Two Three ║
║ Four Five Six ║
║ Seven Eight Nine ║
╚══════════════════╝
If a single query is needed 26 REPLACE can be used to check every upper case letter like
SELECT #var1 col1, REPLACE(
REPLACE(
REPLACE(
...
REPLACE(#var1, 'A', ' A')
, ...
, 'X', ' X')
, 'Y', ' Y')
, 'Z', ' Z') col2
Not the most beautiful thing but it'll work.
EDIT
Just to add another function to do the same thing in a different way of the other answers
CREATE FUNCTION splitCapital (#param Varchar(MAX))
RETURNS Varchar(MAX)
BEGIN
Declare #ret Varchar(MAX) = '';
declare #len int = len(#param);
WITH Base10(N) AS (
SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7
UNION ALL SELECT 8 UNION ALL SELECT 9
), Chars(N) As (
Select TOP(#len)
nthChar
= substring(#param, u.N + t.N*10 + h.N*100 + th.N*1000 + 1, 1)
Collate Latin1_General_CS_AI
FROM Base10 u
CROSS JOIN Base10 t
CROSS JOIN Base10 h
CROSS JOIN Base10 th
WHERE u.N + t.N*10 + h.N*100 + th.N*1000 < #len
ORDER BY u.N + t.N*10 + h.N*100 + th.N*1000
)
SELECT #ret += Case nthChar
When UPPER(nthChar) Then ' '
Else ''
End + nthChar
FROM Chars
RETURN #ret;
END
This one uses the possibility of TSQL to concatenate string variable, I had to use the TOP N trick to force the Chars CTE rows in the right order
Build a Numbers table. There are some excellent posts on SO to show you how to do this. Populate it with values up the maximum length of your input string. Select the values from 1 through the actual length of the current input string. Cross join this list of numbers to the input string. Use the result to SUBSTRING() each character. Then you can either compare the resulting list of one-charachter values to a pre-populated table-valued variable or convert each character to an integer using ASCII() and choose only those between 65 ('A') and 90 ('Z'). At this point you have a list which is the position of each upper-case character in your input string. UNION the maximum length of your input string onto the end of this list. You'll see why in just a second. Now you can SUBSTRING() your input variable, starting at the Number given by row N and taking a length of (the Number given by row N+1) - (The number given by row N). This is why you have to UNION the extra Number on the end. Finally concatenate all these substring together, space-separated, using the algorithm of your choice.
Sorry, don't have an instance in front of me to try out code. Sounds like a fun task. I think doing it with nested SELECT statements will get convoluted and un-maintainable; better to lay it out as CTEs, IMHO.
I know that there are already some good answers out there, but if you wanted to avoid creating a function, you could also use a recursive CTE to accomplish this. It's certainly not a clean way of doing this, but it works.
DECLARE
#camelcase nvarchar(4000) = 'ThisIsCamelCased'
;
WITH
split
AS
(
SELECT
[iteration] = 0
,[string] = #camelcase
UNION ALL
SELECT
[iteration] = split.[iteration] + 1
,[string] = STUFF(split.[string], pattern.[index] + 1, 0, ' ')
FROM
split
CROSS APPLY
( SELECT [index] = PATINDEX(N'%[^ ][A-Z]%' COLLATE Latin1_General_Bin, split.[string]) )
pattern
WHERE
pattern.[index] > 0
)
SELECT TOP (1)
[spaced] = split.[string]
FROM
split
ORDER BY
split.[iteration] DESC
;
As I said, this isn't a pretty way to write a query, but I use things like this when I'm just writing up some ad-hoc queries where I would not want to add new artifacts to the database. You could also use this to create your function as an inline table valued function, which is always a tad nicer.
Please Try This:
declare #t nvarchar (100) ='IamTheTestString'
declare #len int
declare #Counter int =0
declare #Final nvarchar (100) =''
set #len =len( #t)
while (#Counter <= #len)
begin
set #Final= #Final + Case when ascii(substring (#t,#Counter,1))>=65 and
ascii(substring (#t,#Counter,1))<=90 then ' '+substring (#t,#Counter,1) else
substring (#t,#Counter,1) end
set #Counter=#Counter+1
end
print ltrim(#Final)

Query to get only numbers from a string

I have data like this:
string 1: 003Preliminary Examination Plan
string 2: Coordination005
string 3: Balance1000sheet
The output I expect is
string 1: 003
string 2: 005
string 3: 1000
And I want to implement it in SQL.
First create this UDF
CREATE FUNCTION dbo.udf_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
Now use the function as
SELECT dbo.udf_GetNumeric(column_name)
from table_name
SQL FIDDLE
I hope this solved your problem.
Reference
Try this one -
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM #temp
) d
) t
Output:
----------
003
005
1000
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM #temp
Please try:
declare #var nvarchar(max)='Balance1000sheet'
SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
SELECT SUBSTRING(#var, PATINDEX('%[0-9]%', #var), LEN(#var)) Val
)x
Getting only numbers from a string can be done in a one-liner.
Try this :
SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))
NB: Only works for the first int in the string, ex: abc123vfg34 returns 123.
I found this approach works about 3x faster than the top voted answer. Create the following function, dbo.GetNumbers:
CREATE FUNCTION dbo.GetNumbers(#String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
WITH
Numbers
AS (
--Step 1.
--Get a column of numbers to represent
--every character position in the #String.
SELECT 1 AS Number
UNION ALL
SELECT Number + 1
FROM Numbers
WHERE Number < LEN(#String)
)
,Characters
AS (
SELECT Character
FROM Numbers
CROSS APPLY (
--Step 2.
--Use the column of numbers generated above
--to tell substring which character to extract.
SELECT SUBSTRING(#String, Number, 1) AS Character
) AS c
)
--Step 3.
--Pattern match to return only numbers from the CTE
--and use STRING_AGG to rebuild it into a single string.
SELECT #String = STRING_AGG(Character,'')
FROM Characters
WHERE Character LIKE '[0-9]'
--allows going past the default maximum of 100 loops in the CTE
OPTION (MAXRECURSION 8000)
RETURN #String
END
GO
Testing
Testing for purpose:
SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
('003Preliminary Examination Plan') --output: 003
,('Coordination005') --output: 005
,('Balance1000sheet') --output: 1000
,('(111) 222-3333') --output: 1112223333
,('1.38hello#f00.b4r#\-6') --output: 1380046
) testData(InputString)
Testing for performance:
Start off setting up the test data...
--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000))
--Make an 8000 character string with mix of numbers and letters
DECLARE #Num VARCHAR(8000) = REPLICATE('12tf56se',800)
--Add this to the test table 500 times
DECLARE #n INT = 0
WHILE #n < 500
BEGIN
INSERT INTO dbo.NumTest VALUES (#Num)
SET #n = #n +1
END
Now testing the dbo.GetNumbers function:
SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s
Then testing the UDF from the top voted answer on the same data.
SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s
Inspiration for dbo.GetNumbers
Decimals
If you need it to handle decimals, you can use either of the following approaches, I found no noticeable performance differences between them.
change '[0-9]' to '[0-9.]'
change Character LIKE '[0-9]' to ISNUMERIC(Character) = 1 (SQL treats a single decimal point as "numeric")
Bonus
You can easily adapt this to differing requirements by swapping out WHERE Character LIKE '[0-9]' with the following options:
WHERE Letter LIKE '[a-zA-Z]' --Get only letters
WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric
WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric
With the previous queries I get these results:
'AAAA1234BBBB3333' >>>> Output: 1234
'-çã+0!\aº1234' >>>> Output: 0
The code below returns All numeric chars:
1st output: 12343333
2nd output: 01234
declare #StringAlphaNum varchar(255)
declare #Character varchar
declare #SizeStringAlfaNumerica int
declare #CountCharacter int
set #StringAlphaNum = 'AAAA1234BBBB3333'
set #SizeStringAlfaNumerica = len(#StringAlphaNum)
set #CountCharacter = 1
while isnumeric(#StringAlphaNum) = 0
begin
while #CountCharacter < #SizeStringAlfaNumerica
begin
if substring(#StringAlphaNum,#CountCharacter,1) not like '[0-9]%'
begin
set #Character = substring(#StringAlphaNum,#CountCharacter,1)
set #StringAlphaNum = replace(#StringAlphaNum, #Character, '')
end
set #CountCharacter = #CountCharacter + 1
end
set #CountCharacter = 0
end
select #StringAlphaNum
declare #puvodni nvarchar(20)
set #puvodni = N'abc1d8e8ttr987avc'
WHILE PATINDEX('%[^0-9]%', #puvodni) > 0 SET #puvodni = REPLACE(#puvodni, SUBSTRING(#puvodni, PATINDEX('%[^0-9]%', #puvodni), 1), '' )
SELECT #puvodni
A solution for SQL Server 2017 and later, using TRANSLATE:
DECLARE #T table (string varchar(50) NOT NULL);
INSERT #T
(string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet');
SELECT
result =
REPLACE(
TRANSLATE(
T.string COLLATE Latin1_General_CI_AI,
'abcdefghijklmnopqrstuvwxyz',
SPACE(26)),
SPACE(1),
SPACE(0))
FROM #T AS T;
Output:
result
003
005
1000
The code works by:
Replacing characters a-z (ignoring case & accents) with a space
Replacing spaces with an empty string.
The string supplied to TRANSLATE can be expanded to include additional characters.
I did not have rights to create functions but had text like
["blahblah012345679"]
And needed to extract the numbers out of the middle
Note this assumes the numbers are grouped together and not at the start and end of the string.
select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name
Although this is an old thread its the first in google search, I came up with a different answer than what came before. This will allow you to pass your criteria for what to keep within a string, whatever that criteria might be. You can put it in a function to call over and over again if you want.
declare #String VARCHAR(MAX) = '-123. a 456-78(90)'
declare #MatchExpression VARCHAR(255) = '%[0-9]%'
declare #return varchar(max)
WHILE PatIndex(#MatchExpression, #String) > 0
begin
set #return = CONCAT(#return, SUBSTRING(#string,patindex(#matchexpression, #string),1))
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
end
select (#return)
This UDF will work for all types of strings:
CREATE FUNCTION udf_getNumbersFromString (#string varchar(max))
RETURNS varchar(max)
AS
BEGIN
WHILE #String like '%[^0-9]%'
SET #String = REPLACE(#String, SUBSTRING(#String, PATINDEX('%[^0-9]%', #String), 1), '')
RETURN #String
END
Just a little modification to #Epsicron 's answer
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')) as a(string)
no need for a temporary variable
Firstly find out the number's starting length then reverse the string to find out the first position again(which will give you end position of number from the end). Now if you deduct 1 from both number and deduct it from string whole length you'll get only number length. Now get the number using SUBSTRING
declare #fieldName nvarchar(100)='AAAA1221.121BBBB'
declare #lenSt int=(select PATINDEX('%[0-9]%', #fieldName)-1)
declare #lenEnd int=(select PATINDEX('%[0-9]%', REVERSE(#fieldName))-1)
select SUBSTRING(#fieldName, PATINDEX('%[0-9]%', #fieldName), (LEN(#fieldName) - #lenSt -#lenEnd))
T-SQL function to read all the integers from text and return the one at the indicated index, starting from left or right, also using a starting search term (optional):
create or alter function dbo.udf_number_from_text(
#text nvarchar(max),
#search_term nvarchar(1000) = N'',
#number_position tinyint = 1,
#rtl bit = 0
) returns int
as
begin
declare #result int = 0;
declare #search_term_index int = 0;
if #text is null or len(#text) = 0 goto exit_label;
set #text = trim(#text);
if len(#text) = len(#search_term) goto exit_label;
if len(#search_term) > 0
begin
set #search_term_index = charindex(#search_term, #text);
if #search_term_index = 0 goto exit_label;
end;
if #search_term_index > 0
if #rtl = 0
set #text = trim(right(#text, len(#text) - #search_term_index - len(#search_term) + 1));
else
set #text = trim(left(#text, #search_term_index - 1));
if len(#text) = 0 goto exit_label;
declare #patt_number nvarchar(10) = '%[0-9]%';
declare #patt_not_number nvarchar(10) = '%[^0-9]%';
declare #number_start int = 1;
declare #number_end int;
declare #found_numbers table (id int identity(1,1), val int);
while #number_start > 0
begin
set #number_start = patindex(#patt_number, #text);
if #number_start > 0
begin
if #number_start = len(#text)
begin
insert into #found_numbers(val)
select cast(substring(#text, #number_start, 1) as int);
break;
end;
else
begin
set #text = right(#text, len(#text) - #number_start + 1);
set #number_end = patindex(#patt_not_number, #text);
if #number_end = 0
begin
insert into #found_numbers(val)
select cast(#text as int);
break;
end;
else
begin
insert into #found_numbers(val)
select cast(left(#text, #number_end - 1) as int);
if #number_end = len(#text)
break;
else
begin
set #text = trim(right(#text, len(#text) - #number_end));
if len(#text) = 0 break;
end;
end;
end;
end;
end;
if #rtl = 0
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id asc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
else
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id desc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
exit_label:
return #result;
end;
Example:
select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);
returns 10;
This is one of the simplest and easiest one. This will work on the entire String for multiple occurences as well.
CREATE FUNCTION dbo.fn_GetNumbers(#strInput NVARCHAR(500))
RETURNS NVARCHAR(500)
AS
BEGIN
DECLARE #strOut NVARCHAR(500) = '', #intCounter INT = 1
WHILE #intCounter <= LEN(#strInput)
BEGIN
SELECT #strOut = #strOut + CASE WHEN SUBSTRING(#strInput, #intCounter, 1) LIKE '[0-9]' THEN SUBSTRING(#strInput, #intCounter, 1) ELSE '' END
SET #intCounter = #intCounter + 1
END
RETURN #strOut
END
Following a solution using a single common table expression (CTE).
DECLARE #s AS TABLE (id int PRIMARY KEY, value nvarchar(max));
INSERT INTO #s
VALUES
(1, N'003Preliminary Examination Plan'),
(2, N'Coordination005'),
(3, N'Balance1000sheet');
SELECT * FROM #s ORDER BY id;
WITH t AS (
SELECT
id,
1 AS i,
SUBSTRING(value, 1, 1) AS c
FROM
#s
WHERE
LEN(value) > 0
UNION ALL
SELECT
t.id,
t.i + 1 AS i,
SUBSTRING(s.value, t.i + 1, 1) AS c
FROM
t
JOIN #s AS s ON t.id = s.id
WHERE
t.i < LEN(s.value)
)
SELECT
id,
STRING_AGG(c, N'') WITHIN GROUP (ORDER BY i ASC) AS value
FROM
t
WHERE
c LIKE '[0-9]'
GROUP BY
id
ORDER BY
id;
DECLARE #index NVARCHAR(20);
SET #index = 'abd565klaf12';
WHILE PATINDEX('%[0-9]%', #index) != 0
BEGIN
SET #index = REPLACE(#index, SUBSTRING(#index, PATINDEX('%[0-9]%', #index), 1), '');
END
SELECT #index;
One can replace [0-9] with [a-z] if numbers only are wanted with desired castings using the CAST function.
If we use the User Define Function, the query speed will be greatly reduced. This code extracts the number from the string....
SELECT
Reverse(substring(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) , patindex('%[0-9]%', Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) ), len(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) ))))) )) NumberValue
FROM dbo.TableName
CREATE OR REPLACE FUNCTION count_letters_and_numbers(input_string TEXT)
RETURNS TABLE (letters INT, numbers INT) AS $$
BEGIN
RETURN QUERY SELECT
sum(CASE WHEN input_string ~ '[A-Za-z]' THEN 1 ELSE 0 END) as letters,
sum(CASE WHEN input_string ~ '[0-9]' THEN 1 ELSE 0 END) as numbers
FROM unnest(string_to_array(input_string, '')) as input_string;
END;
$$ LANGUAGE plpgsql;
For the hell of it...
This solution is different to all earlier solutions, viz:
There is no need to create a function
There is no need to use pattern matching
There is no need for a temporary table
This solution uses a recursive common table expression (CTE)
But first - note the question does not specify where such strings are stored. In my solution below, I create a CTE as a quick and dirty way to put these strings into some kind of "source table".
Note also - this solution uses a recursive common table expression (CTE) - so don't get confused by the usage of two CTEs here. The first is simply to make the data avaliable to the solution - but it is only the second CTE that is required in order to solve this problem. You can adapt the code to make this second CTE query your existing table, view, etc.
Lastly - my coding is verbose, trying to use column and CTE names that explain what is going on and you might be able to simplify this solution a little. I've added in a few pseudo phone numbers with some (expected and atypical, as the case may be) formatting for the fun of it.
with SOURCE_TABLE as (
select '003Preliminary Examination Plan' as numberString
union all select 'Coordination005' as numberString
union all select 'Balance1000sheet' as numberString
union all select '1300 456 678' as numberString
union all select '(012) 995 8322 ' as numberString
union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
select
len(numberString) as currentStringLength,
isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
from SOURCE_TABLE
union all
select
len(remainingString) as currentStringLength,
cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
substring(remainingString,2,len(remainingString)) as remainingString
from FIRST_CHAR_PROCESSED fcp2
where fcp2.currentStringLength > 1
)
select
newString
,* -- comment this out when required
from FIRST_CHAR_PROCESSED
where currentStringLength = 1
So what's going on here?
Basically in our CTE we are selecting the first character and using try_cast (see docs) to cast it to a tinyint (which is a large enough data type for a single-digit numeral). Note that the type-casting rules in SQL Server say that an empty string (or a space, for that matter) will resolve to zero, so the nullif is added to force spaces and empty strings to resolve to null (see discussion) (otherwise our result would include a zero character any time a space is encountered in the source data).
The CTE also returns everything after the first character - and that becomes the input to our recursive call on the CTE; in other words: now let's process the next character.
Lastly, the field newString in the CTE is generated (in the second SELECT) via concatenation. With recursive CTEs the data type must match between the two SELECT statements for any given column - including the column size. Because we know we are adding (at most) a single character, we are casting that character to nvarchar(1) and we are casting the newString (so far) as nvarchar(3999). Concatenated, the result will be nvarchar(4000) - which matches the type casting we carry out in the first SELECT.
If you run this query and exclude the WHERE clause, you'll get a sense of what's going on - but the rows may be in a strange order. (You won't necessarily see all rows relating to a single input value grouped together - but you should still be able to follow).
Hope it's an interesting option that may help a few people wanting a strictly expression-based solution.
In Oracle
You can get what you want using this:
SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))
Sample Query:
SELECT SUBSTR('003Preliminary Examination Plan ',REGEXP_INSTR ('003Preliminary Examination Plan ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL
If you are using Postgres and you have data like '2000 - some sample text' then try substring and position combination, otherwise if in your scenario there is no delimiter, you need to write regex:
SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as
number_column_name

Get Word Count of a Column using SQL

I have a table with a column called Description. The column is populated with text data. I want to create a query that returns the amount of words in each description.
My thought was to create a function that takes in a value, and returns the amount of words found in the inputted text.
SELECT dbo.GetWordCount(Description) FROM TABLE
For example, if the description is "Hello World! Have a nice day.", the query should return 6.
How can I get the word count of the description column?
See this proposed solution: http://www.sql-server-helper.com/functions/count-words.aspx
CREATE FUNCTION [dbo].[WordCount] ( #InputString VARCHAR(4000) )
RETURNS INT
AS
BEGIN
DECLARE #Index INT
DECLARE #Char CHAR(1)
DECLARE #PrevChar CHAR(1)
DECLARE #WordCount INT
SET #Index = 1
SET #WordCount = 0
WHILE #Index <= LEN(#InputString)
BEGIN
SET #Char = SUBSTRING(#InputString, #Index, 1)
SET #PrevChar = CASE WHEN #Index = 1 THEN ' '
ELSE SUBSTRING(#InputString, #Index - 1, 1)
END
IF #PrevChar = ' ' AND #Char != ' '
SET #WordCount = #WordCount + 1
SET #Index = #Index + 1
END
RETURN #WordCount
END
GO
Usage Example:
DECLARE #String VARCHAR(4000)
SET #String = 'Health Insurance is an insurance against expenses incurred through illness of the insured.'
SELECT [dbo].[WordCount] ( #String )
This is a little cumbersome but it handles the whitespace issue nicely, its fast and inline, no udf.
DECLARE #Term VARCHAR(100) = ' this is pretty fast '
SELECT #Term, LEN(REPLACE(REPLACE(REPLACE(' '+#Term,' ',' '+CHAR(1)) ,CHAR(1)+' ',''),CHAR(1),'')) - LEN(REPLACE(REPLACE(REPLACE(REPLACE(' '+#Term,' ',' '+CHAR(1)) ,CHAR(1)+' ',''),CHAR(1),''),' ','')) [Word Count]
Generalized Syntax:
SELECT (LENGTH(column_name) - LENGTH(REPLACE(column_name, ' ', ''))),column_name1,column_name2 FROM table_name;
In case, if you want to calculate how many words are there in single 'address' column of a table named 'employeeDetails' then:
SELECT (LENGTH(address) - LENGTH(REPLACE(address, ' ', ''))),address,employee_name FROM employeeDetails ;
In addition to Mortalus's answer I'd use an inline function rather than scalar (*Note - this function will work from SQL Server 2012 and up)
for Previous versions of SQL Server see below:
/*SQL Server 2012 and up*/
CREATE FUNCTION dbo.udf_WordCount
(
#str VARCHAR(8000)
)
RETURNS TABLE AS RETURN
WITH Tally (n) AS
(
SELECT TOP (LEN(#str)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0)) a(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) b(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) c(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d(n)
)
, BreakChar as
(
SELECT SUBSTRING(#str , n , 1) [Char] , N
FROM Tally
)
, Analize as
(
SELECT * , lag([Char],1) OVER (ORDER BY N) PrevChar
FROM BreakChar
)
SELECT WordCount = COUNT(1) + 1
FROM Analize
WHERE [Char] != PrevChar
AND PrevChar = ' '
How to Use:
DECLARE #str varchar(1000) = 'It''s now or never I ain''t gonna live forever'
SELECT * FROM dbo.udf_WordCount(#str) --> 9
**SQL Server 2008 and lower:
/*SQL Server 2008 and down*/
CREATE FUNCTION dbo.udf_WordCount_2008
(
--declare
#str VARCHAR(8000)
--= 'It''s now or never I ain''t gonna live forever'
)
RETURNS TABLE AS RETURN
WITH Tally (n) AS
(
SELECT TOP (LEN(#str)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM (VALUES (0),(0),(0),(0),(0),(0),(0),(0)) a(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) b(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) c(n)
CROSS JOIN (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) d(n)
)
, BreakChar as
(
SELECT SUBSTRING(#str , n , 1) [Char] , N
FROM Tally
)
, Analize as
(
SELECT a.* , b.Char PrevChar
FROM BreakChar a
JOIN BreakChar b
on a.n = b.n+1
)
SELECT WordCount = COUNT(1) + 1
FROM Analize
WHERE [Char] != PrevChar
AND PrevChar = ' '
This answer is based on the same code used in Mortalus's answer, which I originally found here.
This solution is a more efficient and more concise version of that code. I've also add some explanation for the code that will hopefully make this answer clearer for future readers.
The following user defined function takes in a string of text, and then loops through the each character of the inputted text. If the previous character was a space, the word count is increased by one.
Since the word count is calculated by counting the spaces between the words, there will always be 1 less space than actual words. To counteract this, start #PrevChar with the value of ' '. Then, when the loop is run for the first time, when the code then reaches IF #PrevChar = ' ', it will return true, and the word count will be increase by one. This works even if the text has a length of 0, since in that case, it just won't get passed the #Index <= LEN(#InputString) check, and the word count will never be increased. (This replaces the CASE statement used in the linked answer.)
AND #CurrentChar != ' ' is used to solve the problem of double spacing being counted as multiple words. If the previous character is a space, but the current character is also a space, move on to the next index without increasing the word count. The next iteration will then only have #PrevChar set to ' ', and so the word count will only be increase once for the double space.
CREATE FUNCTION [dbo].[WordCount] (#InputString VARCHAR(MAX))
RETURNS INT
AS
BEGIN
DECLARE #Index INT = 1
DECLARE #CurrentChar CHAR(1)
--Initialize the previous character as a space.
DECLARE #PrevChar CHAR(1) = ' '
DECLARE #WordCount INT = 0
WHILE #Index <= LEN(#InputString)
BEGIN
--Set the current character to equal the character in the index
--position of the inputted text.
SET #CurrentChar= SUBSTRING(#InputString, #Index, 1)
--If the previous character was a space and the current character
--is not a space, increase the wordcount by 1.
IF #PrevChar = ' ' AND #CurrentChar != ' '
SET #WordCount = #WordCount + 1
--Increase the index counter by 1.
SET #Index = #Index + 1
--Now that we are done with the current character, set the previous
--character to equal the current character.
SET #PrevChar = #CurrentChar
END
RETURN #WordCount
END
requisites: SQL Server 2016 and later
I use this in my sp , I receive a sentence, so i can handle inner spaces.
SELECT value from STRING_SPLIT(#oracion1,' ')
now I filter for values with text and count them for to achieve this :
SELECT count(value) from STRING_SPLIT(#str,' ') where len(value)>0
#oracion1 colud be N"JUAN ES CARPINTERO " or #oracion1 could be N"JUAN ES CARPINTERO "

T-SQL trim &nbsp (and other non-alphanumeric characters)

We have some input data that sometimes appears with &nbsp characters on the end.
The data comes in from the source system as varchar() and our attempts to cast as decimal fail b/c of these characters.
Ltrim and Rtrim don't remove the characters, so we're forced to do something like:
UPDATE myTable
SET myColumn = replace(myColumn,char(160),'')
WHERE charindex(char(160),myColumn) > 0
This works for the &nbsp, but is there a good way to do this for any non-alphanumeric (or in this case numeric) characters?
This will remove all non alphanumeric chracters
CREATE FUNCTION [dbo].[fnRemoveBadCharacter]
(
#BadString nvarchar(20)
)
RETURNS nvarchar(20)
AS
BEGIN
DECLARE #nPos INTEGER
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
WHILE #nPos > 0
BEGIN
SELECT #BadString = STUFF(#BadString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
END
RETURN #BadString
END
Use the function like:
UPDATE TableToUpdate
SET ColumnToUpdate = dbo.fnRemoveBadCharacter(ColumnToUpdate)
WHERE whatever
This page has a sample of how you can remove non-alphanumeric chars:
-- Put something like this into a user function:
DECLARE #cString VARCHAR(32)
DECLARE #nPos INTEGER
SELECT #cString = '90$%45623 *6%}~:#'
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
WHILE #nPos > 0
BEGIN
SELECT #cString = STUFF(#cString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
END
SELECT #cString
How is the table being populated? While it is possible to scrub this in sql a better approach would be to change the column type to int and scrub the data before it's loaded into the database (SSIS). Is this an option?
For large datasets I have had better luck with this function that checks the ASCII value. I have added options to keep only alpha, numeric or alphanumeric based on the parameters.
--CleanType 1 - Remove all non alpanumeric
-- 2 - Remove only alpha
-- 3 - Remove only numeric
CREATE FUNCTION [dbo].[fnCleanString] (
#InputString varchar(8000)
, #CleanType int
, #LeaveSpaces bit
) RETURNS varchar(8000)
AS
BEGIN
-- // Declare variables
-- ===========================================================
DECLARE #Length int
, #CurLength int = 1
, #ReturnString varchar(8000)=''
SELECT #Length = len(#InputString)
-- // Begin looping through each char checking ASCII value
-- ===========================================================
WHILE (#CurLength <= (#Length+1))
BEGIN
IF (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 48 and 57 AND #CleanType in (1,3) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 65 and 90 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 97 and 122 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) = 32 AND #LeaveSpaces = 1 )
BEGIN
SET #ReturnString = #ReturnString + SUBSTRING(#InputString,#CurLength,1)
END
SET #CurLength = #CurLength + 1
END
RETURN #ReturnString
END
If the mobile could start with a Plus(+) I will use the function like this
CREATE FUNCTION [dbo].[Mobile_NoAlpha](#Mobile VARCHAR(1000))
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #StartsWithPlus BIT = 0
--check if the mobile starts with a plus(+)
IF LEFT(#Mobile, 1) = '+'
BEGIN
SET #StartsWithPlus = 1
--Take out the plus before using the regex to eliminate invalid characters
SET #Mobile = RIGHT(#Mobile, LEN(#Mobile)-1)
END
WHILE PatIndex('%[^0-9]%', #Mobile) > 0
SET #Mobile = Stuff(#Mobile, PatIndex('%[^0-9]%', #Mobile), 1, '')
IF #StartsWithPlus = 1
SET #Mobile = '+' + #Mobile
RETURN #Mobile
END