Get specific Count for Character from a column without using a function or Stored Proc - sql-server-2005

I have a column on a table [SampleTable] called [MyColumn] and i would like the number of time this character appears on the column. The character is ;
Excel has a simple solution for this
=LEN()-LEN(SUBSTITUTE(,";",""))

SELECT LEN(MyColumn) - LEN(REPLACE(MyColumn, ';', ''))
FROM SampleTable
WHERE ...

For best readability in code this is best done with a UDF. For example, the one from here:
CREATE FUNCTION [dbo].[ufn_CountChar] ( #pInput VARCHAR(1000), #pSearchChar CHAR(1) )
RETURNS INT
BEGIN
DECLARE #vInputLength INT
DECLARE #vIndex INT
DECLARE #vCount INT
SET #vCount = 0
SET #vIndex = 1
SET #vInputLength = LEN(#pInput)
WHILE #vIndex <= #vInputLength
BEGIN
IF SUBSTRING(#pInput, #vIndex, 1) = #pSearchChar
SET #vCount = #vCount + 1
SET #vIndex = #vIndex + 1
END
RETURN #vCount
END
GO

Related

Return all words starting with a character in a column

I have a VARCHAR column with data like this:
abc = :abc and this = :that
I need a query to find all of the special "words" that start with a colon in this column of data. I don't really need any other data (IDs or otherwise) and duplicates would be OK. I can remove duplicates in Excel later if need be. So if this was the only row, I'd like something like this as the output:
SpecialWords
:abc
:that
I'm thinking it'll require a CHARINDEX or something like that. But since there could be more than one special word in the column, I can't just find the first : and strip out the rest.
Any help is greatly appreciated! Thanks in advance!
You have to split this value based on spaces and return only fields that starts with a colon :, i provided 2 solutions to achieve this based on the result type you need (Table or Single Value)
Table-Valued Function
You can create a TV function to split this column into a table:
CREATE FUNCTION [dbo].[GETVALUES]
(
#DelimitedString varchar(8000)
)
RETURNS #tblArray TABLE
(
ElementID int IDENTITY(1,1), -- Array index
Element varchar(1000) -- Array element contents
)
AS
BEGIN
-- Local Variable Declarations
-- ---------------------------
DECLARE #Index smallint,
#Start smallint,
#DelSize smallint
SET #DelSize = 1
-- Loop through source string and add elements to destination table array
-- ----------------------------------------------------------------------
WHILE LEN(#DelimitedString) > 0
BEGIN
SET #Index = CHARINDEX(' ', #DelimitedString)
IF #Index = 0
BEGIN
IF ((LTRIM(RTRIM(#DelimitedString))) LIKE ':%')
INSERT INTO
#tblArray
(Element)
VALUES
(LTRIM(RTRIM(#DelimitedString)))
BREAK
END
ELSE
BEGIN
IF (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1)))) LIKE ':%'
INSERT INTO
#tblArray
(Element)
VALUES
(LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1))))
SET #Start = #Index + #DelSize
SET #DelimitedString = SUBSTRING(#DelimitedString, #Start , LEN(#DelimitedString) - #Start + 1)
END
END
RETURN
END
And you can use it like the following:
DECLARE #SQLStr varchar(100)
SELECT #SQLStr = 'abc = :abc and this = :that and xyz = :asd'
SELECT
*
FROM
dbo.GETVALUES(#SQLStr)
Result:
Scalar-Valued Function
If you need to return a value (not table) so you can use this function which will return on all values separated by (line feed + carridge return CHAR(13) + CHAR(10))
CREATE FUNCTION dbo.GetValues2
(
#DelimitedString varchar(8000)
)
RETURNS varchar(8000)
AS
BEGIN
DECLARE #Index smallint,
#Start smallint,
#DelSize smallint,
#Result varchar(8000)
SET #DelSize = 1
SET #Result = ''
WHILE LEN(#DelimitedString) > 0
BEGIN
SET #Index = CHARINDEX(' ', #DelimitedString)
IF #Index = 0
BEGIN
if (LTRIM(RTRIM(#DelimitedString))) LIKE ':%'
SET #Result = #Result + char(13) + char(10) + (LTRIM(RTRIM(#DelimitedString)))
BREAK
END
ELSE
BEGIN
IF (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1)))) LIKE ':%'
SET #Result = #Result + char(13) + char(10) + (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1))))
SET #Start = #Index + #DelSize
SET #DelimitedString = SUBSTRING(#DelimitedString, #Start , LEN(#DelimitedString) - #Start + 1)
END
END
return #Result
END
GO
you can use it as the following
DECLARE #SQLStr varchar(100)
SELECT #SQLStr = 'abc = :abc and this = :that and xyz = :asd'
SELECT dbo.GetValues2(#SQLStr)
Result
in the table result line feed are not visible, just copy the data to an editor and it will appears as shown in the image
References
Splitting the string in sql server
One way is to write a specialized SPLIT function. I would suggest getting a TSQL Split function off the internet and see if you can adapt the code to your needs.
Working from scratch, you could write a function that loops over the column value using CHARINDEX until it doesn't find any more : characters.
How about using a charindex?
rextester sample:
create table mytable (testcolumn varchar(20))
insert into mytable values ('this = :that'),('yes'), (':no'), ('abc = :abc')
select right(testcolumn, charindex(':', reverse(testcolumn)) - 1) from mytable
where testcolumn like '%:%'
reference:
SQL Select everything after character
Update
Addressing Sami's:
Didn't see that two words could be in one colon, how about this?
select replace(substring(testcolumn, charindex(':', testcolumn), len(testcolumn)), ':', '')
Update again
I see, the actual statement is this = :that and that = :this
If performance is important then you want to use an inline table valued function to split the string and extract what you need. You could use delimitedSplit8K or delimitedSplit8K_lead for this.
declare #string varchar(8000) = 'abc = :abc and this = :that';
select item
from dbo.DelimitedSplit8K(#string, ' ')
where item like ':%';
returns:
item
------
:abc
:that
And for even better performance than what I posted above you could use ngrams8k like so:
declare #string varchar(8000) = 'abc = :abc and this = :that';
select position, item =
substring(#string, position,
isnull(nullif(charindex(' ',#string,position+1),0),8000)-position)
from dbo.ngrams8k(#string, 1)
where token = ':';
This even gives you the location of the item you are searching for:
position item
---------- -------
7 :abc
23 :that

Is there a LastIndexOf in SQL Server?

I am trying to parse out a value from a string that involves getting the last index of a string. Currently, I am doing a horrible hack that involves reversing a string:
SELECT REVERSE(SUBSTRING(REVERSE(DB_NAME()), 1,
CHARINDEX('_', REVERSE(DB_NAME()), 1) - 1))
To me this code is nearly unreadable. I just upgraded to SQL Server 2016 and I hoping there is a better way.
Is there?
If you want everything after the last _, then use:
select right(db_name(), charindex('_', reverse(db_name()) + '_') - 1)
If you want everything before, then use left():
select left(db_name(), len(db_name()) - charindex('_', reverse(db_name()) + '_'))
Wrote 2 functions, 1 to return LastIndexOf for the selected character.
CREATE FUNCTION dbo.LastIndexOf(#source nvarchar(80), #pattern char)
RETURNS int
BEGIN
RETURN (LEN(#source)) - CHARINDEX(#pattern, REVERSE(#source))
END;
GO
and 1 to return a string before this LastIndexOf. Maybe it will be useful to someone.
CREATE FUNCTION dbo.StringBeforeLastIndex(#source nvarchar(80), #pattern char)
RETURNS nvarchar(80)
BEGIN
DECLARE #lastIndex int
SET #lastIndex = (LEN(#source)) - CHARINDEX(#pattern, REVERSE(#source))
RETURN SUBSTRING(#source, 0, #lastindex + 1)
-- +1 because index starts at 0, but length at 1, so to get up to 11th index, we need LENGTH 11+1=12
END;
GO
No, SQL server doesnt have LastIndexOf.
This are the available string functions
But you can always can create your own function
CREATE FUNCTION dbo.LastIndexOf(#source text, #pattern char)
RETURNS
AS
BEGIN
DECLARE #ret text;
SELECT into #ret
REVERSE(SUBSTRING(REVERSE(#source), 1,
CHARINDEX(#pattern, REVERSE(#source), 1) - 1))
RETURN #ret;
END;
GO
Once you have one of the split strings from here,you can do it in a set based way like this..
declare #string varchar(max)
set #string='C:\Program Files\Microsoft SQL Server\MSSQL\DATA\AdventureWorks_Data.mdf'
;with cte
as
(select *,row_number() over (order by (select null)) as rownum
from [dbo].[SplitStrings_Numbers](#string,'\')
)
select top 1 item from cte order by rownum desc
**Output:**
AdventureWorks_Data.mdf
CREATE FUNCTION dbo.LastIndexOf(#text NTEXT, #delimiter NTEXT)
RETURNS INT
AS
BEGIN
IF (#text IS NULL) RETURN NULL;
IF (#delimiter IS NULL) RETURN NULL;
DECLARE #Text2 AS NVARCHAR(MAX) = #text;
DECLARE #Delimiter2 AS NVARCHAR(MAX) = #delimiter;
DECLARE #Index AS INT = CHARINDEX(REVERSE(#Delimiter2), REVERSE(#Text2));
IF (#Index < 1) RETURN 0;
DECLARE #ContentLength AS INT = (LEN('|' + #Text2 + '|') - 2);
DECLARE #DelimiterLength AS INT = (LEN('|' + #Delimiter2 + '|') - 2);
DECLARE #Result AS INT = (#ContentLength - #Index - #DelimiterLength + 2);
RETURN #Result;
END
Allows for multi-character delimiters like ", " (comma space).
Returns 0 if the delimiter is not found.
Takes a NTEXT for comfort reasons as NVARCHAR(MAX)s are implicitely cast into NTEXT but not vice-versa.
Handles delimiters with leading or tailing space correctly!
Try:
select LEN('tran van abc') + 1 - CHARINDEX(' ', REVERSE('tran van abc'))
So, the last index of ' ' is : 9
I came across this thread while searching for a solution to my similar problem which had the exact same requirement but was for a different kind of database that was lacking the REVERSE function.
In my case this was for a OpenEdge (Progress) database, which has a slightly different syntax. This made the INSTR function available to me that most Oracle typed databases offer.
So I came up with the following code:
SELECT
INSTR(foo.filepath, '/',1, LENGTH(foo.filepath) - LENGTH( REPLACE( foo.filepath, '/', ''))) AS IndexOfLastSlash
FROM foo
However, for my specific situation (being the OpenEdge (Progress) database) this did not result into the desired behaviour because replacing the character with an empty char gave the same length as the original string. This doesn't make much sense to me but I was able to bypass the problem with the code below:
SELECT
INSTR(foo.filepath, '/',1, LENGTH( REPLACE( foo.filepath, '/', 'XX')) - LENGTH(foo.filepath)) AS IndexOfLastSlash
FROM foo
Now I understand that this code won't solve the problem for T-SQL because there is no alternative to the INSTR function that offers the Occurence property.
Just to be thorough I'll add the code needed to create this scalar function so it can be used the same way like I did in the above examples. And will do exactly what the OP wanted, serve as a LastIndexOf method for SQL Server.
-- Drop the function if it already exists
IF OBJECT_ID('INSTR', 'FN') IS NOT NULL
DROP FUNCTION INSTR
GO
-- User-defined function to implement Oracle INSTR in SQL Server
CREATE FUNCTION INSTR (#str VARCHAR(8000), #substr VARCHAR(255), #start INT, #occurrence INT)
RETURNS INT
AS
BEGIN
DECLARE #found INT = #occurrence,
#pos INT = #start;
WHILE 1=1
BEGIN
-- Find the next occurrence
SET #pos = CHARINDEX(#substr, #str, #pos);
-- Nothing found
IF #pos IS NULL OR #pos = 0
RETURN #pos;
-- The required occurrence found
IF #found = 1
BREAK;
-- Prepare to find another one occurrence
SET #found = #found - 1;
SET #pos = #pos + 1;
END
RETURN #pos;
END
GO
To avoid the obvious, when the REVERSE function is available you do not need to create this scalar function and you can just get the required result like this:
SELECT
LEN(foo.filepath) - CHARINDEX('\', REVERSE(foo.filepath))+1 AS LastIndexOfSlash
FROM foo
Try this.
drop table #temp
declare #brokername1 nvarchar(max)='indiabullssecurities,canmoney,indianivesh,acumencapitalmarket,sharekhan,edelweisscapital';
Create Table #temp
(
ID int identity(1,1) not null,
value varchar(100) not null
)
INSERT INTO #temp(value) SELECT value from STRING_SPLIT(#brokername1,',')
declare #id int;
set #id=(select max(id) from #temp)
--print #id
declare #results varchar(500)
select #results = coalesce(#results + ',', '') + convert(varchar(12),value)
from #temp where id<#id
order by id
print #results

SQL query using substring to separate data

I have data like this and I need to separate them based on
account=10825 and instance id =0
I have tried this using substring with charindex, but need to improve for further queries as it was not in the same format every time.
data:
1:
Month=12&Year=2015&Accounts=[10825].[44].[1]&Users=[RL665480003].[44]&Culture=en-US&DMSWebService=http%3A%2F%2Fausydapi01.recall.com%2Fdmswebservice%2Fdmswebservice.svc&OLTAccountID=0&OLTInstanceID=0&DaystoDestroy=90&LastLoadDate=12%2F30%2F2015 00%3A00%3A00&connectionString=Data Source%3Damatldb09%3BInitial Catalog%3DLocalizationDB%3BUser ID%3Dlocalization%3BPassword%3Dr3call%3B&ResourceType=BICustomerPortal&LastLoadDateDW=12%2F31%2F2015 12%3A00%3A00 AM&Period=12%2F30%2F2015 00%3A00%3A00
2:
Culture=en-US&Month=12&Year=2015&Accounts=[2784].[6].[1]&Users=[RL042671018].[6]&DMSWebService=http%3A%2F%2Fruss-app.recall.com%2Fdmswebservice%2Fdmswebservice.svc&OLTAccountID=0&OLTInstanceID=0&DaystoDestroy=90&ResourceType=BICustomerPortal&connectionString=Data Source%3Damatldb09%3BInitial Catalog%3DLocalizationDB%3BUser ID%3Dlocalization%3BPassword%3Dr3call%3B
What you've got there looks like a querystring from a browser (or a set of posted variables). So it will vary in length as you say, and also maybe the ordering and quantity of the variables could potentially change too.
So the only reliable way I can think of to do this is to effectively de-serialize the querystring. Since you're using SQL, we'll do it into a temp table. You can use the special characters used in querystrings: "&" (to separate parameters) and "=" (to separate the parameter name from the value) as markers to do this.
DECLARE #data nvarchar(MAX)
-- using your first data sample as an example:
SET #data = 'Month=12&Year=2015&Accounts=[10825].[44].[1]&Users=[RL665480003].[44]&Culture=en-US&DMSWebService=http%3A%2F%2Fausydapi01.recall.com%2Fdmswebservice%2Fdmswebservice.svc&OLTAccountID=0&OLTInstanceID=0&DaystoDestroy=90&LastLoadDate=12%2F30%2F2015 00%3A00%3A00&connectionString=Data Source%3Damatldb09%3BInitial Catalog%3DLocalizationDB%3BUser ID%3Dlocalization%3BPassword%3Dr3call%3B&ResourceType=BICustomerPortal&LastLoadDateDW=12%2F31%2F2015 12%3A00%3A00 AM&Period=12%2F30%2F2015 00%3A00%3A00'
DECLARE #workingtable TABLE
(
ID [int] IDENTITY(1, 1),
ItemID [nvarchar](2000),
ItemValue [nvarchar](2000)
)
DECLARE #Item nvarchar(4000)
,#ItemID [nvarchar](2000)
,#ItemValue [nvarchar](2000)
,#pos int
,#count int
,#row int
,#delimiter1 varchar(1)
,#delimiter2 varchar(1);
SET #delimiter1 = '&';
SET #delimiter2 = '='
SET #data = LTRIM(RTRIM(#data)) + #delimiter1
SET #pos = CHARINDEX(#delimiter1, #data, 1)
SET #count = 0;
SET #row = 0;
IF REPLACE(#data, #delimiter1, '') <> '' -- make sure there are actually any delimited items in the list
BEGIN
WHILE #pos > 0
BEGIN
SET #count = #count + 1
SET #Item = LTRIM(RTRIM(LEFT(#data, #pos - 1))) -- get the querystring parameter and its value
SET #ItemID = LTRIM(RTRIM(LEFT(#Item, CHARINDEX(#delimiter2, #Item) -1))) -- now extract the parameter value
SET #ItemValue = LTRIM(RTRIM(RIGHT(#Item, LEN(#Item) - CHARINDEX(#delimiter2, #Item)))) -- now extract the parameter name
INSERT INTO #workingtable ([ItemID], [ItemValue]) VALUES (#ItemID, #ItemValue) -- store in working table
SET #data = RIGHT(#data, LEN(#data) - #pos) -- remove the item we just extracted from the list
SET #pos = CHARINDEX(#delimiter1, #data, 1) -- reset the position to point to the next delimiter
END
END
SELECT ItemID, ItemValue FROM #workingtable
Once you've got that, you can find the specific value you want very easily. You mentioned getting the first part of the "Accounts" field. So you could get the whole Accounts field like this:
SELECT ItemValue FROM #workingtable WHERE [ItemID] = 'Accounts'
or find the specific part like this:
SELECT SUBSTRING(ItemValue, CHARINDEX('[', [ItemValue]) + 1, CHARINDEX(']', [ItemValue]) - 2) FROM #workingtable WHERE [ItemID] = 'Accounts'
Obviously if you want this functionality to be easily re-usable I suggest you encapsulate it in a function or procedure.
Hope that helps.
Or you could use the following that has the limitation that the len of the accountid is not more than 100 chars.
declare #str varchar(1000)
set #str='Month=12&Year=2015&Users=[RL665480003].[44]&Culture=en-US&DMSWebService=http%3A%2F%2Fausydapi01.recall.com%2Fdmswebservice%2Fdmswebservice.svc&OLTAccountID=0&OLTInstanceID=0&DaystoDestroy=90&LastLoadDate=12%2F30%2F2015 00%3A00%3A00&connectionString=Data Source%3Damatldb09%3BInitial Catalog%3DLocalizationDB%3BUser ID%3Dlocalization%3BPassword%3Dr3call%3B&ResourceType=BICustomerPortal&LastLoadDateDW=12%2F31%2F2015 12%3A00%3A00 AM&Period=12%2F30%2F2015 00%3A00%3A00&Accounts=[10825].[44].[1]'
select SUBSTRING(SUBSTRING(#str, patindex('%Accounts=[[]%', #str) + len('Accounts=[') , 100) , 0 , patindex('%]%', SUBSTRING(#str, patindex('%Accounts=[[]%', #str) + len('Accounts=[') , 100)) )

How do I convert an int to a zero padded string in T-SQL?

Let's say I have an int with the value of 1. How can I convert that int to a zero padded string, such as 00000001?
Declare #MyInt integer Set #MyInt = 123
Declare #StrLen TinyInt Set #StrLen = 8
Select Replace(Str(#MyInt, #StrLen), ' ' , '0')
Another way is:
DECLARE #iVal int = 1
select REPLACE(STR(#iVal, 8, 0), ' ', '0')
as of SQL Server 2012 you can now do this:
format(#int, '0000#')
This work for me:
SELECT RIGHT('000' + CAST(Table.Field AS VARCHAR(3)),3) FROM Table
...
I created this user function
T-SQL Code :
CREATE FUNCTION CIntToChar(#intVal Int, #intLen Int) RETURNS nvarchar(24) AS BEGIN
IF #intlen > 24
SET #intlen = 24
RETURN REPLICATE('0',#intLen-LEN(RTRIM(CONVERT(nvarchar(24),#intVal))))
+ CONVERT(nvarchar(24),#intVal) END
Example :
SELECT dbo.CIntToChar( 867, 6 ) AS COD_ID
OUTPUT
000867
Use FORMAT(<your number>,'00000000') use as many zeroes as you need to have digits in your final outcome.
Here is official documentation of the FORMAT function
If I'm trying to pad to a specific total length, I use the REPLICATE and DATALENGTH functions, like so:
DECLARE #INT INT
DECLARE #UNPADDED VARCHAR(3)
DECLARE #PADDED VARCHAR(3)
SET #INT = 2
SET #UNPADDED = CONVERT(VARCHAR(3),#INT)
SET #PADDED = REPLICATE('0', 3 - DATALENGTH(#UNPADDED)) + #UNPADDED
SELECT #INT, #UNPADDED, #PADDED
I used variables here for simplicity, but you see, you can specify the final length of the total string and not worry about the size of the INT that you start with as long as it's <= the final string length.
I always use:
SET #padded = RIGHT('z0000000000000'
+ convert(varchar(30), #myInt), 8)
The z stops SQL from implicitly coverting the string into an int for the addition/concatenation.
If the int can go negative you have a problem, so to get around this I sometimes do this:
DECLARE #iVal int
set #iVal = -1
select
case
when #ival >= 0 then right(replicate('0',8) + cast(#ival as nvarchar(8)),8)
else '-' + right(replicate('0',8) + cast(#ival*-1 as nvarchar(8)),8)
end
Very straight forward way to think about padding with '0's is, if you fixed your #_int's to have 4 decimals, you inject 4 '0's:
select RIGHT( '0000'+ Convert(varchar, #_int), 4) as txtnum
; if your fixed space is 3, you inject 3'0's
select RIGHT( '000'+ Convert(varchar, #_int), 3) as txtnum
; below I inject '00' to generate 99 labels for each bldg
declare #_int int
set #_int = 1
while #_int < 100 Begin
select BldgName + '.Floor_' + RIGHT( '00'+ Convert(varchar, #_int), 2)
+ '.balcony' from dbo.tbl_FloorInfo group by BldgName
set #_int = #_int +1
End
Result is:
'BldgA.Floor_01.balcony'
'BldgB.Floor_01.balcony'
'BldgC.Floor_01.balcony'
..
..
'BldgA.Floor_10.balcony'
'BldgB.Floor_10.balcony'
'BldgC.Floor_10.balcony'
..
..
..
'BldgA.Floor_99.balcony'
'BldgB.Floor_99.balcony'
'BldgC.Floor_99.balcony'
Or if you really want to go hard-core... ;-)
declare #int int
set #int = 1
declare #string varchar(max)
set #string = cast(#int as varchar(max))
declare #length int
set #length = len(#string)
declare #MAX int
set #MAX = 8
if #length < #MAX
begin
declare #zeros varchar(8)
set #zeros = ''
declare #counter int
set #counter = 0
while (#counter < (#MAX - #length))
begin
set #zeros = #zeros + '0'
set #counter = #counter + 1
end
set #string = #zeros + #string
end
print #string
And then there's this one, using REPLICATE:
SELECT REPLICATE('0', 7) + '1'
Of course, you can replace the literals 7 and '1' with appropriate functions as needed; the above gives you your example. For example:
SELECT REPLICATE('0', 8 - LEN(CONVERT(nvarchar, #myInt))) + CONVERT(nvarchar, #myInt)
will pad an integer of less than 8 places with zeros up to 8 characters.
Now, a negative number in the second argument of REPLICATE will return NULL. So, if that's a possibility (say, #myInt could be over 100 million in the above example), then you can use COALESCE to return the number without leading zeros if there are more than 8 characters:
SELECT COALESCE(REPLICATE('0', 8 - LEN(CONVERT(nvarchar, #myInt))) + CONVERT(nvarchar, #myInt), CONVERT(nvarchar, #myInt))
I think Charles Bretana's answer is the simplest and fastest. A similar solution without using STR is:
SELECT REPLACE(REVERSE(
CONVERT(CHAR(5 /*<= Target length*/)
, REVERSE(CONVERT(VARCHAR(100), #MyInt)))
), ' ', '0')

T-SQL trim &nbsp (and other non-alphanumeric characters)

We have some input data that sometimes appears with &nbsp characters on the end.
The data comes in from the source system as varchar() and our attempts to cast as decimal fail b/c of these characters.
Ltrim and Rtrim don't remove the characters, so we're forced to do something like:
UPDATE myTable
SET myColumn = replace(myColumn,char(160),'')
WHERE charindex(char(160),myColumn) > 0
This works for the &nbsp, but is there a good way to do this for any non-alphanumeric (or in this case numeric) characters?
This will remove all non alphanumeric chracters
CREATE FUNCTION [dbo].[fnRemoveBadCharacter]
(
#BadString nvarchar(20)
)
RETURNS nvarchar(20)
AS
BEGIN
DECLARE #nPos INTEGER
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
WHILE #nPos > 0
BEGIN
SELECT #BadString = STUFF(#BadString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
END
RETURN #BadString
END
Use the function like:
UPDATE TableToUpdate
SET ColumnToUpdate = dbo.fnRemoveBadCharacter(ColumnToUpdate)
WHERE whatever
This page has a sample of how you can remove non-alphanumeric chars:
-- Put something like this into a user function:
DECLARE #cString VARCHAR(32)
DECLARE #nPos INTEGER
SELECT #cString = '90$%45623 *6%}~:#'
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
WHILE #nPos > 0
BEGIN
SELECT #cString = STUFF(#cString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
END
SELECT #cString
How is the table being populated? While it is possible to scrub this in sql a better approach would be to change the column type to int and scrub the data before it's loaded into the database (SSIS). Is this an option?
For large datasets I have had better luck with this function that checks the ASCII value. I have added options to keep only alpha, numeric or alphanumeric based on the parameters.
--CleanType 1 - Remove all non alpanumeric
-- 2 - Remove only alpha
-- 3 - Remove only numeric
CREATE FUNCTION [dbo].[fnCleanString] (
#InputString varchar(8000)
, #CleanType int
, #LeaveSpaces bit
) RETURNS varchar(8000)
AS
BEGIN
-- // Declare variables
-- ===========================================================
DECLARE #Length int
, #CurLength int = 1
, #ReturnString varchar(8000)=''
SELECT #Length = len(#InputString)
-- // Begin looping through each char checking ASCII value
-- ===========================================================
WHILE (#CurLength <= (#Length+1))
BEGIN
IF (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 48 and 57 AND #CleanType in (1,3) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 65 and 90 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 97 and 122 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) = 32 AND #LeaveSpaces = 1 )
BEGIN
SET #ReturnString = #ReturnString + SUBSTRING(#InputString,#CurLength,1)
END
SET #CurLength = #CurLength + 1
END
RETURN #ReturnString
END
If the mobile could start with a Plus(+) I will use the function like this
CREATE FUNCTION [dbo].[Mobile_NoAlpha](#Mobile VARCHAR(1000))
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #StartsWithPlus BIT = 0
--check if the mobile starts with a plus(+)
IF LEFT(#Mobile, 1) = '+'
BEGIN
SET #StartsWithPlus = 1
--Take out the plus before using the regex to eliminate invalid characters
SET #Mobile = RIGHT(#Mobile, LEN(#Mobile)-1)
END
WHILE PatIndex('%[^0-9]%', #Mobile) > 0
SET #Mobile = Stuff(#Mobile, PatIndex('%[^0-9]%', #Mobile), 1, '')
IF #StartsWithPlus = 1
SET #Mobile = '+' + #Mobile
RETURN #Mobile
END