SQL convert string data in hexadecimal format into string text - sql

I have a table which has a column X. X will be storing large text values in hex format. Now I want to convert hex to raw and validate the data. But when I am using the below query,
I am getting only some part of text after running the query not the complete text. Actually original text is very large....
select UTL_RAW.CAST_TO_VARCHAR2(HEXTORAW(X)) as a from table name
I also tried the below query, but no use it also extracts the same
decalre #a varchar(max)
select UTL_RAW.CAST_TO_VARCHAR2(HEXTORAW(X)) as new from table name.
Kindly let me know how can i extract or see large text from sql. Sample query may be helpful.

For MS-SQL 2008 the following stored proc will convert a hex string into a varchar(max):
if exists (select * from dbo.sysobjects where name = 'f_hextostr' and xtype = 'FN')
drop function [dbo].[f_hextostr]
GO
CREATE FUNCTION [dbo].[f_hextostr] (#hexstring VARCHAR(max))
RETURNS VARCHAR(max)
AS
begin
declare #char1 char(1), #char2 char(1), #strlen int, #currpos int, #result varchar(max)
set #strlen=len(#hexstring)
set #currpos=1
set #result=''
while #currpos<#strlen
begin
set #char1=substring(#hexstring,#currpos,1)
set #char2=substring(#hexstring,#currpos+1,1)
if (#char1 between '0' and '9' or #char1 between 'A' and 'F')
and (#char2 between '0' and '9' or #char2 between 'A' and 'F')
set #result=#result+
char((ascii(#char1)-case when #char1 between '0' and '9' then 48 else 55 end)*16+
ascii(#char2)-case when #char2 between '0' and '9' then 48 else 55 end)
set #currpos = #currpos+2
end
return #result
end
GO
To use just do something like:
select dbo.f_hextostr('0x3031323')
or
select dbo.f_hextostr(X) from MyTable

This can be done in SQL server using built-in conversions via varbinary data type.
-- VARCHAR TO (HEX) VARCHAR
SELECT CONVERT(VARCHAR(MAX), CONVERT(VARBINARY(MAX), 'Help'), 1) -- '0x48656C70'
-- (HEX) VARCHAR TO VARCHAR
SELECT CONVERT(VARCHAR(MAX), CONVERT(VARBINARY(MAX), '0x48656c70', 1)) -- 'Help' (requires 0x)
SELECT CONVERT(VARCHAR(MAX), CONVERT(VARBINARY(MAX), '48656c70', 2)) -- 'Help' (assumes 0x)

It seems that this works better for me.
if exists (select * from dbo.sysobjects where name = 'HexToStr' and xtype = 'FN')
drop function [dbo].[HexToStr]
GO
CREATE FUNCTION [dbo].[HexToStr] (#hexstring VARCHAR(max))
RETURNS VARCHAR(max)
AS
begin
return #hexstring
end
GO

Related

Searching for non-visible characters

I'm troubleshooting some strangeness in output of my SQL Server when I want to get records that are not null or empty string:
SELECT myString
FROM myTable
WHERE myString IS NOT NULL OR myString != ''
In addition to records that obviously fit (string values returned to SSMS's grid) I'm seeing records where the grid cell is blank. When I select the cell and try to copy it, my clipboard manager (clipmate.com) complains that the data is invalid.
If I output the query to file instead of to grid and then inspect via Hex char mode, sure enough - there are characters I would not expect (much less want) to be in there.
Collation of my database collation value shows as: SQL_Latin1_General_CP1_CI_AS
How would I go about eliminating any/all non-visible characters?
can try this, will remove any characters that are not printable.
CREATE FUNCTION [dbo].[RemoveNonPrintableChars]
(
#p_string varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
declare #l_pos int = 1
declare #l_str varchar(max) = ''
while (#l_pos <= len(#p_string))
begin
if (ascii(substring(#p_string,#l_pos,1)) >=32)
begin
set #l_str=#l_str+substring(#p_string,#l_pos,1)
end
set #l_pos = #l_pos+1
end
return #l_str
END
declare
#l_str varchar(max) = 'andrew'--select char(7)
select dbo.[RemoveNonPrintableChars] (#l_str)
set #l_str = 'andrew'+char(7)-- add NP character
select dbo.[RemoveNonPrintableChars] (#l_str)
If by chance you can't use a UDF.
You may notice that I don't just strip the control characters here, I replace them with a space so not to concatenate strings.
Declare #YourTable table (SomeField varchar(50))
Insert Into #YourTable values
('Michael'+char(13)+char(10)+'LastName')
Select A.*
,B.Value
From #YourTable A
Cross Apply (
Select Value = replace(replace((
Select ''+C
From (
Select N,C=case when ASCII(Substring(A.SomeField,N ,1))>31 then Substring(A.SomeField,N ,1) else '{--space--}' end
From ( Select Top (Len(A.SomeField)) N=Row_Number() Over (Order By Number) From master..spt_values ) N
) C Order by N
For XML Path('') ) ,'{--space--}',' '),' ',' ')
) B
Returns
SomeField Value
Michael Michael LastName
LastName
EDIT
However, If you do want a UDF consider the following non-linear approach
CREATE FUNCTION [dbo].[udf-Str-Strip-Control](#S varchar(max))
Returns varchar(max)
Begin
;with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(C) As (Select Top (32) Char(Row_Number() over (Order By (Select NULL))-1) From cte1 a,cte1 b)
Select #S = Replace(#S,C,' ')
From cte2
Return LTrim(RTrim(Replace(Replace(#S,' ',' '),' ',' ')))
End
--Select [dbo].[udf-Str-Strip-Control]('Michael'+char(13)+char(10)+'LastName') --Returns: Michael LastName
I like John's answers better, and would probably modify them if you were looking to control whether or not to include certain control characters. This is the function I used in the past to clean up some strings.
create function dbo.fnCleanVarchar (
#StringParameter varchar(max)
, #CleanStyle tinyint = 1
) returns varchar(max) as
begin;
if #StringParameter is null
return null;
if #CleanStyle > 3 set #CleanStyle = 1;
declare #StringReturn varchar(max);
declare #StringLength int;
declare #CharacterCode int;
declare #CharacterCodePosition int;
set #StringReturn = '';
set #StringLength = len(#StringParameter);
set #CharacterCodePosition = 1;
while #CharacterCodePosition <= #StringLength
begin
set #CharacterCode = ascii(substring(#stringParameter , #CharacterCodePosition , 1))
-- Removes Unprintable Characters 0-8,12,14-31
-- If Style = 1, Remove Unprintable Characters except Tab (9), New Line (10), Carraige Return (13)
-- If Style = 2, Remove Unprintable Characters except character 9 (Tab)
-- If Style = 3, Remove Unprintable Characters and character 9 (Tab)
set #StringReturn = #StringReturn + case
when #CharacterCode >31
then char(#CharacterCode)
when #Style = 3
then ''
when #Style = 2 and #CharacterCode = 9
then char(9)
when #Style = 1 and #CharacterCode in ( 9 , 10 , 13 )
then char(#CharacterCode)
else ''
end;
set #CharacterCodePosition = #CharacterCodePosition + 1
end;
if len(#StringReturn) = 0
return null;
return #StringReturn
end;
Checking for non-visible fields is directly related to find non-visible characters, so consider these two notes:
Note 1: SQL Server will auto-trimming spaces in clauses so N' ' = N'' is true, and any continues strings of empty characters;
Empty characters are a character that is equal to N''.
Note 2: There are 65536 Unicode characters, you can view them with a query like this:
WITH CTE(i, c) AS (
SELECT 0, NCHAR(0) COLLATE SQL_Latin1_General_CP1_CI_AS --I add COLLATE to express your collation but I think it is optional
UNION ALL
SELECT i+1, NCHAR(i+1) COLLATE SQL_Latin1_General_CP1_CI_AS
FROM CTE
WHERE i < 65535
)
SELECT *
FROM CTE
OPTION ( MaxRecursion 0 );
Some of those are not visible and empty like NCHAR(0), NCHAR(12288), ...,
Some of those are not visible and not empty like NCHAR(1), ...,
Some of those are visible and empty like NCHAR(502), ... !!!!.
So if your field is a nvarchar string, you will have a big problem to filter not visible characters, and for varchar strings you have a problem but more little than that.
Side note: You can use COALESCE(myString, '') != '' instead of yours ;).
SUMMARY :
For a little pin don't create a hammer machine!.
When this behaviors are not so important for you and your project don't try to change or handle or create your own equal string function ;).
A sample function to check if a string is visible(returns 1) or not(returns 0) can be like this:
CREATE FUNCTION IsVisible ( #string varchar(max) )
RETURNS bit
AS
BEGIN
DECLARE #pString varchar(max) = #string;
WITH InvisibleChars AS (
SELECT c COLLATE SQL_Latin1_General_CP1_CI_AS AS c
FROM (VALUES (CHAR(0)), (CHAR(1)), (CHAR(9)), (CHAR(10)),
(CHAR(11)), (CHAR(12)), (CHAR(13)), (CHAR(28)),
(CHAR(29)), (CHAR(30)), (CHAR(31)), (CHAR(32)),
(CHAR(160)) -- Above characters are non-visibles
) t(c)
)
SELECT #pString = REPLACE(#pString, c, '')
FROM InvisibleChars;
RETURN CASE WHEN #pString = '' THEN 0 ELSE 1 END;
END
GO

Split words with a capital letter in sql

Does anyone know how to split words starting with capital letters from a string?
Example:
DECLARE #var1 varchar(100) = 'OneTwoThreeFour'
DECLARE #var2 varchar(100) = 'OneTwoThreeFourFive'
DECLARE #var3 varchar(100) = 'One'
SELECT #var1 as Col1, <?> as Col2
SELECT #var2 as Col1, <?> as Col2
SELECT #var3 as Col1, <?> as Col2
expected result:
Col1 Col2
OneTwoThreeFour One Two three Four
OneTwoThreeFourFive One Two Three Four Five
One One
If this is not possible (or if too long) an scalar function would be okay as well.
Here is a function I created that is similar to the "removing non-alphabetic characters". How to strip all non-alphabetic characters from string in SQL Server?
This one uses a case sensitive collation which actively seeks out a non-space/capital letter combination and then uses the STUFF function to insert the space. This IS a scalar UDF, so some folks will immediately say that it will be slower than other solutions. To that notion, I say, please test it. This function does not use any table data and only loops as many times as necessary, so it will likely give you very good performance.
Create Function dbo.Split_On_Upper_Case(#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^ ][A-Z]%'
While PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) + 1, 0, ' ')
Return #Temp
End
Call it like this:
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('One')
Select dbo.Split_On_Upper_Case('OneTwoThree')
Select dbo.Split_On_Upper_Case('stackOverFlow')
Select dbo.Split_On_Upper_Case('StackOverFlow')
Here is a function I have just created.
FUNCTION
CREATE FUNCTION dbo.Split_On_Upper_Case
(
#String VARCHAR(4000)
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #Char CHAR(1);
DECLARE #i INT = 0;
DECLARE #OutString VARCHAR(4000) = '';
WHILE (#i <= LEN(#String))
BEGIN
SELECT #Char = SUBSTRING(#String, #i,1)
IF (#Char = UPPER(#Char) Collate Latin1_General_CS_AI)
SET #OutString = #OutString + ' ' + #Char;
ELSE
SET #OutString = #OutString + #Char;
SET #i += 1;
END
SET #OutString = LTRIM(#OutString);
RETURN #OutString;
END
Test Data
DECLARE #TABLE TABLE (Strings VARCHAR(1000))
INSERT INTO #TABLE
VALUES ('OneTwoThree') ,
('FourFiveSix') ,
('SevenEightNine')
Query
SELECT dbo.Split_On_Upper_Case(Strings) AS Vals
FROM #TABLE
Result Set
╔══════════════════╗
║ Vals ║
╠══════════════════╣
║ One Two Three ║
║ Four Five Six ║
║ Seven Eight Nine ║
╚══════════════════╝
If a single query is needed 26 REPLACE can be used to check every upper case letter like
SELECT #var1 col1, REPLACE(
REPLACE(
REPLACE(
...
REPLACE(#var1, 'A', ' A')
, ...
, 'X', ' X')
, 'Y', ' Y')
, 'Z', ' Z') col2
Not the most beautiful thing but it'll work.
EDIT
Just to add another function to do the same thing in a different way of the other answers
CREATE FUNCTION splitCapital (#param Varchar(MAX))
RETURNS Varchar(MAX)
BEGIN
Declare #ret Varchar(MAX) = '';
declare #len int = len(#param);
WITH Base10(N) AS (
SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7
UNION ALL SELECT 8 UNION ALL SELECT 9
), Chars(N) As (
Select TOP(#len)
nthChar
= substring(#param, u.N + t.N*10 + h.N*100 + th.N*1000 + 1, 1)
Collate Latin1_General_CS_AI
FROM Base10 u
CROSS JOIN Base10 t
CROSS JOIN Base10 h
CROSS JOIN Base10 th
WHERE u.N + t.N*10 + h.N*100 + th.N*1000 < #len
ORDER BY u.N + t.N*10 + h.N*100 + th.N*1000
)
SELECT #ret += Case nthChar
When UPPER(nthChar) Then ' '
Else ''
End + nthChar
FROM Chars
RETURN #ret;
END
This one uses the possibility of TSQL to concatenate string variable, I had to use the TOP N trick to force the Chars CTE rows in the right order
Build a Numbers table. There are some excellent posts on SO to show you how to do this. Populate it with values up the maximum length of your input string. Select the values from 1 through the actual length of the current input string. Cross join this list of numbers to the input string. Use the result to SUBSTRING() each character. Then you can either compare the resulting list of one-charachter values to a pre-populated table-valued variable or convert each character to an integer using ASCII() and choose only those between 65 ('A') and 90 ('Z'). At this point you have a list which is the position of each upper-case character in your input string. UNION the maximum length of your input string onto the end of this list. You'll see why in just a second. Now you can SUBSTRING() your input variable, starting at the Number given by row N and taking a length of (the Number given by row N+1) - (The number given by row N). This is why you have to UNION the extra Number on the end. Finally concatenate all these substring together, space-separated, using the algorithm of your choice.
Sorry, don't have an instance in front of me to try out code. Sounds like a fun task. I think doing it with nested SELECT statements will get convoluted and un-maintainable; better to lay it out as CTEs, IMHO.
I know that there are already some good answers out there, but if you wanted to avoid creating a function, you could also use a recursive CTE to accomplish this. It's certainly not a clean way of doing this, but it works.
DECLARE
#camelcase nvarchar(4000) = 'ThisIsCamelCased'
;
WITH
split
AS
(
SELECT
[iteration] = 0
,[string] = #camelcase
UNION ALL
SELECT
[iteration] = split.[iteration] + 1
,[string] = STUFF(split.[string], pattern.[index] + 1, 0, ' ')
FROM
split
CROSS APPLY
( SELECT [index] = PATINDEX(N'%[^ ][A-Z]%' COLLATE Latin1_General_Bin, split.[string]) )
pattern
WHERE
pattern.[index] > 0
)
SELECT TOP (1)
[spaced] = split.[string]
FROM
split
ORDER BY
split.[iteration] DESC
;
As I said, this isn't a pretty way to write a query, but I use things like this when I'm just writing up some ad-hoc queries where I would not want to add new artifacts to the database. You could also use this to create your function as an inline table valued function, which is always a tad nicer.
Please Try This:
declare #t nvarchar (100) ='IamTheTestString'
declare #len int
declare #Counter int =0
declare #Final nvarchar (100) =''
set #len =len( #t)
while (#Counter <= #len)
begin
set #Final= #Final + Case when ascii(substring (#t,#Counter,1))>=65 and
ascii(substring (#t,#Counter,1))<=90 then ' '+substring (#t,#Counter,1) else
substring (#t,#Counter,1) end
set #Counter=#Counter+1
end
print ltrim(#Final)

Query to get only numbers from a string

I have data like this:
string 1: 003Preliminary Examination Plan
string 2: Coordination005
string 3: Balance1000sheet
The output I expect is
string 1: 003
string 2: 005
string 3: 1000
And I want to implement it in SQL.
First create this UDF
CREATE FUNCTION dbo.udf_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
Now use the function as
SELECT dbo.udf_GetNumeric(column_name)
from table_name
SQL FIDDLE
I hope this solved your problem.
Reference
Try this one -
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM #temp
) d
) t
Output:
----------
003
005
1000
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM #temp
Please try:
declare #var nvarchar(max)='Balance1000sheet'
SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
SELECT SUBSTRING(#var, PATINDEX('%[0-9]%', #var), LEN(#var)) Val
)x
Getting only numbers from a string can be done in a one-liner.
Try this :
SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))
NB: Only works for the first int in the string, ex: abc123vfg34 returns 123.
I found this approach works about 3x faster than the top voted answer. Create the following function, dbo.GetNumbers:
CREATE FUNCTION dbo.GetNumbers(#String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
WITH
Numbers
AS (
--Step 1.
--Get a column of numbers to represent
--every character position in the #String.
SELECT 1 AS Number
UNION ALL
SELECT Number + 1
FROM Numbers
WHERE Number < LEN(#String)
)
,Characters
AS (
SELECT Character
FROM Numbers
CROSS APPLY (
--Step 2.
--Use the column of numbers generated above
--to tell substring which character to extract.
SELECT SUBSTRING(#String, Number, 1) AS Character
) AS c
)
--Step 3.
--Pattern match to return only numbers from the CTE
--and use STRING_AGG to rebuild it into a single string.
SELECT #String = STRING_AGG(Character,'')
FROM Characters
WHERE Character LIKE '[0-9]'
--allows going past the default maximum of 100 loops in the CTE
OPTION (MAXRECURSION 8000)
RETURN #String
END
GO
Testing
Testing for purpose:
SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
('003Preliminary Examination Plan') --output: 003
,('Coordination005') --output: 005
,('Balance1000sheet') --output: 1000
,('(111) 222-3333') --output: 1112223333
,('1.38hello#f00.b4r#\-6') --output: 1380046
) testData(InputString)
Testing for performance:
Start off setting up the test data...
--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000))
--Make an 8000 character string with mix of numbers and letters
DECLARE #Num VARCHAR(8000) = REPLICATE('12tf56se',800)
--Add this to the test table 500 times
DECLARE #n INT = 0
WHILE #n < 500
BEGIN
INSERT INTO dbo.NumTest VALUES (#Num)
SET #n = #n +1
END
Now testing the dbo.GetNumbers function:
SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s
Then testing the UDF from the top voted answer on the same data.
SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s
Inspiration for dbo.GetNumbers
Decimals
If you need it to handle decimals, you can use either of the following approaches, I found no noticeable performance differences between them.
change '[0-9]' to '[0-9.]'
change Character LIKE '[0-9]' to ISNUMERIC(Character) = 1 (SQL treats a single decimal point as "numeric")
Bonus
You can easily adapt this to differing requirements by swapping out WHERE Character LIKE '[0-9]' with the following options:
WHERE Letter LIKE '[a-zA-Z]' --Get only letters
WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric
WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric
With the previous queries I get these results:
'AAAA1234BBBB3333' >>>> Output: 1234
'-çã+0!\aº1234' >>>> Output: 0
The code below returns All numeric chars:
1st output: 12343333
2nd output: 01234
declare #StringAlphaNum varchar(255)
declare #Character varchar
declare #SizeStringAlfaNumerica int
declare #CountCharacter int
set #StringAlphaNum = 'AAAA1234BBBB3333'
set #SizeStringAlfaNumerica = len(#StringAlphaNum)
set #CountCharacter = 1
while isnumeric(#StringAlphaNum) = 0
begin
while #CountCharacter < #SizeStringAlfaNumerica
begin
if substring(#StringAlphaNum,#CountCharacter,1) not like '[0-9]%'
begin
set #Character = substring(#StringAlphaNum,#CountCharacter,1)
set #StringAlphaNum = replace(#StringAlphaNum, #Character, '')
end
set #CountCharacter = #CountCharacter + 1
end
set #CountCharacter = 0
end
select #StringAlphaNum
declare #puvodni nvarchar(20)
set #puvodni = N'abc1d8e8ttr987avc'
WHILE PATINDEX('%[^0-9]%', #puvodni) > 0 SET #puvodni = REPLACE(#puvodni, SUBSTRING(#puvodni, PATINDEX('%[^0-9]%', #puvodni), 1), '' )
SELECT #puvodni
A solution for SQL Server 2017 and later, using TRANSLATE:
DECLARE #T table (string varchar(50) NOT NULL);
INSERT #T
(string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet');
SELECT
result =
REPLACE(
TRANSLATE(
T.string COLLATE Latin1_General_CI_AI,
'abcdefghijklmnopqrstuvwxyz',
SPACE(26)),
SPACE(1),
SPACE(0))
FROM #T AS T;
Output:
result
003
005
1000
The code works by:
Replacing characters a-z (ignoring case & accents) with a space
Replacing spaces with an empty string.
The string supplied to TRANSLATE can be expanded to include additional characters.
I did not have rights to create functions but had text like
["blahblah012345679"]
And needed to extract the numbers out of the middle
Note this assumes the numbers are grouped together and not at the start and end of the string.
select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name
Although this is an old thread its the first in google search, I came up with a different answer than what came before. This will allow you to pass your criteria for what to keep within a string, whatever that criteria might be. You can put it in a function to call over and over again if you want.
declare #String VARCHAR(MAX) = '-123. a 456-78(90)'
declare #MatchExpression VARCHAR(255) = '%[0-9]%'
declare #return varchar(max)
WHILE PatIndex(#MatchExpression, #String) > 0
begin
set #return = CONCAT(#return, SUBSTRING(#string,patindex(#matchexpression, #string),1))
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
end
select (#return)
This UDF will work for all types of strings:
CREATE FUNCTION udf_getNumbersFromString (#string varchar(max))
RETURNS varchar(max)
AS
BEGIN
WHILE #String like '%[^0-9]%'
SET #String = REPLACE(#String, SUBSTRING(#String, PATINDEX('%[^0-9]%', #String), 1), '')
RETURN #String
END
Just a little modification to #Epsicron 's answer
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')) as a(string)
no need for a temporary variable
Firstly find out the number's starting length then reverse the string to find out the first position again(which will give you end position of number from the end). Now if you deduct 1 from both number and deduct it from string whole length you'll get only number length. Now get the number using SUBSTRING
declare #fieldName nvarchar(100)='AAAA1221.121BBBB'
declare #lenSt int=(select PATINDEX('%[0-9]%', #fieldName)-1)
declare #lenEnd int=(select PATINDEX('%[0-9]%', REVERSE(#fieldName))-1)
select SUBSTRING(#fieldName, PATINDEX('%[0-9]%', #fieldName), (LEN(#fieldName) - #lenSt -#lenEnd))
T-SQL function to read all the integers from text and return the one at the indicated index, starting from left or right, also using a starting search term (optional):
create or alter function dbo.udf_number_from_text(
#text nvarchar(max),
#search_term nvarchar(1000) = N'',
#number_position tinyint = 1,
#rtl bit = 0
) returns int
as
begin
declare #result int = 0;
declare #search_term_index int = 0;
if #text is null or len(#text) = 0 goto exit_label;
set #text = trim(#text);
if len(#text) = len(#search_term) goto exit_label;
if len(#search_term) > 0
begin
set #search_term_index = charindex(#search_term, #text);
if #search_term_index = 0 goto exit_label;
end;
if #search_term_index > 0
if #rtl = 0
set #text = trim(right(#text, len(#text) - #search_term_index - len(#search_term) + 1));
else
set #text = trim(left(#text, #search_term_index - 1));
if len(#text) = 0 goto exit_label;
declare #patt_number nvarchar(10) = '%[0-9]%';
declare #patt_not_number nvarchar(10) = '%[^0-9]%';
declare #number_start int = 1;
declare #number_end int;
declare #found_numbers table (id int identity(1,1), val int);
while #number_start > 0
begin
set #number_start = patindex(#patt_number, #text);
if #number_start > 0
begin
if #number_start = len(#text)
begin
insert into #found_numbers(val)
select cast(substring(#text, #number_start, 1) as int);
break;
end;
else
begin
set #text = right(#text, len(#text) - #number_start + 1);
set #number_end = patindex(#patt_not_number, #text);
if #number_end = 0
begin
insert into #found_numbers(val)
select cast(#text as int);
break;
end;
else
begin
insert into #found_numbers(val)
select cast(left(#text, #number_end - 1) as int);
if #number_end = len(#text)
break;
else
begin
set #text = trim(right(#text, len(#text) - #number_end));
if len(#text) = 0 break;
end;
end;
end;
end;
end;
if #rtl = 0
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id asc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
else
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id desc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
exit_label:
return #result;
end;
Example:
select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);
returns 10;
This is one of the simplest and easiest one. This will work on the entire String for multiple occurences as well.
CREATE FUNCTION dbo.fn_GetNumbers(#strInput NVARCHAR(500))
RETURNS NVARCHAR(500)
AS
BEGIN
DECLARE #strOut NVARCHAR(500) = '', #intCounter INT = 1
WHILE #intCounter <= LEN(#strInput)
BEGIN
SELECT #strOut = #strOut + CASE WHEN SUBSTRING(#strInput, #intCounter, 1) LIKE '[0-9]' THEN SUBSTRING(#strInput, #intCounter, 1) ELSE '' END
SET #intCounter = #intCounter + 1
END
RETURN #strOut
END
Following a solution using a single common table expression (CTE).
DECLARE #s AS TABLE (id int PRIMARY KEY, value nvarchar(max));
INSERT INTO #s
VALUES
(1, N'003Preliminary Examination Plan'),
(2, N'Coordination005'),
(3, N'Balance1000sheet');
SELECT * FROM #s ORDER BY id;
WITH t AS (
SELECT
id,
1 AS i,
SUBSTRING(value, 1, 1) AS c
FROM
#s
WHERE
LEN(value) > 0
UNION ALL
SELECT
t.id,
t.i + 1 AS i,
SUBSTRING(s.value, t.i + 1, 1) AS c
FROM
t
JOIN #s AS s ON t.id = s.id
WHERE
t.i < LEN(s.value)
)
SELECT
id,
STRING_AGG(c, N'') WITHIN GROUP (ORDER BY i ASC) AS value
FROM
t
WHERE
c LIKE '[0-9]'
GROUP BY
id
ORDER BY
id;
DECLARE #index NVARCHAR(20);
SET #index = 'abd565klaf12';
WHILE PATINDEX('%[0-9]%', #index) != 0
BEGIN
SET #index = REPLACE(#index, SUBSTRING(#index, PATINDEX('%[0-9]%', #index), 1), '');
END
SELECT #index;
One can replace [0-9] with [a-z] if numbers only are wanted with desired castings using the CAST function.
If we use the User Define Function, the query speed will be greatly reduced. This code extracts the number from the string....
SELECT
Reverse(substring(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) , patindex('%[0-9]%', Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) ), len(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) ))))) )) NumberValue
FROM dbo.TableName
CREATE OR REPLACE FUNCTION count_letters_and_numbers(input_string TEXT)
RETURNS TABLE (letters INT, numbers INT) AS $$
BEGIN
RETURN QUERY SELECT
sum(CASE WHEN input_string ~ '[A-Za-z]' THEN 1 ELSE 0 END) as letters,
sum(CASE WHEN input_string ~ '[0-9]' THEN 1 ELSE 0 END) as numbers
FROM unnest(string_to_array(input_string, '')) as input_string;
END;
$$ LANGUAGE plpgsql;
For the hell of it...
This solution is different to all earlier solutions, viz:
There is no need to create a function
There is no need to use pattern matching
There is no need for a temporary table
This solution uses a recursive common table expression (CTE)
But first - note the question does not specify where such strings are stored. In my solution below, I create a CTE as a quick and dirty way to put these strings into some kind of "source table".
Note also - this solution uses a recursive common table expression (CTE) - so don't get confused by the usage of two CTEs here. The first is simply to make the data avaliable to the solution - but it is only the second CTE that is required in order to solve this problem. You can adapt the code to make this second CTE query your existing table, view, etc.
Lastly - my coding is verbose, trying to use column and CTE names that explain what is going on and you might be able to simplify this solution a little. I've added in a few pseudo phone numbers with some (expected and atypical, as the case may be) formatting for the fun of it.
with SOURCE_TABLE as (
select '003Preliminary Examination Plan' as numberString
union all select 'Coordination005' as numberString
union all select 'Balance1000sheet' as numberString
union all select '1300 456 678' as numberString
union all select '(012) 995 8322 ' as numberString
union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
select
len(numberString) as currentStringLength,
isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
from SOURCE_TABLE
union all
select
len(remainingString) as currentStringLength,
cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
substring(remainingString,2,len(remainingString)) as remainingString
from FIRST_CHAR_PROCESSED fcp2
where fcp2.currentStringLength > 1
)
select
newString
,* -- comment this out when required
from FIRST_CHAR_PROCESSED
where currentStringLength = 1
So what's going on here?
Basically in our CTE we are selecting the first character and using try_cast (see docs) to cast it to a tinyint (which is a large enough data type for a single-digit numeral). Note that the type-casting rules in SQL Server say that an empty string (or a space, for that matter) will resolve to zero, so the nullif is added to force spaces and empty strings to resolve to null (see discussion) (otherwise our result would include a zero character any time a space is encountered in the source data).
The CTE also returns everything after the first character - and that becomes the input to our recursive call on the CTE; in other words: now let's process the next character.
Lastly, the field newString in the CTE is generated (in the second SELECT) via concatenation. With recursive CTEs the data type must match between the two SELECT statements for any given column - including the column size. Because we know we are adding (at most) a single character, we are casting that character to nvarchar(1) and we are casting the newString (so far) as nvarchar(3999). Concatenated, the result will be nvarchar(4000) - which matches the type casting we carry out in the first SELECT.
If you run this query and exclude the WHERE clause, you'll get a sense of what's going on - but the rows may be in a strange order. (You won't necessarily see all rows relating to a single input value grouped together - but you should still be able to follow).
Hope it's an interesting option that may help a few people wanting a strictly expression-based solution.
In Oracle
You can get what you want using this:
SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))
Sample Query:
SELECT SUBSTR('003Preliminary Examination Plan ',REGEXP_INSTR ('003Preliminary Examination Plan ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL
If you are using Postgres and you have data like '2000 - some sample text' then try substring and position combination, otherwise if in your scenario there is no delimiter, you need to write regex:
SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as
number_column_name

SQL take just the numeric values from a varchar

Say i have a few fields like the following:
abd738927
jaksm234234
hfk342
ndma0834
jon99322
Type: varchar.
How do I take just the numeric values from this to display:
738927
234234
342
0834
99322
Have tried substring however the data varies in length, and cast didnt work either due to being unable to convert, any ideas?
Here's the example with PATINDEX:
select SUBSTRING(fieldName, PATINDEX('%[0-9]%', fieldName), LEN(fieldName))
This assumes (1) the field WILL have a numeric, (2) the numerics are all grouped together, and (3) the numerics don't have any subsequent characters after them.
Extract only numbers (without using while loop) and check each and every character to see if it is a number and extract it
Declare #s varchar(100),#result varchar(100)
set #s='as4khd0939sdf78'
set #result=''
select
#result=#result+
case when number like '[0-9]' then number else '' end from
(
select substring(#s,number,1) as number from
(
select number from master..spt_values
where type='p' and number between 1 and len(#s)
) as t
) as t
select #result as only_numbers
DECLARE #NonNumeric varchar(1000) = 'RGI000Testing1000'
DECLARE #Index int
SET #Index = 0
while 1=1
begin
set #Index = patindex('%[^0-9]%',#NonNumeric)
if #Index <> 0
begin
SET #NonNumeric = replace(#NonNumeric,substring(#NonNumeric,#Index, 1), '')
end
else
break;
end
select #NonNumeric -- 0001000
Well if you don't want to create a function, you can just something like this:
cast(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(YOUR_COLUMN
,'A',''),'B',''),'C',''),'D',''),'E',''),'F',''),'G',''),'H',''),'I',''),'J','')
,'K',''),'L',''),'M',''),'N',''),'O',''),'P',''),'Q',''),'R',''),'S',''),'T','')
,'U',''),'V',''),'W',''),'X',''),'Y',''),'Z',''),'$',''),',',''),' ','') as float)
I think you're wanting VBA's Val() function. Easy enough to accomplish with IsNumeric()
create function Val
(
#text nvarchar(40)
)
returns float
as begin
-- emulate vba's val() function
declare #result float
declare #tmp varchar(40)
set #tmp = #text
while isnumeric(#tmp) = 0 and len(#tmp)>0 begin
set #tmp=left(#tmp,len(#tmp)-1)
end
set #result = cast(#tmp as float)
return #result
end
select substring(
'jaksm234234',
patindex('%[0-9]%','jaksm234234'),
LEN('jaksm234234')-patindex('%[0-9]%','jaksm234234')+2
)
input table
if you have data like above in the image, then use the below query
select field_3 from table where PATINDEX('%[ ~`!##$%^&*_()=+\|{};",<>/?a-z]%', field_3)=0
Results will be look like this
Result table
Extract only numbers from a string. Returns a string with all the numbers inside. Example: this1is2one345long6789number will return 123456789
CREATE FUNCTION [dbo].[GetOnlyNumbers] (#Temp VARCHAR(1000))
RETURNS VARCHAR (1000) AS BEGIN
DECLARE #KeepValues AS VARCHAR(50)
SET #KeepValues = '%[^0-9]%'
WHILE PATINDEX(#KeepValues, #Temp) > 0
SET #Temp = STUFF(#Temp, PATINDEX(#KeepValues, #Temp), 1, '')
RETURN #Temp
END
A right with patindex for the reverse string works also for those
SELECT [Column],
CAST(RIGHT([Column], PATINDEX('%[0-9][^0-9]%', REVERSE([Column])+' ')) AS INT) as [Num]
FROM (VALUES
('abd738927'),
('jaksm234234'),
('hfk342'),
('ndma0834'),
('jon99322'),
) val([Column])
Column
Num
abd738927
738927
jaksm234234
234234
hfk342
342
ndma0834
834
jon99322
99322

How to Replace Multiple Characters in SQL?

This is based on a similar question How to Replace Multiple Characters in Access SQL?
I wrote this since sql server 2005 seems to have a limit on replace() function to 19 replacements inside a where clause.
I have the following task: Need to perform a match on a column, and to improve the chances of a match stripping multiple un-needed chars using replace() function
DECLARE #es NVarChar(1) SET #es = ''
DECLARE #p0 NVarChar(1) SET #p0 = '!'
DECLARE #p1 NVarChar(1) SET #p1 = '#'
---etc...
SELECT *
FROM t1,t2
WHERE REPLACE(REPLACE(t1.stringkey,#p0, #es), #p1, #es)
= REPLACE(REPLACE(t2.stringkey,#p0, #es), #p1, #es)
---etc
If there are >19 REPLACE() in that where clause, it doesn't work. So the solution I came up with is to create a sql function called trimChars in this example (excuse them starting at #22
CREATE FUNCTION [trimChars] (
#string varchar(max)
)
RETURNS varchar(max)
AS
BEGIN
DECLARE #es NVarChar(1) SET #es = ''
DECLARE #p22 NVarChar(1) SET #p22 = '^'
DECLARE #p23 NVarChar(1) SET #p23 = '&'
DECLARE #p24 NVarChar(1) SET #p24 = '*'
DECLARE #p25 NVarChar(1) SET #p25 = '('
DECLARE #p26 NVarChar(1) SET #p26 = '_'
DECLARE #p27 NVarChar(1) SET #p27 = ')'
DECLARE #p28 NVarChar(1) SET #p28 = '`'
DECLARE #p29 NVarChar(1) SET #p29 = '~'
DECLARE #p30 NVarChar(1) SET #p30 = '{'
DECLARE #p31 NVarChar(1) SET #p31 = '}'
DECLARE #p32 NVarChar(1) SET #p32 = ' '
DECLARE #p33 NVarChar(1) SET #p33 = '['
DECLARE #p34 NVarChar(1) SET #p34 = '?'
DECLARE #p35 NVarChar(1) SET #p35 = ']'
DECLARE #p36 NVarChar(1) SET #p36 = '\'
DECLARE #p37 NVarChar(1) SET #p37 = '|'
DECLARE #p38 NVarChar(1) SET #p38 = '<'
DECLARE #p39 NVarChar(1) SET #p39 = '>'
DECLARE #p40 NVarChar(1) SET #p40 = '#'
DECLARE #p41 NVarChar(1) SET #p41 = '-'
return REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
#string, #p22, #es), #p23, #es), #p24, #es), #p25, #es), #p26, #es), #p27, #es), #p28, #es), #p29, #es), #p30, #es), #p31, #es), #p32, #es), #p33, #es), #p34, #es), #p35, #es), #p36, #es), #p37, #es), #p38, #es), #p39, #es), #p40, #es), #p41, #es)
END
This can then be used in addition to the other replace strings
SELECT *
FROM t1,t2
WHERE trimChars(REPLACE(REPLACE(t1.stringkey,#p0, #es), #p1, #es)
= REPLACE(REPLACE(t2.stringkey,#p0, #es), #p1, #es))
I created a few more functions to do similar replacing like so trimChars(trimMoreChars(
SELECT *
FROM t1,t2
WHERE trimChars(trimMoreChars(REPLACE(REPLACE(t1.stringkey,#p0, #es), #p1, #es)
= REPLACE(REPLACE(t2.stringkey,#p0, #es), #p1, #es)))
Can someone give me a better solution to this problem in terms of performance and maybe a cleaner implementation?
One useful trick in SQL is the ability use #var = function(...) to assign a value. If you have multiple records in your record set, your var is assigned multiple times with side-effects:
declare #badStrings table (item varchar(50))
INSERT INTO #badStrings(item)
SELECT '>' UNION ALL
SELECT '<' UNION ALL
SELECT '(' UNION ALL
SELECT ')' UNION ALL
SELECT '!' UNION ALL
SELECT '?' UNION ALL
SELECT '#'
declare #testString varchar(100), #newString varchar(100)
set #teststring = 'Juliet ro><0zs my s0x()rz!!?!one!#!#!#!'
set #newString = #testString
SELECT #newString = Replace(#newString, item, '') FROM #badStrings
select #newString -- returns 'Juliet ro0zs my s0xrzone'
I would seriously consider making a CLR UDF instead and using regular expressions (both the string and the pattern can be passed in as parameters) to do a complete search and replace for a range of characters. It should easily outperform this SQL UDF.
I really like #Juliett's solution! I would just use a CTE to get all the invalid characters:
DECLARE #badStrings VARCHAR(100)
DECLARE #teststring VARCHAR(100)
SET #badStrings = '><()!?#'
SET #teststring = 'Juliet ro><0zs my s0x()rz!!?!one!#!#!#!'
;WITH CTE AS
(
SELECT SUBSTRING(#badStrings, 1, 1) AS [String], 1 AS [Start], 1 AS [Counter]
UNION ALL
SELECT SUBSTRING(#badStrings, [Start] + 1, 1) AS [String], [Start] + 1, [Counter] + 1
FROM CTE
WHERE [Counter] < LEN(#badStrings)
)
SELECT #teststring = REPLACE(#teststring, CTE.[String], '') FROM CTE
SELECT #teststring
Juliet ro0zs my s0xrzone
I suggest you to create a scalar user defined function. This is an example (sorry in advance, because the variable names are in spanish):
CREATE FUNCTION [dbo].[Udf_ReplaceChars] (
#cadena VARCHAR(500), -- String to manipulate
#caracteresElim VARCHAR(100), -- String of characters to be replaced
#caracteresReem VARCHAR(100) -- String of characters for replacement
)
RETURNS VARCHAR(500)
AS
BEGIN
DECLARE #cadenaFinal VARCHAR(500), #longCad INT, #pos INT, #caracter CHAR(1), #posCarER INT;
SELECT
#cadenaFinal = '',
#longCad = LEN(#cadena),
#pos = 1;
IF LEN(#caracteresElim)<>LEN(#caracteresReem)
BEGIN
RETURN NULL;
END
WHILE #pos <= #longCad
BEGIN
SELECT
#caracter = SUBSTRING(#cadena,#pos,1),
#pos = #pos + 1,
#posCarER = CHARINDEX(#caracter,#caracteresElim);
IF #posCarER <= 0
BEGIN
SET #cadenaFinal = #cadenaFinal + #caracter;
END
ELSE
BEGIN
SET #cadenaFinal = #cadenaFinal + SUBSTRING(#caracteresReem,#posCarER,1)
END
END
RETURN #cadenaFinal;
END
Here is an example using this function:
SELECT dbo.Udf_ReplaceChars('This is a test.','sat','Z47');
And the result is: 7hiZ iZ 4 7eZ7.
As you can see, each character of the #caracteresElim parameter is replaced by the character in the same position from the #caracteresReem parameter.
While this question was asked about SQL Server 2005, it's worth noting that as of Sql Server 2017, the request can be done with the new TRANSLATE function.
https://learn.microsoft.com/en-us/sql/t-sql/functions/translate-transact-sql
I hope this information helps people who get to this page in the future.
I had a one-off data migration issue where the source data could not output correctly some unusual/technical characters plus the ubiquitous extra commas in CSVs.
We decided that for each such character the source extract should replace them with something that was recognisable to both the source system and the SQL Server that was loading them but which would not be in the data otherwise.
It did mean however that in various columns across various tables these replacement characters would appear and I would have to replace them. Nesting multiple REPLACE functions made the import code look scary and prone to errors in misjudging the placement and number of brackets so I wrote the following function. I know it can process a column in a table of 3,000 rows in less than a second though I'm not sure how quickly it will scale up to multi-million row tables.
create function [dbo].[udf_ReplaceMultipleChars]
(
#OriginalString nvarchar(4000)
, #ReplaceTheseChars nvarchar(100)
, #LengthOfReplacement int = 1
)
returns nvarchar(4000)
begin
declare #RevisedString nvarchar(4000) = N'';
declare #lengthofinput int =
(
select len(#OriginalString)
);
with AllNumbers
as (select 1 as Number
union all
select Number + 1
from AllNumbers
where Number < #lengthofinput)
select #RevisedString += case
when (charindex(substring(#OriginalString, Number, 1), #ReplaceTheseChars, 1) - 1) % 2
= 0 then
substring(
#ReplaceTheseChars
, charindex(
substring(#OriginalString, Number, 1)
, #ReplaceTheseChars
, 1
) + 1
, #LengthOfReplacement
)
else
substring(#OriginalString, Number, 1)
end
from AllNumbers
option (maxrecursion 4000);
return (#RevisedString);
end;
It works by submitting both the string to be evaluated and have characters to be replaced (#OriginalString) along with a string of paired characters where the first character is to be replaced by the second, the third by the fourth, fifth by sixth and so on (#ReplaceTheseChars).
Here is the string of chars that I needed to replace and their replacements... [']"~,{Ø}°$±|¼¦¼ª½¬½^¾#✓
i.e. A opening square bracket denotes an apostrophe, a closing one a double quote. You can see that there were vulgar fractions as well as degrees and diameter symbols in there.
There is a default #LengthOfReplacement that is included as a starting point if anyone needed to replace longer strings. I played around with that in my project but the single char replacement was the main function.
The condition of the case statement is important. It ensures that it only replaces the character if it is found in your #ReplaceTheseChars variable and that the character has to be found in an odd numbered position (the minus 1 from charindex result ensures that anything NOT found returns a negative modulo value). i.e if you find a tilde (~) in position 5 it will replace it with a comma but if on a subsequent run it found the comma in position 6 it would not replace it with a curly bracket ({).
This can be best demonstrated with an example...
declare #ProductDescription nvarchar(20) = N'abc~def[¦][123';
select #ProductDescription
= dbo.udf_ReplaceMultipleChars(
#ProductDescription
/* NB the doubling up of the apostrophe is necessary in the string but resolves to a single apostrophe when passed to the function */
,'['']"~,{Ø}°$±|¼¦¼ª½¬½^¾#✓'
, default
);
select #ProductDescription
, dbo.udf_ReplaceMultipleChars(
#ProductDescription
,'['']"~,{Ø}°$±|¼¦¼ª½¬½^¾#✓'
/* if you didn't know how to type those peculiar chars in then you can build a string like this... '[' + nchar(0x0027) + ']"~,{' + nchar(0x00D8) + '}' + nchar(0x00B0) etc */
,
default
);
This will return both the value after the first pass through the function and the second time as follows...
abc,def'¼"'123 abc,def'¼"'123
A table update would just be
update a
set a.Col1 = udf.ReplaceMultipleChars(a.Col1,'~,]"',1)
from TestTable a
Finally (I hear you say!), although I've not had access to the translate function I believe that this function can process the example shown in the documentation quite easily. The TRANSLATE function demo is
SELECT TRANSLATE('2*[3+4]/{7-2}', '[]{}', '()()');
which returns 2*(3+4)/(7-2) although I understand it might not work on 2*[3+4]/[7-2] !!
My function would approach this as follows listing each char to be replaced followed by its replacement [ --> (, { --> ( etc.
select dbo.udf_ReplaceMultipleChars('2*[3+4]/{7-2}', '[({(])})', 1);
which will also work for
select dbo.udf_ReplaceMultipleChars('2*[3+4]/[7-2]', '[({(])})', 1);
I hope someone finds this useful and if you get to test its performance against larger tables do let us know one way or another!
declare #testVal varchar(20)
set #testVal = '?t/es?ti/n*g 1*2?3*'
select #testVal = REPLACE(#testVal, item, '') from (select '?' item union select '*' union select '/') list
select #testVal;
One option is to use a numbers/tally table to drive an iterative process via a pseudo-set based query.
The general idea of char replacement can be demonstrated with a simple character map table approach:
create table charMap (srcChar char(1), replaceChar char(1))
insert charMap values ('a', 'z')
insert charMap values ('b', 'y')
create table testChar(srcChar char(1))
insert testChar values ('1')
insert testChar values ('a')
insert testChar values ('2')
insert testChar values ('b')
select
coalesce(charMap.replaceChar, testChar.srcChar) as charData
from testChar left join charMap on testChar.srcChar = charMap.srcChar
Then you can bring in the tally table approach to do the lookup on each character position in the string.
create table tally (i int)
declare #i int
set #i = 1
while #i <= 256 begin
insert tally values (#i)
set #i = #i + 1
end
create table testData (testString char(10))
insert testData values ('123a456')
insert testData values ('123ab456')
insert testData values ('123b456')
select
i,
SUBSTRING(testString, i, 1) as srcChar,
coalesce(charMap.replaceChar, SUBSTRING(testString, i, 1)) as charData
from testData cross join tally
left join charMap on SUBSTRING(testString, i, 1) = charMap.srcChar
where i <= LEN(testString)
I don't know why Charles Bretana deleted his answer, so I'm adding it back in as a CW answer, but a persisted computed column is a REALLY good way to handle these cases where you need cleansed or transformed data almost all the time, but need to preserve the original garbage. His suggestion is relevant and appropriate REGARDLESS of how you decide to cleanse your data.
Specifically, in my current project, I have a persisted computed column which trims all the leading zeros (luckily this is realtively easily handled in straight T-SQL) from some particular numeric identifiers stored inconsistently with leading zeros. This is stored in persisted computed columns in the tables which need it and indexed because that conformed identifier is often used in joins.
Here are the steps
Create a CLR function
See following code:
public partial class UserDefinedFunctions
{
[Microsoft.SqlServer.Server.SqlFunction]
public static SqlString Replace2(SqlString inputtext, SqlString filter,SqlString replacewith)
{
string str = inputtext.ToString();
try
{
string pattern = (string)filter;
string replacement = (string)replacewith;
Regex rgx = new Regex(pattern);
string result = rgx.Replace(str, replacement);
return (SqlString)result;
}
catch (Exception s)
{
return (SqlString)s.Message;
}
}
}
Deploy your CLR function
Now Test it
See following code:
create table dbo.test(dummydata varchar(255))
Go
INSERT INTO dbo.test values('P#ssw1rd'),('This 12is #test')
Go
Update dbo.test
set dummydata=dbo.Replace2(dummydata,'[0-9#]','')
select * from dbo.test
dummydata, Psswrd, This is test booom!!!!!!!!!!!!!
Here's a modern solution using STRING_SPLIT that's very concise. The drawback is that you need at least version SQL Server 2016 running at compatibility level 130.
Declare #strOriginal varchar(100) = 'Juliet ro><0zs my s0x()rz!!?!one!#!#!#!'
Declare #strModified varchar(100) = #strOriginal
Declare #disallowed varchar(100) = '> < ( ) ! ? #'
Select
#strModified = Replace(#strModified, value, '')
From
String_Split(#disallowed,' ')
Select #strModified
It returns:
Juliet ro0zs my s0xrzone
create function RemoveCharacters(#original nvarchar(max) , #badchars nvarchar(max))
returns nvarchar(max)
as
begin
declare #len int = (select len(#badchars))
return REPLACE(TRANSLATE(#original, #badchars, replicate('#' , #len )), '#', '')
end
go
select dbo.RemoveCharacters('Hello World!' , 'lo!' )
--returns He Wrd