Related
All,
I am trying to replace the special characters in a string with the URL
encoding values to which they correspond. Below is some example code I have
been working with.
Thanks for the help.
create table #url_encoding_lookup(character varchar(10), code varchar (20))
insert into #url_encoding_lookup (character, code)
values
('!', '%21'),
('"', '%22'),
('#', '%23'),
('$', '%24'),
('%', '%25'),
('&', '%26'),
('''', '%27'),
('(', '%28'),
(')', '%29'),
('*', '%2A'),
('+', '%2B'),
(',', '%2C'),
('-', '%2D'),
('.', '%2E'),
('/', '%2F')
Create table #data
(string varchar (200))
insert into #data
values
('Jim (BoB)'),
('Will''s Place'),
('Auto-Mart')
select * from #data
select * from #url_encoding_lookup
desired results would be
Jim %28Bob%29
Will%27s Place
Auto%2DMart
Create procedure
BEGIN
DECLARE _end BOOLEAN DEFAULT FALSE;
DECLARE _result CHAR(200) DEFAULT str;
DECLARE _find VARCHAR(32);
DECLARE _replace VARCHAR(32);
DECLARE _cur CURSOR FOR SELECT _character, _code FROM url_encoding_lookup;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET _end = TRUE;
OPEN _cur;
_loop: LOOP
FETCH _cur INTO _find, _replace;
IF _end THEN
LEAVE _loop;
END IF;
SET _result = REPLACE(_result, _find, _replace);
END LOOP _loop;
CLOSE _cur;
RETURN _result;
END
Then
SELECT _replace_chars(name) FROM `data`
Result
Jim %28BoB%29
Will%27s Place
Auto%2DMart
ALTER FUNCTION [dbo].[udf_ReplaceYouCoded]
(
#the_string nvarchar(max)
)
RETURNS NVARCHAR(MAX)
AS
BEGIN
declare #temp_field nvarchar(max)
create table #url_encoding_lookup(character varchar(10), code varchar (20))
insert into #url_encoding_lookup (character, code)
values
('!', '%21'),
('"', '%22'),
('#', '%23'),
('$', '%24'),
('%', '%25'),
('&', '%26'),
('''', '%27'),
('(', '%28'),
(')', '%29'),
('*', '%2A'),
('+', '%2B'),
(',', '%2C'),
('-', '%2D'),
('.', '%2E'),
('/', '%2F')
declare #x as int
set #x = 1
--LOOP #the_string
while #i < len(#the_string)
if(substring(#the_string,x,1) = (select character from #url_encoding_lookup where character = substring(#the_string,x,1)))
begin
#temp_field = #temp_field + (select code from #url_encoding_lookup where character = substring(#the_string,x,1))
end
else
begin
#temp_field = #temp_field + substring(#the_string,x,1)
end
select #x = #x + 1
end
RETURN #temp_field
END
ALTER function [dbo].[udf_ReplaceSpecialChars]
(#s varchar(256))
returns varchar(256)
as
begin
-- declare #s varchar(256) set #s = 'Jim (P)' --test
if #s is null
return null
declare #s2 varchar(256)
set #s2 = '' --set variable to empty string. ready to recieve values
declare #l int
set #l = len(#s) --determin the number of characters in #s
declare #p int
set #p = 1 --set beginning string position
while #p <= #l begin
declare #c int
set #c = ascii(substring(#s, #p, 1)) --find the ascii number for 1st
character
declare #nc varchar(256)
set #nc = (select code from url_encoding_lookup where ascii_code = #c) --
get corresponding URL encoding string from lookup table
if #c between 33 and 47 or #c between 58 and 64 or #c between 91 and 96
or #c between 123 and 255 --when looping through each character, if special
character
set #s2 = #s2 + #nc --then use string from lookup table
else if #c = 32 or #c between 48 and 57 or #c between 65 and 90 or #c
between 97 and 122 --if character is not special
set #s2 = #s2 + char(#c) --then find char value of character
set #p = #p + 1 --set position to next charachter for loop to look at
end
if len(#s2) = 0
return null
return #s2 --return rebuilt string
end
You can replace the special characters using the following regular expression [\u0100-\uffff]
select regexp_replace(column, '[\u0100-\uffff]', '')
we are having to ID some data coming from a bad import and any help would be appreciated.
For instance a string like below and identifier char for the charindex.
SET #InputString = 'The quick brown fox jumped "over" or "under" the log'
SET #IdentifierChar = '"'
The issue we are having is that we can run our test against a hard coded string like the one above and get the result of 'over'. we have tried to put it in a while loop and then we get 'over','or','under'. The Expected Result for us would be only returning 'over', 'under' and not the or.
Our first go at a test was something like below just to see try and split:
DECLARE #InputString Nvarchar(MAX)
DECLARE #IdentifierChar NCHAR(1)
SET #InputString = 'The quick brown fox jumped "over" or "under" the log'
SET #IdentifierChar = '"'
declare #FirstID int
declare #SecondID int
declare #Length int
declare #TargetString Nvarchar(MAX)
Set #FirstID = CHARINDEX(#IdentifierChar,#InputString,1)
Set #SecondID = CHARINDEX(#IdentifierChar,#InputString,#FirstID+1)
Set #Length = #SecondID-#FirstID
Set #TargetString = SUBSTRING(#InputString,#FirstID+1,#Length-1)
Like I said then we literally just threw it in a hard coded loop and set the value of the substring to the last position of the identifier of the specialcharacter just to test and see how the charindex was splitting out the strings between the quotes and we did not think about it getting the 'or' as well.
so here is the dirty loop:
Set #COUNT = 0
Set #Length = 0
WHILE(#COUNT)<3
BEGIN
Set #FirstID = CHARINDEX(#IdentifierChar,#InputString,#Length)
Set #SecondID = CHARINDEX(#IdentifierChar,#InputString,#FirstID+1)
Set #Length = #SecondID-#FirstID
Set #TargetString = SUBSTRING(#InputString,#FirstID+1,#Length-1)
SET #COUNT = #COUNT+1
Set #Length =#SecondID
END
There's probably a better way to parse this out, but here's my minimal modification to your code to make it work, with comments where I changed things:
DECLARE #InputString Nvarchar(MAX)
DECLARE #IdentifierChar NCHAR(1)
SET #InputString = 'The quick brown fox jumped "over" or "under" the log'
SET #IdentifierChar = '"'
declare #FirstID int
declare #SecondID int
declare #Length int
declare #TargetString Nvarchar(MAX)
declare #COUNT int -- added this missing from your code above
Set #COUNT = 0
Set #Length = 0
WHILE(#COUNT)<2 -- only need 2 here now
BEGIN
Set #FirstID = CHARINDEX(#IdentifierChar,#InputString,#Length)
Set #SecondID = CHARINDEX(#IdentifierChar,#InputString,#FirstID+1)
Set #Length = #SecondID-#FirstID
Set #TargetString = SUBSTRING(#InputString,#FirstID+1,#Length-1)
SET #COUNT = #COUNT+1
Set #Length =#SecondID+1 -- added one
print #TargetString -- so we can see what it finds
END
Your main issue was updating #Length at the bottom of your loop -- when you thought you were pointing PAST the double quote after "over", you were actually pointing right at it and finding it a second time as an open-quote before " or ".
Here's a User Defined Function that I wrote and keep in my toolbox. This is a slightly off-label use, but it should work well for you.
If we consider this string to be five substrings, delimited by the four double-quote characters, then we can split on those just take substrings 2 and 4. (Getting a third or fourth quoted value would be as easy as getting substrings 6 or 8) Trying to get an element that doesn't exist will just return a NULL.
After executing the CREATE statement below, which will create the dbo.SPLIT_LIST function, you can call it like this:
declare #InputString varchar(255)
SET #InputString = 'The quick brown fox jumped "over" or "under" the log'
select dbo.SPLIT_LIST(#InputString, '"', 2, ''),
dbo.SPLIT_LIST(#InputString, '"', 4, '')
And you'll get your two output values. What's nice about a function like this is you can just throw it in your select statements and operate over many records at a time, instead of per each.
CREATE function dbo.SPLIT_LIST(
#string nvarchar(max),
#delimiter nvarchar(50),
#i int,
#text_qualifier nvarchar(1)
)
returns nvarchar(max)
/*
returns a selected element from a delimited list
select dbo.SPLIT_LIST_w_Qualifier('"twenty,one","twenty,two","twenty,three"', ',', 2,'"')
returns: 'twenty,two'
Note: can ignore embedded text qualifiers
*/
as
BEGIN
declare #value nvarchar(max),
#d_length int,
#next_delimiter nvarchar(51),
#q_length int, --length of the text qualifier
#trim int,
#using_qualifier int
set #d_length = len(#delimiter)
set #q_length = len(#text_qualifier)
set #string = ltrim(rtrim(#string))
--works by chopping off the leading value from the string each round
while #i > 0 and #string is not null and len(#string) > 0
begin
--if the remaining #string starts with the text qualifier,
--then the currently parsed value should end with the text qualifier+delimiter
if left(#string,1) = #text_qualifier
begin
set #using_qualifier = 1
--chop off leading qualifier
set #string = ltrim(right(#string,(len(#string)-len(#text_qualifier))))
end
else
begin
set #using_qualifier = 0
end
if (#using_qualifier = 0) -- If we are NOT using a text qualifier for this element
begin
if (charindex(#delimiter, #string) > 0) --If there is a remaining delimiter
begin
set #value = ltrim(rtrim(left(#string, charindex(#delimiter, #string)-1)))
set #string = ltrim(rtrim(right(#string, len(#string)-charindex(#delimiter, #string) - #d_length + 1)))
end
else --no remaining delimiters
begin
set #value = #string
set #string = null
end
end
else -- If we ARE using a text qualifier for this element
begin
if (charindex((#text_qualifier+#delimiter), #string) > 0) --If there is a remaining qualifier+delimiter
begin
set #value = ltrim(rtrim(left(#string, charindex((#text_qualifier+#delimiter), #string)-1)))
set #string = ltrim(rtrim(right(#string, len(#string)-charindex((#text_qualifier+#delimiter), #string) - #d_length - #q_length + 1)))
end
else --no remaining qualifier+delimiters
begin
--Does the remaining string END with the text qualifier?
if (charindex(REVERSE(#text_qualifier), REVERSE(#string)) = 1)
begin
set #value = ltrim(rtrim(left(#string, len(#string)-#q_length)))
set #string = null
end
else if (charindex((#text_qualifier), #string) > 0) --Is there a remaining qualifier at all?
begin
set #value = ltrim(rtrim(left(#string, charindex((#text_qualifier), #string)-1)))
set #string = null
end
else --no final closing qualifier
begin
set #value = #string
set #string = null
end
end
end
set #i = #i - 1
--print #value
end
if #i = 0 return #value --should exit here
return NULL --a parse too far exists here
END
Depending on the size of your data set and the complexity of your overall query, you could use a recursive CTE:
;with inputStr as (select 'The quick brown fox jumped "over" or "under" or "around" the log' as s)
,cte as (
select right(s,len(s) - charindex('"',s) + 1) as s --get the first occurence of "
from inputStr
union ALL
select right(right(s,len(s)-1),len(s) - charindex('"',s) + 1) as s --get the second occurence of " in the above string
from cte
where charindex('"',s) > 0 --where " exists
)
select left(s,charindex('"',right(s,len(s)-1))+1) as quoted --select from the first " to the second "
from cte
where (len(s) - len(replace(s,'"',''))) % 2 <> 1 --even number of "
and left(s,charindex('"',right(s,len(s)-1))+1) like '"%"'
Just wanted to update. I continued to toy around with the code and got something to work in case anyone wants to use similar logic in the future. This did what we discussed above.
DECLARE #TargetString NVARCHAR(MAX)
DECLARE #stringLen int
DECLARE #splitTbl TABLE(siteId NVARCHAR(MAX))
DECLARE #idChar NCHAR(1)
SET #TargetString = 'The quick brown fox jumped "over" or "under" the "log"'
SET #stringLen = CHARINDEX(' ', #TargetString)
SET #idChar = '"'
WHILE CHARINDEX(' ', #TargetString) > 0
BEGIN
SET #stringLen = CHARINDEX(' ', #TargetString);
INSERT INTO #splitTbl
SELECT SUBSTRING(#TargetString,1,#stringLen - 1);
SET #TargetString = SUBSTRING(#TargetString, #stringLen + 1,
LEN(#TargetString));
END
DECLARE #buildResults NVARCHAR(MAX)
INSERT INTO #splitTbl
SELECT #TargetString
DECLARE #buildLike NVARCHAR(MAX)
SET #buildLike = '%'+#idChar+'%'
SELECT #buildResults = COALESCE(#buildResults + ', ', '')
+SUBSTRING(siteId, 2, lEN(siteId) - 2)
FROM #splitTbl
WHERE siteId LIKE #buildLike
I have contact_firstname column which has some special characters like (#,&,-,_, etc) in the data stored in that column. I want to first find all those special characters in each record and replace those characters with a space. I found a query on this website which helps identify the special characters but I am not sure how to find charindex of each special character in the below string and replace it with a space.
DECLARE #MyString VARCHAR(100)
SET #MyString = '!Char$Fox#'
IF (#MyString LIKE '%[^a-zA-Z0-9]%')
BEGIN
PRINT 'Contains "special" characters'
END
I think you have to loop, as Tab Alleman mentioned:
declare #MyString varchar(100) = '!Char$Fox#'
declare #i int = 0
declare #char varchar(1)
declare #len int = LEN(#MyString)
declare #result varchar(100) = ''
while #i < #len
begin
set #char = SUBSTRING(#MyString, #i, 1)
if #char like '%[^a-zA-Z0-9]%'
begin
set #char = ' '
end
set #result = #result + #char
set #i = #i + 1
end
select #result
You can also do this:
DECLARE #InvalidChars VARCHAR(100)
DECLARE #MyString VARCHAR(100)
SET #InvalidChars = '!$#'
SET #MyString = '!Char$Fox#'
;WITH CTE AS
(
SELECT SUBSTRING(#InvalidChars, 1, 1) AS [String], 1 AS [Start], 1 AS [Counter]
UNION ALL
SELECT SUBSTRING(#InvalidChars, [Start] + 1, 1) AS [String], [Start] + 1, [Counter] + 1
FROM CTE
WHERE [Counter] < LEN(#InvalidChars)
)
SELECT #MyString = REPLACE(#MyString, CTE.[String], ' ') FROM CTE
SELECT #MyString
Result:
Char Fox
This is a combination of solutions found here:
How to Replace Multiple Characters in SQL?
T-SQL: Opposite to string concatenation - how to split string into multiple records [duplicate]
Why does the #result value in the code below print out a blank string? I would expect it to concatenate with the previous result.
DECLARE #size int
DECLARE #string nvarchar(10)
DECLARE #result nvarchar(10)
SELECT #string = '12345abc123'
DECLARE #count int, #total int
SELECT
#total = LEN(#string),
#count = 1
WHILE #count <= #total
BEGIN
SELECT #result = SUBSTRING(#string, #count, 1) + '-'+ #result
SELECT #count = #count + 1
PRINT #result
END
You never initialized #result, so it defaults to an sql null. SQL nulls are contagious poison, so when you do
SELECT #result = .... + #result
You're actually doing
SELECT #result = ... + null
and #result simply remains null
Initializing the value to an empty string solves your problem:
SET #result = ''
It is returning a blank because you are concatenating the substring with #result, which initially is NULL.
Try setting #result to an empty string like this this:
SELECT #string = '12345abc123', #result = ''
you have to initialize #result variable
Declare it like this
DECLARE #result nvarchar(10) = ''
this should work
My table has one column that contain strings like: ” HRM_APPLICATION_DELAY_IN”
I want to perform bellow operations on each row on this column
convert to lower case
remove underscore “_”
change case (convert to upper case) of the character after the underscore like: ” hrm_Application_Delay_In”
Need help for conversion. Thanks for advance
Here is a function to achieve it:
create function f_test
(
#a varchar(max)
)
returns varchar(max)
as
begin
set #a = lower(#a)
while #a LIKE '%\_%' ESCAPE '\'
begin
select #a = stuff(#a, v, 2, upper(substring(#a, v+1,1)))
from (select charindex('_', #a) v) a
end
return #a
end
Example:
select dbo.f_test( HRM_APPLICATION_DELAY_IN')
Result:
hrmApplicationDelayIn
To update your table here is an example how to write the syntax with the function:
UPDATE <yourtable>
SET <yourcolumn> = dbo.f_test(col)
WHERE <yourcolumn> LIKE '%\_%' ESCAPE '\'
For a variable this is overkill, but I'm using this to demonstrate a pattern
declare #str varchar(100) = 'HRM_APPLICATION_DELAY_IN';
;with c(one,last,rest) as (
select cast(lower(left(#str,1)) as varchar(max)),
left(#str,1), stuff(lower(#str),1,1,'')
union all
select one+case when last='_'
then upper(left(rest,1))
else left(rest,1) end,
left(rest,1), stuff(rest,1,1,'')
from c
where rest > ''
)
select max(one)
from c;
That can be extended to a column in a table
-- Sample table
declare #tbl table (
id int identity not null primary key clustered,
str varchar(100)
);
insert #tbl values
('HRM_APPLICATION_DELAY_IN'),
('HRM_APPLICATION_DELAY_OUT'),
('_HRM_APPLICATION_DELAY_OUT'),
(''),
(null),
('abc<de_fg>hi');
-- the query
;with c(id,one,last,rest) as (
select id,cast(lower(left(str,1)) as varchar(max)),
left(str,1), stuff(lower(str),1,1,'')
from #tbl
union all
select id,one+case when last='_'
then upper(left(rest,1))
else left(rest,1) end,
left(rest,1), stuff(rest,1,1,'')
from c
where rest > ''
)
select id,max(one)
from c
group by id
option (maxrecursion 0);
-- result
ID COLUMN_1
1 hrm_Application_Delay_In
2 hrm_Application_Delay_Out
3 _Hrm_Application_Delay_Out
4
5 (null)
6 abc<de_Fg>hi
select
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(lower('HRM_APPLICATION_DELAY_IN'),'_a','A'),'_b','B'),'_c','C'),'_d','D'),'_e','E'),'_f','F'),
'_g','G'),'_h','H'),'_i','I'),'_j','J'),'_k','K'),'_l','L'),
'_m','M'),'_n','N'),'_o','O'),'_p','P'),'_q','Q'),'_r','R'),
'_s','S'),'_t','T'),'_u','U'),'_v','V'),'_w','W'),'_x','X'),
'_y','Y'),'_z','Z'),'_','')
Bellow two steps can solve problem,as example i use sys.table.user can use any one
declare #Ret varchar(8000), #RetVal varchar(8000), #i int, #count int = 1;
declare #c varchar(10), #Text varchar(8000), #PrevCase varchar, #ModPrefix varchar(10);
DECLARE #FileDataTable TABLE(TableName varchar(200))
INSERT INTO #FileDataTable
select name FROM sys.tables where object_name(object_id) not like 'sys%' order by name
SET #ModPrefix = 'Pur'
DECLARE crsTablesTruncIns CURSOR
FOR select TableName FROM #FileDataTable
OPEN crsTablesTruncIns
FETCH NEXT FROM crsTablesTruncIns INTO #Text
WHILE ##FETCH_STATUS = 0
BEGIN
SET #RetVal = '';
select #i=1, #Ret = '';
while (#i <= len(#Text))
begin
SET #c = substring(#Text,#i,1)
--SET #Ret = #Ret + case when #Reset=1 then UPPER(#c) else LOWER(#c)
IF(#PrevCase = '_' OR #i = 1)
SET #Ret = UPPER(#c)
ELSE
SET #Ret = LOWER(#c)
--#Reset = case when #c like '[a-zA-Z]' then 0 else 1 end,
if(#c like '[a-zA-Z]')
SET #RetVal = #RetVal + #Ret
if(#c = '_')
SET #PrevCase = '_'
else
SET #PrevCase = ''
SET #i = #i +1
end
SET #RetVal = #ModPrefix + #RetVal
print cast(#count as varchar) + ' ' + #RetVal
SET #count = #count + 1
EXEC sp_RENAME #Text , #RetVal
SET #RetVal = ''
FETCH NEXT FROM crsTablesTruncIns INTO #Text
END
CLOSE crsTablesTruncIns
DEALLOCATE crsTablesTruncIns
I'd like to show you my nice and simple solution. It uses Tally function to split the string by pattern, in our case by underscope. For understanding Tally functions, read this article.
So, this is how my tally function looks like:
CREATE FUNCTION [dbo].[tvf_xt_tally_split](
#String NVARCHAR(max)
,#Delim CHAR(1))
RETURNS TABLE
as
return
(
WITH Tally AS (SELECT top (select isnull(LEN(#String),100)) n = ROW_NUMBER() OVER(ORDER BY [name]) from master.dbo.syscolumns)
(
SELECT LTRIM(RTRIM(SUBSTRING(#Delim + #String + #Delim,N+1,CHARINDEX(#Delim,#Delim + #String + #Delim,N+1)-N-1))) Value, N as Ix
FROM Tally
WHERE N < LEN(#Delim + #String + #Delim)
AND SUBSTRING(#Delim + #String + #Delim,N,1) = #Delim
)
)
This function returns a table, where each row represents part of string between #Delim (in our case between underscopes). Rest of the work is simple, just cobination of LEFT, RIGHT, LEN, UPPER and LOWER functions.
declare #string varchar(max)
set #string = ' HRM_APPLICATION_DELAY_IN'
-- convert to lower case
set #string = LOWER(#string)
declare #output varchar(max)
-- build string
select #output = coalesce(#output + '_','') +
UPPER(left(Value,1)) + RIGHT(Value, LEN(Value) - 1)
from dbo.tvf_xt_tally_split(#string, '_')
-- lower first char
select left(lower(#output),1) + RIGHT(#output, LEN(#output) - 1)