SQL - Replacing all "ASCII/special characters" in a string

SQL - Replacing all "ASCII/special characters" in a string - sql

Edit: I have about 80 characters that are causing problems in my application so I don't want to hard code a REPLACE for every single character. I think it would be easier to create a separate table with two columns,"special characters" and "replacement characters", and I will remove those columns from the original table which contains the column "StringTest". My goal will be figuring out how to use the characters table to replace characters in the string table.
I am trying to replace all "special characters" (ie À, Æ, Ç) with "MappedCharacters" (A, AE, C) in SQL Server. I have tried two different techniques, one using a cursor, one without a cursor, to search through a string and replace all special characters with mapped characters. Each of my methods only replaces characters they are in the same row as the string.
Example before:
num SpecialCharacter MappedCharacter StringTest
1 À A StringÀÆ
2 Æ AE ÆStringÆ
3 Ç C StrÇÀing
Example after:
num SpecialCharacter MappedCharacter StringTest
1 À A StringAÆ
2 Æ AE AEStringAE
3 Ç C StrCÀing
Preferred Output:
num SpecialCharacter MappedCharacter StringTest
1 À A StringAAE
2 Æ AE AEStringAE
3 Ç C StrCAing
So you can see that I want to replace all "special characters" in StringTest but only characters that are in the same row are getting replaced.
I haven't quite figured out how to do that just yet.
Here are the two SQL code that I have been trying to modify (I only need one to work)
First Method:
DECLARE #cASCIINum INT;
DECLARE #cSpecialChar VARCHAR(50);
DECLARE #cMappedChar VARCHAR(50);
DECLARE #cStringTest VARCHAR(50);
DECLARE #mapCursor as CURSOR;
SET #mapCursor = CURSOR FOR
SELECT [ASCIINum]
,[SpecialChar]
,[MappedChar]
,[StringTest]
FROM [intranet].[dbo].[CharMapTestTab];
OPEN #mapCursor;
FETCH NEXT FROM #mapCursor INTO #cASCIINum,
#cSpecialChar,
#cMappedChar,
#cStringTest;
WHILE ##FETCH_STATUS = 0
BEGIN
UPDATE [intranet].[dbo].[CharMapTestTab]
SET StringTest = REPLACE(StringTest, SpecialChar, MappedChar)
WHERE SpecialChar <> MappedChar
END
CLOSE #mapCursor;
DEALLOCATE #mapCursor;
Second Method:
DECLARE #ASCIINum INT = 0
WHILE (1 = 1)
BEGIN
SELECT #ASCIINum = ASCIINum
FROM [intranet].[dbo].[CharMapTestTab]
WHERE ASCIINum > #ASCIINum
ORDER BY ASCIINum
IF ##ROWCOUNT = 0 BREAK;
UPDATE [intranet].[dbo].[CharMapTestTab]
SET StringTest = REPLACE(StringTest, SpecialChar, MappedChar)
WHERE SpecialChar <> MappedChar
SELECT TOP 1000 [ASCIINum]
,[SpecialChar]
,[MappedChar]
,[StringTest]
FROM [intranet].[dbo].[CharMapTestTab]
END

Try this, it works better than looping because there is only 1 update:
-- create test table vc
create table vc(StringTest varchar(20))
insert vc values('StringÀÆ'), ('ÆStringÆ')
go
-- create test table CharacterMapping
create table CharacterMapping(SpecialCharacter char(1), MappedCharacter varchar(2))
insert CharacterMapping values('À', 'A'),('Æ', 'AE'), ('Ç', 'C')
go
--build the varchar for updating
declare #x varchar(max) = 'StringTest'
select #x = 'replace('+#x+', ''' + SpecialCharacter + ''','''+MappedCharacter+''')'
from CharacterMapping
set #x = 'update vc set StringTest=' + #x +' from vc'
exec (#x)
select * from vc
Result:
StringAAE
AEStringAE

I would make a separate mapping table which contains the bad character and its corresponding good character, one set per row. Then loop over that table and do a replace for each character set.
DECLARE #map TABLE (
id INT,
badChar CHAR,
goodChar CHAR
)
DECLARE #strings TABLE (
searchString VARCHAR(50)
)
INSERT INTO #map
VALUES
(1, 'y', 'a'),
(2, 'z', 'b')
DECLARE #curRow INT, #totalRows INT
SET #curRow = 1
SELECT #totalRows = COUNT(*) FROM #map
INSERT INTO #strings
VALUES
('zcccyccz'),
('cccyccz')
WHILE #curRow <= #totalRows
BEGIN
UPDATE #strings
SET searchString = REPLACE(searchString, badChar, goodChar)
FROM #map
WHERE id = #curRow
SET #curRow = #curRow + 1
END
SELECT * FROM #strings
--Output
--bcccaccb
--cccaccb

It would be helpful to know how many rows are in your table and how many you estimate to have "special characters". Also, are there only 3 special characters? if you have 40 or less special characters, it may look ridiculous, but I'd just nest as many REPLACE() calls as you have special characters, like:
UPDATE YourTable SET YourColumn = REPLACE(
REPLACE(
REPLACE(YourColumn,'Ç','C')
,'Æ','AE')
,'À','A')
if most rows have special characters, I'd skip any WHERE. if only a few rows have special characters, I'd use a CTE to identify them:
;WITH AllSpecialRows AS
(
SELECT PrimaryKey FROM YourTable WHERE YourColumn LIKE '%À%'
UNION
SELECT PrimaryKey FROM YourTable WHERE YourColumn LIKE '%Æ%'
UNION
SELECT PrimaryKey FROM YourTable WHERE YourColumn LIKE '%Ç%'
)
UPDATE y
SET YourColumn = REPLACE(
REPLACE(
REPLACE(YourColumn,'Ç','C')
,'Æ','AE')
,'À','A')
FROM YourTable y
INNER JOIN AllSpecialRows s ON y.PrimaryKey =s.PrimaryKey

update table
set column = REPLACE(column,'À','A')
where column like ('%À%')
update table
set column = REPLACE(column,'Æ','AE')
where column like ('%Æ%')
I will leave the 3rd to you
Or this might be more efficient
update table
set column = REPLACE(REPLACE(column,'À','A'),'Æ','AE')
where column like ('%À%')
or column like ('%Æ%')
If you really want to process a list of mapped characters then this is not a proper answer

#t-clausen.dk answer with Table variables and temp tables, just to avoid people mess up their dev databases with additional tables.
TABLE Variables:
-- Create test table variable #CharacterMapping
DECLARE #CharacterMapping TABLE (SpecialCharacter char(1), MappedCharacter varchar(2))
INSERT #CharacterMapping VALUES('À', 'A'), ('Æ', 'AE'), ('Ç', 'C')
--Build the varchar for updating
DECLARE #x varchar(max) = 'StringTest'
SELECT #x = 'replace('+#x+', ''' + SpecialCharacter + ''',''' + MappedCharacter + ''')'
FROM #CharacterMapping
SET #x = 'DECLARE #vc TABLE(StringTest varchar(20));'
+ ' insert #vc values(''StringÀÆ''), (''ÆStringÆ'');'
+ 'update #vc set StringTest=' + #x +' from #vc;'
+ 'SELECT * FROM #vc;'
Exec (#x)
GO
With Temp table:
-- Create test temp table #vc
CREATE TABLE #vc(StringTest varchar(20))
INSERT #vc VALUES('StringÀÆ'), ('ÆStringÆ')
-- Create test table CharacterMapping
DECLARE #CharacterMapping TABLE (SpecialCharacter char(1), MappedCharacter varchar(2))
INSERT #CharacterMapping VALUES('À', 'A'), ('Æ', 'AE'), ('Ç', 'C')
--Build the varchar for updating
DECLARE #x varchar(max) = 'StringTest'
SELECT #x = 'replace('+#x+', ''' + SpecialCharacter + ''',''' + MappedCharacter + ''')'
FROM #CharacterMapping
SET #x = 'update #vc set StringTest=' + #x +' from #vc'
-- Execute
EXEC (#x)
-- Select the results
SELECT * FROM #vc;
-- Drop temp table
DROP TABLE #vc;
GO

Related

To find a substring matching separated by commas

I have a table say "user"which is having a col "access" having multi values separated by comma.
and i have another table " codes" which has a column "SCRCODES" having some user codes as single valued.
so i need to check whether the multi values in the col "access" of the table "user" is having any of the values present in the "SCRCODES" col of the table "codes"
someone please advise on this.
Thanks

i think this will help you:
ALTER FUNCTION [dbo].[Split]
(
#RowData NVARCHAR(MAX) ,
#SplitOn NVARCHAR(5)
)
RETURNS #ReturnValue TABLE ( Data NVARCHAR(MAX) )
AS
BEGIN
DECLARE #Counter INT
SET #Counter = 1
WHILE ( CHARINDEX(#SplitOn, #RowData) > 0 )
BEGIN
INSERT INTO #ReturnValue
( data
)
SELECT Data = LTRIM(RTRIM(SUBSTRING(#RowData, 1,
CHARINDEX(#SplitOn,
#RowData) - 1)))
SET #RowData = SUBSTRING(#RowData,
CHARINDEX(#SplitOn, #RowData) + 1,
LEN(#RowData))
SET #Counter = #Counter + 1
END
INSERT INTO #ReturnValue
( data )
SELECT Data = LTRIM(RTRIM(#RowData))
RETURN
END;
GO
DECLARE #str VARCHAR(MAX)
SET #str = select access from users where oid = "1"
SELECT *
FROM codes c, users u where c.SCRCODES in dbo.Split(#str, ',')

I assume that your sercodes does not contain comma.
You can do something like this:
select sercodes from codes
inner join users
on user.codeid = codes.codeid
where charindex(sercodes + ',', access) > 0 or charindex(',' + sercodes , access) > 0
The idea is that access will be stored like this way "read, write, execute". So, it will be either end with comma or start with comma and part of the string..
Please let me know whether it is working. You can give actual table data and design to get more accurate query.

Find all combinations

My teacher asks an algorithm that find all combinations. I have a set of data and the length can be variable. So combinations should be like this:
a
b
c
aa
ab
ac
...
ccbc
ccca
cccb
cccc
They will be stored in the "word" table that contains a single varchar field.
I did it with loop because I don't like recursivity and jt has better performance:
DROP PROCEDURE combi;
CREATE PROCEDURE combi
AS
BEGIN
DELETE FROM word
DECLARE #i BIGINT
DECLARE #j INT
DECLARE #word NVARCHAR(24)
DECLARE #str NVARCHAR(62)
DECLARE #combinations BIGINT
DECLARE #currentlength TINYINT
DECLARE #maxcurrentlength TINYINT
SET #maxcurrentlength=4
SET #str='azertyuiopqsdfghjklmwxcvbnAZERTYUIOPQSDFGHJKLMWXCVBN0123456789' -- length=62
SET #currentlength=1
-- loop on the length of the text
WHILE #currentlength<=#maxcurrentlength BEGIN
SET #combinations=POWER(62,#currentlength)
SET #i=0
-- get all combinations
WHILE i<#combinations BEGIN
SET #word=''
SET #j=0
-- generate word
WHILE #j<#currentlength BEGIN
SET #word=#word+SUBSTRING(#str, (FLOOR(#i / POWER(62,#currentlength-#j-1) ) % 62) +1, 1)
SET #j=#j+1
END
INSERT INTO word VALUES (#word)
SET #i=#i+1
END
SET #currentlength=#currentlength+1
END
END;
EXEC combi;
The problem is when I use a length of 8, my server crashes: it seems that POWER(62,#currentlength-#j-1) is the problem.

I'm slightly confused about how you ask the question. You ask to "find all combinations" which could very easily be done with CROSS JOIN. If you need to get a length of 4 then you join the table with available values to itself 4 times and you are pretty much done. If you need to get the strings in 1 field you could concatenate them in the select. Like this:
declare #values table (
value nvarchar(100))
insert #values values ('a'),('b'),('c')
select v1.value+v2.value+v3.value+v4.value
from #values v1 cross join
#values v2 cross join
#values v3 cross join
#values v4
order by v1.value+v2.value+v3.value+v4.value

Here is a generic solution using a recursive CTE:
CREATE TABLE t (i nchar(1))
INSERT INTO t VALUES ('a'),('b'),('c')
;WITH cte AS (
SELECT cast(i AS nvarchar(4000)) AS combo, 1 AS ct
FROM t
UNION ALL
SELECT cte.combo + t.i, ct + 1
FROM cte
CROSS JOIN t
WHERE ct <= 4 -- your maximum length
)
SELECT combo
FROM cte
ORDER BY ct, combo
SQL Fiddle.
You must be aware that the number of results grows exponentially with the maximum length, so performance deteriorates rapidly with growing maximum length.

You are likely overflowing the int type you are passing to POWER() as the documentation for power suggests POWER() returns the same type you feed it.
Try using:
SET #word=#word+SUBSTRING(#str, (FLOOR(#i / POWER(CAST(62 AS BIGINT),#currentlength-#j-1) ) % 62) +1, 1)

If you need to parameterise it so that you can set the required length then this algorithm would do it and it's more relational database orientated.
declare #characters table (character nchar(1))
declare #words table (word nvarchar(100))
insert #characters values ('a'),('b'),('c')
INSERT #words (word ) VALUEs ('')
DECLARE #Required_length int
DECLARE #length int
SET #Required_length = 4
SET #length = 0
WHILE #length <= #Required_length
BEGIN
SET #length = #length+1
INSERT #words (word )
SELECT w.word + c.character
FROM #words w JOIN #characters c ON LEN(w.word) = #length-1
END
SELECT word from #words where len(word) = #Required_length
Start with a zero length word
Add all the possible characters to the zero length word to get all
the one character words
Add all the possible characters to the end of all the one character
words to get all the two character words
Add all the possible characters to the end of all the two character words
to get all the three character words
etc....
You can make it run more efficiently by including the length as a column in the word table so that you don't need to calculate the lengths when you're filtering by them but as this has been set by your teacher I'm not going to do all your work for you

First insert of all characters
SET NOCOUNT ON;
create table ##chars (col char(1))
declare #i int
set #i=65
while #i<=90 /* A-Z */
begin
insert into ##chars values( CHAR(#i))
set #i=#i+1
end
set #i=97
while #i<=122 /* a-z */
begin
insert into ##chars values( CHAR(#i))
set #i=#i+1
end
set #i=48
while #i<=57 /* 0-9 */
begin
insert into ##chars values( CHAR(#i))
set #i=#i+1
end
Now, set number for combinations
create table ##result(word varchar(10))
declare #wide int
set #wide=4 /* set how many combinations are calculated */
insert into ##result select * from ##chars
while #wide>1
begin
begin tran w
insert into ##result select a.word+b.col from ##result a, ##chars b
commit tran w
set #wide=#wide-1
end
select * from ##result
/*
drop table ##chars
drop table ##result
*/

Split words with a capital letter in sql

Does anyone know how to split words starting with capital letters from a string?
Example:
DECLARE #var1 varchar(100) = 'OneTwoThreeFour'
DECLARE #var2 varchar(100) = 'OneTwoThreeFourFive'
DECLARE #var3 varchar(100) = 'One'
SELECT #var1 as Col1, <?> as Col2
SELECT #var2 as Col1, <?> as Col2
SELECT #var3 as Col1, <?> as Col2
expected result:
Col1 Col2
OneTwoThreeFour One Two three Four
OneTwoThreeFourFive One Two Three Four Five
One One
If this is not possible (or if too long) an scalar function would be okay as well.

Here is a function I created that is similar to the "removing non-alphabetic characters". How to strip all non-alphabetic characters from string in SQL Server?
This one uses a case sensitive collation which actively seeks out a non-space/capital letter combination and then uses the STUFF function to insert the space. This IS a scalar UDF, so some folks will immediately say that it will be slower than other solutions. To that notion, I say, please test it. This function does not use any table data and only loops as many times as necessary, so it will likely give you very good performance.
Create Function dbo.Split_On_Upper_Case(#Temp VarChar(1000))
Returns VarChar(1000)
AS
Begin
Declare #KeepValues as varchar(50)
Set #KeepValues = '%[^ ][A-Z]%'
While PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) > 0
Set #Temp = Stuff(#Temp, PatIndex(#KeepValues collate Latin1_General_Bin, #Temp) + 1, 0, ' ')
Return #Temp
End
Call it like this:
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('OneTwoThreeFour')
Select dbo.Split_On_Upper_Case('One')
Select dbo.Split_On_Upper_Case('OneTwoThree')
Select dbo.Split_On_Upper_Case('stackOverFlow')
Select dbo.Split_On_Upper_Case('StackOverFlow')

Here is a function I have just created.
FUNCTION
CREATE FUNCTION dbo.Split_On_Upper_Case
(
#String VARCHAR(4000)
)
RETURNS VARCHAR(4000)
AS
BEGIN
DECLARE #Char CHAR(1);
DECLARE #i INT = 0;
DECLARE #OutString VARCHAR(4000) = '';
WHILE (#i <= LEN(#String))
BEGIN
SELECT #Char = SUBSTRING(#String, #i,1)
IF (#Char = UPPER(#Char) Collate Latin1_General_CS_AI)
SET #OutString = #OutString + ' ' + #Char;
ELSE
SET #OutString = #OutString + #Char;
SET #i += 1;
END
SET #OutString = LTRIM(#OutString);
RETURN #OutString;
END
Test Data
DECLARE #TABLE TABLE (Strings VARCHAR(1000))
INSERT INTO #TABLE
VALUES ('OneTwoThree') ,
('FourFiveSix') ,
('SevenEightNine')
Query
SELECT dbo.Split_On_Upper_Case(Strings) AS Vals
FROM #TABLE
Result Set
╔══════════════════╗
║ Vals ║
╠══════════════════╣
║ One Two Three ║
║ Four Five Six ║
║ Seven Eight Nine ║
╚══════════════════╝

If a single query is needed 26 REPLACE can be used to check every upper case letter like
SELECT #var1 col1, REPLACE(
REPLACE(
REPLACE(
...
REPLACE(#var1, 'A', ' A')
, ...
, 'X', ' X')
, 'Y', ' Y')
, 'Z', ' Z') col2
Not the most beautiful thing but it'll work.
EDIT
Just to add another function to do the same thing in a different way of the other answers
CREATE FUNCTION splitCapital (#param Varchar(MAX))
RETURNS Varchar(MAX)
BEGIN
Declare #ret Varchar(MAX) = '';
declare #len int = len(#param);
WITH Base10(N) AS (
SELECT 0 UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3
UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7
UNION ALL SELECT 8 UNION ALL SELECT 9
), Chars(N) As (
Select TOP(#len)
nthChar
= substring(#param, u.N + t.N*10 + h.N*100 + th.N*1000 + 1, 1)
Collate Latin1_General_CS_AI
FROM Base10 u
CROSS JOIN Base10 t
CROSS JOIN Base10 h
CROSS JOIN Base10 th
WHERE u.N + t.N*10 + h.N*100 + th.N*1000 < #len
ORDER BY u.N + t.N*10 + h.N*100 + th.N*1000
)
SELECT #ret += Case nthChar
When UPPER(nthChar) Then ' '
Else ''
End + nthChar
FROM Chars
RETURN #ret;
END
This one uses the possibility of TSQL to concatenate string variable, I had to use the TOP N trick to force the Chars CTE rows in the right order

Build a Numbers table. There are some excellent posts on SO to show you how to do this. Populate it with values up the maximum length of your input string. Select the values from 1 through the actual length of the current input string. Cross join this list of numbers to the input string. Use the result to SUBSTRING() each character. Then you can either compare the resulting list of one-charachter values to a pre-populated table-valued variable or convert each character to an integer using ASCII() and choose only those between 65 ('A') and 90 ('Z'). At this point you have a list which is the position of each upper-case character in your input string. UNION the maximum length of your input string onto the end of this list. You'll see why in just a second. Now you can SUBSTRING() your input variable, starting at the Number given by row N and taking a length of (the Number given by row N+1) - (The number given by row N). This is why you have to UNION the extra Number on the end. Finally concatenate all these substring together, space-separated, using the algorithm of your choice.
Sorry, don't have an instance in front of me to try out code. Sounds like a fun task. I think doing it with nested SELECT statements will get convoluted and un-maintainable; better to lay it out as CTEs, IMHO.

I know that there are already some good answers out there, but if you wanted to avoid creating a function, you could also use a recursive CTE to accomplish this. It's certainly not a clean way of doing this, but it works.
DECLARE
#camelcase nvarchar(4000) = 'ThisIsCamelCased'
;
WITH
split
AS
(
SELECT
[iteration] = 0
,[string] = #camelcase
UNION ALL
SELECT
[iteration] = split.[iteration] + 1
,[string] = STUFF(split.[string], pattern.[index] + 1, 0, ' ')
FROM
split
CROSS APPLY
( SELECT [index] = PATINDEX(N'%[^ ][A-Z]%' COLLATE Latin1_General_Bin, split.[string]) )
pattern
WHERE
pattern.[index] > 0
)
SELECT TOP (1)
[spaced] = split.[string]
FROM
split
ORDER BY
split.[iteration] DESC
;
As I said, this isn't a pretty way to write a query, but I use things like this when I'm just writing up some ad-hoc queries where I would not want to add new artifacts to the database. You could also use this to create your function as an inline table valued function, which is always a tad nicer.

Please Try This:
declare #t nvarchar (100) ='IamTheTestString'
declare #len int
declare #Counter int =0
declare #Final nvarchar (100) =''
set #len =len( #t)
while (#Counter <= #len)
begin
set #Final= #Final + Case when ascii(substring (#t,#Counter,1))>=65 and
ascii(substring (#t,#Counter,1))<=90 then ' '+substring (#t,#Counter,1) else
substring (#t,#Counter,1) end
set #Counter=#Counter+1
end
print ltrim(#Final)

Dynamically Create tables and Insert into it from another table with CSV values

Have a Table with the CSV Values in the columns as below
ID Name text
1 SID,DOB 123,12/01/1990
2 City,State,Zip NewYork,NewYork,01234
3 SID,DOB 456,12/21/1990
What is need to get is 2 tables in this scenario as out put with the corresponding values
ID SID DOB
1 123 12/01/1990
3 456 12/21/1990
ID City State Zip
2 NewYork NewYork 01234
Is there any way of achieving it using a Cursor or any other method in SQL server?

There are several ways that this can be done. One way that I would suggest would be to split the data from the comma separated list into multiple rows.
Since you are using SQL Server, you could implement a recursive CTE to split the data, then apply a PIVOT function to create the columns that you want.
;with cte (id, NameItem, Name, textItem, text) as
(
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from yt
union all
select id,
cast(left(Name, charindex(',',Name+',')-1) as varchar(50)) NameItem,
stuff(Name, 1, charindex(',',Name+','), '') Name,
cast(left(text, charindex(',',text+',')-1) as varchar(50)) textItem,
stuff(text, 1, charindex(',',text+','), '') text
from cte
where Name > ''
and text > ''
)
select id, SID, DOB
into table1
from
(
select id, nameitem, textitem
from cte
where nameitem in ('SID', 'DOB')
) d
pivot
(
max(textitem)
for nameitem in (SID, DOB)
) piv;
See SQL Fiddle with Demo. The recursive version will work great but if you have a large dataset, you could have some performance issues so you could also use a user defined function to split the data:
create FUNCTION [dbo].[Split](#String1 varchar(MAX), #String2 varchar(MAX), #Delimiter char(1))
returns #temptable TABLE (colName varchar(MAX), colValue varchar(max))
as
begin
declare #idx1 int
declare #slice1 varchar(8000)
declare #idx2 int
declare #slice2 varchar(8000)
select #idx1 = 1
if len(#String1)<1 or #String1 is null return
while #idx1 != 0
begin
set #idx1 = charindex(#Delimiter,#String1)
set #idx2 = charindex(#Delimiter,#String2)
if #idx1 !=0
begin
set #slice1 = left(#String1,#idx1 - 1)
set #slice2 = left(#String2,#idx2 - 1)
end
else
begin
set #slice1 = #String1
set #slice2 = #String2
end
if(len(#slice1)>0)
insert into #temptable(colName, colValue) values(#slice1, #slice2)
set #String1 = right(#String1,len(#String1) - #idx1)
set #String2 = right(#String2,len(#String2) - #idx2)
if len(#String1) = 0 break
end
return
end;
Then you can use a CROSS APPLY to get the result for each row:
select id, SID, DOB
into table1
from
(
select t.id,
c.colname,
c.colvalue
from yt t
cross apply dbo.split(t.name, t.text, ',') c
where c.colname in ('SID', 'DOB')
) src
pivot
(
max(colvalue)
for colname in (SID, DOB)
) piv;
See SQL Fiddle with Demo

You'd need to approach this as a multi-step ETL project. I'd probably start with exporting the two types of rows into a couple staging tables. So, for example:
select * from yourtable /* rows that start with a number */
where substring(text,1,1) in
('0','1','2','3','4','5','6','7','8','9')
select * from yourtable /* rows that don't start with a number */
where substring(text,1,1)
not in ('0','1','2','3','4','5','6','7','8','9')
/* or simply this to follow your example explicitly */
select * from yourtable where name like 'sid%'
select * from yourtable where name like 'city%'
Once you get the two types separated then you can split them out with one of the already written split functions found readily out on the interweb.
Aaron Bertrand (who is on here often) has written up a great post on the variety of ways to split comma delimted strings using SQL. Each of the methods are compared and contrasted here.
http://www.sqlperformance.com/2012/07/t-sql-queries/split-strings
If your row count is minimal (under 50k let's say) and it's going to be a one time operation than pick the easiest way and don't worry too much about all the performance numbers.
If you have a ton of rows or this is an ETL process that will run all the time then you'll really want to pay attention to that stuff.

A simple solution using cursors to build temporary tables. This has the limitation of making all columns VARCHAR and would be slow for large amounts of data.
--** Set up example data
DECLARE #Source TABLE (ID INT, Name VARCHAR(50), [text] VARCHAR(200));
INSERT INTO #Source
(ID, Name, [text])
VALUES (1, 'SID,DOB', '123,12/01/1990')
, (2, 'City,State,Zip', 'NewYork,NewYork,01234')
, (3, 'SID,DOB', '456,12/21/1990');
--** Declare variables
DECLARE #Name VARCHAR(200) = '';
DECLARE #Text VARCHAR(1000) = '';
DECLARE #SQL VARCHAR(MAX);
--** Set up cursor for the tables
DECLARE cursor_table CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Name
FROM #Source AS s
GROUP BY Name;
OPEN cursor_table
FETCH NEXT FROM cursor_table INTO #Name;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically create a temp table with the specified columns
SET #SQL = 'CREATE TABLE ##Table (' + REPLACE(#Name, ',', ' VARCHAR(50),') + ' VARCHAR(50));';
EXEC(#SQL);
--** Set up cursor to insert the rows
DECLARE row_cursor CURSOR FAST_FORWARD READ_ONLY FOR
SELECT s.Text
FROM #Source AS s
WHERE Name = #Name;
OPEN row_cursor;
FETCH NEXT FROM row_cursor INTO #Text;
WHILE ##FETCH_STATUS = 0
BEGIN
--** Dynamically insert the row
SELECT #SQL = 'INSERT INTO ##Table VALUES (''' + REPLACE(#Text, ',', ''',''') + ''');';
EXEC(#SQL);
FETCH NEXT FROM row_cursor INTO #Text;
END
--** Display the table
SELECT *
FROM ##Table;
--** Housekeeping
CLOSE row_cursor;
DEALLOCATE row_cursor;
DROP TABLE ##Table;
FETCH NEXT FROM cursor_table INTO #Name;
END
CLOSE cursor_table;
DEALLOCATE cursor_table;

SQL server - Split and sum of a single cell

I have a table cell of type nvarchar(max) that typically looks like this:
A03 B32 Y660 P02
e.g. a letter followed by a number, separated by spaces. What I want to do is get a sum of all those numbers in a SQL procedure. Something rather simple in other languages, but I am fairly new to SQL and besides it seems to me like a rather clumsy language to play around with strings.
Aaanyway, I imagine it would go like this:
1) Create a temporary table and fill it using a split function
2) Strip the first character of every cell
3) Convert the data to int
4) Update target table.column set to sum of said temporary table.
So I got as far as this:
CREATE PROCEDURE [dbo].[SumCell] #delimited nvarchar(max), #row int
AS
BEGIN
declare #t table(data nvarchar(max))
declare #xml xml
set #xml = N'<root><r>' + replace(#delimited,' ','</r><r>') + '</r></root>'
insert into #t(data)
select
r.value('.','varchar(5)') as item
from #xml.nodes('//root/r') as records(r)
UPDATE TargetTable
SET TargetCell = SUM(#t.data) WHERE id = #row
END
Obviously, the first char stripping and conversion to int part is missing and on top of that, I get a "must declare the scalar variable #t" error...

Question is not very clear so assuming your text is in a single cell like A3 B32 Y660 P20 following snippet can be used to get the sum.
DECLARE #Cell NVARCHAR(400), #Sum INT, #CharIndex INT
SELECT #Cell = 'A3 B32 Y660 P20',#Sum=0
WHILE (LEN(LTRIM(#Cell))>0)
BEGIN
SELECT #CharIndex = CHARINDEX(' ',#Cell,0)
SELECT #Sum = #Sum +
SUBSTRING(#Cell,2,CASE WHEN #CharIndex>2 THEN #CharIndex-2 ELSE LEN(#Cell)-1 END )
SELECT #Cell = SUBSTRING(#Cell,#CharIndex+1,LEN(#Cell))
IF NOT (#CharIndex >0) BREAK;
END
--#Sum has the total of cell numbers
SELECT #Sum

I'm making the assumption that you really want to be able to find the sum of values in your delimited list for a full selection of a table. Therefore, I believe the most complicated part of your question is to split the values. The method I tend to use requires a numbers table, So I'll start with that:
--If you really want to use a temporary numbers table don't use this method!
create table #numbers(
Number int identity(1,1) primary key
)
declare #counter int
set #counter = 1
while #counter<=10000
begin
insert into #numbers default values
set #counter = #counter + 1
end
I'll also create some test data
create table #data(
id int identity(1,1),
cell nvarchar(max)
)
insert into #data(cell) values('A03 B32 Y660 P02')
insert into #data(cell) values('Y72 A12 P220 B42')
Then, I'd put the split functionality into a CTE to keep things clean:
;with split as (
select d.id,
[valOrder] = row_number() over(partition by d.cell order by n.Number),
[fullVal] = substring(d.cell, n.Number, charindex(' ',d.cell+' ',n.Number) - n.Number),
[char] = substring(d.cell, n.Number, 1),
[numStr] = substring(d.cell, n.Number+1, charindex(' ',d.cell+' ',n.Number) - n.Number)
from #data d
join #numbers n on substring(' '+d.cell, n.Number, 1) = ' '
where n.Number <= len(d.cell)+1
)
select id, sum(cast(numStr as int))
from split
group by id

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL - Replacing all "ASCII/special characters" in a string - sql

Related

To find a substring matching separated by commas

Find all combinations

Split words with a capital letter in sql

Dynamically Create tables and Insert into it from another table with CSV values

SQL server - Split and sum of a single cell

Categories

Resources