SQL Server 2005 Using CHARINDEX() To split a string - sql

How can I split the following string based on the '-' character?
So if I had this string: LD-23DSP-1430
How could I split it into separate columns like this:
LD 23DSP 1430
Also, is there a way to split each character into a separate field if I needed to (without the '-')? I'm trying to find a way to replace each letter with the NATO alphabet.
So this would be..... Lima Delta Twenty Three Delta Sierra Papa Fourteen Thirty.... in one field.
I know I can get the left side like this:
LEFT(#item, CHARINDEX('-', #item) - 1)

I wouldn't exactly say it is easy or obvious, but with just two hyphens, you can reverse the string and it is not too hard:
with t as (select 'LD-23DSP-1430' as val)
select t.*,
LEFT(val, charindex('-', val) - 1),
SUBSTRING(val, charindex('-', val)+1, len(val) - CHARINDEX('-', reverse(val)) - charindex('-', val)),
REVERSE(LEFT(reverse(val), charindex('-', reverse(val)) - 1))
from t;
Beyond that and you might want to use split() instead.

Here's a little function that will do "NATO encoding" for you:
CREATE FUNCTION dbo.NATOEncode (
#String varchar(max)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN (
WITH L1 (N) AS (SELECT 1 UNION ALL SELECT 1),
L2 (N) AS (SELECT 1 FROM L1, L1 B),
L3 (N) AS (SELECT 1 FROM L2, L2 B),
L4 (N) AS (SELECT 1 FROM L3, L3 B),
L5 (N) AS (SELECT 1 FROM L4, L4 C),
L6 (N) AS (SELECT 1 FROM L5, L5 C),
Nums (Num) AS (SELECT Row_Number() OVER (ORDER BY (SELECT 1)) FROM L6)
SELECT
NATOString = Substring((
SELECT
Convert(varchar(max), ' ' + D.Word)
FROM
Nums N
INNER JOIN (VALUES
('A', 'Alpha'),
('B', 'Beta'),
('C', 'Charlie'),
('D', 'Delta'),
('E', 'Echo'),
('F', 'Foxtrot'),
('G', 'Golf'),
('H', 'Hotel'),
('I', 'India'),
('J', 'Juliet'),
('K', 'Kilo'),
('L', 'Lima'),
('M', 'Mike'),
('N', 'November'),
('O', 'Oscar'),
('P', 'Papa'),
('Q', 'Quebec'),
('R', 'Romeo'),
('S', 'Sierra'),
('T', 'Tango'),
('U', 'Uniform'),
('V', 'Victor'),
('W', 'Whiskey'),
('X', 'X-Ray'),
('Y', 'Yankee'),
('Z', 'Zulu'),
('0', 'Zero'),
('1', 'One'),
('2', 'Two'),
('3', 'Three'),
('4', 'Four'),
('5', 'Five'),
('6', 'Six'),
('7', 'Seven'),
('8', 'Eight'),
('9', 'Niner')
) D (Digit, Word)
ON Substring(#String, N.Num, 1) = D.Digit
WHERE
N.Num <= Len(#String)
FOR XML PATH(''), TYPE
).value('.[1]', 'varchar(max)'), 2, 2147483647)
);
This function will work on even very long strings, and performs pretty well (I ran it against a 100,000-character string and it returned in 589 ms). Here's an example of how to use it:
SELECT NATOString FROM dbo.NATOEncode('LD-23DSP-1430');
-- Output: Lima Delta Two Three Delta Sierra Papa One Four Three Zero
I intentionally made it a table-valued function so it could be inlined into a query if you run it against many rows at once, just use CROSS APPLY or wrap the above example in parentheses to use it as a value in the SELECT clause (you can put a column name in the function parameter position).

Try the following query:
DECLARE #item VARCHAR(MAX) = 'LD-23DSP-1430'
SELECT
SUBSTRING( #item, 0, CHARINDEX('-', #item)) ,
SUBSTRING(
SUBSTRING( #item, CHARINDEX('-', #item)+1,LEN(#ITEM)) ,
0 ,
CHARINDEX('-', SUBSTRING( #item, CHARINDEX('-', #item)+1,LEN(#ITEM)))
),
REVERSE(SUBSTRING( REVERSE(#ITEM), 0, CHARINDEX('-', REVERSE(#ITEM))))

USE [master]
GO
/****** this function returns Pakistan where as if you want to get ireland simply replace (SELECT SUBSTRING(#NEWSTRING,CHARINDEX('$#$#$',#NEWSTRING)+5,LEN(#NEWSTRING))) with
SELECT #NEWSTRING = (SELECT SUBSTRING(#NEWSTRING, 0,CHARINDEX('$#$#$',#NEWSTRING)))******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[FN_RETURN_AFTER_SPLITER]
(
#SPLITER varchar(max))
RETURNS VARCHAR(max)
AS
BEGIN
--declare #testString varchar(100),
DECLARE #NEWSTRING VARCHAR(max)
-- set #teststring = '#ram?eez(ali)'
SET #NEWSTRING = #SPLITER ;
SELECT #NEWSTRING = (SELECT SUBSTRING(#NEWSTRING,CHARINDEX('$#$#$',#NEWSTRING)+5,LEN(#NEWSTRING)))
return #NEWSTRING
END
--select [dbo].[FN_RETURN_AFTER_SPLITER] ('Ireland$#$#$Pakistan')

Create FUNCTION [dbo].[fnSplitString]
(
#string NVARCHAR(200),
#delimiter CHAR(1)
)
RETURNS #output TABLE(splitdata NVARCHAR(10)
)
BEGIN
DECLARE #start INT, #end INT
SELECT #start = 1, #end = CHARINDEX(#delimiter, #string)
WHILE #start < LEN(#string) + 1 BEGIN
IF #end = 0
SET #end = LEN(#string) + 1
INSERT INTO #output (splitdata)
VALUES(SUBSTRING(#string, #start, #end - #start))
SET #start = #end + 1
SET #end = CHARINDEX(#delimiter, #string, #start)
END
RETURN
END**strong text**

DECLARE #variable VARCHAR(100) = 'LD-23DSP-1430';
WITH Split
AS ( SELECT #variable AS list ,
charone = LEFT(#variable, 1) ,
R = RIGHT(#variable, LEN(#variable) - 1) ,
'A' AS MasterOne
UNION ALL
SELECT Split.list ,
LEFT(Split.R, 1) ,
R = RIGHT(split.R, LEN(Split.R) - 1) ,
'B' AS MasterOne
FROM Split
WHERE LEN(Split.R) > 0
)
SELECT *
FROM Split
OPTION ( MAXRECURSION 10000 );

Related

Replace the even characters to upper case and the remaining characters to lower case

Is there an SQL query to replace the even characters to upper case and the remaining characters to lower case in a string?
For example if the string is 'sagar' the result should be like
sAgAr
What would be the appropriate solution for this?
I can't resist answering. This seems like such a natural for a recursive CTE:
with t as (
select 'abcdef' as str
),
cte as (
select cast(lower(str) as varchar(max)) as str, 1 as pos
from t
union all
select stuff(str, pos + 1, 1,
(case when pos % 2 = 1 then upper(substring(str, pos + 1, 1))
else lower(substring(str, pos + 1, 1))
end)
) as str, 1 + pos
from cte
where pos < len(str)
)
select top (1) *
from cte
order by pos desc;
Written the below code and it works fine
Tested on Master DB
declare #name nvarchar(50)
declare #i int
set #i=1
set #name='sagar'
while(#i<=LEN(#name))
begin
if(#i%2=0)
begin
print Upper(SUBSTRING(#name,#i,1))
set #i=#i+1
end
else
begin
print Lower(SUBSTRING(#name,#i,1))
set #i=#i+1
end
end
Give the name of your own choice while setting the #name parameter and you can get the required result
Using a tally table...
declare #table table ([name] varchar(64))
insert into #table
values
('sAgAr')
,('abcdefghijk')
,('LMNOPQ')
;WITH
E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select
t.[name]
,lower(left(t.[name],1))
+
STUFF((
SELECT '' + case
when c2.N%2 = 0 then upper(substring(t2.[name],c2.N,1))
else lower(substring(t2.[name],c2.N,1))
end
FROM #table t2
cross apply cteTally c2
where
len(t2.[name]) >= c2.N
and t2.name = t.name
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
from
#table t
This is by splitting into rows and recreating strings again:
declare #test table ([value] nvarchar(20))
insert into #test values ('sagar'), ('Blueprint'), ('turtLe')
;with cte as (
select [value]
, num
, iif(num % 2 = 0, upper(substring([value], num, 1)), lower(substring([value], num, 1))) as [char]
from #test
cross join (values (1), (2), (3), (4), (5), (6), (7), (8), (9), (10), (11), (12)) numbers(num) --add more for > 12 characters
where num <= len([Value]))
select distinct [Value], [CaseApplied] = STUFF(( SELECT '' + [char]
FROM cte AS c
WHERE c.[value]= cte.value
FOR XML PATH('')
), 1, 0, '')
from cte
Here's one way:
DECLARE #mystringLOW varchar(100) = 'sagar'
,#pos int = 2
WHILE #pos <= LEN(#mystringLOW)
BEGIN
SET #mystringLOW = (SELECT STUFF(#mystringLOW, #pos, 1, UPPER(SUBSTRING(#mystringLOW, #pos, 1))))
SET #pos += 2
END
SELECT #mystringLOW AS [my answer]
Produces:
my answer
---------
sAgAr

How to grab everything before a 3rd occurrence of a character in SQL query

How can I grab all the data before the 3rd '-'?
Below I have the sample data:
000700- - - 8
015111- - 005 -
019999- - 005 -
A01- 01200- 0 - 5
A01-012000- - 5
A02-015450- - 5
A02-015450- 003 - 1
D08-020700- - 8
D08-020710- - 5
D08-020710- 013 - 1
D08-020710- 013 - 3
This is what I have done and I get the proper info. but because there is some spaces missing being removed I cannot do a proper comparison to get the data in Crystal Reports.
reverse(substring(reverse(a.ProjectionCode),
charindex('-', re‌​verse(a.ProjectionCo‌​de)) + 1,
len(reverse(a.ProjectionCode))))) as PhaseCode
Try this:
declare #str varchar(200) = 'A02-015450- 003 - 1';
select SUBSTRING(#str, 1, LEN(#str) - CHARINDEX('-', REVERSE(#str), 1));
This should get you what you need and will work even in the event that there are more or less than 3 dashes for a given row.
-- Test Data...
INSERT #TestData (SomeString) VALUES
('000700- - - 8'),
('015111- - 005 -'),
('019999- - 005 -'),
('A01- 01200- 0 - 5'),
('A01-012000- - 5'),
('A02-015450- - 5'),
('A02-015450- 003 - 1'),
('D08-020700- - 8'),
('D08-020710- - 5'),
('D08-020710- 013 - 1'),
('D08-020710- 013 - 3'),
('1-2-3-4-5-6-7-8-9'), -- More than 3 dashes
('zaq12wsx-vfr445tgb'), -- lesst than 3 dashes
('987654321345678'); -- No dashes
-- Query...
SELECT
td.SomeString,
LEFT(td.SomeString, ISNULL(d3.Dash, LEN(td.SomeString) + 1)- 1)
FROM
#TestData td
CROSS APPLY ( VALUES (NULLIF(CHARINDEX('-', td.SomeString, 1), 0)) ) d1 (Dash)
CROSS APPLY ( VALUES (NULLIF(CHARINDEX('-', td.SomeString, d1.Dash + 1), 0)) ) d2 (Dash)
CROSS APPLY ( VALUES (NULLIF(CHARINDEX('-', td.SomeString, d2.Dash + 1), 0)) ) d3 (Dash);
EDIT: Adding function and usage code...
iTVF
CREATE FUNCTION dbo.tfn_TextLeftOfThirdDash
/* =============================================================================================
iTVF that returns all text to the left og the 3rd dash in a string of text
============================================================================================= */
(
#String VARCHAR(8000)
)
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
SELECT
StringRemain = LEFT(#String, ISNULL(d3.Dash, LEN(#String) + 1)- 1)
FROM
( VALUES (NULLIF(CHARINDEX('-', #String, 1), 0)) ) d1 (Dash)
CROSS APPLY ( VALUES (NULLIF(CHARINDEX('-', #String, d1.Dash + 1), 0)) ) d2 (Dash)
CROSS APPLY ( VALUES (NULLIF(CHARINDEX('-', #String, d2.Dash + 1), 0)) ) d3 (Dash);
GO
How to use the function...
SELECT
td.SomeString,
l3d.StringRemain
FROM
#TestData td
CROSS APPLY dbo.tfn_TextLeftOfThirdDash(td.SomeString) l3d;
HTH,
Jason
It is not totally clear what you expect as output here. But taking a guess you could use PARSENAME here since your data is always exactly three dashes. Please realize that if you have any periods in your data this will NOT work correctly. In a perfect world this data would not be delimited in a single column like this. It should be spread across columns. What you have here is violating 1NF.
declare #Something table
(
SomeValue varchar(50)
)
insert #Something
values
('000700- - - 8')
,('015111- - 005 -')
,('019999- - 005 -')
,('A01- 01200- 0 - 5')
,('A01-012000- - 5')
,('A02-015450- - 5')
,('A02-015450- 003 - 1')
,('D08-020700- - 8')
,('D08-020710- - 5')
,('D08-020710- 013 - 1')
,('D08-020710- 013 - 3 ')
select *
, parsename(replace(SomeValue, '-', '.'), 2)
from #Something
The tsql function for searching a string, charindex, accepts an argument for where to start. This start position can be calculated using another call to charindex..
SELECT
SUBSTRING(col, 1,
CHARINDEX('-', col,
CHARINDEX('-', col,
CHARINDEX('-', col)+1
)+1
)-1
)
FROM table
SQL Server has nothing natively to determine the nth position of a character or string. My method is to create my own. Something like this:
CREATE FUNCTION [dbo].[nthOccurencePos](#input VARCHAR(128), #delimiter CHAR(1), #nth INT)
RETURNS INT
BEGIN
DECLARE #result INT;
WITH cteExample AS
(
--Find the index of the first delimiter
SELECT 1 AS rowCounter, CHARINDEX(#delimiter, #input, 1) delPos
UNION ALL
--Move over one character and find the index of the next delimiter
SELECT rowCounter + 1, CHARINDEX(#delimiter, #input, delPos + 1)
FROM cteExample
WHERE delPos > 0 --When you come up empty, stop
)
SELECT #result = delPos FROM cteExample WHERE rowCounter = #nth;
RETURN #result;
END
Now use this function to get everything before the 3rd hyphen like this:
DECLARE #tbl TABLE(sample VARCHAR(128))
INSERT #tbl VALUES
('000700- - - 8')
,('015111- - 005 -')
,('019999- - 005 -')
,('A01- 01200- 0 - 5')
,('A01-012000- - 5')
,('A02-015450- - 5')
,('A02-015450- 003 - 1')
,('D08-020700- - 8')
,('D08-020710- - 5')
,('D08-020710- 013 - 1')
,('D08-020710- 013 - 3 ');
SELECT LEFT(sample, dbo.nthOccurencePos(sample, '-', 3) - 1) BeforeHyphen3
FROM #tbl;
Although you don't use this in your case, it is more common to need the nth column of data within a delimited string. For example:
DECLARE #delim NVARCHAR(128);
SET #delim = 'abcdefg-hijklmnop-qrstuv-wxyz';
SELECT SUBSTRING(#delim, dbo.nthOccurencePos(#delim, '-', 2) + 1, dbo.nthOccurencePos(#delim, '-', 3) - dbo.nthOccurencePos(#delim, '-', 2) - 1) as thirdColumn

MSSQL - Masking data based on mapping table

wanted to perform data masking according to mapping as below by using MSSQL 2008R2:
Mapping Table
A = C
B = A
C = E
1 = 3
2 = 1
3 = 9
Original
ABC123
Masked
CAE319
The idea would be using replace however the second replace function will replacing previous replaced value.
select Replace(Replace(Replace(Replace(Replace(REPLACE('ABC123', 'A', 'C'), 'B', 'A'), 'C', 'E'), '1', '3'), '2', '1'), '3', '9')
Result: CAE319
P.s. value edited, because Reverse or reverse replace cannot be use in this case
any idea?
If you want a more table approach.
There are two code segments below which will Mask or UnMask a string. Easily converted into a UDF or even placed in a CROSS APPLY
Declare #Mask table (MapFrom varchar(10),MapTo varchar(10))
Insert into #Mask values
('A','C'),
('B','D'),
('C','E'),
('1','2'),
('2','3'),
('3','9')
Declare #Yourtable table (ID int,SomeCol varchar(max))
Insert Into #Yourtable values
(1,'ABC123')
-- To Mask
Declare #U varchar(max) ='ABC123'
Select NewSting = Stuff((Select ''+S
From (
Select N
,S=IsNull(MapTo,Substring(#U,N,1))
From (Select Top (Len(#U)) N=Row_Number() Over (Order By (Select null)) From master..spt_values) N
Left Join #Mask on Substring(#U,N,1)=MapFrom
) X
Order By N
For XML Path ('')),1,0,'')
-- To UnMask
Declare #M varchar(max) = 'CDE239'
Select NewSting = Stuff((Select ''+S
From (
Select N
,S=IsNull(MapFrom,Substring(#M,N,1))
From (Select Top (Len(#M)) N=Row_Number() Over (Order By (Select null)) From master..spt_values) N
Left Join #Mask on Substring(#M,N,1)=MapTo
) X
Order By N
For XML Path ('')),1,0,'')
Just change the order of replace and reverse the result
select REVERSE( Replace(Replace(Replace(Replace(Replace(REPLACE('321CBA', '3', '9'), '2', '3'), '1', '2'), 'C', 'E'), 'B', 'D'), 'A', 'C'))
RESULT :
CDE239
EDIT:
Declare #Mask table (MapFrom varchar(10),MapTo varchar(10))
Insert into #Mask values
('A','C'),
('B','A'),
('C','E'),
('1','3'),
('2','1'),
('3','9')
DECLARE #pos INT
,#result VARCHAR(100)
,#maskfrom NCHAR(1)
,#mask_to NCHAR(1);
SET #result = 'ABC123';
SET #pos = 1
WHILE #pos < LEN(#result) + 1
BEGIN
SELECT #mask_to = MapTo
FROM #mask
WHERE MapFrom = substring(#result, #pos, 1)
SET #result = STUFF(#result, #pos, 1, #mask_to);
SET #pos = #pos + 1;
END
SELECT #result
RESULT
CAE319

Converting CHAR string to nth letter in Alphabet string in SQL

I have to build a process that takes a VARCHAR string (for example 'AHT559') and converts it to a INT only string by converting the Alphabetic chars to INTEGERS based on the nth letter in the alphabet. The above would thus result in: 010820559.
I have done this in SAS before, but I'm relatively new to SQL. What would be the best way to do this in SQL?
Here is what I've done in SAS:
DO _i = 1 TO length( account );
IF (rank( char( account, _i ) ) -64) < 0 THEN agreement_hash = CATS( agreement_hash, char( account, _i ) );
ELSE IF (rank( char( account, _i ) ) -64) < 10 THEN agreement_hash = CATS( agreement_hash, 0, rank( char( account, _i ) )-64 );
ELSE agreement_hash = CATS( agreement_hash, rank( char( account, _i ) )-64 );
END;
If the format of the values is always the same as you state in the comments and you only need to process a single value at a time you can do some simple string manipulation to convert the characters to integers using their ASCII values, and subtracting 64 to get the number of the alphabetic character:
SELECT ASCII('A') -- produces 65
SELECT ASCII('A') - 64 -- produces 1
This is a little long winded and could be done in less lines of code, but it's separated for clarity.
DECLARE #val NVARCHAR(10) = 'AHT559'
-- get first, second and third character numeric values
DECLARE #first INT = ASCII(SUBSTRING(#val, 1, 1)) - 64
DECLARE #second INT = ASCII(SUBSTRING(#val, 2, 1)) - 64
DECLARE #third INT = ASCII(SUBSTRING(#val, 3, 1)) - 64
-- join them together adding a '0' if < 10
SELECT RIGHT('0' + CAST(#first AS VARCHAR(2)), 2)
+ RIGHT('0' + CAST(#second AS VARCHAR(2)), 2)
+ RIGHT('0' + CAST(#third AS VARCHAR(2)), 2)
+ RIGHT(#val, 3)
Tested on 4 million rows:
-- temp table creation - takes approx 100 seconds on my machine
CREATE TABLE #temp (val NVARCHAR(6))
DECLARE #rowno INT = 1
SELECT #rowno = 1
WHILE #rowno <= 4000000
BEGIN
INSERT INTO #temp ( val ) VALUES ( 'AHT559' )
SELECT #rowno = #rowno + 1
END
To run this code against the entire temp table takes < 20 seconds on my machine:
SELECT val AS OrignalValue,
RIGHT('0' + CAST( ASCII(SUBSTRING(val, 1, 1)) - 64 AS VARCHAR(2)), 2)
+ RIGHT('0' + CAST( ASCII(SUBSTRING(val, 2, 1)) - 64 AS VARCHAR(2)), 2)
+ RIGHT('0' + CAST( ASCII(SUBSTRING(val, 3, 1)) - 64 AS VARCHAR(2)), 2)
+ RIGHT(val, 3) AS FormattedValue
FROM #temp
Here is a similar script for sqlserver, any character which is not a capital letter is assumed a digit in this syntax:
DECLARE #x varchar(100) = 'AHT559'
DECLARE #p int = len(#x)
WHILE #p > 0
SELECT #x =
CASE WHEN substring(#x, #p, 1) between 'A' and 'Z'
THEN stuff(#x, #p, 1, right(ascii(substring(#x, #p, 1)) - 64 + 100, 2))
ELSE #x END,
#p -= 1
SELECT #x
Result:
010820559
You could use something like the below, possibly as a scalar function to do this conversion.
DECLARE #i INT
DECLARE #Item NVARCHAR(4000) = 'AHT1234'
DECLARE #ItemTable TABLE
(
Item NCHAR(1)
)
SET #i = 1
--Split the input string into separate characters, store in temp table
WHILE (#i <= LEN(#Item))
BEGIN
INSERT INTO #ItemTable(Item)
VALUES(SUBSTRING(#Item, #i, 1))
SET #i = #i + 1
END
DECLARE #AlphaTable TABLE (
Letter NCHAR(1),
Position NVARCHAR(2)
)
-- Populate this with the whole alphabet obviously. Could be a permanent rather than temp table.
INSERT INTO #AlphaTable
( Letter, Position )
VALUES ( N'A', '01'),
(N'H', '08'),
(N'T', '20')
DECLARE #Output NVARCHAR(50)
-- Convert the output and concatenate it back to a single output.
SELECT #Output = COALESCE(#output, '') + Converted
FROM (
SELECT CASE WHEN ISNUMERIC(Item) = 1
THEN CONVERT(NVARCHAR(1), Item)
ELSE (SELECT Position FROM #AlphaTable WHERE Letter = CONVERT(NCHAR(1), Item))
END AS Converted
FROM #ItemTable
) AS T1
SELECT #Output
GO
Try this.
DECLARE #STR VARCHAR(MAX)= 'AHT559',
#SP INT,
#SP_STR VARCHAR(50),
#OUTPUT VARCHAR(MAX)=''
DECLARE #TEMP_STR VARCHAR(50)
SET #TEMP_STR = #STR
WHILE Patindex('%[A-Z]%', #TEMP_STR) <> 0
BEGIN
SELECT #SP = Patindex('%[A-Z]%', #TEMP_STR)
SELECT #SP_STR = Upper(LEFT(#TEMP_STR, #SP))
SELECT #SP_STR = ( Ascii(#SP_STR) - 65 ) + 1
SELECT #TEMP_STR = Stuff(#TEMP_STR, 1, #SP, '')
SET #OUTPUT += RIGHT('0' + #SP_STR, 2)
END
SELECT #OUTPUT + Substring(#STR, Patindex('%[0-9]%', #STR), Len(#STR))
How about using a CTE to create every combination of the first 3 letters and using that to match to:
SQL Fiddle
MS SQL Server 2008 Schema Setup:
CREATE TABLE Accounts
(
Account VARCHAR(6)
)
INSERT INTO Accounts
VALUES ('AHT559'), ('BXC556'),
('CST345')
Query 1:
;WITH AlphaToNum
AS
(
SELECT *
FROM (VALUES
('A', '01'), ('B', '02'), ('C', '03'), ('D', '04'),
('E', '05'), ('F', '06'), ('G', '07'), ('H', '08'),
('I', '09'), ('J', '10'), ('K', '11'), ('L', '12'),
('M', '13'), ('N', '14'), ('O', '15'), ('P', '16'),
('Q', '17'), ('R', '18'), ('S', '19'), ('T', '20'),
('U', '21'), ('V', '22'), ('W', '23'), ('X', '24'),
('Y', '25'), ('Z', '26')
) X(alpha, num)
),
MappingTable
As
(
SELECT A1.alpha + A2.alpha + A3.alpha as match, A1.num + A2.num + A3.num as val
FROM AlphaToNum A1
CROSS APPLY AlphaToNum A2
CROSS APPLY AlphaToNum A3
)
SELECT A.Account, M.val + SUBSTRING(A.Account,4, 3) As ConvertedAccount
FROM MappingTable M
INNER JOIN Accounts A
ON LEFT(A.Account,3) = M.match
Results:
| Account | ConvertedAccount |
|---------|------------------|
| AHT559 | 010820559 |
| BXC556 | 022403556 |
| CST345 | 031920345 |
This is probably best done using a CLR UDF, but a full answer is too long for this format.
Basically you need to create a UDF (User defined function) that takes a string (nvarchar...) as an input and returns a string as an output. You can do that with C# quite easily, and you need to wrap it with the CLR integration requirements.
You can see here for relevant information.
The code could look something like:
[Microsoft.SqlServer.Server.SqlFunction(
IsDeterministic=true,
IsPrecise=true,
SystemDataAccess=SystemDataAccessKind.None)]
public static SqlString ToNthAlpha(SqlString value)
{
if(value.IsNull)
return value;
char []chars = value.Value.ToCharArray();
StringBuilder res = new StringBuilder();
for(int i = 0; i < chars.Length; i++)
{
if(chars[i] >= 'A' && chars[i] <= 'Z')
res.AppendFormat("{0:00}", chars[i] - 'A');
res.Append(chars[i]);
}
return new SqlString(res.ToString());
}

T-SQL split string based on delimiter

I have some data that I would like to split based on a delimiter that may or may not exist.
Example data:
John/Smith
Jane/Doe
Steve
Bob/Johnson
I am using the following code to split this data into First and Last names:
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
The results I would like:
FirstName---LastName
John--------Smith
Jane--------Doe
Steve-------NULL
Bob---------Johnson
This code works just fine as long as all the rows have the anticipated delimiter, but errors out when a row does not:
"Invalid length parameter passed to the LEFT or SUBSTRING function."
How can I re-write this to work properly?
May be this will help you.
SELECT SUBSTRING(myColumn, 1, CASE CHARINDEX('/', myColumn)
WHEN 0
THEN LEN(myColumn)
ELSE CHARINDEX('/', myColumn) - 1
END) AS FirstName
,SUBSTRING(myColumn, CASE CHARINDEX('/', myColumn)
WHEN 0
THEN LEN(myColumn) + 1
ELSE CHARINDEX('/', myColumn) + 1
END, 1000) AS LastName
FROM MyTable
For those looking for answers for SQL Server 2016+. Use the built-in STRING_SPLIT function
Eg:
DECLARE #tags NVARCHAR(400) = 'clothing,road,,touring,bike'
SELECT value
FROM STRING_SPLIT(#tags, ',')
WHERE RTRIM(value) <> '';
Reference: https://msdn.microsoft.com/en-nz/library/mt684588.aspx
Try filtering out the rows that contain strings with the delimiter and work on those only like:
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
WHERE CHARINDEX('/', myColumn) > 0
Or
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
WHERE myColumn LIKE '%/%'
SELECT CASE
WHEN CHARINDEX('/', myColumn, 0) = 0
THEN myColumn
ELSE LEFT(myColumn, CHARINDEX('/', myColumn, 0)-1)
END AS FirstName
,CASE
WHEN CHARINDEX('/', myColumn, 0) = 0
THEN ''
ELSE RIGHT(myColumn, CHARINDEX('/', REVERSE(myColumn), 0)-1)
END AS LastName
FROM MyTable
ALTER FUNCTION [dbo].[split_string](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE (id INT IDENTITY(1,1), val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
I just wanted to give an alternative way to split a string with multiple delimiters, in case you are using a SQL Server version under 2016.
The general idea is to split out all of the characters in the string, determine the position of the delimiters, then obtain substrings relative to the delimiters. Here is a sample:
-- Sample data
DECLARE #testTable TABLE (
TestString VARCHAR(50)
)
INSERT INTO #testTable VALUES
('Teststring,1,2,3')
,('Test')
DECLARE #delimiter VARCHAR(1) = ','
-- Generate numbers with which we can enumerate
;WITH Numbers AS (
SELECT 1 AS N
UNION ALL
SELECT N + 1
FROM Numbers
WHERE N < 255
),
-- Enumerate letters in the string and select only the delimiters
Letters AS (
SELECT n.N
, SUBSTRING(t.TestString, n.N, 1) AS Letter
, t.TestString
, ROW_NUMBER() OVER ( PARTITION BY t.TestString
ORDER BY n.N
) AS Delimiter_Number
FROM Numbers n
INNER JOIN #testTable t
ON n <= LEN(t.TestString)
WHERE SUBSTRING(t.TestString, n, 1) = #delimiter
UNION
-- Include 0th position to "delimit" the start of the string
SELECT 0
, NULL
, t.TestString
, 0
FROM #testTable t
)
-- Obtain substrings based on delimiter positions
SELECT t.TestString
, ds.Delimiter_Number + 1 AS Position
, SUBSTRING(t.TestString, ds.N + 1, ISNULL(de.N, LEN(t.TestString) + 1) - ds.N - 1) AS Delimited_Substring
FROM #testTable t
LEFT JOIN Letters ds
ON t.TestString = ds.TestString
LEFT JOIN Letters de
ON t.TestString = de.TestString
AND ds.Delimiter_Number + 1 = de.Delimiter_Number
OPTION (MAXRECURSION 0)
The examples above work fine when there is only one delimiter, but it doesn't scale well for multiple delimiters. Note that this will only work for SQL Server 2016 and above.
/*Some Sample Data*/
DECLARE #mytable TABLE ([id] VARCHAR(10), [name] VARCHAR(1000));
INSERT INTO #mytable
VALUES ('1','John/Smith'),('2','Jane/Doe'), ('3','Steve'), ('4','Bob/Johnson')
/*Split based on delimeter*/
SELECT P.id, [1] 'FirstName', [2] 'LastName', [3] 'Col3', [4] 'Col4'
FROM(
SELECT A.id, X1.VALUE, ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY A.id) RN
FROM #mytable A
CROSS APPLY STRING_SPLIT(A.name, '/') X1
) A
PIVOT (MAX(A.[VALUE]) FOR A.RN IN ([1],[2],[3],[4],[5])) P
These all helped me get to this. I am still on 2012 but now have something quick that will allow me to split a string, even if string has varying numbers of delimiters, and grab the nth substring from that string. It's quick too. I know this post is old, but it took me forever to find something so hopefully this will help someone else.
CREATE FUNCTION [dbo].[SplitsByIndex]
(#separator VARCHAR(20) = ' ',
#string VARCHAR(MAX),
#position INT
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #results TABLE
(id INT IDENTITY(1, 1),
chrs VARCHAR(8000)
);
DECLARE #outResult VARCHAR(8000);
WITH X(N)
AS (SELECT 'Table1'
FROM(VALUES(0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0)) T(C)),
Y(N)
AS (SELECT 'Table2'
FROM X A1,
X A2,
X A3,
X A4,
X A5,
X A6,
X A7,
X A8), -- Up to 16^8 = 4 billion
T(N)
AS (SELECT TOP (ISNULL(LEN(#string), 0)) ROW_NUMBER() OVER(
ORDER BY
(
SELECT NULL
)) - 1 N
FROM Y),
Delim(Pos)
AS (SELECT t.N
FROM T
WHERE(SUBSTRING(#string, t.N, LEN(#separator + 'x') - 1) LIKE #separator
OR t.N = 0)),
Separated(value)
AS (SELECT SUBSTRING(#string, d.Pos + LEN(#separator + 'x') - 1, LEAD(d.Pos, 1, 2147483647) OVER(
ORDER BY
(
SELECT NULL
))-d.Pos - LEN(#separator))
FROM Delim d
WHERE #string IS NOT NULL)
INSERT INTO #results(chrs)
SELECT s.value
FROM Separated s
WHERE s.value <> #separator;
SELECT #outResult =
(
SELECT chrs
FROM #results
WHERE id = #position
);
RETURN #outResult;
END;
This can be used like this:
SELECT [dbo].[SplitsByIndex](' ',fieldname,2)
from tablename
I would protect the substring operation by always appending a delimiter to the test strings. This makes the parsing much simpler. Your code may now rely on finding the right pattern, and not need to cope with special cases.
SELECT SUBSTRING(myColumn + '/', 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn + '/', CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
It eliminates edge cases and conditionals and cases.
Always add an extra delimiter at the end, then the challenge case is no problem.