SQL server split string into columns by delimiter (dynamic length) [duplicate] - sql

This question already has answers here:
how to separate string into different columns?
(5 answers)
How to split a comma-separated value to columns
(38 answers)
Closed 3 years ago.
SQL server cannot use MySQL split_index function, and my environment face accessibility blocking to use some function on server like "CREATE" "INSERT"
Are there any method to split strings by fixed delimiter into columns ?
Has 3 delimiters but length is dynamic.
e.g.
STRING : sometimes - "AA.0.HJ", sometimes - "AABBCC.099.0",sometimes - "0.91.JAH21"
The combinations of substring is not work.
SUBSTRING(STRING ,
CHARINDEX('.', STRING )+1,
LEN(STRING )-CHARINDEX('.', STRING )
Origin:
STRING
AA.0.HJ
AABBCC.099.0
0.91.JAH21
Target :
STRING First Second Third
AA.0.HJ AA 0 HJ
AABBCC.099.0 AABBCC 099 0
0.91.JAH21 0 91 JAH21
What is the solution in this situation ?

An xml-based solution
declare #tmp table (STRING varchar(500))
insert into #tmp
values
('AA.0.HJ')
,('AABBCC.099.0')
,('0.91.JAH21')
;WITH Splitted
AS (
SELECT STRING
,CAST('<x>' + REPLACE(STRING, '.', '</x><x>') + '</x>' AS XML) AS Parts
FROM #tmp
)
SELECT STRING
,Parts.value(N'/x[1]', 'varchar(50)') AS [First]
,Parts.value(N'/x[2]', 'varchar(50)') AS [Second]
,Parts.value(N'/x[3]', 'varchar(50)') AS [Third]
FROM Splitted;
Output:

You can use parsename
Declare #t table (name varchar(50))
insert into #t values ('AA.0.HJ')
insert into #t values ('AABBCC.099.0')
select parsename(name,3),parsename(name,2),parsename(name,1) from #t

Related

sql extract rightmost number in string and increment

i have transaction codes like
"A0004", "1B2005","20CCCCCCC21"
I need to extract the rightmost number and increment the transaction code by one
"AA0004"----->"AA0005"
"1B2005"------->"1B2006"
"20CCCCCCCC21"------>"20CCCCCCCC22"
in SQL Server 2012.
unknown length of string
right(n?) always number
dealing with unsignificant number of string and number length is out of my league.
some logic is always missing.
LEFT(#a,2)+RIGHT('000'+CONVERT(NVARCHAR,CONVERT(INT,SUBSTRING( SUBSTRING(#a,2,4),2,3))+1)),3
First, I want to be clear about this: I totally agree with the comments to the question from a_horse_with_no_name and Jeroen Mostert.
You should be storing one data point per column, period.
Having said that, I do realize that a lot of times the database structure can't be changed - so here's one possible way to get that calculation for you.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
col varchar(100)
);
INSERT INTO #T (col) VALUES
('A0004'),
('1B2005'),
('1B2000'),
('1B00'),
('20CCCCCCC21');
(I've added a couple of strings as edge cases you didn't mention in the question)
Then, using a couple of cross apply to minimize code repetition, I came up with that:
SELECT col,
LEFT(col, LEN(col) - LastCharIndex + 1) +
REPLICATE('0', LEN(NumberString) - LEN(CAST(NumberString as int))) +
CAST((CAST(NumberString as int) + 1) as varchar(100)) As Result
FROM #T
CROSS APPLY
(
SELECT PATINDEX('%[^0-9]%', Reverse(col)) As LastCharIndex
) As Idx
CROSS APPLY
(
SELECT RIGHT(col, LastCharIndex - 1) As NumberString
) As NS
Results:
col Result
A0004 A0005
1B2005 1B2006
1B2000 1B2001
1B00 1B01
20CCCCCCC21 20CCCCCCC22
The LastCharIndex represents the index of the last non-digit char in the string.
The NumberString represents the number to increment, as a string (to preserve the leading zeroes if they exists).
From there, it's simply taking the left part of the string (that is, up until the number), and concatenate it to a newly calculated number string, using Replicate to pad the result of addition with the exact number of leading zeroes the original number string had.
Try This
DECLARE #test nvarchar(1000) ='"A0004", "1B2005","20CCCCCCC21"'
DECLARE #Temp AS TABLE (ID INT IDENTITY,Data nvarchar(1000))
INSERT INTO #Temp
SELECT #test
;WITH CTE
AS
(
SELECT Id,LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))) AS Data
,RIGHT(LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))),1)+1 AS ReqData
FROM
(
SELECT ID,
CAST ('<S>'+REPLACE(Data,',','</S><S>')+'</S>' AS XML) AS Data
FROM #Temp
) AS A
CROSS APPLY Data.nodes ('S') AS Split(a)
)
SELECT CONCAT('"'+Data+'"','-------->','"'+CONCAT(LEFT(Data,LEN(Data)-1),CAST(ReqData AS VARCHAR))+'"') AS ExpectedResult
FROM CTE
Result
ExpectedResult
-----------------
"A0004"-------->"A0005"
"1B2005"-------->"1B2006"
"20CCCCCCC21"-------->"20CCCCCCC22"
STUFF(#X
,LEN(#X)-CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END+1
,LEN(((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
,((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
works on number only strings
99 becomes 100
mod(#N) increments

TSQL: Find a continuous number in a string

I need to find a continuous 6 or 7 digit number in a string from column name Filename. The string has other numbers in it with dashes(or another character, like an underscore), but I only need the continuous number
The StudentID needs to be extracted from the filename. (I know the data is just wow, multiple vendors, multiple file naming formats is the cause.) Another option would be to just list the starting position of the continuous number.
Desired outcome:
Actual outcome:
Test Code:
DROP TABLE #StuID
CREATE TABLE #StuID (
FILENAME VARCHAR(MAX)
,StudentID INT
)
INSERT INTO #StuID
( FILENAME )
VALUES
('Smith John D, 11-23-1980, 1234567.pdf')
,('Doe Jane, _01_22_1980_123456.pdf')
,('John Doe, 567891.pdf' )
--This is what I tried.
SELECT FILENAME
, substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 8) AS StudentID
FROM #StuID
Because you want 6 or 7 digits, case might be the simplest solution:
SELECT FILENAME,
(CASE WHEN FileName LIKE '%[0-9][0-9][0-9][0-9][0-9][0-9][0-9]%'
THEN substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 7)
WHEN FileName LIKE '%[0-9][0-9][0-9][0-9][0-9][0-9]%'
THEN substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9]%', FileName), 6)
END) AS StudentID
FROM #StuID
Another approach I like a lot is a cast to XML and a XQuery filter:
WITH Casted([FileName],ToXml) AS
(
SELECT [FILENAME]
,CAST('<x>' + REPLACE(REPLACE(REPLACE([FILENAME],' ','</x><x>'),'.','</x><x>'),'_','</x><x>') + '</x>' AS XML)
FROM #StuID
)
SELECT [FileName]
,numbers.value('text()[1]','int')
FROM Casted
CROSS APPLY ToXml.nodes('/x[not(empty(. cast as xs:int?))]') A(numbers);
This will split the string in its fragments and return all fragments, which are numbers.
You can easily reduce the set to StudentIDs by using any convenient WHERE clause or you add to the XQuery filter the length of 6 or 7:
CROSS APPLY ToXml.nodes('/x[not(empty(. cast as xs:int?))
and (string-length(.)=6 or string-length(.)=7)]') A(numbers)
EDIT
This would be most on point:
CROSS APPLY ToXml.nodes('/x[. cast as xs:int? >= 100000 and . cast as xs:int? <10000000]') A(numbers)
If you know that filetype is pdf then:
SELECT FILENAME
, substring(REPLACE(FileName, '.pdf',''), patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 8)
AS StudentID
FROM #StuID;
db<>fiddle demo
More generic one (SQL Server 2017):
SELECT FILENAME
, substring(s.c, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', s.c), 8) AS StudentID
FROM #StuID
CROSS APPLY (SELECT trim(' !"#$%&\''()*+,-./:;<=>?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~' FROM filename) AS c) s
db<>fiddle demo2

How to split string by forward slash in SQL Server? [duplicate]

This question already has answers here:
How do I split a delimited string so I can access individual items?
(46 answers)
Closed 4 years ago.
There is no default split function in SQL Server 2012 that I'm using.
I want to split the string (ex: /Folder1/Folder2/) by /.
if string is /Folder1/ then output should be Folder1,
if string is /Folder1/Folder2/ then output should be Folder2,
if string is /Folder1/Folder2/Folder3/ then output should be Folder3.
Try this:
declare #tbl table (path varchar(100));
insert into #tbl values
('/Folder1/'),
('/Folder1/Folder2/'),
('/Folder1/Folder2/Folder3/');
select *,
replace(substring(path, len(path) - charindex('/', reverse(path), 2) + 1, 1000), '/', '')
from #tbl

Removing leading zeros in a string in sqlserver

I want to remove leading zeros for a varchar column. Actually we are storing version information in a column. Find below example versions.
2.00.001
The output would be : 2.0.1
Input : 2.00.00.001
The output would be: 2.0.0.1
Input : 2.00
The output would be : 2.0
The dots in the version column not constant. It may be two or three or four
I found some solutions in google but those are not working. Find below are the queries I tried.
SELECT SUBSTRING('2.00.001', PATINDEX('%[^0 ]%', '2.00.001' + ' '), LEN('2.00.001'))
SELECT REPLACE(LTRIM(REPLACE('2.00.001', '0', ' ')),' ', '0')
Please suggest me the best approach in sqlserver.
One way is to use a string splitting function with cross apply, for xml path, and stuff.
For an explanation on how stuff and for xml works together to concatenate a string from selected rows, read this SO post.
Using a string splitting function will enable you to convert each number part of the string to int, that will remove the leading zeroes. Executing a select statement on the result of the string splitting function will enable you to get your int values back into a varchar value, seperated by dot.
The stuff function will remove the first dot.
Create the string splitting function:
CREATE FUNCTION SplitStrings_XML
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
);
GO
I've chosen to use an xml based function because it's fairly simple. If you are using 2016 version you can use the built in string_split function. For earlier versions, I would stronly suggest reading Aaron Bertrand's Split strings the right way – or the next best way.
Create and populate sample table (Please save us this step in your future questions)
DECLARE #T AS TABLE
(
col varchar(20)
)
INSERT INTO #T VALUES
('2.00.001'),
('2.00.00.001'),
('2.00')
The query:
SELECT col, result
FROM #T
CROSS APPLY
(
SELECT STUFF(
(
SELECT '.' + CAST(CAST(Item as int) as varchar(20))
FROM SplitStrings_XML(col, '.')
FOR XML PATH('')
)
, 1, 1, '') As result
) x
Results:
col result
2.00.001 2.0.1
2.00.00.001 2.0.0.1
2.00 2.0
You can see it in action on this link on rextester
No need for Split/Parse Function, and easy to expand if there could be more than 5 groups
Declare #YourTable table (YourCol varchar(25))
Insert Into #YourTable Values
('2.00.001'),
('2.00.00.001'),
('2.00')
Update #YourTable
Set YourCol = concat(Pos1,'.'+Pos2,'.'+Pos3,'.'+Pos4,'.'+Pos5)
From #YourTable A
Cross Apply (
Select Pos1 = ltrim(rtrim(xDim.value('/x[1]','int')))
,Pos2 = ltrim(rtrim(xDim.value('/x[2]','int')))
,Pos3 = ltrim(rtrim(xDim.value('/x[3]','int')))
,Pos4 = ltrim(rtrim(xDim.value('/x[4]','int')))
,Pos5 = ltrim(rtrim(xDim.value('/x[5]','int')))
From (Select Cast('<x>' + replace((Select replace(A.YourCol,'.','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml) as xDim) as A
) B
Select * from #YourTable
Returns
YourCol
2.0.1
2.0.0.1
2.0
Easy, fast, compatible and readable way – without tables or XML tricks.
Correctly handles all cases including empty string, NULL, or numbers like 00100.
Supports unlimited number of groups. Runs on all SQL Server versions.
Step 1: Remove leading zeros from all groups.
Step 2: Place single zero to groups where no digits remained.
[Edit: Not sure why it was downvoted twice. Check the solution: ]
The function:
CREATE FUNCTION dbo.fncGetNormalizedVersionNumber(#Version nvarchar(200))
RETURNS nvarchar(200) AS
BEGIN
-- Preprocessing: Surround version string by dots so all groups have the same format.
SET #Version = '.' + #Version + '.';
-- Step 1: Remove any leading zeros from groups as long as string length decreases.
DECLARE #PreviousLength int = 0;
WHILE #PreviousLength <> LEN(#Version)
BEGIN
SET #PreviousLength = LEN(#Version);
SET #Version = REPLACE(#Version, '.0', '.');
END;
-- Step 2: Insert 0 to any empty group as long as string length increases.
SET #PreviousLength = 0;
WHILE #PreviousLength <> LEN(#Version)
BEGIN
SET #PreviousLength = LEN(#Version);
SET #Version = REPLACE(#Version, '..', '.0.');
END;
-- Strip leading and trailing dot added by preprocessing.
RETURN SUBSTRING(#Version, 2, LEN(#Version) - 2);
END;
Usage:
SELECT dbo.fncGetNormalizedVersionNumber('020.00.00.000100');
20.0.0.100
Performance per 100,000 calculations:
solution using helper function + helper tables + XML: 54519 ms
this solution (used on table column): 2574 ms (→ 21 times faster) (UPDATED after comment.)
For SQL Server 2016:
SELECT
STUFF
((SELECT
'.' + CAST(CAST(value AS INT) AS VARCHAR)
FROM STRING_SPLIT('2.00.001', '.')
FOR XML PATH (''))
, 1, 1, '')
According to this: https://sqlperformance.com/2016/03/sql-server-2016/string-split
It's the fastest way :)
Aaron Bertrand knows it's stuff.
For an interesting and deep read about splitting strings on SQL Server plese read this gem of knowledge: http://www.sqlservercentral.com/articles/Tally+Table/72993/
It has some clever strategies
I am not sure this is what you are looking for but you can give a go, it should handle up to 4 zeros.
DECLARE #VERSION NVARCHAR(20) = '2.00.00.001'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
SET #VERSION = '2.00.00.01'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
SET #VERSION = '2.000.0000.0001'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
Try this one
SUBSTRING(str_col, PATINDEX('%[^0]%', str_col+'.'), LEN(str_col))
Here is another sample:
CREATE TABLE #tt(s VARCHAR(15))
INSERT INTO #tt VALUES
('2.00.001'),
('2.00.00.001'),
('2.00')
SELECT t.s,STUFF(c.s,1,1,'') AS news FROM #tt AS t
OUTER APPLY(
SELECT '.'+LTRIM(z.n) FROM (VALUES(CONVERT(XML,'<n>'+REPLACE(t.s,'.','</n><n>')+'</n>'))) x(xs)
CROSS APPLY(SELECT n.value('.','int') FROM x.xs.nodes('n') AS y(n)) z(n)
FOR XML PATH('')
) c(s)
s news
--------------- -----------
2.00.001 2.0.1
2.00.00.001 2.0.0.1
2.00 2.0

Split string into new column [duplicate]

This question already has answers here:
How do I split a delimited string so I can access individual items?
(46 answers)
Closed 7 years ago.
There is a column containing following e.g. abcd/ef/g/hij.
Characters between the / are dynamic not fix.
I want to split in a select query the content into 4 separate new columns.
The already answered question is different, I want to split the content in a string seperated by / into new columns.
You can use REPLACE to replace '/' with '.'. Then use PARSENAME to get each separate part of the string:
CREATE TABLE #tmp (str VARCHAR(50))
INSERT INTO #tmp VALUES
('abcd/ef/g/hij'),
('1111111/222/33/4444')
SELECT PARSENAME(x.s, 4) AS [1], PARSENAME(x.s, 3) AS [2],
PARSENAME(x.s, 2) AS [3], PARSENAME(x.s, 1) AS [4]
FROM #tmp
CROSS APPLY (SELECT REPLACE(str, '/', '.')) AS x(s)
Output:
1 2 3 4
---------------------
abcd ef g hij
1111111 222 33 4444
If you ask me, fastest ad-hoc method would be to turn your data into xml and use nodes() method:
declare #temp table (data nvarchar(max))
insert into #temp
select 'abcd/ef/g/hij' union all
select '1/2/3'
select t.data, n.c.value('.', 'nvarchar(max)')
from #temp as t
outer apply (select cast('<t>' + replace(t.data, '/', '</t><t>') + '</t>' as xml) as data) as d
outer apply d.data.nodes('t') as n(c)
You need to find the position of the / characters using CHARINDEX and slice the string up that way. It will be a large expression, because to find the third slash, you need to use the 3rd parameter of CHARINDEX, passing the result of another CHARINDEX, which also has its 3rd parameter being used. Except for the last (fourth) fragment, you also need to use CHARINDEX to find and remove text after the next slash.
Something like this will extract the text after the third slash:
RIGHT(s, CHARINDEX('/', s, CHARINDEX('/', s, CHARINDEX('/', s)+1)+1)+1)
I leave the rest to you.