Pad Zero before first hypen and remove spaces and add BA and IN - sql

I have data as below
98-45.3A-22
104-44.0A-23
00983-29.1-22
01757-42.5A-22
04968-37.3A2-23
Output Looking for output as below in SQL Server
00098-BA45.3A-IN-22
00104-BA44.0A-IN-23
00983-BA29.1-IN-22
01757-BA42.5A-IN-22
04968-BA37.3A2-IN-23

I splitted parts to cope with tricky data templates. This should work even with non-dash-2-digit tail:
WITH Src AS
(
SELECT * FROM (VALUES
('98-45.3A-22'),
('104-44.0A-23'),
('00983-29.1-22'),
('01757-42.5A-22'),
('04968-37.3A2-23')
) T(X)
), Parts AS
(
SELECT *,
RIGHT('00000'+SUBSTRING(X, 1, CHARINDEX('-',X, 1)-1),5) Front,
'BA'+SUBSTRING(X, CHARINDEX('-',X, 1)+1, 2) BA,
SUBSTRING(X, PATINDEX('%.%',X), LEN(X)-CHARINDEX('-', REVERSE(X), 1)-PATINDEX('%.%',X)+1) P,
SUBSTRING(X, LEN(X)-CHARINDEX('-', REVERSE(X), 1)+1, LEN(X)) En
FROM Src
)
SELECT Front+'-'+BA+P+'-IN'+En
FROM Parts
It returns:
00098-BA45.3A-IN-22
00104-BA44.0A-IN-23
00983-BA29.1-IN-22
01757-BA42.5A-IN-22
04968-BA37.3A2-IN-23

Try this,
DECLARE #String VARCHAR(100) = '98-45.3A-22'
SELECT ISNULL(REPLICATE('0',6 - CHARINDEX('-',#String)),'') -- Add leading Zeros
+ STUFF(
STUFF(#String,CHARINDEX('-',#String),1,'-BA'), -- Add 'BA'
CHARINDEX('-',#String,CHARINDEX('-',#String)+1)+2, -- 2 additional for the character 'BA'
1,'-IN') -- Add 'IN'
What if I have more than 6 digit number before first hyphen and want to remove the leading zeros to make it 6 digits.
DECLARE #String VARCHAR(100) = '0000098-45.3A-22'
SELECT CASE WHEN CHARINDEX('-',#String) <= 6
THEN ISNULL(REPLICATE('0',6 - CHARINDEX('-',#String)),'') -- Add leading Zeros
+ STUFF(
STUFF( #String,CHARINDEX('-',#String),1,'-BA'), -- Add 'BA'
CHARINDEX('-',#String,CHARINDEX('-',#String)+1)+2, -- 2 additional for the character 'BA'
1,'-IN') -- Add 'IN'
ELSE STUFF(
STUFF(
STUFF(#String,CHARINDEX('-',#String),1,'-BA'), -- Add 'BA'
CHARINDEX('-',#String,CHARINDEX('-',#String)+1)+2, -- 2 additional for the character 'BA'
1,'-IN'), -- Add 'IN'
1, CHARINDEX('-',#String) - 6, '' -- remove extra leading Zeros
)
END

Making assumptions that the format is consistent (e.g. always ends with "-" + 2 characters....)
DECLARE #Data TABLE (Col1 VARCHAR(100))
INSERT #Data ( Col1 )
SELECT Col1
FROM (
VALUES ('98-45.3A-22'), ('104-44.0A-23'),
('00983-29.1-22'), ('01757-42.5A-22'),
('04968-37.3A2-23')
) x (Col1)
SELECT RIGHT('0000' + LEFT(Col1, CHARINDEX('-', Col1) - 1), 5)
+ '-BA' + SUBSTRING(Col1, CHARINDEX('-', Col1) + 1, CHARINDEX('.', Col1) - CHARINDEX('-', Col1))
+ SUBSTRING(Col1, CHARINDEX('.', Col1) + 1, LEN(Col1) - CHARINDEX('.', Col1) - 3)
+ '-IN-' + RIGHT(Col1, 2)
FROM #Data
It's not ideal IMO to do this string manipulation all the time in SQL. You could shift it out to your presentation layer, or store the pre-formatted value in the db to save the cost of this every time.

Use REPLICATE AND CHARINDEX:
Replicate: will repeat given character till reach required count specify in function
CharIndex: Finds the first occurrence of any character
Declare #Data AS VARCHAR(50)='98-45.3A-22'
SELECT REPLICATE('0',6-CHARINDEX('-',#Data)) + #Data
SELECT
SUBSTRING
(
(REPLICATE('0',6-CHARINDEX('-',#Data)) +#Data)
,0
,6
)
+'-'+'BA'+ CAST('<x>' + REPLACE(#Data,'-','</x><x>') + '</x>' AS XML).value('/x[2]','varchar(max)')
+'-'+ 'IN'+ '-' + CAST('<x>' + REPLACE(#Data,'-','</x><x>') + '</x>' AS XML).value('/x[3]','varchar(max)')

In another way by using PARSENAME() you can use this query:
WITH t AS (
SELECT
PARSENAME(REPLACE(REPLACE(s, '.', '###'), '-', '.'), 3) AS p1,
REPLACE(PARSENAME(REPLACE(REPLACE(s, '.', '###'), '-', '.'), 2), '###', '.') AS p2,
PARSENAME(REPLACE(REPLACE(s, '.', '###'), '-', '.'), 1) AS p3
FROM yourTable)
SELECT RIGHT('00000' + p1, 5) + '-BA' + p2 + '-IN-' + p3
FROM t;

Related

Search between the third and forth occurrence in a string

I have a string that contains some text:
Example:
XPTOP XPTOP 4WS00632 BLACK VERNIS
I want to extract only
4WS00632
and ignore everything else.
I did try indexing the spaces, using charindex for searching all the spaces and then a substring to start at x, end at y.
But, no luck. Sometimes it returns "4WS0063" or "P 4WS00632".
This data is not coherent only "XPTOP XPTOP" is somewhat coherent.
For instance "4WS00632" this has 8 digits, but it might have 9 or 12.
So, I really need to catch everything in between the 3rd space and 4th space.
Yet another option is with a bit of JSON
Example or dbFiddle
Select A.SomeCol
,NewVal = JSON_VALUE('["'+replace(SomeCol,' ','","')+'"]','$[2]')
From YourTable A
Results
SomeCol NewVal
XPTOP XPTOP 4WS00632 BLACK VERNIS 4WS00632
Try this technique:
DECLARE #text NVARCHAR(MAX) = 'XPTOP XPTOP 4WS00632 BLACK VERNIS';
DECLARE #text_xml XML = '<a>' + REPLACE(#text, ' ', '</a><a>')+ '</a>';
WITH DataSource (element_pos, element_text) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY T.c)
,T.c.value('(.)[1]', 'nvarchar(128)')
FROM #text_xml.nodes('a') T(c)
)
SELECT element_text
FROM DataSource
WHERE element_pos = 3;
The idea is to split the string by space and order the strings by their position. Then simply get the 3rd one.
One method is string_split() along with some like comparisons:
select t.*, s.value
from t cross apply
(select s.value
from string_split(t.str, ' ') s
where t.str like concat('% % % ', s.value, ' %') and
t.str not like concat('% % % % ', s.value, ' %')
) s
This is one way of using a combination of charindex and stuff.
It could be done in a single statement but I broke it into a couple of logical steps to make it clearer.
create table #test (col1 varchar(100))
insert into #test values ('XPTOP XPTOP 4WS00632 BLACK VERNIS')
with s1(s) as (select Stuff(col1,1,CharIndex(' ',col1),'') from #test),
s2(s) as (select Stuff(s,1, CharIndex(' ',s),'') from s1)
select Left(s, charIndex(' ',s) )
from s2;
Here is a solution using charindex - and substring:
Declare #testTable Table (TestString varchar(100));
Insert Into #testTable (TestString)
Values ('XPTOP XPTOP 4WS00632 BLACK VERNIS')
, ('XYZ XYZ JW2B0037VK3 S17 SENAPE BLACK');
Select tt.TestString
, part1 = ltrim(substring(v.TestString, 1, p01.pos - 2))
, part2 = ltrim(substring(v.TestString, p01.pos, p02.pos - p01.pos - 1))
, part3 = ltrim(substring(v.TestString, p02.pos, p03.pos - p02.pos - 1))
From #testTable As tt
Cross Apply (Values (concat(tt.TestString, replicate(' ', 3)))) As v(TestString)
Cross Apply (Values (charindex(' ', v.TestString, 1) + 1)) As p01(pos)
Cross Apply (Values (charindex(' ', v.TestString, p01.pos) + 1)) As p02(pos)
Cross Apply (Values (charindex(' ', v.TestString, p02.pos) + 1)) As p03(pos)

Best way to pad section of this string with 0s

This is 2 examples of what the string currently look like:
6731-121-1
9552-3-1
This is what I want to pad them to look like
0006731-121-1
0009552-003-1
So I want them to be padded with 7 zeroes before the first '-' then 3 zeroes between the first and second '-'
What would be the best way to accomplish this in SQL SELECT statement.
SELECT RIGHT('0000000'
+ ISNULL(
LEFT(OE.exception_id, CHARINDEX('-', OE.exception_id)
- 1) ,
''
) ,7) + '-'
+ SUBSTRING(OE.exception_id, CHARINDEX('-', ( OE.exception_id )), 10) exception_id
ParseName() could be an option here
Example
Declare #YourTable Table ([YourCol] varchar(50))
Insert Into #YourTable Values
('6731-121-1')
,('9552-3-1')
Select *
,NewVal = right('0000000'+parsename(replace(YourCol,'-','.'),3),7)
+'-'
+right('000'+parsename(replace(YourCol,'-','.'),2),3)
+'-'
+parsename(replace(YourCol,'-','.'),1)
From #YourTable
Returns
YourCol NewVal
6731-121-1 0006731-121-1
9552-3-1 0009552-003-1
In situations with more than 3 periods
Example: '1.2.3.4.5'
Or any value is empty
3 examples: '1..3', '1.2.3.', '.2'
Parsename will return null for all values. You will need to split the column using a different method.
Here is an alternative to parsename:
DECLARE #table table(col varchar(100))
INSERT #table values('6731-121-1'),('9552-3-1')
SELECT
col,
REPLICATE('0', 8-x) + STUFF(col, x+1, 0,REPLICATE('0', 4 - (y-x))) newcol
FROM #table
CROSS APPLY
(SELECT CHARINDEX('-', col) x) x
CROSS APPLY
(SELECT CHARINDEX('-', col + '-', x+1) y) y
col newcol
6731-121-1 0006731-121-1
9552-3-1 0009552-003-1

How to get the middle word using Substring via Charindex of Second Position?

Basically what I am trying to do is that I want to get the middle word, using the second occurrence of the same character (on this case, dash "-").
This is the sample input:
declare #word nvarchar(max)
set #word = 'Technical Materials - Conversion - Team Dashboard'
There are three parts on this sentence, and they are divided by '-' dash line.
The first part is 'Technical Materials' which I am able to get using:
SELECT LTRIM(RTRIM(SUBSTRING(#word, 0, CHARINDEX('-', #word, 0))))
The last set was 'Team Dashboard' which I am able to get using:
SELECT CASE WHEN LEN(#word) - LEN(REPLACE(#word, '-', '')) = 1
THEN NULL
ELSE
RIGHT(#word,CHARINDEX('-', REVERSE(#word))-1)
END
The problem was, I am having a hard time getting the middle words which is 'Conversion' in this example.
If the format is fixed, you can use PARSENAME to achieve your expectation:
DECLARE #Word AS NVARCHAR(MAX) = 'Technical Materials - Conversion - Team Dashboard'
SELECT PARSENAME(REPLACE(#Word, '-', '.'), 2)
if you want to trim the extra spaces, then:
SELECT LTRIM(RTRIM(PARSENAME(REPLACE(#Word, '-', '.'), 2)))
Try this query:
SELECT
SUBSTRING(#word,
CHARINDEX('-', #word) + 2,
CHARINDEX('-', #word, CHARINDEX('-', #word) + 1) -
CHARINDEX('-', #word) - 3)
FROM yourTable
The general strategy here is to use SUBSTRING(), which requires the starting and ending positions of the middle string in question. We can use CHARINDEX to find both the first and second dash in the string. From this, we can compute the positions of the middle substring we want.
Demo here:
Rextester
This will find the text between the first 2 occurrences of '-'
DECLARE #word nvarchar(max)
SET #word = 'Technical Materials - Conversion - Team Dashboard'
SELECT SUBSTRING(x, 0, charindex('-', x))
FROM (values(stuff(#word, 1, charindex('-', #word), ''))) x(x)
This will find the middle element. In case of an even number of elements it will pick the first of the 2 middle elements
DECLARE #word nvarchar(max)
SET #word = 'Technical Materials - Conversion - Team Dashboard'
;WITH CTE(txt, rn, cnt) as
(
SELECT
t.c.value('.', 'VARCHAR(2000)'),
row_number() over (order by (select 1)), count(*) over()
FROM (
SELECT x = CAST('<t>' +
REPLACE(#word, ' - ', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
)
SELECT txt
FROM CTE
WHERE (cnt+1) / 2 = rn

Extract string between after second / and before -

I have a field that holds an account code. I've managed to extract the first 2 parts OK but I'm struggling with the last 2.
The field data is as follows:
812330/50110/0-0
812330/50110/BDG001-0
812330/50110/0-X001
I need to get the string between the second "/" and the "-" and after the "-" .Both fields have variable lengths, so I would be looking to output 0 and 0 on the first record, BDG001 and 0 on the second record and 0 and X001 on the third record.
Any help much appreciated, thanks.
You can use CHARINDEX and LEFT/RIGHT:
CREATE TABLE #tab(col VARCHAR(1000));
INSERT INTO #tab VALUES ('812330/50110/0-0'),('812330/50110/BDG001-0'),
('812330/50110/0-X001');
WITH cte AS
(
SELECT
col,
r = RIGHT(col, CHARINDEX('/', REVERSE(col))-1)
FROM #tab
)
SELECT col,
r,
sub1 = LEFT(r, CHARINDEX('-', r)-1),
sub2 = RIGHT(r, LEN(r) - CHARINDEX('-', r))
FROM cte;
LiveDemo
EDIT:
or even simpler:
SELECT
col
,sub1 = SUBSTRING(col,
LEN(col) - CHARINDEX('/', REVERSE(col)) + 2,
CHARINDEX('/', REVERSE(col)) -CHARINDEX('-', REVERSE(col))-1)
,sub2 = RIGHT(col, CHARINDEX('-', REVERSE(col))-1)
FROM #tab;
LiveDemo2
EDIT 2:
Using PARSENAME SQL SERVER 2012+ (if your data does not contain .):
SELECT
col,
sub1 = PARSENAME(REPLACE(REPLACE(col, '/', '.'), '-', '.'), 2),
sub2 = PARSENAME(REPLACE(REPLACE(col, '/', '.'), '-', '.'), 1)
FROM #tab;
LiveDemo3
...Or you can do this, so you only go from left side to right, so you don't need to count from the end in case you have more '/' or '-' signs:
SELECT
SUBSTRING(columnName, CHARINDEX('/' , columnName, CHARINDEX('/' , columnName) + 1) + 1,
CHARINDEX('-', columnName) - CHARINDEX('/' , columnName, CHARINDEX('/' , columnName) + 1) - 1) AS FirstPart,
SUBSTRING(columnName, CHARINDEX('-' , columnName) + 1, LEN(columnName)) AS LastPart
FROM table_name
One method way is to download a split() function off the web and use it. However, the values end up in separate rows, not separate columns. An alternative is a series of nested subqueries, CTEs, or outer applies:
select t.*, p1.part1, p12.part2, p12.part3
from table t outer apply
(select t.*,
left(t.field, charindex('/', t.field)) as part1,
substring(t.field, charindex('/', t.field) + 1) as rest1
) p1 outer apply
(select left(p1.rest1, charindex('/', p1.rest1) as part2,
substring(p1.rest1, charindex('/', p1.rest1) + 1, len(p1.rest1)) as part3
) p12
where t.field like '%/%/%';
The where clause guarantees that the field value is in the right format. Otherwise, you need to start sprinkling the code with case statements to handle misformated data.

T-SQL split string based on delimiter

I have some data that I would like to split based on a delimiter that may or may not exist.
Example data:
John/Smith
Jane/Doe
Steve
Bob/Johnson
I am using the following code to split this data into First and Last names:
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
The results I would like:
FirstName---LastName
John--------Smith
Jane--------Doe
Steve-------NULL
Bob---------Johnson
This code works just fine as long as all the rows have the anticipated delimiter, but errors out when a row does not:
"Invalid length parameter passed to the LEFT or SUBSTRING function."
How can I re-write this to work properly?
May be this will help you.
SELECT SUBSTRING(myColumn, 1, CASE CHARINDEX('/', myColumn)
WHEN 0
THEN LEN(myColumn)
ELSE CHARINDEX('/', myColumn) - 1
END) AS FirstName
,SUBSTRING(myColumn, CASE CHARINDEX('/', myColumn)
WHEN 0
THEN LEN(myColumn) + 1
ELSE CHARINDEX('/', myColumn) + 1
END, 1000) AS LastName
FROM MyTable
For those looking for answers for SQL Server 2016+. Use the built-in STRING_SPLIT function
Eg:
DECLARE #tags NVARCHAR(400) = 'clothing,road,,touring,bike'
SELECT value
FROM STRING_SPLIT(#tags, ',')
WHERE RTRIM(value) <> '';
Reference: https://msdn.microsoft.com/en-nz/library/mt684588.aspx
Try filtering out the rows that contain strings with the delimiter and work on those only like:
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
WHERE CHARINDEX('/', myColumn) > 0
Or
SELECT SUBSTRING(myColumn, 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn, CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
WHERE myColumn LIKE '%/%'
SELECT CASE
WHEN CHARINDEX('/', myColumn, 0) = 0
THEN myColumn
ELSE LEFT(myColumn, CHARINDEX('/', myColumn, 0)-1)
END AS FirstName
,CASE
WHEN CHARINDEX('/', myColumn, 0) = 0
THEN ''
ELSE RIGHT(myColumn, CHARINDEX('/', REVERSE(myColumn), 0)-1)
END AS LastName
FROM MyTable
ALTER FUNCTION [dbo].[split_string](
#delimited NVARCHAR(MAX),
#delimiter NVARCHAR(100)
) RETURNS #t TABLE (id INT IDENTITY(1,1), val NVARCHAR(MAX))
AS
BEGIN
DECLARE #xml XML
SET #xml = N'<t>' + REPLACE(#delimited,#delimiter,'</t><t>') + '</t>'
INSERT INTO #t(val)
SELECT r.value('.','varchar(MAX)') as item
FROM #xml.nodes('/t') as records(r)
RETURN
END
I just wanted to give an alternative way to split a string with multiple delimiters, in case you are using a SQL Server version under 2016.
The general idea is to split out all of the characters in the string, determine the position of the delimiters, then obtain substrings relative to the delimiters. Here is a sample:
-- Sample data
DECLARE #testTable TABLE (
TestString VARCHAR(50)
)
INSERT INTO #testTable VALUES
('Teststring,1,2,3')
,('Test')
DECLARE #delimiter VARCHAR(1) = ','
-- Generate numbers with which we can enumerate
;WITH Numbers AS (
SELECT 1 AS N
UNION ALL
SELECT N + 1
FROM Numbers
WHERE N < 255
),
-- Enumerate letters in the string and select only the delimiters
Letters AS (
SELECT n.N
, SUBSTRING(t.TestString, n.N, 1) AS Letter
, t.TestString
, ROW_NUMBER() OVER ( PARTITION BY t.TestString
ORDER BY n.N
) AS Delimiter_Number
FROM Numbers n
INNER JOIN #testTable t
ON n <= LEN(t.TestString)
WHERE SUBSTRING(t.TestString, n, 1) = #delimiter
UNION
-- Include 0th position to "delimit" the start of the string
SELECT 0
, NULL
, t.TestString
, 0
FROM #testTable t
)
-- Obtain substrings based on delimiter positions
SELECT t.TestString
, ds.Delimiter_Number + 1 AS Position
, SUBSTRING(t.TestString, ds.N + 1, ISNULL(de.N, LEN(t.TestString) + 1) - ds.N - 1) AS Delimited_Substring
FROM #testTable t
LEFT JOIN Letters ds
ON t.TestString = ds.TestString
LEFT JOIN Letters de
ON t.TestString = de.TestString
AND ds.Delimiter_Number + 1 = de.Delimiter_Number
OPTION (MAXRECURSION 0)
The examples above work fine when there is only one delimiter, but it doesn't scale well for multiple delimiters. Note that this will only work for SQL Server 2016 and above.
/*Some Sample Data*/
DECLARE #mytable TABLE ([id] VARCHAR(10), [name] VARCHAR(1000));
INSERT INTO #mytable
VALUES ('1','John/Smith'),('2','Jane/Doe'), ('3','Steve'), ('4','Bob/Johnson')
/*Split based on delimeter*/
SELECT P.id, [1] 'FirstName', [2] 'LastName', [3] 'Col3', [4] 'Col4'
FROM(
SELECT A.id, X1.VALUE, ROW_NUMBER() OVER (PARTITION BY A.id ORDER BY A.id) RN
FROM #mytable A
CROSS APPLY STRING_SPLIT(A.name, '/') X1
) A
PIVOT (MAX(A.[VALUE]) FOR A.RN IN ([1],[2],[3],[4],[5])) P
These all helped me get to this. I am still on 2012 but now have something quick that will allow me to split a string, even if string has varying numbers of delimiters, and grab the nth substring from that string. It's quick too. I know this post is old, but it took me forever to find something so hopefully this will help someone else.
CREATE FUNCTION [dbo].[SplitsByIndex]
(#separator VARCHAR(20) = ' ',
#string VARCHAR(MAX),
#position INT
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #results TABLE
(id INT IDENTITY(1, 1),
chrs VARCHAR(8000)
);
DECLARE #outResult VARCHAR(8000);
WITH X(N)
AS (SELECT 'Table1'
FROM(VALUES(0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0), (0)) T(C)),
Y(N)
AS (SELECT 'Table2'
FROM X A1,
X A2,
X A3,
X A4,
X A5,
X A6,
X A7,
X A8), -- Up to 16^8 = 4 billion
T(N)
AS (SELECT TOP (ISNULL(LEN(#string), 0)) ROW_NUMBER() OVER(
ORDER BY
(
SELECT NULL
)) - 1 N
FROM Y),
Delim(Pos)
AS (SELECT t.N
FROM T
WHERE(SUBSTRING(#string, t.N, LEN(#separator + 'x') - 1) LIKE #separator
OR t.N = 0)),
Separated(value)
AS (SELECT SUBSTRING(#string, d.Pos + LEN(#separator + 'x') - 1, LEAD(d.Pos, 1, 2147483647) OVER(
ORDER BY
(
SELECT NULL
))-d.Pos - LEN(#separator))
FROM Delim d
WHERE #string IS NOT NULL)
INSERT INTO #results(chrs)
SELECT s.value
FROM Separated s
WHERE s.value <> #separator;
SELECT #outResult =
(
SELECT chrs
FROM #results
WHERE id = #position
);
RETURN #outResult;
END;
This can be used like this:
SELECT [dbo].[SplitsByIndex](' ',fieldname,2)
from tablename
I would protect the substring operation by always appending a delimiter to the test strings. This makes the parsing much simpler. Your code may now rely on finding the right pattern, and not need to cope with special cases.
SELECT SUBSTRING(myColumn + '/', 1, CHARINDEX('/', myColumn)-1) AS FirstName,
SUBSTRING(myColumn + '/', CHARINDEX('/', myColumn) + 1, 1000) AS LastName
FROM MyTable
It eliminates edge cases and conditionals and cases.
Always add an extra delimiter at the end, then the challenge case is no problem.