sql extract rightmost number in string and increment - sql

i have transaction codes like
"A0004", "1B2005","20CCCCCCC21"
I need to extract the rightmost number and increment the transaction code by one
"AA0004"----->"AA0005"
"1B2005"------->"1B2006"
"20CCCCCCCC21"------>"20CCCCCCCC22"
in SQL Server 2012.
unknown length of string
right(n?) always number
dealing with unsignificant number of string and number length is out of my league.
some logic is always missing.
LEFT(#a,2)+RIGHT('000'+CONVERT(NVARCHAR,CONVERT(INT,SUBSTRING( SUBSTRING(#a,2,4),2,3))+1)),3

First, I want to be clear about this: I totally agree with the comments to the question from a_horse_with_no_name and Jeroen Mostert.
You should be storing one data point per column, period.
Having said that, I do realize that a lot of times the database structure can't be changed - so here's one possible way to get that calculation for you.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
col varchar(100)
);
INSERT INTO #T (col) VALUES
('A0004'),
('1B2005'),
('1B2000'),
('1B00'),
('20CCCCCCC21');
(I've added a couple of strings as edge cases you didn't mention in the question)
Then, using a couple of cross apply to minimize code repetition, I came up with that:
SELECT col,
LEFT(col, LEN(col) - LastCharIndex + 1) +
REPLICATE('0', LEN(NumberString) - LEN(CAST(NumberString as int))) +
CAST((CAST(NumberString as int) + 1) as varchar(100)) As Result
FROM #T
CROSS APPLY
(
SELECT PATINDEX('%[^0-9]%', Reverse(col)) As LastCharIndex
) As Idx
CROSS APPLY
(
SELECT RIGHT(col, LastCharIndex - 1) As NumberString
) As NS
Results:
col Result
A0004 A0005
1B2005 1B2006
1B2000 1B2001
1B00 1B01
20CCCCCCC21 20CCCCCCC22
The LastCharIndex represents the index of the last non-digit char in the string.
The NumberString represents the number to increment, as a string (to preserve the leading zeroes if they exists).
From there, it's simply taking the left part of the string (that is, up until the number), and concatenate it to a newly calculated number string, using Replicate to pad the result of addition with the exact number of leading zeroes the original number string had.

Try This
DECLARE #test nvarchar(1000) ='"A0004", "1B2005","20CCCCCCC21"'
DECLARE #Temp AS TABLE (ID INT IDENTITY,Data nvarchar(1000))
INSERT INTO #Temp
SELECT #test
;WITH CTE
AS
(
SELECT Id,LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))) AS Data
,RIGHT(LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))),1)+1 AS ReqData
FROM
(
SELECT ID,
CAST ('<S>'+REPLACE(Data,',','</S><S>')+'</S>' AS XML) AS Data
FROM #Temp
) AS A
CROSS APPLY Data.nodes ('S') AS Split(a)
)
SELECT CONCAT('"'+Data+'"','-------->','"'+CONCAT(LEFT(Data,LEN(Data)-1),CAST(ReqData AS VARCHAR))+'"') AS ExpectedResult
FROM CTE
Result
ExpectedResult
-----------------
"A0004"-------->"A0005"
"1B2005"-------->"1B2006"
"20CCCCCCC21"-------->"20CCCCCCC22"

STUFF(#X
,LEN(#X)-CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END+1
,LEN(((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
,((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
works on number only strings
99 becomes 100
mod(#N) increments

Related

SQL Long String into Substrings by a length and whitespaces

I Want to make out of a String ("Hello this is a String That is very odd")
Substrings by a defined legth (eg. 8) so that when string gets cut at index 8 but alway at the whitespaces not in a word.
lenth : 11
("Hello this is a String That is very odd") --> ("Hello this"),("is a String"),("That is"),("very odd")
I alredy have an array of the indexes Of the whitespaces but i dont know further.
I appreciate if you would help me
There is no easy solution...
So the simple answer is: Do not use SQL-Server for this issue. It's just the wrong tool.
Nevertheless this can be done (if you have to):
--Some declared table to mock your scenario
DECLARE #tbl TABLE(ID INT IDENTITY, YourString NVARCHAR(1000));
INSERT INTO #tbl VALUES('Hello this is a String That is very odd')
,('blah')
,('And one withaverylongword');
--use this to define the portion's length. 8 will be to little...
DECLARE #portionLenght INT = 12;
-the query
WITH cte AS
(
SELECT t.ID
,A.[key] AS fragmentPosition
,A.[value] AS fragment
FROM #tbl t
CROSS APPLY OPENJSON(CONCAT('["',REPLACE(t.YourString,' ','","'),'"]')) A
)
,recCTE AS
(
SELECT ID,fragmentPosition,fragment
,0 AS growingIndex
,CAST(fragment AS NVARCHAR(MAX)) AS growingString
FROM cte
WHERE fragmentPosition=0
UNION ALL
SELECT cte.ID
,cte.fragmentPosition
,cte.fragment
,recCTE.growingIndex + CASE WHEN B.newLength>#portionLenght THEN 1 ELSE 0 END
,CASE WHEN B.newLength>#portionLenght THEN cte.fragment ELSE CONCAT(recCTE.growingString,N' ',cte.fragment) END
FROM recCTE
INNER JOIN cte ON cte.ID=recCTE.ID AND cte.fragmentPosition=recCTE.fragmentPosition+1
CROSS APPLY(SELECT LEN(CONCAT(recCTE.growingString,N' ',cte.fragment))) B(newLength)
)
,final AS
(
SELECT *
,ROW_NUMBER() OVER(PARTITION BY ID,growingIndex ORDER BY fragmentPosition DESC) lastGrowing
FROM recCTE
)
SELECT * FROM final
WHERE lastGrowing=1
ORDER BY ID,fragmentPosition;
The result (with length=12)
1 Hello this
1 is a String
1 That is very
1 odd
2 blah
3 And one
3 withaverylongword
The idea in short
we use a trick with OPENJSON to transform your string into a json array and split it with a guaranteed sort order.
we use a recursive CTE to run through your fragments.
each iteration will calculate the total length of the former parts together with the new fragment
depending on this calculation the fragment will either be added or a new protion is opened.
the final CTE will add a partitioned ROW_NUMBER() to find the last entry per portion.
And no, you should not use this... :-)

Filter IDs with just numbers excluding letters

So I have results that begins with 2 letters followed by 3 numbers, for example:
ID_Sample
AB001
BC003
AB100
BC400
How can I do a query that ignores the letters and just looks up the numbers to do a filter? For example:
WHERE ID_Sample >= 100
I tried using a "Replace" to get rid of known letters, but I figured there might be a better way. For example:
Select
Replace(id_sample,'AB','')
Choosing the 3 numerals on the right would work too.
For your sample data, you can just start at the third character and convert to a number:
where try_convert(int, stuff(ID_Sample, 1, 2, '')) > 100
Or, if you know that the number is 3 characters:
where try_convert(int, right(ID_Sample, 3)) > 100
+1 for Gordon's answer. This is a fun problem that you can solve using TRANSLATE if you're using SQL 2017+.
First, in case you've never used it, Per BOL TRANSLATE:
Returns the string provided as a first argument after some characters
specified in the second argument are translated into a destination set
of characters specified in the third argument.2
This:
SELECT TRANSLATE('123AABBCC!!!','ABC','XYZ');
Returns: 123XXYYZZ!!!
Here's the solution using TRANSLATE:
-- Sample Data
DECLARE #t TABLE (ID_Sample CHAR(6))
INSERT #t (ID_Sample) VALUES ('AB001'),('BC003'),('AB100'),('BC400'),('CC555');
-- Solution
SELECT
ID_Sample = t.ID_Sample,
ID_Sample_Int = s.NewString
FROM #t AS t
CROSS JOIN (VALUES('ABCDEFGHIJKLMNOPQRSTUVWXYZ', REPLICATE(0,26))) AS f(S1,S2)
CROSS APPLY (VALUES(TRY_CAST(TRANSLATE(t.ID_Sample,f.S1,f.S2) AS INT))) AS s(NewString)
WHERE s.NewString >= 100;
Without the WHERE clause filter you get:
ID_Sample ID_Sample_Int
--------- -------------
AB001 1
BC003 3
AB100 100
BC400 400
CC555 555
... the WHERE clause filters out the first two rows.
Check these methods- Unit test also done!
Declare #Table as table(ID_Sample varchar(20))
set nocount on
Insert into #Table (ID_Sample)
Values('AB001'),('BC003'),('AB100'),('BC400')
--substring_method
select * from #Table
where try_cast(substring(ID_Sample,3,3) as int) >100
--right_method
select * from #Table
where try_cast(right(ID_Sample,3) as int) >100
--stuff_method
select * from #Table
where try_cast(stuff(ID_Sample,1,2,'') as int) >100
--replace_method
select * from #Table
where try_cast(replace(ID_Sample,left(ID_Sample,2),'') as int) >100

How to trim/replace any letters in the value?

I have few columns in my old database that have values where number and letters are combined together. This is something that I have to clean and import in the new table. The most of the values that need to be converted look like this:
40M or 85M or NR or 5NR ...
Since there wasn't any validation what user can enter in the old system there still can be values like: 40A or 3R and so on. I want to import only numeric values in my new table. So if there is any letters in the value I want to trim them. What is the best way to do that in SQL Server? I have tried this:
CASE WHEN CHARINDEX('M',hs_ptr1) <> 0 THEN 1 ELSE 0 END AS hs_ptr1
but this will only identify if one letter is in the value. If anyone can help please let me know. Thanks!
you can use patindex to search for the pattern. Try this code:
Code:
CREATE TABLE #temp
(
TXT NVARCHAR(50)
)
INSERT INTO #temp (TXT)
VALUES
('40M'),
('85M'),
('NR'),
('5NR')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(TXT, pos, LEN(TXT))
FROM (
SELECT TXT, pos = PATINDEX('%[0-9]%', TXT)
FROM #temp
) d
) t
DROP TABLE #temp
Here's a way without a function....
declare #table table (c varchar(256))
insert into #table
values
('40M'),
('30'),
('5NR'),
('3(-4_')
select
replace(LEFT(SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000),
PATINDEX('%[^0-9.-]%', SUBSTRING(replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',',''), PATINDEX('%[0-9.-]%', replace(replace(replace(replace(replace(c,'(',''),')',''),'-',''),' ',''),',','')), 8000) + 'X') -1),'.','')
from #table
You go with the PATINDEX function and search for a character that is not a digit. If such an index exists, then grab everything to the left of it. Something like that:
SELECT LEFT(your_field_name, PATINDEX("%[^0-9]%", your_field_name) - 1)
FROM your_table_name
UPDATE
Well, you need to take care of any edge cases. E.g. if there isn't a non-digit data the function will return 0, thus the calculation yields -1, which, indeed, is an invalid length.
I would suggest you to leverage a Common Table Expression to calculate the index of the non-digit data and then construct an IIF expression to select the correct char data. E.g.
WITH cte AS
(
SELECT *, PATINDEX("%[^0-9]%", your_field_name) AS NumLength
FROM your_table_name
)
SELECT any_other_field, IIF(NumLength = 0,
your_field_name,
LEFT(your_field_name, PATINDEX("%[^0-9]%", your_field_name) - 1)
)
FROM cte

Removing leading zeros in a string in sqlserver

I want to remove leading zeros for a varchar column. Actually we are storing version information in a column. Find below example versions.
2.00.001
The output would be : 2.0.1
Input : 2.00.00.001
The output would be: 2.0.0.1
Input : 2.00
The output would be : 2.0
The dots in the version column not constant. It may be two or three or four
I found some solutions in google but those are not working. Find below are the queries I tried.
SELECT SUBSTRING('2.00.001', PATINDEX('%[^0 ]%', '2.00.001' + ' '), LEN('2.00.001'))
SELECT REPLACE(LTRIM(REPLACE('2.00.001', '0', ' ')),' ', '0')
Please suggest me the best approach in sqlserver.
One way is to use a string splitting function with cross apply, for xml path, and stuff.
For an explanation on how stuff and for xml works together to concatenate a string from selected rows, read this SO post.
Using a string splitting function will enable you to convert each number part of the string to int, that will remove the leading zeroes. Executing a select statement on the result of the string splitting function will enable you to get your int values back into a varchar value, seperated by dot.
The stuff function will remove the first dot.
Create the string splitting function:
CREATE FUNCTION SplitStrings_XML
(
#List NVARCHAR(MAX),
#Delimiter NVARCHAR(255)
)
RETURNS TABLE
WITH SCHEMABINDING
AS
RETURN
(
SELECT Item = y.i.value('(./text())[1]', 'nvarchar(4000)')
FROM
(
SELECT x = CONVERT(XML, '<i>'
+ REPLACE(#List, #Delimiter, '</i><i>')
+ '</i>').query('.')
) AS a CROSS APPLY x.nodes('i') AS y(i)
);
GO
I've chosen to use an xml based function because it's fairly simple. If you are using 2016 version you can use the built in string_split function. For earlier versions, I would stronly suggest reading Aaron Bertrand's Split strings the right way – or the next best way.
Create and populate sample table (Please save us this step in your future questions)
DECLARE #T AS TABLE
(
col varchar(20)
)
INSERT INTO #T VALUES
('2.00.001'),
('2.00.00.001'),
('2.00')
The query:
SELECT col, result
FROM #T
CROSS APPLY
(
SELECT STUFF(
(
SELECT '.' + CAST(CAST(Item as int) as varchar(20))
FROM SplitStrings_XML(col, '.')
FOR XML PATH('')
)
, 1, 1, '') As result
) x
Results:
col result
2.00.001 2.0.1
2.00.00.001 2.0.0.1
2.00 2.0
You can see it in action on this link on rextester
No need for Split/Parse Function, and easy to expand if there could be more than 5 groups
Declare #YourTable table (YourCol varchar(25))
Insert Into #YourTable Values
('2.00.001'),
('2.00.00.001'),
('2.00')
Update #YourTable
Set YourCol = concat(Pos1,'.'+Pos2,'.'+Pos3,'.'+Pos4,'.'+Pos5)
From #YourTable A
Cross Apply (
Select Pos1 = ltrim(rtrim(xDim.value('/x[1]','int')))
,Pos2 = ltrim(rtrim(xDim.value('/x[2]','int')))
,Pos3 = ltrim(rtrim(xDim.value('/x[3]','int')))
,Pos4 = ltrim(rtrim(xDim.value('/x[4]','int')))
,Pos5 = ltrim(rtrim(xDim.value('/x[5]','int')))
From (Select Cast('<x>' + replace((Select replace(A.YourCol,'.','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml) as xDim) as A
) B
Select * from #YourTable
Returns
YourCol
2.0.1
2.0.0.1
2.0
Easy, fast, compatible and readable way – without tables or XML tricks.
Correctly handles all cases including empty string, NULL, or numbers like 00100.
Supports unlimited number of groups. Runs on all SQL Server versions.
Step 1: Remove leading zeros from all groups.
Step 2: Place single zero to groups where no digits remained.
[Edit: Not sure why it was downvoted twice. Check the solution: ]
The function:
CREATE FUNCTION dbo.fncGetNormalizedVersionNumber(#Version nvarchar(200))
RETURNS nvarchar(200) AS
BEGIN
-- Preprocessing: Surround version string by dots so all groups have the same format.
SET #Version = '.' + #Version + '.';
-- Step 1: Remove any leading zeros from groups as long as string length decreases.
DECLARE #PreviousLength int = 0;
WHILE #PreviousLength <> LEN(#Version)
BEGIN
SET #PreviousLength = LEN(#Version);
SET #Version = REPLACE(#Version, '.0', '.');
END;
-- Step 2: Insert 0 to any empty group as long as string length increases.
SET #PreviousLength = 0;
WHILE #PreviousLength <> LEN(#Version)
BEGIN
SET #PreviousLength = LEN(#Version);
SET #Version = REPLACE(#Version, '..', '.0.');
END;
-- Strip leading and trailing dot added by preprocessing.
RETURN SUBSTRING(#Version, 2, LEN(#Version) - 2);
END;
Usage:
SELECT dbo.fncGetNormalizedVersionNumber('020.00.00.000100');
20.0.0.100
Performance per 100,000 calculations:
solution using helper function + helper tables + XML: 54519 ms
this solution (used on table column): 2574 ms (→ 21 times faster) (UPDATED after comment.)
For SQL Server 2016:
SELECT
STUFF
((SELECT
'.' + CAST(CAST(value AS INT) AS VARCHAR)
FROM STRING_SPLIT('2.00.001', '.')
FOR XML PATH (''))
, 1, 1, '')
According to this: https://sqlperformance.com/2016/03/sql-server-2016/string-split
It's the fastest way :)
Aaron Bertrand knows it's stuff.
For an interesting and deep read about splitting strings on SQL Server plese read this gem of knowledge: http://www.sqlservercentral.com/articles/Tally+Table/72993/
It has some clever strategies
I am not sure this is what you are looking for but you can give a go, it should handle up to 4 zeros.
DECLARE #VERSION NVARCHAR(20) = '2.00.00.001'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
SET #VERSION = '2.00.00.01'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
SET #VERSION = '2.000.0000.0001'
SELECT REPLACE(REPLACE(REPLACE(#VERSION, '0000','0'),'000','0'),'00','0')
2.0.0.01
Try this one
SUBSTRING(str_col, PATINDEX('%[^0]%', str_col+'.'), LEN(str_col))
Here is another sample:
CREATE TABLE #tt(s VARCHAR(15))
INSERT INTO #tt VALUES
('2.00.001'),
('2.00.00.001'),
('2.00')
SELECT t.s,STUFF(c.s,1,1,'') AS news FROM #tt AS t
OUTER APPLY(
SELECT '.'+LTRIM(z.n) FROM (VALUES(CONVERT(XML,'<n>'+REPLACE(t.s,'.','</n><n>')+'</n>'))) x(xs)
CROSS APPLY(SELECT n.value('.','int') FROM x.xs.nodes('n') AS y(n)) z(n)
FOR XML PATH('')
) c(s)
s news
--------------- -----------
2.00.001 2.0.1
2.00.00.001 2.0.0.1
2.00 2.0

How to sort a varchar column that contains numbers and letters in SQL Server?

I have a varchar column that contain numbers (1-99999) and letters (AM0001-BF9999).
Since it has letters so i can't just convert it to int.
Is there a way to maybe use grouping_id to sort this column by numbers (small to large) then follow by letters (alphabetically)?
Thanks..
You need to know what the maximum length of your field is. Assuming 25 characters for illustrative purposes, this will work:
select
v
from (
select
right(space(25) + v,25) as v
from ( values
('1-99999')
,('AM0001-BF9999')
) data(v)
)data
order by v
to yield:
v
-------------------------
1-99999
AM0001-BF9999
You can try using the ISNUMERIC function like this:
select * from test_table
order by
case isnumeric(test_column)
when 1 then convert(int,test_column)
else 999999 end, test_column
Sql fiddle demo.
That's what you get when you denormalize your database schema.
Prefix and number should be stored separately.
That said, this is what I did when I had the same problem:
SELECT * FROM YOUR_TABLE
ORDER BY dbo.GetNumbers(YOUR_FIELD), YOUR_FIELD
Create Function dbo.GetNumbers(#Data VarChar(8000))
Returns int
AS
Begin
Return CAST(Left(
SubString(#Data, PatIndex('%[0-9.-]%', #Data), 8000),
PatIndex('%[^0-9.-]%', SubString(#Data, PatIndex('%[0-9.-]%', #Data), 8000) + 'X')-1) AS int)
End
See also this post for extracting numbers from strings
http://blogs.lessthandot.com/index.php/DataMgmt/DataDesign/extracting-numbers-with-sql-server/