SQL2000 safely cast a VARCHAR(256) to INT - sql-server-2000

I'm having some problem safely casting a varchar to int on SQL2000.
Part 1 of my problem was that IsNumeric returns false positives if your looking for integers only. I'm aware though why IsNumeric does this though (floats, money etcetera are numeric too) so i looked for an IsInteger function on google.
I found the following User Defined Function (UDF):
CREATE FUNCTION dbo.IsInteger
(
#num VARCHAR(64)
)
RETURNS BIT
BEGIN
IF LEFT(#num, 1) = '-'
SET #num = SUBSTRING(#num, 2, LEN(#num))
RETURN CASE
WHEN PATINDEX('%[^0-9-]%', #num) = 0
AND CHARINDEX('-', #num) <= 1
AND #num NOT IN ('.', '-', '+', '^')
AND LEN(#num)>0
AND #num NOT LIKE '%-%'
THEN
1
ELSE
0
END
END
this seems to do a good job checking for integers:
declare #num varchar(256);
declare #num2 varchar(256);
set #num = '22312311';
set #num2 = '22312311.0';
SELECT #num AS [character],
dbo.IsInteger(#num) AS [isInteger],
CASE dbo.IsInteger(#num)WHEN 1 THEN convert(int, #num) ELSE NULL END AS [integer]
UNION
SELECT #num2 AS [character],
dbo.IsInteger(#num2) AS [isInteger],
CASE dbo.IsInteger(#num2)WHEN 1 THEN convert(int, #num2) ELSE NULL END AS [integer];
However it won't validate if the integer is within range (-2^31 <=> 2^31 - 1)
declare #num varchar(256);
set #num = '2147483648';
SELECT #num AS [character],
dbo.IsInteger(#num) AS [isInteger],
CASE dbo.IsInteger(#num)WHEN 1 THEN convert(int, #num) ELSE NULL END AS [integer];
Which throws
Server: Msg 248, Level 16, State 1, Line 3
The conversion of the nvarchar value '2147483648' overflowed an int column. Maximum integer value exceeded.
SQL2000 doesn't have TRY/CATCH (answer presumes ISNUMERIC() returns no false positives) and casting errors cause the entire batch to fail even within UDF's according to this website:
When an error occurs in a UDF,
execution of the function is aborted
immediately and so is the query, and
unless the error is one that aborts
the batch, execution continues on the
next statement – but ##error is 0!
and even if they didn't would still obscure ##error. I also can't cast to bigint since it might still crash (albeit not as often) and this query is part of a UNION which is output to XML which is further validated and transformed with XSLT by a VB6 COM DLL and displayed on a website coded back in 2001 so I really (no really) do not want to change the query output!.
So this leaves me stuck on this seemingly easy task:
if varchar is castable to int cast to int otherwise give me NULL
Any pointers / solutions would be much apreciated but please note that I can't, under no circumstance, change the source column's datatype nor change the validation when data is entered.

Edit:
You can not have numbers over decimal(38,0) in SQL Server (+/- 10^38 -1) so can not trap them or convert them. Which means 37 characters may length and a CAST to decimal(38,0)
SELECT
CASE
WHEN CAST(MyColumn AS decimal(38,0) BETWEEN -2147483648 AND 2147483647 THEN CAST(MyColumn AS int)
ELSE NULL
END
FROM
MyTable
WHERE
ISNUMERIC(MyColumn + '.0e0') = 1 AND LEN(MyColumn) <= 37
Respect to this article for the .0e0 trick
EDIT OP
This question lead me to the folowing updated IsInteger function.
CREATE FUNCTION dbo.IsInteger
(
#num VARCHAR(256)
)
RETURNS BIT
BEGIN
RETURN CASE
WHEN ISNUMERIC(#num + '.0e0') = 1 AND convert(decimal(38,0), #num) BETWEEN -2147483648 AND 2147483647 THEN 1
ELSE 0
END
END

You could just add a couple more checks into the function:
CREATE FUNCTION [dbo].[IsInteger]
(
#num VARCHAR(64)
)
RETURNS BIT
BEGIN
IF LEFT(#num, 1) = '-'
SET #num = SUBSTRING(#num, 2, LEN(#num))
DECLARE #IsInt BIT
SELECT #IsInt = CASE
WHEN PATINDEX('%[^0-9-]%', #num) = 0
AND CHARINDEX('-', #num) <= 1
AND #num NOT IN ('.', '-', '+', '^')
AND LEN(#num)>0
AND #num NOT LIKE '%-%'
THEN
1
ELSE
0
END
IF #IsInt = 1
BEGIN
IF LEN(#num) <= 11
BEGIN
DECLARE #test bigint
SELECT #test = convert(bigint, #num)
IF #test <= 2147483647 AND #test >= -2147483648
BEGIN
set #IsInt = 1
END
ELSE
BEGIN
set #IsInt = 0
END
END
ELSE
BEGIN
set #IsInt = 0
END
END
RETURN #IsInt
END
I've not had a chance to test but I think it should work - I've left it as verbose as possible

Related

How to update values using case statement

I have created update statement like below
UPDATE dbo.S_Item
SET SalePrice3 = CASE WHEN Price <0 THEN '-1'
when Price=1 then 11
when Price=2 then 22
when Price=3 then 33
when Price=4 then 44
when Price=5 then 55
when Price=6 then 66
when Price=7 then 77
when Price=8 then 88
when Price=9 then 99
when Price=0 then 00
end
but i want update more values using above statement for example if want update price=123 it has to update 112233,if price=456 it has to update 445566,if price=725 it has to update 772255 how can achieve this help me
Create Function ReplicateDigits (
#Number Int)
Returns BigInt
Begin
Declare #Step SmallInt = 1,
#Result nVaRchar(100) = N''
While (#Step <= Len(#Number))
Begin
Select #Result = #Result + Replicate(SubString(Cast(#Number As Varchar), #Step, 1), 2)
Select #Step = #Step + 1
End
Return Cast(#Result As BigInt)
End
Go
Then:
UPDATE dbo.S_Item
SET SalePrice3 = CASE
WHEN Price <0 THEN '-1'
Else dbo.ReplicateDigits(Price)
End
Let me know if it was useful.
If the point is just in duplication of every digit, here's another implementation of the duplication method:
CREATE FUNCTION dbo.DuplicateDigits(#Input int)
RETURNS varchar(20)
AS
BEGIN
DECLARE #Result varchar(20) = CAST(#Input AS varchar(20));
DECLARE #Pos int = LEN(#Result);
WHILE #Pos > 0
BEGIN
SET #Result = STUFF(#Result, #Pos, 0, SUBSTRING(#Result, #Pos, 1));
SET #Pos -= 1;
END;
RETURN #Result;
END;
The method consists in iterating through the digits backwards, extracting each using SUBSTRING and duplicating it using STUFF.
And you would be using this function same as in Meysam Tolouee's answer:
UPDATE dbo.S_Item
SET SalePrice3 = CASE
WHEN Price < 0 THEN '-1'
ELSE dbo.DuplicateDigits(SalePrice3)
END;
To explain a little why the function's returned type is varchar, it is because that guarantees that the function returns the result no matter what the input's [reasonable] length is. The maximum length of 20 has been chosen merely because the input is [assumed to be] int and positive int values consist of up to 10 digits.
However, whether varchar(20) converts to the type of SalePrice3 is another matter, which should be considered separately.
Youy Must Create a Procedure for Achiving the Desired Result Rather Than to Use a Single Query.

Query to get only numbers from a string

I have data like this:
string 1: 003Preliminary Examination Plan
string 2: Coordination005
string 3: Balance1000sheet
The output I expect is
string 1: 003
string 2: 005
string 3: 1000
And I want to implement it in SQL.
First create this UDF
CREATE FUNCTION dbo.udf_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
Now use the function as
SELECT dbo.udf_GetNumeric(column_name)
from table_name
SQL FIDDLE
I hope this solved your problem.
Reference
Try this one -
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM #temp
) d
) t
Output:
----------
003
005
1000
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM #temp
Please try:
declare #var nvarchar(max)='Balance1000sheet'
SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
SELECT SUBSTRING(#var, PATINDEX('%[0-9]%', #var), LEN(#var)) Val
)x
Getting only numbers from a string can be done in a one-liner.
Try this :
SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))
NB: Only works for the first int in the string, ex: abc123vfg34 returns 123.
I found this approach works about 3x faster than the top voted answer. Create the following function, dbo.GetNumbers:
CREATE FUNCTION dbo.GetNumbers(#String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
WITH
Numbers
AS (
--Step 1.
--Get a column of numbers to represent
--every character position in the #String.
SELECT 1 AS Number
UNION ALL
SELECT Number + 1
FROM Numbers
WHERE Number < LEN(#String)
)
,Characters
AS (
SELECT Character
FROM Numbers
CROSS APPLY (
--Step 2.
--Use the column of numbers generated above
--to tell substring which character to extract.
SELECT SUBSTRING(#String, Number, 1) AS Character
) AS c
)
--Step 3.
--Pattern match to return only numbers from the CTE
--and use STRING_AGG to rebuild it into a single string.
SELECT #String = STRING_AGG(Character,'')
FROM Characters
WHERE Character LIKE '[0-9]'
--allows going past the default maximum of 100 loops in the CTE
OPTION (MAXRECURSION 8000)
RETURN #String
END
GO
Testing
Testing for purpose:
SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
('003Preliminary Examination Plan') --output: 003
,('Coordination005') --output: 005
,('Balance1000sheet') --output: 1000
,('(111) 222-3333') --output: 1112223333
,('1.38hello#f00.b4r#\-6') --output: 1380046
) testData(InputString)
Testing for performance:
Start off setting up the test data...
--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000))
--Make an 8000 character string with mix of numbers and letters
DECLARE #Num VARCHAR(8000) = REPLICATE('12tf56se',800)
--Add this to the test table 500 times
DECLARE #n INT = 0
WHILE #n < 500
BEGIN
INSERT INTO dbo.NumTest VALUES (#Num)
SET #n = #n +1
END
Now testing the dbo.GetNumbers function:
SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s
Then testing the UDF from the top voted answer on the same data.
SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s
Inspiration for dbo.GetNumbers
Decimals
If you need it to handle decimals, you can use either of the following approaches, I found no noticeable performance differences between them.
change '[0-9]' to '[0-9.]'
change Character LIKE '[0-9]' to ISNUMERIC(Character) = 1 (SQL treats a single decimal point as "numeric")
Bonus
You can easily adapt this to differing requirements by swapping out WHERE Character LIKE '[0-9]' with the following options:
WHERE Letter LIKE '[a-zA-Z]' --Get only letters
WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric
WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric
With the previous queries I get these results:
'AAAA1234BBBB3333' >>>> Output: 1234
'-çã+0!\aº1234' >>>> Output: 0
The code below returns All numeric chars:
1st output: 12343333
2nd output: 01234
declare #StringAlphaNum varchar(255)
declare #Character varchar
declare #SizeStringAlfaNumerica int
declare #CountCharacter int
set #StringAlphaNum = 'AAAA1234BBBB3333'
set #SizeStringAlfaNumerica = len(#StringAlphaNum)
set #CountCharacter = 1
while isnumeric(#StringAlphaNum) = 0
begin
while #CountCharacter < #SizeStringAlfaNumerica
begin
if substring(#StringAlphaNum,#CountCharacter,1) not like '[0-9]%'
begin
set #Character = substring(#StringAlphaNum,#CountCharacter,1)
set #StringAlphaNum = replace(#StringAlphaNum, #Character, '')
end
set #CountCharacter = #CountCharacter + 1
end
set #CountCharacter = 0
end
select #StringAlphaNum
declare #puvodni nvarchar(20)
set #puvodni = N'abc1d8e8ttr987avc'
WHILE PATINDEX('%[^0-9]%', #puvodni) > 0 SET #puvodni = REPLACE(#puvodni, SUBSTRING(#puvodni, PATINDEX('%[^0-9]%', #puvodni), 1), '' )
SELECT #puvodni
A solution for SQL Server 2017 and later, using TRANSLATE:
DECLARE #T table (string varchar(50) NOT NULL);
INSERT #T
(string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet');
SELECT
result =
REPLACE(
TRANSLATE(
T.string COLLATE Latin1_General_CI_AI,
'abcdefghijklmnopqrstuvwxyz',
SPACE(26)),
SPACE(1),
SPACE(0))
FROM #T AS T;
Output:
result
003
005
1000
The code works by:
Replacing characters a-z (ignoring case & accents) with a space
Replacing spaces with an empty string.
The string supplied to TRANSLATE can be expanded to include additional characters.
I did not have rights to create functions but had text like
["blahblah012345679"]
And needed to extract the numbers out of the middle
Note this assumes the numbers are grouped together and not at the start and end of the string.
select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name
Although this is an old thread its the first in google search, I came up with a different answer than what came before. This will allow you to pass your criteria for what to keep within a string, whatever that criteria might be. You can put it in a function to call over and over again if you want.
declare #String VARCHAR(MAX) = '-123. a 456-78(90)'
declare #MatchExpression VARCHAR(255) = '%[0-9]%'
declare #return varchar(max)
WHILE PatIndex(#MatchExpression, #String) > 0
begin
set #return = CONCAT(#return, SUBSTRING(#string,patindex(#matchexpression, #string),1))
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
end
select (#return)
This UDF will work for all types of strings:
CREATE FUNCTION udf_getNumbersFromString (#string varchar(max))
RETURNS varchar(max)
AS
BEGIN
WHILE #String like '%[^0-9]%'
SET #String = REPLACE(#String, SUBSTRING(#String, PATINDEX('%[^0-9]%', #String), 1), '')
RETURN #String
END
Just a little modification to #Epsicron 's answer
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')) as a(string)
no need for a temporary variable
Firstly find out the number's starting length then reverse the string to find out the first position again(which will give you end position of number from the end). Now if you deduct 1 from both number and deduct it from string whole length you'll get only number length. Now get the number using SUBSTRING
declare #fieldName nvarchar(100)='AAAA1221.121BBBB'
declare #lenSt int=(select PATINDEX('%[0-9]%', #fieldName)-1)
declare #lenEnd int=(select PATINDEX('%[0-9]%', REVERSE(#fieldName))-1)
select SUBSTRING(#fieldName, PATINDEX('%[0-9]%', #fieldName), (LEN(#fieldName) - #lenSt -#lenEnd))
T-SQL function to read all the integers from text and return the one at the indicated index, starting from left or right, also using a starting search term (optional):
create or alter function dbo.udf_number_from_text(
#text nvarchar(max),
#search_term nvarchar(1000) = N'',
#number_position tinyint = 1,
#rtl bit = 0
) returns int
as
begin
declare #result int = 0;
declare #search_term_index int = 0;
if #text is null or len(#text) = 0 goto exit_label;
set #text = trim(#text);
if len(#text) = len(#search_term) goto exit_label;
if len(#search_term) > 0
begin
set #search_term_index = charindex(#search_term, #text);
if #search_term_index = 0 goto exit_label;
end;
if #search_term_index > 0
if #rtl = 0
set #text = trim(right(#text, len(#text) - #search_term_index - len(#search_term) + 1));
else
set #text = trim(left(#text, #search_term_index - 1));
if len(#text) = 0 goto exit_label;
declare #patt_number nvarchar(10) = '%[0-9]%';
declare #patt_not_number nvarchar(10) = '%[^0-9]%';
declare #number_start int = 1;
declare #number_end int;
declare #found_numbers table (id int identity(1,1), val int);
while #number_start > 0
begin
set #number_start = patindex(#patt_number, #text);
if #number_start > 0
begin
if #number_start = len(#text)
begin
insert into #found_numbers(val)
select cast(substring(#text, #number_start, 1) as int);
break;
end;
else
begin
set #text = right(#text, len(#text) - #number_start + 1);
set #number_end = patindex(#patt_not_number, #text);
if #number_end = 0
begin
insert into #found_numbers(val)
select cast(#text as int);
break;
end;
else
begin
insert into #found_numbers(val)
select cast(left(#text, #number_end - 1) as int);
if #number_end = len(#text)
break;
else
begin
set #text = trim(right(#text, len(#text) - #number_end));
if len(#text) = 0 break;
end;
end;
end;
end;
end;
if #rtl = 0
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id asc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
else
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id desc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
exit_label:
return #result;
end;
Example:
select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);
returns 10;
This is one of the simplest and easiest one. This will work on the entire String for multiple occurences as well.
CREATE FUNCTION dbo.fn_GetNumbers(#strInput NVARCHAR(500))
RETURNS NVARCHAR(500)
AS
BEGIN
DECLARE #strOut NVARCHAR(500) = '', #intCounter INT = 1
WHILE #intCounter <= LEN(#strInput)
BEGIN
SELECT #strOut = #strOut + CASE WHEN SUBSTRING(#strInput, #intCounter, 1) LIKE '[0-9]' THEN SUBSTRING(#strInput, #intCounter, 1) ELSE '' END
SET #intCounter = #intCounter + 1
END
RETURN #strOut
END
Following a solution using a single common table expression (CTE).
DECLARE #s AS TABLE (id int PRIMARY KEY, value nvarchar(max));
INSERT INTO #s
VALUES
(1, N'003Preliminary Examination Plan'),
(2, N'Coordination005'),
(3, N'Balance1000sheet');
SELECT * FROM #s ORDER BY id;
WITH t AS (
SELECT
id,
1 AS i,
SUBSTRING(value, 1, 1) AS c
FROM
#s
WHERE
LEN(value) > 0
UNION ALL
SELECT
t.id,
t.i + 1 AS i,
SUBSTRING(s.value, t.i + 1, 1) AS c
FROM
t
JOIN #s AS s ON t.id = s.id
WHERE
t.i < LEN(s.value)
)
SELECT
id,
STRING_AGG(c, N'') WITHIN GROUP (ORDER BY i ASC) AS value
FROM
t
WHERE
c LIKE '[0-9]'
GROUP BY
id
ORDER BY
id;
DECLARE #index NVARCHAR(20);
SET #index = 'abd565klaf12';
WHILE PATINDEX('%[0-9]%', #index) != 0
BEGIN
SET #index = REPLACE(#index, SUBSTRING(#index, PATINDEX('%[0-9]%', #index), 1), '');
END
SELECT #index;
One can replace [0-9] with [a-z] if numbers only are wanted with desired castings using the CAST function.
If we use the User Define Function, the query speed will be greatly reduced. This code extracts the number from the string....
SELECT
Reverse(substring(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) , patindex('%[0-9]%', Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) ), len(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) ))))) )) NumberValue
FROM dbo.TableName
CREATE OR REPLACE FUNCTION count_letters_and_numbers(input_string TEXT)
RETURNS TABLE (letters INT, numbers INT) AS $$
BEGIN
RETURN QUERY SELECT
sum(CASE WHEN input_string ~ '[A-Za-z]' THEN 1 ELSE 0 END) as letters,
sum(CASE WHEN input_string ~ '[0-9]' THEN 1 ELSE 0 END) as numbers
FROM unnest(string_to_array(input_string, '')) as input_string;
END;
$$ LANGUAGE plpgsql;
For the hell of it...
This solution is different to all earlier solutions, viz:
There is no need to create a function
There is no need to use pattern matching
There is no need for a temporary table
This solution uses a recursive common table expression (CTE)
But first - note the question does not specify where such strings are stored. In my solution below, I create a CTE as a quick and dirty way to put these strings into some kind of "source table".
Note also - this solution uses a recursive common table expression (CTE) - so don't get confused by the usage of two CTEs here. The first is simply to make the data avaliable to the solution - but it is only the second CTE that is required in order to solve this problem. You can adapt the code to make this second CTE query your existing table, view, etc.
Lastly - my coding is verbose, trying to use column and CTE names that explain what is going on and you might be able to simplify this solution a little. I've added in a few pseudo phone numbers with some (expected and atypical, as the case may be) formatting for the fun of it.
with SOURCE_TABLE as (
select '003Preliminary Examination Plan' as numberString
union all select 'Coordination005' as numberString
union all select 'Balance1000sheet' as numberString
union all select '1300 456 678' as numberString
union all select '(012) 995 8322 ' as numberString
union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
select
len(numberString) as currentStringLength,
isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
from SOURCE_TABLE
union all
select
len(remainingString) as currentStringLength,
cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
substring(remainingString,2,len(remainingString)) as remainingString
from FIRST_CHAR_PROCESSED fcp2
where fcp2.currentStringLength > 1
)
select
newString
,* -- comment this out when required
from FIRST_CHAR_PROCESSED
where currentStringLength = 1
So what's going on here?
Basically in our CTE we are selecting the first character and using try_cast (see docs) to cast it to a tinyint (which is a large enough data type for a single-digit numeral). Note that the type-casting rules in SQL Server say that an empty string (or a space, for that matter) will resolve to zero, so the nullif is added to force spaces and empty strings to resolve to null (see discussion) (otherwise our result would include a zero character any time a space is encountered in the source data).
The CTE also returns everything after the first character - and that becomes the input to our recursive call on the CTE; in other words: now let's process the next character.
Lastly, the field newString in the CTE is generated (in the second SELECT) via concatenation. With recursive CTEs the data type must match between the two SELECT statements for any given column - including the column size. Because we know we are adding (at most) a single character, we are casting that character to nvarchar(1) and we are casting the newString (so far) as nvarchar(3999). Concatenated, the result will be nvarchar(4000) - which matches the type casting we carry out in the first SELECT.
If you run this query and exclude the WHERE clause, you'll get a sense of what's going on - but the rows may be in a strange order. (You won't necessarily see all rows relating to a single input value grouped together - but you should still be able to follow).
Hope it's an interesting option that may help a few people wanting a strictly expression-based solution.
In Oracle
You can get what you want using this:
SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))
Sample Query:
SELECT SUBSTR('003Preliminary Examination Plan ',REGEXP_INSTR ('003Preliminary Examination Plan ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL
If you are using Postgres and you have data like '2000 - some sample text' then try substring and position combination, otherwise if in your scenario there is no delimiter, you need to write regex:
SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as
number_column_name

How to compare software versions using SQL Server?

When trying to compare software versions 5.12 to 5.8, version 5.12 is newer, however mathematically 5.12 is less than 5.8. How would I compare the two versions so that a newer version returns 'Y'?
SELECT CASE WHEN 5.12 > 5.8 THEN 'Y' ELSE 'N' END
Possible Solutions
Add a 0 after the decimal in 5.8 so that it compares 5.08 to 5.12, however it seems like this would require a bit of code.
Simply compare values after the decimal (ie. 12 > 8), however this fails when the version rolls to 6.0.
Use reverse logic and assume that if 5.12 is less than 5.8 to return 'Y'. I believe this would fail when the version rolls to 6.0.
You could use hierarchyid
Which you can use by putting a / at the end and start of the string and casting it
e.g.
SELECT CASE WHEN cast('/5.12/' as hierarchyid) > cast('/5.8/' as hierarchyid) THEN 'Y' ELSE 'N' END
That returns a Y
declare #v1 varchar(100) = '5.12'
declare #v2 varchar(100) = '5.8'
select
case
when CONVERT(int, LEFT(#v1, CHARINDEX('.', #v1)-1)) < CONVERT(int, LEFT(#v2, CHARINDEX('.', #v2)-1)) then 'v2 is newer'
when CONVERT(int, LEFT(#v1, CHARINDEX('.', #v1)-1)) > CONVERT(int, LEFT(#v2, CHARINDEX('.', #v2)-1)) then 'v1 is newer'
when CONVERT(int, substring(#v1, CHARINDEX('.', #v1)+1, LEN(#v1))) < CONVERT(int, substring(#v2, CHARINDEX('.', #v2)+1, LEN(#v1))) then 'v2 is newer'
when CONVERT(int, substring(#v1, CHARINDEX('.', #v1)+1, LEN(#v1))) > CONVERT(int, substring(#v2, CHARINDEX('.', #v2)+1, LEN(#v1))) then 'v1 is newer'
else 'same!'
end
There was a very good solution from a duplicate question here:
How to compare SQL strings that hold version numbers like .NET System.Version class?
After playing with the query for a while, I learned that it was not able to compare the last part when there are 4 or more parts (say, if the version number was 1.2.3.4, it would always treat the last one as 0). I have fixed that issue as well as came up with another function to compare two version numbers.
CREATE Function [dbo].[VersionNthPart](#version as nvarchar(max), #part as int) returns int as
Begin
Declare
#ret as int = null,
#start as int = 1,
#end as int = 0,
#partsFound as int = 0,
#terminate as bit = 0
if #version is not null
Begin
Set #ret = 0
while #partsFound < #part
Begin
Set #end = charindex('.', #version, #start)
If #end = 0 -- did not find the dot. Either it was last part or the part was missing.
begin
if #part - #partsFound > 1 -- also this isn't the last part so it must bail early.
begin
set #terminate = 1
end
Set #partsFound = #part
SET #end = len(#version) + 1; -- get the full length so that it can grab the whole of the final part.
end
else
begin
SET #partsFound = #partsFound + 1
end
If #partsFound = #part and #terminate = 0
begin
Set #ret = Convert(int, substring(#version, #start, #end - #start))
end
Else
begin
Set #start = #end + 1
end
End
End
return #ret
End
GO
CREATE FUNCTION [dbo].[CompareVersionNumbers]
(
#Source nvarchar(max),
#Target nvarchar(max),
#Parts int = 4
)
RETURNS INT
AS
BEGIN
/*
-1 : target has higher version number (later version)
0 : same
1 : source has higher version number (later version)
*/
DECLARE #ReturnValue as int = 0;
DECLARE #PartIndex as int = 1;
DECLARE #SourcePartValue as int = 0;
DECLARE #TargetPartValue as int = 0;
WHILE (#PartIndex <= #Parts AND #ReturnValue = 0)
BEGIN
SET #SourcePartValue = [dbo].[VersionNthPart](#Source, #PartIndex);
SET #TargetPartValue = [dbo].[VersionNthPart](#Target, #PartIndex);
IF #SourcePartValue > #TargetPartValue
SET #ReturnValue = 1
ELSE IF #SourcePartValue < #TargetPartValue
SET #ReturnValue = -1
SET #PartIndex = #PartIndex + 1;
END
RETURN #ReturnValue
END
Usage/Test case:
declare #Source as nvarchar(100) = '4.9.21.018'
declare #Target as nvarchar(100) = '4.9.21.180'
SELECT [dbo].[CompareVersionNumbers](#Source, #Target, DEFAULT) -- default version parts are 4
SET #Source = '1.0.4.1'
SET #Target = '1.0.1.8'
SELECT [dbo].[CompareVersionNumbers](#Source, #Target, 4) -- typing out # of version parts also works
SELECT [dbo].[CompareVersionNumbers](#Source, #Target, 2) -- comparing only 2 parts should be the same
SET #Target = '1.0.4.1.5'
SELECT [dbo].[CompareVersionNumbers](#Source, #Target, 4) -- only comparing up to parts 4 so they are the same
SELECT [dbo].[CompareVersionNumbers](#Source, #Target, 5) -- now comparing 5th part which should indicate that the target has higher version number
I recommend to create a SQL CLR function:
public partial class UserDefinedFunctions
{
[SqlFunction(Name = "CompareVersion")]
public static bool CompareVersion(SqlString x, SqlString y)
{
return Version.Parse(x) > Version.Parse(y);
}
}
Notes:
SqlString has explicit cast to string.
Pass full version string as of a.b.c.d
I encountered this when trying to filter SQL rows based on semantic versioning. My solution was a bit different, in that I wanted to store configuration rows tagged with a semantic version number and then select rows compatible with a running version of our software.
Assumptions:
My software will include a configuration setting containing the current version number
Data-driven configuration rows will include a min version number
I need to be able to select configuration rows where min <= current.
Examples:
Version 1.0.0 should include: 1.0.0, 1.0.0-*, 1.0.0-beta.1
Version 1.0.0 should exclude: 1.0.1, 1.1.0, 2.0.0
Version 1.1.0-beta.2 should include: 1.0.0, 1.0.1, 1.1.0-beta.1, 1.1.0-beta.2
Version 1.1.0-beta.2 should exclude: 1.1.0, 1.1.1, 1.2.0, 2.0.0, 1.1.1-beta.1
The MSSQL UDF is:
CREATE FUNCTION [dbo].[SemanticVersion] (
#Version nvarchar(50)
)
RETURNS nvarchar(255)
AS
BEGIN
DECLARE #hyphen int = CHARINDEX('-', #version)
SET #Version = REPLACE(#Version, '*', ' ')
DECLARE
#left nvarchar(50) = CASE #hyphen WHEN 0 THEN #version ELSE SUBSTRING(#version, 1, #hyphen-1) END,
#right nvarchar(50) = CASE #hyphen WHEN 0 THEN NULL ELSE SUBSTRING(#version, #hyphen+1, 50) END,
#normalized nvarchar(255) = '',
#buffer int = 8
WHILE CHARINDEX('.', #left) > 0 BEGIN
SET #normalized = #normalized + CASE ISNUMERIC(LEFT(#left, CHARINDEX('.', #left)-1))
WHEN 0 THEN LEFT(#left, CHARINDEX('.', #left)-1)
WHEN 1 THEN REPLACE(STR(LEFT(#left, CHARINDEX('.', #left)-1), #buffer), SPACE(1), '0')
END + '.'
SET #left = SUBSTRING(#left, CHARINDEX('.', #left)+1, 50)
END
SET #normalized = #normalized + CASE ISNUMERIC(#left)
WHEN 0 THEN #left
WHEN 1 THEN REPLACE(STR(#left, #buffer), SPACE(1), '0')
END
SET #normalized = #normalized + '-'
IF (#right IS NOT NULL) BEGIN
WHILE CHARINDEX('.', #right) > 0 BEGIN
SET #normalized = #normalized + CASE ISNUMERIC(LEFT(#right, CHARINDEX('.', #right)-1))
WHEN 0 THEN LEFT(#right, CHARINDEX('.', #right)-1)
WHEN 1 THEN REPLACE(STR(LEFT(#right, CHARINDEX('.', #right)-1), #buffer), SPACE(1), '0')
END + '.'
SET #right = SUBSTRING(#right, CHARINDEX('.', #right)+1, 50)
END
SET #normalized = #normalized + CASE ISNUMERIC(#right)
WHEN 0 THEN #right
WHEN 1 THEN REPLACE(STR(#right, #buffer), SPACE(1), '0')
END
END ELSE
SET #normalized = #normalized + 'zzzzzzzzzz'
RETURN #normalized
END
SQL tests include:
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-alpha') < dbo.SemanticVersion('1.0.0-alpha.1') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-alpha.1') < dbo.SemanticVersion('1.0.0-alpha.beta') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-alpha.beta') < dbo.SemanticVersion('1.0.0-beta') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-beta') < dbo.SemanticVersion('1.0.0-beta.2') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-beta.2') < dbo.SemanticVersion('1.0.0-beta.11') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-beta.11') < dbo.SemanticVersion('1.0.0-rc.1') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-rc.1') < dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-*') <= dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.*') <= dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.*') <= dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('*') <= dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.0-*') <= dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.1-*') > dbo.SemanticVersion('1.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.0.1-*') <= dbo.SemanticVersion('1.0.1') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.1.*') > dbo.SemanticVersion('1.0.9') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.1.*') <= dbo.SemanticVersion('1.2.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.*') <= dbo.SemanticVersion('2.0.0') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('1.*') > dbo.SemanticVersion('0.9.9-beta-219') THEN 'Success' ELSE 'Failure' END
SELECT CASE WHEN dbo.SemanticVersion('*') <= dbo.SemanticVersion('0.0.1-alpha-1') THEN 'Success' ELSE 'Failure' END
Two steps, first compare the left of the decimal point and after that compare the right.
Possible solution:
declare #v1 varchar(100) = '5.12'
declare #v2 varchar(100) = '5.8'
select case
when CONVERT(int, LEFT(#v1, CHARINDEX('.', #v1)-1)) < CONVERT(int, LEFT(#v2, CHARINDEX('.', #v2)-1)) then 'v2 is newer'
when CONVERT(int, LEFT(#v1, CHARINDEX('.', #v1)-1)) > CONVERT(int, LEFT(#v2, CHARINDEX('.', #v2)-1)) then 'v1 is newer'
when CONVERT(int, RIGHT(#v1, LEN(#v1) - CHARINDEX('.', #v1))) < CONVERT(int, RIGHT(#v2, LEN(#v2) - CHARINDEX('.', #v2))) then 'v2 is newer'
when CONVERT(int, RIGHT(#v1, LEN(#v1) - CHARINDEX('.', #v1))) > CONVERT(int, RIGHT(#v2, LEN(#v2) - CHARINDEX('.', #v2))) then 'v1 is newer'
else 'same!' end as 'Version Test'
Do not store in a string what is not a string. Alternative is creating your own data type (in C# - allowed for some time) that stored the versions as a sequence of bytes and implements proper comparison logic.
As suggested by AF you can compare the int part and then the decimal part .Apart from all the answers given there is one more way to do it using parsename .You could try something like this
case when cast(#var as int)>cast(#var2 as int) then 'Y'
when cast(PARSENAME(#var,1) as int) > cast(PARSENAME(#var2,1) as int) THEN 'Y'
Declare #var float
Declare #var2 float
set #var=5.14
set #var2=5.8
Select case when cast(#var as int)>cast(#var2 as int) then 'Y'
when cast(PARSENAME(#var,1) as int)> cast(PARSENAME(#var2,1) as int) THEN 'Y'
else 'N' END
You don't say so in the question, but your comment under Tomtom's answer suggests you are storing the version numbers as [decimals][d]. I guess that you have a table like this:
CREATE TABLE ReleaseHistory (
VersionNumber DECIMAL(6,3) NOT NULL
);
GO
INSERT INTO ReleaseHistory (
VersionNumber
)
VALUES
(5.12),
(5.8),
(12.34),
(3.14),
(0.78),
(1.0);
GO
The following query is an attempt to rank versions by the order in which they would be released:
SELECT
VersionNumber,
RANK() OVER (ORDER BY VersionNumber) AS ReleaseOrder
FROM ReleaseHistory;
It produces the following result set:
VersionNumber ReleaseOrder
--------------------------------------- --------------------
0.780 1
1.000 2
3.140 3
5.120 4
5.800 5
12.340 6
This is not what we expect. Version 5.8 was released before version 5.12!
Split the version number into its major and minor components to rank the version numbers properly. One way to do this is to convert the decimal value to a string and split on the period. The T-SQL syntax for this is ugly (the language is not designed for string processing):
WITH VersionStrings AS (
SELECT CAST(VersionNumber AS VARCHAR(6)) AS VersionString
FROM ReleaseHistory
),
VersionNumberComponents AS (
SELECT
CAST(SUBSTRING(VersionString, 1, CHARINDEX('.', VersionString) - 1) AS INT) AS MajorVersionNumber,
CAST(SUBSTRING(VersionString, CHARINDEX('.', VersionString) + 1, LEN(VersionString) - CHARINDEX('.', VersionString)) AS INT) AS MinorVersionNumber
FROM VersionStrings
)
SELECT
CAST(MajorVersionNumber AS VARCHAR(3)) + '.' + CAST(MinorVersionNumber AS VARCHAR(3)) AS VersionString,
RANK() OVER (ORDER BY MajorVersionNumber, MinorVersionNumber) AS ReleaseOrder
FROM VersionNumberComponents;
But it provides the expected result:
VersionString ReleaseOrder
------------- --------------------
0.780 1
1.0 2
3.140 3
5.120 4
5.800 5
12.340 6
As Tomtom replied, decimal is a not a good type to store a version number. It would be better to store the version number in two positive integer columns, one containing the major version number and the other containing the minor version number.
This is based on SeanW's answer but this solution allows for the following format [major].[minor].[build]. It maybe used for SQL 2K and when cursor is not an option.
declare #v1 varchar(100) = '1.4.020'
declare #v2 varchar(100) = '1.4.003'
declare #v1_dot1_pos smallint /*position - 1st version - 1st dot */
declare #v1_dot2_pos smallint /*position - 1st version - 2nd dot */
declare #v2_dot1_pos smallint /*position - 2nd version - 1st dot */
declare #v2_dot2_pos smallint /*position - 2nd version - 2nd dot */
-------------------------------------------------
-- get the pos of the first and second dots
-------------------------------------------------
SELECT
#v1_dot1_pos=CHARINDEX('.', #v1),
#v2_dot1_pos=CHARINDEX('.', #v2),
#v1_dot2_pos=charindex( '.', #v1, charindex( '.', #v1 ) + 1 ),
#v2_dot2_pos=charindex( '.', #v2, charindex( '.', #v2 ) + 1 )
-------------------------------------------------
-- break down the parts
-------------------------------------------------
DECLARE #v1_major int, #v2_major int
DECLARE #v1_minor int, #v2_minor int
DECLARE #v1_build int, #v2_build int
SELECT
#v1_major = CONVERT(int,LEFT(#v1,#v1_dot1_pos-1)),
#v1_minor = CONVERT(int,SUBSTRING(#v1,#v1_dot1_pos+1,(#v1_dot2_pos-#v1_dot1_pos)-1)),
#v1_build = CONVERT(int,RIGHT(#v1,(LEN(#v1)-#v1_dot2_pos))),
#v2_major = CONVERT(int,LEFT(#v2,#v2_dot1_pos-1)),
#v2_minor = CONVERT(int,SUBSTRING(#v2,#v2_dot1_pos+1,(#v2_dot2_pos-#v2_dot1_pos)-1)),
#v2_build = CONVERT(int,RIGHT(#v2,(LEN(#v2)-#v2_dot2_pos)))
-------------------------------------------------
-- return the difference
-------------------------------------------------
SELECT
Case
WHEN #v1_major < #v2_major then 'v2 is newer'
WHEN #v1_major > #v2_major then 'v1 is newer'
WHEN #v1_minor < #v2_minor then 'v2 is newer'
WHEN #v1_minor > #v2_minor then 'v1 is newer'
WHEN #v1_build < #v2_build then 'v2 is newer'
WHEN #v1_build > #v2_build then 'v1 is newer'
ELSE '!Same'
END
The solution that was implemented:
CREATE FUNCTION [dbo].[version_compare]
(
#v1 VARCHAR(5), #v2 VARCHAR(5)
)
RETURNS tinyint
AS
BEGIN
DECLARE #v1_int tinyint, #v1_frc tinyint,
#v2_int tinyint, #v2_frc tinyint,
#ResultVar tinyint
SET #ResultVar = 0
SET #v1_int = CONVERT(tinyint, LEFT(#v1, CHARINDEX('.', #v1) - 1))
SET #v1_frc = CONVERT(tinyint, RIGHT(#v1, LEN(#v1) - CHARINDEX('.', #v1)))
SET #v2_int = CONVERT(tinyint, LEFT(#v2, CHARINDEX('.', #v2) - 1))
SET #v2_frc = CONVERT(tinyint, RIGHT(#v2, LEN(#v2) - CHARINDEX('.', #v2)))
SELECT #ResultVar = CASE
WHEN #v2_int > #v1_int THEN 2
WHEN #v1_int > #v2_int THEN 1
WHEN #v2_frc > #v1_frc THEN 2
WHEN #v1_frc > #v2_frc THEN 1
ELSE 0 END
-- Return the result of the function
RETURN #ResultVar
END
GO
This recursive query would convert any '.'-separated version numbers into comparable strings left-padding each element to 10 characters thus allowing to compare versions with or without build number and accommodating for non-numeric characters:
WITH cte (VersionNumber) AS (
SELECT '1.23.456' UNION ALL
SELECT '2.3' UNION ALL
SELECT '0.alpha-3'
),
parsed (VersionNumber, Padded) AS (
SELECT
CAST(SUBSTRING(VersionNumber, CHARINDEX('.', VersionNumber) + 1, LEN(VersionNumber)) + '.' AS NVARCHAR(MAX)),
CAST(RIGHT(REPLICATE('0', 10) + LEFT(VersionNumber, CHARINDEX('.', VersionNumber) - 1), 10) AS NVARCHAR(MAX))
FROM cte
UNION ALL
SELECT
SUBSTRING(VersionNumber, CHARINDEX('.', VersionNumber) + 1, LEN(VersionNumber)),
Padded + RIGHT(REPLICATE('0', 10) + LEFT(VersionNumber, CHARINDEX('.', VersionNumber) - 1), 10)
FROM parsed WHERE CHARINDEX('.', VersionNumber) > 0
)
SELECT Padded
FROM parsed
WHERE VersionNumber = ''
ORDER BY Padded;
Padded
------------------------------
0000000000000alpha-3
000000000100000000230000000456
00000000020000000003
I have created (with inspiration from Eva Lacy (above)), this function:
CREATE or alter function dbo.IsVersionNewerThan
(
#Source nvarchar(max),
#Target nvarchar(max)
)
RETURNS table
as
/*
-1 : target has higher version number (later version)
0 : same
1 : source has higher version number (later version)
test harness:
; WITH tmp
AS
(
SELECT '1.0.0.5' AS Version
UNION ALL SELECT '0.0.0.0'
UNION ALL SELECT '1.5.0.6'
UNION ALL SELECT '2.0.0'
UNION ALL SELECT '2.0.0.0'
UNION ALL SELECT '2.0.1.1'
UNION ALL SELECT '15.15.1323.22'
UNION ALL SELECT '15.15.622.55'
)
SELECT tmp.version, isGreather from tmp
outer apply (select * from dbo.IsVersionNewerThan(tmp.Version, '2.0.0.0')) as IsG
*/
return (
select CASE
when cast('/' + #Source + '/' as hierarchyid) > cast('/' + #Target + '/' as hierarchyid) THEN 1
when #Source = #Target then 0
else -1
end as IsGreather
)
go
The test script is included as a comment.
It works, as long as you do not have versions like '1.5.06.2' (note the zero).
SQL Server thinks this function has is_inlineable = 1, which bodes well for the performance.
Then my SQL code can look like this:
declare #version varchar(10) = '2.30.1.12'
set #version = '2.30.1.1'
if exists(select * from dbo.IsVersionNewerThan(#version,'2.30.1.12') where IsGreather >= 0)
BEGIN
print 'yes'
end
else print 'no'
Here is what I did by modifying some code I found on StackOverflow and writing some myself. This is version 1 of the code so please let me know what you think. Usage examples and test cases are in the code comments.
First create this function if not using SQL 2016 or greater and you do not have access to STRING_SPLIT:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: <Author,,Name>
-- Create date: <Create Date,,>
-- Description: modified from https://stackoverflow.com/questions/10914576/t-sql-split-string/42000063#42000063
-- =============================================
CREATE FUNCTION [dbo].[SplitStringToRows]
(
#List VARCHAR(4000)
, #Delimiter VARCHAR(50)
)
RETURNS TABLE
AS
RETURN
(
--For testing
-- SELECT * FROM SplitStringToRows ('1.0.123','.')
-- DECLARE #List VARCHAR(MAX) = '1.0.123', #Delimiter VARCHAR(50) = '.';
WITH Casted AS
(
SELECT CAST(N'<x>' + REPLACE((SELECT REPLACE(#List,#Delimiter,N'§§Split$me$here§§') AS [*] FOR XML PATH('')),N'§§Split$me$here§§',N'</x><x>') + N'</x>' AS XML) AS SplitMe
)
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS [Index]
, x.value(N'.',N'nvarchar(max)') AS Part
FROM Casted
CROSS APPLY SplitMe.nodes(N'/x') AS A(x)
)
Then create this function:
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: Soenhay
-- Create date: 7/1/2017
-- Description: Returns -1 if VersionStringA is less than VersionStringB.
-- Returns 0 if VersionStringA equals VersionStringB.
-- Returns 1 if VersionSTringA is greater than VersionStringB.
-- =============================================
CREATE FUNCTION dbo.CompareVersionStrings
(
#VersionStringA VARCHAR(50)
,#VersionStringB VARCHAR(50)
)
RETURNS TABLE
AS
RETURN
(
--CurrentVersion should be of the form:
--major.minor[.build[.revision]]
--This is the same as the versioning system used in c#.
--For applications the build and revision numbers will by dynamically set based on the current date and time of the build.
--Example: [assembly: AssemblyFileVersion("1.123.*")]//http://stackoverflow.com/questions/15505841/the-version-specified-for-the-file-version-is-not-in-the-normal-major-minor-b
--Each component should be between 0 and 65534 ( UInt16.MaxValue - 1 )
--Max version number would be 65534.65534.65534.65534
--For Testing
-- SELECT * FROM dbo.CompareVersionStrings('', '')
-- SELECT * FROM dbo.CompareVersionStrings('asdf.asdf', 'asdf.asdf') --returns 0
-- SELECT * FROM dbo.CompareVersionStrings('asdf', 'fdas') --returns -1
-- SELECT * FROM dbo.CompareVersionStrings('zasdf', 'fdas') --returns 1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.123.123', '1.1.123.123') --Should return -1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.123.123', '1.0.123.123') --Should return 0
-- SELECT * FROM dbo.CompareVersionStrings('1.1.123.123', '1.0.123.123') --Should return 1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.123.123', '1.0.124.123') --Should return -1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.124.123', '1.0.123.123') --Should return 1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.123.123', '1.0.123.124') --Should return -1
-- SELECT * FROM dbo.CompareVersionStrings('1.0.123.124', '1.0.123.123') --Should return 1
-- SELECT * FROM dbo.CompareVersionStrings('1.0', '1.1') --Should return -1
-- SELECT * FROM dbo.CompareVersionStrings('1.0', '1.0') --Should return 0
-- SELECT * FROM dbo.CompareVersionStrings('1.1', '1.0') --Should return 1
-- Declare #VersionStringA VARCHAR(50) = '' ,#VersionStringB VARCHAR(50) = '' ;
-- Declare #VersionStringA VARCHAR(50) = '1.0.123.123' ,#VersionStringB VARCHAR(50) = '1.1.123.123' ;
-- Declare #VersionStringA VARCHAR(50) = '1.1.123.123' ,#VersionStringB VARCHAR(50) = '1.1.123.123' ;
-- Declare #VersionStringA VARCHAR(50) = '1.2.123.123' ,#VersionStringB VARCHAR(50) = '1.1.123.123' ;
-- Declare #VersionStringA VARCHAR(50) = '1.1.123' ,#VersionStringB VARCHAR(50) = '1.1.123.123' ;
-- Declare #VersionStringA VARCHAR(50) = '1.1.123.123' ,#VersionStringB VARCHAR(50) = '1.1.123' ;
-- Declare #VersionStringA VARCHAR(50) = '1.1' ,#VersionStringB VARCHAR(50) = '1.1' ;
-- Declare #VersionStringA VARCHAR(50) = '1.2' ,#VersionStringB VARCHAR(50) = '1.1' ;
-- Declare #VersionStringA VARCHAR(50) = '1.1' ,#VersionStringB VARCHAR(50) = '1.2' ;
WITH
Indexes AS
(
SELECT 1 AS [Index]
, 'major' AS Name
UNION
SELECT 2
, 'minor'
UNION
SELECT 3
, 'build'
UNION
SELECT 4
, 'revision'
)
, SplitA AS
(
SELECT * FROM dbo.SplitStringToRows(#VersionStringA, '.')
)
, SplitB AS
(
SELECT * FROM dbo.SplitStringToRows(#VersionStringB, '.')
)
SELECT
CASE WHEN major = 0 THEN
CASE WHEN minor = 0 THEN
CASE WHEN build = 0 THEN
CASE WHEN revision = 0 THEN 0
ELSE revision END
ELSE build END
ELSE minor END
ELSE major END AS Compare
FROM
(
SELECT
MAX(CASE WHEN [Index] = 1 THEN Compare ELSE NULL END) AS major
,MAX(CASE WHEN [Index] = 2 THEN Compare ELSE NULL END) AS minor
,MAX(CASE WHEN [Index] = 3 THEN Compare ELSE NULL END) AS build
,MAX(CASE WHEN [Index] = 4 THEN Compare ELSE NULL END) AS revision
FROM(
SELECT [Index], Name,
CASE WHEN A = B THEN 0
WHEN A < B THEN -1
WHEN A > B THEN 1
END AS Compare
FROM
(
SELECT
i.[Index]
,i.Name
,ISNULL(a.Part, 0) AS A
,ISNULL(b.Part, 0) AS B
FROM Indexes i
LEFT JOIN SplitA a
ON a.[Index] = i.[Index]
LEFT JOIN SplitB b
ON b.[Index] = i.[Index]
) q1
) q2
) q3
)
GO
I'll give you the most shortest answer of this.
with cte as (
select 7.11 as ver
union all
select 7.6
)
select top 1 ver from cte
order by parsename(ver, 2), parsename(cast(ver as float), 1)
Maybe converting build number to a value can help to understand the hierarchy between build versions.
DECLARE #version VARCHAR(25), #dot1 AS TINYINT, #dot2 AS TINYINT, #dot3 AS TINYINT, #MaxPower AS TINYINT, #Value AS BIGINT
SELECT #version = CAST(SERVERPROPERTY('ProductVersion') AS VARCHAR) --'14.0.1000.169' --'10.50.1600'
SELECT #dot1 = CHARINDEX('.', #version, 1)
SELECT #dot2 = CHARINDEX('.', #version, #dot1 + 1)
SELECT #dot3 = CHARINDEX('.', #version, #dot2 + 1)
SELECT #dot3 = CASE
WHEN #dot3 = 0 THEN LEN(#version) + 1
ELSE #dot3
END
SELECT #MaxPower = MAX(DotColumn) FROM (VALUES (#dot1-1), (#dot2-#dot1-1), (#dot3-#dot2-1)) AS DotTable(DotColumn)
SELECT #Value = POWER(10, #MaxPower)
--SELECT #version, #dot1, #dot2, #dot3, #MaxPower, #Value
SELECT
-- #version AS [Build],
CAST(LEFT(#version, #dot1-1) AS INT) * POWER(#Value, 3) +
CAST(SUBSTRING(#version, #dot1+1, #dot2-#dot1-1) AS INT) * POWER(#Value, 2) +
CAST(SUBSTRING(#version, #dot2+1, #dot3-#dot2-1) AS INT) * #Value +
CASE
WHEN #dot3 = LEN(#version)+1 THEN CAST(0 AS INT)
ELSE CAST(SUBSTRING(#version, #dot3+1, LEN(#version)-#dot3) AS INT)
END AS [Value]
Ispired from #Sean answer, since I needed it for 4 parts, I wrote this (and it is easily modulable for more, comment on function in end of code):
CREATE OR REPLACE FUNCTION compareversions(v1 text,v2 text)
RETURNS smallint
LANGUAGE 'plpgsql'
VOLATILE
PARALLEL UNSAFE
COST 100
AS $$
declare res int;
-- Set parts into variables (for now part 1 to 4 are used)
-- IMPORTANT: if you want to add part(s) think to add:
-- - Setting of part(s) to 0 in "Convert all empty or null parts to 0" below
-- - Proper tests in select/case below
-- IMPORTANT: do not use CAST here since it will lead to syntax error if a version or part is empty
-- v1
declare v1_1 text := split_part(v1, '.', 1);
declare v1_2 text := split_part(v1, '.', 2);
declare v1_3 text := split_part(v1, '.', 3);
declare v1_4 text := split_part(v1, '.', 4);
-- v2
declare v2_1 text := split_part(v2, '.', 1);
declare v2_2 text := split_part(v2, '.', 2);
declare v2_3 text := split_part(v2, '.', 3);
declare v2_4 text := split_part(v2, '.', 4);
begin
-- Convert all empty or null parts to 0
-- v1
if v1_1 = '' or v1_1 is null then v1_1 = '0'; end if;
if v1_2 = '' or v1_2 is null then v1_2 = '0'; end if;
if v1_3 = '' or v1_3 is null then v1_3 = '0'; end if;
if v1_4 = '' or v1_4 is null then v1_4 = '0'; end if;
-- v2
if v2_1 = '' or v2_1 is null then v2_1 = '0'; end if;
if v2_2 = '' or v2_2 is null then v2_2 = '0'; end if;
if v2_3 = '' or v2_3 is null then v2_3 = '0'; end if;
if v2_4 = '' or v2_4 is null then v2_4 = '0'; end if;
select
case
-------------
-- Compare first part:
-- - If v1_1 is inferior to v2_1 return -1 (v1 < v2),
-- - If v1_1 is superior to v2_1 return 1 (v1 > v2).
when CAST(v1_1 as int) < cast(v2_1 as int) then -1
when CAST(v1_1 as int) > cast(v2_1 as int) then 1
-------------
-------------
-- v1_1 is equal to v2_1, compare second part:
-- - If v1_2 is inferior to v2_2 return -1 (v1 < v2),
-- - If v1_2 is superior to v2_2 return 1 (v1 > v2).
when CAST(v1_2 as int) < cast(v2_2 as int) then -1
when CAST(v1_2 as int) > cast(v2_2 as int) then 1
-------------
-------------
-- v1_1 is equal to v2_1 and v1_2 is equal to v2_2, compare third part:
-- - If v1_3 is inferior to v2_3 return -1 (v1 < v2),
-- - If v1_3 is superior to v2_3 return 1 (v1 > v2).
when CAST(v1_3 as int) < cast(v2_3 as int) then -1
when CAST(v1_3 as int) > cast(v2_3 as int) then 1
-------------
-------------
-- Etc..., continuing with fourth part:
when CAST(v1_4 as int) < cast(v2_4 as int) then -1
when CAST(v1_4 as int) > cast(v2_4 as int) then 1
-------------
-- All parts are equals, meaning v1 == v2, return 0
else 0
end
into res;
return res;
end;
$$;
;
COMMENT ON FUNCTION compareversions(v1 text,v2 text)
IS 'Function to compare 2 versions as strings, versions can have from 1 to 4 parts (e.g. "1", "2.3", "3.4.5", "5.6.78.9") but it is easy to add a part.
A version having less than 4 parts is considered having its last part(s) set to 0, i.e. "2.3" is considered as "2.3.0.0" so that comparing "1.2.3" to "1.2.3.0" returns "equal"). Indeed we consider first part is always major, second minor, etc ... whatever the number of part for any version.
Function returns:
- -1 when v1 < v2
- 1 when v1 > v2
- 0 when v1 = v2
And, according to return value:
- To compare if v1 < v2 check compareversions(v1, v2) == -1
- To compare if v1 > v2 check compareversions(v1, v2) == 1
- To compare if v1 == v2 check compareversions(v1, v2) == 0
- To compare if v1 <= v2 check compareversions(v1, v2) <= 0
- To compare if v1 >= v2 check compareversions(v1, v2) >= 0'
;
With this you can also for example compare a version "1.2" with "1.2.1" (will return -1, v1 < v2) as "1.2" will be considered as "1.2.0", it is not an usual check but in case during time a digit is added to version a "1.2" will actually be considered equal to "1.2.0".
And it's also easily modulable for another version format, for X.Y-Z for example, v1_1, etc... will be (not tested but you got the idea):
-- v1_1 = X
declare v1_1 text := split_part(v1, '.', 1);
-- tmp = Y-Z
declare tmp text := split_part(v1, '.', 2);
-- v1_2 = Y
declare v1_2 text := split_part(tmp, '-', 1);
-- v1_3 = Z
declare v1_3 text := split_part(tmp, '-', 2);
-- do the same for v2
#MartinSmith answer works best for up-to 5 decimals but if more than that (which might be rare). Here is what I could have done:
DECLARE #AppVersion1 VARCHAR(20) = '2.7.2.2.3.1'
DECLARE #AppVersion2 VARCHAR(20) = '2.7.2.2.4'
DECLARE #V1 AS INT = CASE WHEN LEN(#AppVersion1) < LEN(#AppVersion2) THEN CAST(REPLACE(#AppVersion2,'.','') AS INT) ELSE CAST(REPLACE(#AppVersion1,'.','') AS INT) END;
DECLARE #V2 AS INT = CASE WHEN LEN(#AppVersion1) < LEN(#AppVersion2) THEN CAST(REPLACE(#AppVersion1,'.','') AS INT) ELSE CAST(REPLACE(#AppVersion2,'.','') AS INT) END;
IF(LEN(#V2)< LEN(#V1))
BEGIN
SET #V2 = CAST( LTRIM(CAST(#V2 AS VARCHAR)) + ISNULL(REPLICATE('0',LEN(#V1)-LEN(#V2)),'') AS INT);
END;
SELECT CASE WHEN #V1 > #V2 THEN 'Y' ELSE 'N' END

SQL take just the numeric values from a varchar

Say i have a few fields like the following:
abd738927
jaksm234234
hfk342
ndma0834
jon99322
Type: varchar.
How do I take just the numeric values from this to display:
738927
234234
342
0834
99322
Have tried substring however the data varies in length, and cast didnt work either due to being unable to convert, any ideas?
Here's the example with PATINDEX:
select SUBSTRING(fieldName, PATINDEX('%[0-9]%', fieldName), LEN(fieldName))
This assumes (1) the field WILL have a numeric, (2) the numerics are all grouped together, and (3) the numerics don't have any subsequent characters after them.
Extract only numbers (without using while loop) and check each and every character to see if it is a number and extract it
Declare #s varchar(100),#result varchar(100)
set #s='as4khd0939sdf78'
set #result=''
select
#result=#result+
case when number like '[0-9]' then number else '' end from
(
select substring(#s,number,1) as number from
(
select number from master..spt_values
where type='p' and number between 1 and len(#s)
) as t
) as t
select #result as only_numbers
DECLARE #NonNumeric varchar(1000) = 'RGI000Testing1000'
DECLARE #Index int
SET #Index = 0
while 1=1
begin
set #Index = patindex('%[^0-9]%',#NonNumeric)
if #Index <> 0
begin
SET #NonNumeric = replace(#NonNumeric,substring(#NonNumeric,#Index, 1), '')
end
else
break;
end
select #NonNumeric -- 0001000
Well if you don't want to create a function, you can just something like this:
cast(replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(replace(
replace(replace(replace(replace(replace(replace(replace(replace(replace(YOUR_COLUMN
,'A',''),'B',''),'C',''),'D',''),'E',''),'F',''),'G',''),'H',''),'I',''),'J','')
,'K',''),'L',''),'M',''),'N',''),'O',''),'P',''),'Q',''),'R',''),'S',''),'T','')
,'U',''),'V',''),'W',''),'X',''),'Y',''),'Z',''),'$',''),',',''),' ','') as float)
I think you're wanting VBA's Val() function. Easy enough to accomplish with IsNumeric()
create function Val
(
#text nvarchar(40)
)
returns float
as begin
-- emulate vba's val() function
declare #result float
declare #tmp varchar(40)
set #tmp = #text
while isnumeric(#tmp) = 0 and len(#tmp)>0 begin
set #tmp=left(#tmp,len(#tmp)-1)
end
set #result = cast(#tmp as float)
return #result
end
select substring(
'jaksm234234',
patindex('%[0-9]%','jaksm234234'),
LEN('jaksm234234')-patindex('%[0-9]%','jaksm234234')+2
)
input table
if you have data like above in the image, then use the below query
select field_3 from table where PATINDEX('%[ ~`!##$%^&*_()=+\|{};",<>/?a-z]%', field_3)=0
Results will be look like this
Result table
Extract only numbers from a string. Returns a string with all the numbers inside. Example: this1is2one345long6789number will return 123456789
CREATE FUNCTION [dbo].[GetOnlyNumbers] (#Temp VARCHAR(1000))
RETURNS VARCHAR (1000) AS BEGIN
DECLARE #KeepValues AS VARCHAR(50)
SET #KeepValues = '%[^0-9]%'
WHILE PATINDEX(#KeepValues, #Temp) > 0
SET #Temp = STUFF(#Temp, PATINDEX(#KeepValues, #Temp), 1, '')
RETURN #Temp
END
A right with patindex for the reverse string works also for those
SELECT [Column],
CAST(RIGHT([Column], PATINDEX('%[0-9][^0-9]%', REVERSE([Column])+' ')) AS INT) as [Num]
FROM (VALUES
('abd738927'),
('jaksm234234'),
('hfk342'),
('ndma0834'),
('jon99322'),
) val([Column])
Column
Num
abd738927
738927
jaksm234234
234234
hfk342
342
ndma0834
834
jon99322
99322

T-SQL trim &nbsp (and other non-alphanumeric characters)

We have some input data that sometimes appears with &nbsp characters on the end.
The data comes in from the source system as varchar() and our attempts to cast as decimal fail b/c of these characters.
Ltrim and Rtrim don't remove the characters, so we're forced to do something like:
UPDATE myTable
SET myColumn = replace(myColumn,char(160),'')
WHERE charindex(char(160),myColumn) > 0
This works for the &nbsp, but is there a good way to do this for any non-alphanumeric (or in this case numeric) characters?
This will remove all non alphanumeric chracters
CREATE FUNCTION [dbo].[fnRemoveBadCharacter]
(
#BadString nvarchar(20)
)
RETURNS nvarchar(20)
AS
BEGIN
DECLARE #nPos INTEGER
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
WHILE #nPos > 0
BEGIN
SELECT #BadString = STUFF(#BadString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
END
RETURN #BadString
END
Use the function like:
UPDATE TableToUpdate
SET ColumnToUpdate = dbo.fnRemoveBadCharacter(ColumnToUpdate)
WHERE whatever
This page has a sample of how you can remove non-alphanumeric chars:
-- Put something like this into a user function:
DECLARE #cString VARCHAR(32)
DECLARE #nPos INTEGER
SELECT #cString = '90$%45623 *6%}~:#'
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
WHILE #nPos > 0
BEGIN
SELECT #cString = STUFF(#cString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
END
SELECT #cString
How is the table being populated? While it is possible to scrub this in sql a better approach would be to change the column type to int and scrub the data before it's loaded into the database (SSIS). Is this an option?
For large datasets I have had better luck with this function that checks the ASCII value. I have added options to keep only alpha, numeric or alphanumeric based on the parameters.
--CleanType 1 - Remove all non alpanumeric
-- 2 - Remove only alpha
-- 3 - Remove only numeric
CREATE FUNCTION [dbo].[fnCleanString] (
#InputString varchar(8000)
, #CleanType int
, #LeaveSpaces bit
) RETURNS varchar(8000)
AS
BEGIN
-- // Declare variables
-- ===========================================================
DECLARE #Length int
, #CurLength int = 1
, #ReturnString varchar(8000)=''
SELECT #Length = len(#InputString)
-- // Begin looping through each char checking ASCII value
-- ===========================================================
WHILE (#CurLength <= (#Length+1))
BEGIN
IF (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 48 and 57 AND #CleanType in (1,3) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 65 and 90 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 97 and 122 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) = 32 AND #LeaveSpaces = 1 )
BEGIN
SET #ReturnString = #ReturnString + SUBSTRING(#InputString,#CurLength,1)
END
SET #CurLength = #CurLength + 1
END
RETURN #ReturnString
END
If the mobile could start with a Plus(+) I will use the function like this
CREATE FUNCTION [dbo].[Mobile_NoAlpha](#Mobile VARCHAR(1000))
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #StartsWithPlus BIT = 0
--check if the mobile starts with a plus(+)
IF LEFT(#Mobile, 1) = '+'
BEGIN
SET #StartsWithPlus = 1
--Take out the plus before using the regex to eliminate invalid characters
SET #Mobile = RIGHT(#Mobile, LEN(#Mobile)-1)
END
WHILE PatIndex('%[^0-9]%', #Mobile) > 0
SET #Mobile = Stuff(#Mobile, PatIndex('%[^0-9]%', #Mobile), 1, '')
IF #StartsWithPlus = 1
SET #Mobile = '+' + #Mobile
RETURN #Mobile
END