sql natural sort by strings mixed with numbers in one label - sql

I came with a problem of sorting using ORDER BY. I found a lot of similar questions, but no answer fits my needs. The task is:
I have column [LABEL] which contains strings, and i want to get an order like this:
label
'1'
'2'
'11R'
'11T9'
'11T10'
'RT_5'
'RT_6'
'RT_10'
'RT_10b'
'RT_10dyn'
and so on...
instead of:
'1'
'11R'
'11T10'
'11T9'
'2S'
'RT_10'
'RT_10b'
'RT_10dyn'
'RT_5'
'RT_6'
the label columb might be like any combination of characters.
The problem is to find numbers in names, and if it is possible to sort by those numbers, then by other charaters...

After a few hours here is the solution:
I created a function to change the labels in specific way:
Each NUMBER in the input #in is replaced by the same number
writen in #digits chars WITH leadings zeros.
For example:
#digit = 4, #in = 'aa300bb' return = '_aa0300bb_'.
#digit = 5, #in = 'aa300bb' return = '_aa00300bb_'.
#digit = 3, #in = 'a2c4e5' return = '_a002c004e005_'.
And here is the function:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[fnMixSort]')
AND type in (N'FN', N'IF', N'TF', N'FS', N'FT'))
DROP FUNCTION [dbo].[fnMixSort]
GO
CREATE FUNCTION [dbo].[fnMixSort] (
#in NVARCHAR(250),
#digits int
) RETURNS NVARCHAR(1000) AS
BEGIN
DECLARE
#starts int,
#i int, -- position where next NUMBER starts
#j int, -- position where next NUMBER ends
#temp nvarchar(1000)
set #starts = 1
set #in = '_' + #in + '_' -- extended LABEL: protection from EMPTY input
while (1=1)
begin
select #temp = substring(#in, #starts, len(#in))
-- #i #j - start/end position of first number
SELECT #i = COALESCE( PATINDEX('%[0-9]%',#temp ), 0)
SELECT #j = COALESCE( PATINDEX('%[0-9][^0-9]%',#temp ), 0)
if #i = 0 break -- no more NUMBERs in the LABEL
-- now we PUT at posiotion=#i+#start-1 specific numbers of '0'
select #in = STUFF(#in, #i + #starts - 1, 0, REPLICATE('0', #digits-#j+#i-1))
select #starts = #starts + #i + #digits - 1
end
-- -------- return ---------
RETURN #in
END
GO
lets create some table to check the function:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[aaaa_test]')
AND type in (N'U'))
DROP TABLE [dbo].[aaaa_test]
GO
CREATE TABLE [dbo].[aaaa_test](
Label [varchar](255) NULL
)
INSERT INTO [dbo].[aaaa_test] ([Label])
VALUES ('bb'),('aa12'),(''),('30'),('10rt'),
('12ru'),('1rt'),('9rt'),('aa8'),('aa10'),('aa'),
('12rz'),('12rt'),('9rt5'),('9_rt_10_23'),('9_rt_10_5'),('9rt12'),
('12rz34'),('12rz3'),('12rz35c'),('12rz105b'),('12rt'),('9rt5'),('9rt10'),('9rt12')
select
[label]
,dbo.fnMixSort(Label,5) as [fnMixSort_returns]
from [dbo].[aaaa_test]
order by dbo.fnMixSort(Label,5)
And the result
label fnMixSort_returns
----------------------------------
1rt _00001rt_
9_rt_10_5 _00009_rt_00010_00005_
9_rt_10_23 _00009_rt_00010_00023_
9rt _00009rt_
9rt5 _00009rt00005_
9rt5 _00009rt00005_
9rt10 _00009rt00010_
9rt12 _00009rt00012_
9rt12 _00009rt00012_
10rt _00010rt_
12rt _00012rt_
12rt _00012rt_
12ru _00012ru_
12rz _00012rz_
12rz3 _00012rz00003_
12rz34 _00012rz00034_
12rz35c _00012rz00035c_
12rz105b _00012rz00105b_
30 _00030_
aa _aa_
aa8 _aa00008_
aa10 _aa00010_
aa12 _aa00012_
bb _bb_
it was my first time to post here...
hope it will help someone oneday..

You can substr [LABEL] column into different columns and then order by those columns. As null is sorted first you don't need to do anything extra for values with less character.
How ever you can also follow this thread here.
Here in this solution the logic is :-
If ID is numeric, add 21 '0's in front of the ID value and get the last 20 characters.
If ID is not numeric, add 21 ‘’s at the end of the ID value and get the first 20 characters.
Or this is a better solution for you query Sort Alphanumeric value
Let us see if it helps.

ANOTHER SOLUTION: different exchanged_label:
/** ==========================================================
FUNCTION DESCRIPTION
-------------------------------------------------------------
Function for special sorting - natural-mix sorting.
Order by : number in word are treated as number, not as a
characters only.
So 'a2' is before 'a10' and '9R' is before '10R' ...
-------------------------------------------------------------
Function puts special prefix before each number.
If number has 1 digit -> with prefix is 0A
If number has 2 digits -> with prefix is 0B
... ... ...
If number has 16 digits -> with prefix is 0P
If number has 17 digits -> with prefix is 0PA
If number has 18 digits -> with prefix is 0PB
... ... ...
If number has 32 digits -> with prefix is 0PP
If number has 33 digits -> with prefix is 0PPA
... and so on...
For example:
aa123bb9 -> aa0C123bb0A9
**/
CODE
CREATE FUNCTION [dbo].[fnMixSort] ( #in NVARCHAR(1000) ) RETURNS NVARCHAR(1000) AS
BEGIN
DECLARE
#starts int,
#i int, -- position where next NUMBER starts
#j int, -- position where next NUMBER ends
#temp nvarchar(1000)
set #starts = 1
set #in = '_' + #in + '_' -- extended LABEL: protection from EMPTY input
while (1=1)
begin
select #temp = substring(#in, #starts, len(#in))
SELECT #i = COALESCE( PATINDEX('%[0-9]%',#temp ), 0)
if #i = 0 break -- no more NUMBERs in the LABEL
SELECT #j = COALESCE( PATINDEX('%[0-9][^0-9]%',#temp ), 0)
select #temp = '0' -- numbers->must still be numbers: before letters
while (#j >= #i + 16)
begin
select #j = #j - 16
select #temp = #temp + 'P'
end
select #temp = #temp + CHAR(#j - #i + 65) -- char(65) is 'A'
select #in = STUFF(#in, #i + #starts - 1, 0, #temp)
select #starts = #starts + LEN(#temp) + (LEN(#temp)-2)*16 + #j
end -- while
RETURN #in
END
GO
results:
1rt _0A1rt_
9_rt_10_5 _0A9_rt_0B10_0A5_
9_rt_10_23 _0A9_rt_0B10_0B23_
9rt _0A9rt_
9rt5 _0A9rt0A5_
9rt5 _0A9rt0A5_
9rt10 _0A9rt0B10_
9rt12 _0A9rt0B12_
9rt12 _0A9rt0B12_
10rt _0B10rt_
12rt _0B12rt_
12rt _0B12rt_
12ru _0B12ru_
12rz _0B12rz_
12rz3 _0B12rz0A3_
12rz34 _0B12rz0B34_
12rz105b _0B12rz0C105b_
30 _0B30_
9234567890123456123456789012345rz38c _0PO9234567890123456123456789012345rz0B38c_
12345678901234561234567890123456rz35c _0PP12345678901234561234567890123456rz0B35c_
123456789012345612345678901234561rz36c _0PPA123456789012345612345678901234561rz0B36c_
aa _aa_
aa0A _aa0A0A_
aa0b _aa0A0b_
aa8 _aa0A8_
aa10 _aa0B10_
aa12 _aa0B12_
bb _bb_

Same approach as pi.314 but rewrite for PostgreSQL:
CREATE OR REPLACE FUNCTION fnNumberAwareSort(value varchar, digits integer)
RETURNS varchar
AS '
DECLARE
numbers VARCHAR[];
texts VARCHAR[];
BEGIN
value = CONCAT(''_'', value, ''_'');
SELECT ARRAY(SELECT res[1] FROM regexp_matches(value, ''\d+'', ''g'') AS res) INTO numbers;
texts = regexp_split_to_array(value, ''\d+'');
FOR i IN 1..array_upper(texts,1) LOOP
numbers[i] = lpad(numbers[i], digits, ''0'');
END LOOP;
value = texts[1];
FOR i IN 2..array_upper(texts,1) LOOP
value = value || numbers[i-1] || texts[i];
END LOOP;
RETURN value;
END;
' LANGUAGE plpgsql;

Related

Return all words starting with a character in a column

I have a VARCHAR column with data like this:
abc = :abc and this = :that
I need a query to find all of the special "words" that start with a colon in this column of data. I don't really need any other data (IDs or otherwise) and duplicates would be OK. I can remove duplicates in Excel later if need be. So if this was the only row, I'd like something like this as the output:
SpecialWords
:abc
:that
I'm thinking it'll require a CHARINDEX or something like that. But since there could be more than one special word in the column, I can't just find the first : and strip out the rest.
Any help is greatly appreciated! Thanks in advance!
You have to split this value based on spaces and return only fields that starts with a colon :, i provided 2 solutions to achieve this based on the result type you need (Table or Single Value)
Table-Valued Function
You can create a TV function to split this column into a table:
CREATE FUNCTION [dbo].[GETVALUES]
(
#DelimitedString varchar(8000)
)
RETURNS #tblArray TABLE
(
ElementID int IDENTITY(1,1), -- Array index
Element varchar(1000) -- Array element contents
)
AS
BEGIN
-- Local Variable Declarations
-- ---------------------------
DECLARE #Index smallint,
#Start smallint,
#DelSize smallint
SET #DelSize = 1
-- Loop through source string and add elements to destination table array
-- ----------------------------------------------------------------------
WHILE LEN(#DelimitedString) > 0
BEGIN
SET #Index = CHARINDEX(' ', #DelimitedString)
IF #Index = 0
BEGIN
IF ((LTRIM(RTRIM(#DelimitedString))) LIKE ':%')
INSERT INTO
#tblArray
(Element)
VALUES
(LTRIM(RTRIM(#DelimitedString)))
BREAK
END
ELSE
BEGIN
IF (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1)))) LIKE ':%'
INSERT INTO
#tblArray
(Element)
VALUES
(LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1))))
SET #Start = #Index + #DelSize
SET #DelimitedString = SUBSTRING(#DelimitedString, #Start , LEN(#DelimitedString) - #Start + 1)
END
END
RETURN
END
And you can use it like the following:
DECLARE #SQLStr varchar(100)
SELECT #SQLStr = 'abc = :abc and this = :that and xyz = :asd'
SELECT
*
FROM
dbo.GETVALUES(#SQLStr)
Result:
Scalar-Valued Function
If you need to return a value (not table) so you can use this function which will return on all values separated by (line feed + carridge return CHAR(13) + CHAR(10))
CREATE FUNCTION dbo.GetValues2
(
#DelimitedString varchar(8000)
)
RETURNS varchar(8000)
AS
BEGIN
DECLARE #Index smallint,
#Start smallint,
#DelSize smallint,
#Result varchar(8000)
SET #DelSize = 1
SET #Result = ''
WHILE LEN(#DelimitedString) > 0
BEGIN
SET #Index = CHARINDEX(' ', #DelimitedString)
IF #Index = 0
BEGIN
if (LTRIM(RTRIM(#DelimitedString))) LIKE ':%'
SET #Result = #Result + char(13) + char(10) + (LTRIM(RTRIM(#DelimitedString)))
BREAK
END
ELSE
BEGIN
IF (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1)))) LIKE ':%'
SET #Result = #Result + char(13) + char(10) + (LTRIM(RTRIM(SUBSTRING(#DelimitedString, 1,#Index - 1))))
SET #Start = #Index + #DelSize
SET #DelimitedString = SUBSTRING(#DelimitedString, #Start , LEN(#DelimitedString) - #Start + 1)
END
END
return #Result
END
GO
you can use it as the following
DECLARE #SQLStr varchar(100)
SELECT #SQLStr = 'abc = :abc and this = :that and xyz = :asd'
SELECT dbo.GetValues2(#SQLStr)
Result
in the table result line feed are not visible, just copy the data to an editor and it will appears as shown in the image
References
Splitting the string in sql server
One way is to write a specialized SPLIT function. I would suggest getting a TSQL Split function off the internet and see if you can adapt the code to your needs.
Working from scratch, you could write a function that loops over the column value using CHARINDEX until it doesn't find any more : characters.
How about using a charindex?
rextester sample:
create table mytable (testcolumn varchar(20))
insert into mytable values ('this = :that'),('yes'), (':no'), ('abc = :abc')
select right(testcolumn, charindex(':', reverse(testcolumn)) - 1) from mytable
where testcolumn like '%:%'
reference:
SQL Select everything after character
Update
Addressing Sami's:
Didn't see that two words could be in one colon, how about this?
select replace(substring(testcolumn, charindex(':', testcolumn), len(testcolumn)), ':', '')
Update again
I see, the actual statement is this = :that and that = :this
If performance is important then you want to use an inline table valued function to split the string and extract what you need. You could use delimitedSplit8K or delimitedSplit8K_lead for this.
declare #string varchar(8000) = 'abc = :abc and this = :that';
select item
from dbo.DelimitedSplit8K(#string, ' ')
where item like ':%';
returns:
item
------
:abc
:that
And for even better performance than what I posted above you could use ngrams8k like so:
declare #string varchar(8000) = 'abc = :abc and this = :that';
select position, item =
substring(#string, position,
isnull(nullif(charindex(' ',#string,position+1),0),8000)-position)
from dbo.ngrams8k(#string, 1)
where token = ':';
This even gives you the location of the item you are searching for:
position item
---------- -------
7 :abc
23 :that

I have a column with datatype nvarchar and I want to sort it in ascending order. How do I achieve it in SSRS?

This is what I'm getting
abc 1
abc 12
abc 15
abc 2
abc 3
And this is how I want
abc 1
abc 2
abc 3
abc 12
abc 15
Query that I use:
select *
from view_abc
order by col1
Use a function to strip out the non numeric characters and leave just the value. Use another function to strip out all the numeric data. You can then sort on the two returned values.
It seems like a bit of work at first but once the functions are in you can re-use them in the future. Here's two functions I use regularly when we get data in from external sources and it's not very normalised.
They may not be the most efficient functions in the world but they work for my purposes
1st a function to just leave the numeric portion.
CREATE FUNCTION [fn].[StripToAlpha]
(
#inputString nvarchar(4000)
)
RETURNS varchar(4000)
AS
BEGIN
DECLARE #Counter as int
DECLARE #strReturnVal varchar(4000)
DECLARE #Len as int
DECLARE #ASCII as int
SET #Counter=0
SET #Len=LEN(#inputString)
SET #strReturnVal = ''
WHILE #Counter<=#Len
BEGIN
SET #Counter = #Counter +1
SET #ascii= ASCII(SUBSTRING(#inputString,#counter,1))
IF(#ascii BETWEEN 65 AND 90) OR (#ascii BETWEEN 97 AND 122)
BEGIN
SET #strReturnVal = #strReturnVal + (SUBSTRING(#inputString,#counter,1))
END
END
RETURN #strReturnVal
END
2nd a function to extract the value from a text field, this also handle percentages (e.g. abc 23% comes out as 0.23) but this is not required in your case.
You'll need to CREATE an 'fn' schema of change the schema name first...
CREATE FUNCTION [fn].[ConvertToValue]
(
#inputString nvarchar(4000)
)
RETURNS Float
AS
BEGIN
DECLARE #Counter as int
DECLARE #strReturnVal varchar(4000)
DECLARE #ReturnVal Float
DECLARE #Len as int
DECLARE #ASCII as int
SET #Counter=0
SET #Len=LEN(#inputString)
SET #strReturnVal = ''
IF #inputString IS NULL
BEGIN
Return NULL
END
IF #Len = 0 OR LEN(LTRIM(RTRIM(#inputString))) = 0
BEGIN
SET #ReturnVal=0
END
ELSE
BEGIN
WHILE #Counter<=#Len
BEGIN
SET #Counter = #Counter +1
SET #ascii= ASCII(SUBSTRING(#inputString,#counter,1))
IF(#ascii BETWEEN 48 AND 57) OR (#ascii IN (46,37))
BEGIN
SET #strReturnVal = #strReturnVal + (SUBSTRING(#inputString,#counter,1))
END
END
if RIGHT(#strReturnVal,1)='%'
BEGIN
SET #strReturnVal = LEFT(#strReturnVal,len(#strReturnVal)-1)
SET #strReturnVal = CAST((CAST(#strReturnVal AS FLOAT)/100) AS nvarchar(4000))
END
SET #ReturnVal = ISNULL(#strReturnVal,0)
END
RETURN #ReturnVal
END
Now we have the two functions created you can simply do
SELECT *
FROM view_abc
ORDER BY fn.StripToAlpha(Col1), fn.ConvertToValue(Col1)
Try this
Edited :
SELECT CAST(SUBSTRING(ColumnNameToOrder, CHARINDEX(' ', ColumnNameToOrder, 0), LEN (ColumnNameToOrder)) AS INT) AS IntColumn, SUBSTRING(ColumnNameToOrder,0, CHARINDEX(' ', ColumnNameToOrder, 0)) AS CharColumn, * FROM view_abc ORDER BY Charcolumn, Intcolumn
Instead of ColumnNameToOrder, you can put your column name which contains the data like 'abc 123'...
Tell me if it works please.
This is what I have come up with. Maybe it can help you, or at least point you in the right direction.
I tested the values. When the values have a zero, the order is like you would like the order to be. Like this:
abc 01
abc 02
abc 03
abc 12
abc 15
So you can run this query to update the existing values, to add the zero.
UPDATE abc
SET col1 = 'abc 0' + SUBSTRING(col1, 5, 1)
WHERE LEN(col1) = 5
Or you can do the above query like this if the first three characters can vary:
UPDATE abc
SET col1 = (SUBSTRING(col1, 1, 3) + ' 0' + SUBSTRING(col1, 5, 1))
WHERE col1 LIKE 'abc__'
This will override the existing value in the col1 column, only when the length of the current String is of length 5.
Then you can run the following query to get the results:
SELECT col1
FROM abc
ORDER BY col1 ASC
=cint(right(Fields!Col1.Value, instrrev(Fields!Col1.Value, " ")-1))
This will work in SSRS and sort correctly, but will only work if Col1 always contains a space, and that the characters after the space can be converted to an integer.

Change characters but keep length

I am migrating sensitive data to a database, and I need to hide details of the text. We would like to keep the volume and length of the text, but change the meaning.
For example:
"James has been well received, and should be helped when ever he finds it hard to speak"
should change to:
"jhdfy dfw aslk dfe kjdfkjd, kjf kjdsf df iotryy erhr lsdj jf ytwe it kjdf tr kjsdd"
Is there a way to update all rows, set the column text to this random type text? Really only want to change charactors (a-z, A-Z), and keep the rest.
One option is to use a bunch of nested replaces . . . but that would probably hit on the maximum number of nested functions.
You could write a painful query using outer apply:
select
from t outer apply
(select replace(t.col, 'a', 'z') as col1) outer apply
(select replace(col1, 'b', 'y') ) outer apply
. . .
However, you might want to write your own function. In other databases, this is called translate() (after the Unix command). If you Google SQL Server translate, I think you'll find examples on the web.
One way is to split the string character by character and replace each row with a random string. And then concatenate them back to get the desired output
DECLARE #str VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
;WITH Cte(orig, random) AS(
SELECT
SUBSTRING(t.a, v.number + 1, 1),
CASE
WHEN SUBSTRING(t.a, v.number + 1, 1) LIKE '[a-z]'
THEN CHAR(ABS(CHECKSUM(NEWID())) % 25 + 97)
ELSE SUBSTRING(t.a, v.number + 1, 1)
END
FROM (SELECT #str) t(a)
CROSS JOIN master..spt_values v
WHERE
v.number < LEN(t.a)
AND v.type = 'P'
)
SELECT
OrignalString = #str,
RandomString = (
SELECT '' + random
FROM Cte FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'
)
TRY IT HERE
OK this is possible using a user defined function (UDF) and a view.
SQL Server does not allow random number generation in a UDF but does allow it in a view. Ref: http://blog.sqlauthority.com/2012/11/20/sql-server-using-rand-in-user-defined-functions-udf/
So here is the solution
CREATE VIEW [dbo].[rndView]
AS
SELECT RAND() rndResult
GO
CREATE FUNCTION [dbo].[RandFn]()
RETURNS float
AS
BEGIN
DECLARE #rndValue float
SELECT #rndValue = rndResult
FROM rndView
RETURN #rndValue
END
GO
CREATE FUNCTION [dbo].[randomstring] ( #stringToParse VARCHAR(MAX))
RETURNS
varchar(max)
AS
BEGIN
/*
A = 65
Z = 90
a = 97
z = 112
declare #stringToParse VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
Select [dbo].[randomstring] ( #stringToParse )
go
Update SpecialTable
Set SpecialString = [dbo].[randomstring] (SpecialString)
go
*/
declare #StringToreturn varchar(max) = ''
declare #charCounter int = 1
declare #len int = len(#stringToParse)
declare #thisRand int
declare #UpperA int = 65
declare #UpperZ int = 90
declare #LowerA int = 97
declare #LowerZ int = 112
declare #thisChar char(1)
declare #Random_Number float
declare #randomChar char(1)
WHILE #charCounter < #len
BEGIN
SELECT #thisChar = SUBSTRING(#stringToParse, #charCounter, 1)
set #randomChar = #thisChar
--print #randomChar
SELECT #Random_Number = dbo.RandFn()
--print #Random_Number
--only swap if a-z or A-Z
if ASCII(#thisChar) >= #UpperA and ASCII(#thisChar) <= #UpperZ begin
--upper case
set #thisRand = #UpperA + (#Random_Number * convert(float, (#UpperZ-#UpperA)))
set #randomChar = CHAR(#thisRand)
--print #thisRand
end
if ASCII(#thisChar) >= #LowerA and ASCII(#thisChar) <= #LowerZ begin
--upper case
set #thisRand = #LowerA + (#Random_Number * convert(float, (#LowerZ-#LowerA)))
set #randomChar = CHAR(#thisRand)
end
--print #thisRand
--print #randomChar
set #StringToreturn = #StringToreturn + #randomChar
SET #charCounter = #charCounter + 1
END
--Select * from #returnList
return #StringToreturn
END
GO

Sorting VARCHAR column with alphanumeric entries

I am using SQL Server, the column is a VARCHAR(50) and I want to sort it like this:
1A
1B
2
2
3
4A
4B
4C
5A
5B
5C
5N
14 Draft
21
22A
22B
23A
23B
23C
23D
23E
25
26
FR01584
MISC
What I have so far is:
Select *
From viewASD
ORDER BY
Case When IsNumeric(LEFT(asdNumNew,1)) = 1
Then CASE When IsNumeric(asdNumNew) = 1
Then Right(Replicate('0',20) + asdNumNew + '0', 20)
Else Right(Replicate('0',20) + asdNumNew, 20)
END
When IsNumeric(LEFT(asdNumNew,1)) = 0
Then Left(asdNumNew + Replicate('',21), 20)
End
But this SQL statement puts '14 Draft' right after '26'.
Could someone help? Thanks
Your WHERE statement is... oddly complex.
It looks like you want to sort by any leading numeric digits in integer order, and then sort by the remainder. If so, you should do that as separate clauses, rather than trying to do it all in one. The specific issue you're having is that you're only allowing for a single-digit number, instead of two or more. (And there's No such thing as two.)
Here's your fix, along with a SQLFiddle, using two separate calculated columns tests for your ORDER BY. (Note that this assumes the numeric portion of asdNumNew will fit in a T-SQL int. If not, you'll need to adjust the CAST and the maximum value on the first ELSE.)
SELECT * FROM viewASD
ORDER BY
CASE
WHEN ISNUMERIC(asdNumNew)=1
THEN CAST(asdNumNew as int)
WHEN PATINDEX('%[^0-9]%',asdNumNew) > 1
THEN CAST(
LEFT(
asdNumNew,
PATINDEX('%[^0-9]%',asdNumNew) - 1
) as int)
ELSE 2147483648
END,
CASE
WHEN ISNUMERIC(asdNumNew)=1
THEN NULL
WHEN PATINDEX('%[^0-9]%',asdNumNew) > 1
THEN SUBSTRING(
asdNumNew,
PATINDEX('%[^0-9]%',asdNumNew) ,
50
)
ELSE asdNumNew
END
If all numbers within the string are reasonably small, say not exceeding 10 digits,
you may expand all the numbers in the string to be exactly 10 digits:
123A -> 0000000123A
S4 -> S0000000004
A3B89 -> A0000000003B0000000089
and so on and then sort them
-- Expand all numbers within S by zeros to be MaxLen
create function [dbo].ExpandNumbers(#S VarChar(4000), #maxlen integer) returns VarChar(4000)
as
begin
declare #result VarChar(4000);
declare #buffer VarChar(4000);
declare #Ch Char;
declare #i integer;
set #buffer = '';
set #result = '';
set #i = 1;
while (#i <= len(#S))
begin
set #Ch = substring(#S, #i, 1);
if ((#Ch >= '0') and (#Ch <= '9'))
set #buffer = #buffer + #Ch
else
begin
if (len(#buffer) > 0)
set #result = #result + right(replicate('0', #maxlen) + #buffer, #maxlen);
set #buffer = '';
set #result = #result + #Ch;
end;
set #i = #i + 1;
end;
if (len(#buffer) > 0)
set #result = #result + right(replicate('0', #maxlen) + #buffer, #maxlen);
return #result;
end;
-- Final query is
select *
from viewASD
order by [dbo].ExpandNumbers(asdNumNew)
I had something similar, but with the possibility of dashes as leading characters as well as trailing spaces. This code worked for me.
SELECT
my_column,
PATINDEX('%[^0-9]%',my_column) AS first_alpha_position,
CONVERT(INT,
CASE
WHEN PATINDEX('%[^0-9]%',my_column) = 0 OR PATINDEX('-%',my_column) = 1
THEN ABS(my_column)
ELSE SUBSTRING(my_column,1,PATINDEX('%[^0-9]%',my_column) -1)
END) AS numeric_value,
LTRIM(
SUBSTRING(my_column,PATINDEX('%[^0-9]%',my_column),LEN(my_column)-PATINDEX('%[^0-9]%',my_column)+1)
) AS alpha_chars
FROM my_table
ORDER BY numeric_value,alpha_chars
TRY THIS
DECLARE #t table (Number nvarchar(20))
INSERT INTO #t
SELECT 'L010'
UNION ALL SELECT 'L011'
UNION ALL SELECT 'L011'
UNION ALL SELECT 'L001'
UNION ALL SELECT 'L012'
UNION ALL SELECT '18'
UNION ALL SELECT '8'
UNION ALL SELECT '17'
UNION ALL SELECT 'B004'
UNION ALL SELECT 'B006'
UNION ALL SELECT 'B008'
UNION ALL SELECT 'B018'
UNION ALL SELECT 'UG001'
UNION ALL SELECT 'UG011'
UNION ALL SELECT 'G001'
UNION ALL SELECT 'G002'
UNION ALL SELECT 'G011';
SELECT Number
FROM #t
ORDER BY
CAST
(
SUBSTRING
(
Number
, 1
, CASE
WHEN patindex('%[^0-9]%',Number) > 0 THEN patindex('%[^0-9]%',Number) - 1
ELSE LEN(Number) END
) AS int
)
, Number
What worked for me is I split up the numeric and the alpha parts and then sorted based on the Alpha, then the Numeric:
CREATE FUNCTION [admin].[GetUnitNumberAsIntFunc](#UnitNumber varchar(20))
RETURNS int
BEGIN
DECLARE #intPosition int
SET #intPosition = PATINDEX('%[^0-9]%', #UnitNumber)
WHILE #intNumber > 0
BEGIN
SET #UnitNumber = STUFF(#UnitNumber, #intNumber, 1, '')
SET #intPosition = PATINDEX('%[^0-9]%', #UnitNumber)
END
RETURN ISNULL(#UnitNumber,9999)
END;
CREATE FUNCTION [admin].[GetUnitNumberAsStrFunc](#UnitNumber varchar(20))
RETURNS varchar(20)
BEGIN
DECLARE #intPosition int
SET #intPosition = PATINDEX('%[0-9]%', #UnitNumber)
SET #UnitNumber = STUFF(#UnitNumber, #intPosition, 6, '')
RETURN ISNULL(#UnitNumber,9999)
END;

T-SQL trim &nbsp (and other non-alphanumeric characters)

We have some input data that sometimes appears with &nbsp characters on the end.
The data comes in from the source system as varchar() and our attempts to cast as decimal fail b/c of these characters.
Ltrim and Rtrim don't remove the characters, so we're forced to do something like:
UPDATE myTable
SET myColumn = replace(myColumn,char(160),'')
WHERE charindex(char(160),myColumn) > 0
This works for the &nbsp, but is there a good way to do this for any non-alphanumeric (or in this case numeric) characters?
This will remove all non alphanumeric chracters
CREATE FUNCTION [dbo].[fnRemoveBadCharacter]
(
#BadString nvarchar(20)
)
RETURNS nvarchar(20)
AS
BEGIN
DECLARE #nPos INTEGER
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
WHILE #nPos > 0
BEGIN
SELECT #BadString = STUFF(#BadString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^a-zA-Z0-9_]%', #BadString)
END
RETURN #BadString
END
Use the function like:
UPDATE TableToUpdate
SET ColumnToUpdate = dbo.fnRemoveBadCharacter(ColumnToUpdate)
WHERE whatever
This page has a sample of how you can remove non-alphanumeric chars:
-- Put something like this into a user function:
DECLARE #cString VARCHAR(32)
DECLARE #nPos INTEGER
SELECT #cString = '90$%45623 *6%}~:#'
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
WHILE #nPos > 0
BEGIN
SELECT #cString = STUFF(#cString, #nPos, 1, '')
SELECT #nPos = PATINDEX('%[^0-9]%', #cString)
END
SELECT #cString
How is the table being populated? While it is possible to scrub this in sql a better approach would be to change the column type to int and scrub the data before it's loaded into the database (SSIS). Is this an option?
For large datasets I have had better luck with this function that checks the ASCII value. I have added options to keep only alpha, numeric or alphanumeric based on the parameters.
--CleanType 1 - Remove all non alpanumeric
-- 2 - Remove only alpha
-- 3 - Remove only numeric
CREATE FUNCTION [dbo].[fnCleanString] (
#InputString varchar(8000)
, #CleanType int
, #LeaveSpaces bit
) RETURNS varchar(8000)
AS
BEGIN
-- // Declare variables
-- ===========================================================
DECLARE #Length int
, #CurLength int = 1
, #ReturnString varchar(8000)=''
SELECT #Length = len(#InputString)
-- // Begin looping through each char checking ASCII value
-- ===========================================================
WHILE (#CurLength <= (#Length+1))
BEGIN
IF (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 48 and 57 AND #CleanType in (1,3) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 65 and 90 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) between 97 and 122 AND #CleanType in (1,2) )
or (ASCII(SUBSTRING(#InputString,#CurLength,1)) = 32 AND #LeaveSpaces = 1 )
BEGIN
SET #ReturnString = #ReturnString + SUBSTRING(#InputString,#CurLength,1)
END
SET #CurLength = #CurLength + 1
END
RETURN #ReturnString
END
If the mobile could start with a Plus(+) I will use the function like this
CREATE FUNCTION [dbo].[Mobile_NoAlpha](#Mobile VARCHAR(1000))
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #StartsWithPlus BIT = 0
--check if the mobile starts with a plus(+)
IF LEFT(#Mobile, 1) = '+'
BEGIN
SET #StartsWithPlus = 1
--Take out the plus before using the regex to eliminate invalid characters
SET #Mobile = RIGHT(#Mobile, LEN(#Mobile)-1)
END
WHILE PatIndex('%[^0-9]%', #Mobile) > 0
SET #Mobile = Stuff(#Mobile, PatIndex('%[^0-9]%', #Mobile), 1, '')
IF #StartsWithPlus = 1
SET #Mobile = '+' + #Mobile
RETURN #Mobile
END