USPS ACS Keyline Check Digit - sql

I have implemented the "MOD 10" check digit algorithm using SQL, for the US Postal Service Address Change Service Keyline according to the method in their document, but it seems I'm getting the wrong numbers! Our input strings have only numbers in them, making the calculation a little easier. When I compare my results with the results from their testing application, I get different numbers. I don't understand what is going on? Does anyone see anything wrong with my algorithm? It's got to be something obvious...
The documentation for the method can be found on page 12-13 of this document:
http://www.usps.com/cpim/ftp/pubs/pub8a.pdf
The sample application can be found at:
http://ribbs.usps.gov/acs/documents/tech_guides/KEYLINE.EXE
PLEASE NOTE: I fixed the code below, based on the help from forum users. This is so that future readers will be able to use the code in its entirety.
ALTER function [dbo].[udf_create_acs] (#MasterCustomerId varchar(26))
returns varchar(30)
as
begin
--this implements the "mod 10" check digit calculation
--for the US Postal Service ACS function, from "Publication 8A"
--found at "http://www.usps.com/cpim/ftp/pubs/pub8a.pdf"
declare #result varchar(30)
declare #current_char int
declare #char_positions_odd varchar(10)
declare #char_positions_even varchar(10)
declare #total_value int
declare #check_digit varchar(1)
--These strings represent the pre-calculated values of each character
--Example: '7' in an odd position in the input becomes 14, which is 1+4=5
-- so the '7' is in position 5 in the string - zero-indexed
set #char_positions_odd = '0516273849'
set #char_positions_even = '0123456789'
set #total_value = 0
set #current_char = 1
--stepping through the string one character at a time
while (#current_char <= len(#MasterCustomerId)) begin
--this is the calculation for the character's weighted value
if (#current_char % 2 = 0) begin
--it is an even position, so just add the digit's value
set #total_value = #total_value + convert(int, substring(#MasterCustomerId, #current_char, 1))
end else begin
--it is an odd position, so add the pre-calculated value for the digit
set #total_value = #total_value + (charindex(substring(#MasterCustomerId, #current_char, 1), #char_positions_odd) - 1)
end
set #current_char = #current_char + 1
end
--find the check digit (character) using the formula in the USPS document
set #check_digit = convert(varchar,(10 - (#total_value % 10)) % 10)
set #result = '#' + #MasterCustomerId + ' ' + #check_digit + '#'
return #result
end

I'm not sure why you're messing with the whole string representations when you're working in a set-based language.
I'd probably do it like below. I ran four tests through and they were all successful. You can expand this easily to handle characters as well and you could even make the table permanent if you really wanted to do that.
CREATE FUNCTION dbo.Get_Mod10
(
#original_string VARCHAR(26)
)
RETURNS VARCHAR(30)
AS
BEGIN
DECLARE
#value_mapping TABLE (original_char CHAR(1) NOT NULL, odd_value TINYINT NOT NULL, even_value TINYINT NOT NULL)
INSERT INTO #value_mapping
(
original_char,
odd_value,
even_value
)
SELECT '0', 0, 0 UNION
SELECT '1', 2, 1 UNION
SELECT '2', 4, 2 UNION
SELECT '3', 6, 3 UNION
SELECT '4', 8, 4 UNION
SELECT '5', 1, 5 UNION
SELECT '6', 3, 6 UNION
SELECT '7', 5, 7 UNION
SELECT '8', 7, 8 UNION
SELECT '9', 9, 9
DECLARE
#i INT,
#clean_string VARCHAR(26),
#len_string TINYINT,
#sum SMALLINT
SET #clean_string = REPLACE(#original_string, ' ', '')
SET #len_string = LEN(#clean_string)
SET #i = 1
SET #sum = 0
WHILE (#i <= #len_string)
BEGIN
SELECT
#sum = #sum + CASE WHEN #i % 2 = 0 THEN even_value ELSE odd_value END
FROM
#value_mapping
WHERE
original_char = SUBSTRING(#clean_string, #i, 1)
SET #i = #i + 1
END
RETURN (10 - (#sum % 10)) % 10
END
GO

set #check_digit = convert(varchar, (10 - (#total_value % 10)) % 10)

Why do we have an additional mod:
convert(varchar, 10 % <<-- ?
The document says that only the last digit needs to be subtracted from 10. Did I miss anything?

Related

Change characters but keep length

I am migrating sensitive data to a database, and I need to hide details of the text. We would like to keep the volume and length of the text, but change the meaning.
For example:
"James has been well received, and should be helped when ever he finds it hard to speak"
should change to:
"jhdfy dfw aslk dfe kjdfkjd, kjf kjdsf df iotryy erhr lsdj jf ytwe it kjdf tr kjsdd"
Is there a way to update all rows, set the column text to this random type text? Really only want to change charactors (a-z, A-Z), and keep the rest.
One option is to use a bunch of nested replaces . . . but that would probably hit on the maximum number of nested functions.
You could write a painful query using outer apply:
select
from t outer apply
(select replace(t.col, 'a', 'z') as col1) outer apply
(select replace(col1, 'b', 'y') ) outer apply
. . .
However, you might want to write your own function. In other databases, this is called translate() (after the Unix command). If you Google SQL Server translate, I think you'll find examples on the web.
One way is to split the string character by character and replace each row with a random string. And then concatenate them back to get the desired output
DECLARE #str VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
;WITH Cte(orig, random) AS(
SELECT
SUBSTRING(t.a, v.number + 1, 1),
CASE
WHEN SUBSTRING(t.a, v.number + 1, 1) LIKE '[a-z]'
THEN CHAR(ABS(CHECKSUM(NEWID())) % 25 + 97)
ELSE SUBSTRING(t.a, v.number + 1, 1)
END
FROM (SELECT #str) t(a)
CROSS JOIN master..spt_values v
WHERE
v.number < LEN(t.a)
AND v.type = 'P'
)
SELECT
OrignalString = #str,
RandomString = (
SELECT '' + random
FROM Cte FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'
)
TRY IT HERE
OK this is possible using a user defined function (UDF) and a view.
SQL Server does not allow random number generation in a UDF but does allow it in a view. Ref: http://blog.sqlauthority.com/2012/11/20/sql-server-using-rand-in-user-defined-functions-udf/
So here is the solution
CREATE VIEW [dbo].[rndView]
AS
SELECT RAND() rndResult
GO
CREATE FUNCTION [dbo].[RandFn]()
RETURNS float
AS
BEGIN
DECLARE #rndValue float
SELECT #rndValue = rndResult
FROM rndView
RETURN #rndValue
END
GO
CREATE FUNCTION [dbo].[randomstring] ( #stringToParse VARCHAR(MAX))
RETURNS
varchar(max)
AS
BEGIN
/*
A = 65
Z = 90
a = 97
z = 112
declare #stringToParse VARCHAR(MAX) = 'James has been well received, and should be helped when ever he finds it hard to speak'
Select [dbo].[randomstring] ( #stringToParse )
go
Update SpecialTable
Set SpecialString = [dbo].[randomstring] (SpecialString)
go
*/
declare #StringToreturn varchar(max) = ''
declare #charCounter int = 1
declare #len int = len(#stringToParse)
declare #thisRand int
declare #UpperA int = 65
declare #UpperZ int = 90
declare #LowerA int = 97
declare #LowerZ int = 112
declare #thisChar char(1)
declare #Random_Number float
declare #randomChar char(1)
WHILE #charCounter < #len
BEGIN
SELECT #thisChar = SUBSTRING(#stringToParse, #charCounter, 1)
set #randomChar = #thisChar
--print #randomChar
SELECT #Random_Number = dbo.RandFn()
--print #Random_Number
--only swap if a-z or A-Z
if ASCII(#thisChar) >= #UpperA and ASCII(#thisChar) <= #UpperZ begin
--upper case
set #thisRand = #UpperA + (#Random_Number * convert(float, (#UpperZ-#UpperA)))
set #randomChar = CHAR(#thisRand)
--print #thisRand
end
if ASCII(#thisChar) >= #LowerA and ASCII(#thisChar) <= #LowerZ begin
--upper case
set #thisRand = #LowerA + (#Random_Number * convert(float, (#LowerZ-#LowerA)))
set #randomChar = CHAR(#thisRand)
end
--print #thisRand
--print #randomChar
set #StringToreturn = #StringToreturn + #randomChar
SET #charCounter = #charCounter + 1
END
--Select * from #returnList
return #StringToreturn
END
GO

sql natural sort by strings mixed with numbers in one label

I came with a problem of sorting using ORDER BY. I found a lot of similar questions, but no answer fits my needs. The task is:
I have column [LABEL] which contains strings, and i want to get an order like this:
label
'1'
'2'
'11R'
'11T9'
'11T10'
'RT_5'
'RT_6'
'RT_10'
'RT_10b'
'RT_10dyn'
and so on...
instead of:
'1'
'11R'
'11T10'
'11T9'
'2S'
'RT_10'
'RT_10b'
'RT_10dyn'
'RT_5'
'RT_6'
the label columb might be like any combination of characters.
The problem is to find numbers in names, and if it is possible to sort by those numbers, then by other charaters...
After a few hours here is the solution:
I created a function to change the labels in specific way:
Each NUMBER in the input #in is replaced by the same number
writen in #digits chars WITH leadings zeros.
For example:
#digit = 4, #in = 'aa300bb' return = '_aa0300bb_'.
#digit = 5, #in = 'aa300bb' return = '_aa00300bb_'.
#digit = 3, #in = 'a2c4e5' return = '_a002c004e005_'.
And here is the function:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[fnMixSort]')
AND type in (N'FN', N'IF', N'TF', N'FS', N'FT'))
DROP FUNCTION [dbo].[fnMixSort]
GO
CREATE FUNCTION [dbo].[fnMixSort] (
#in NVARCHAR(250),
#digits int
) RETURNS NVARCHAR(1000) AS
BEGIN
DECLARE
#starts int,
#i int, -- position where next NUMBER starts
#j int, -- position where next NUMBER ends
#temp nvarchar(1000)
set #starts = 1
set #in = '_' + #in + '_' -- extended LABEL: protection from EMPTY input
while (1=1)
begin
select #temp = substring(#in, #starts, len(#in))
-- #i #j - start/end position of first number
SELECT #i = COALESCE( PATINDEX('%[0-9]%',#temp ), 0)
SELECT #j = COALESCE( PATINDEX('%[0-9][^0-9]%',#temp ), 0)
if #i = 0 break -- no more NUMBERs in the LABEL
-- now we PUT at posiotion=#i+#start-1 specific numbers of '0'
select #in = STUFF(#in, #i + #starts - 1, 0, REPLICATE('0', #digits-#j+#i-1))
select #starts = #starts + #i + #digits - 1
end
-- -------- return ---------
RETURN #in
END
GO
lets create some table to check the function:
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[aaaa_test]')
AND type in (N'U'))
DROP TABLE [dbo].[aaaa_test]
GO
CREATE TABLE [dbo].[aaaa_test](
Label [varchar](255) NULL
)
INSERT INTO [dbo].[aaaa_test] ([Label])
VALUES ('bb'),('aa12'),(''),('30'),('10rt'),
('12ru'),('1rt'),('9rt'),('aa8'),('aa10'),('aa'),
('12rz'),('12rt'),('9rt5'),('9_rt_10_23'),('9_rt_10_5'),('9rt12'),
('12rz34'),('12rz3'),('12rz35c'),('12rz105b'),('12rt'),('9rt5'),('9rt10'),('9rt12')
select
[label]
,dbo.fnMixSort(Label,5) as [fnMixSort_returns]
from [dbo].[aaaa_test]
order by dbo.fnMixSort(Label,5)
And the result
label fnMixSort_returns
----------------------------------
1rt _00001rt_
9_rt_10_5 _00009_rt_00010_00005_
9_rt_10_23 _00009_rt_00010_00023_
9rt _00009rt_
9rt5 _00009rt00005_
9rt5 _00009rt00005_
9rt10 _00009rt00010_
9rt12 _00009rt00012_
9rt12 _00009rt00012_
10rt _00010rt_
12rt _00012rt_
12rt _00012rt_
12ru _00012ru_
12rz _00012rz_
12rz3 _00012rz00003_
12rz34 _00012rz00034_
12rz35c _00012rz00035c_
12rz105b _00012rz00105b_
30 _00030_
aa _aa_
aa8 _aa00008_
aa10 _aa00010_
aa12 _aa00012_
bb _bb_
it was my first time to post here...
hope it will help someone oneday..
You can substr [LABEL] column into different columns and then order by those columns. As null is sorted first you don't need to do anything extra for values with less character.
How ever you can also follow this thread here.
Here in this solution the logic is :-
If ID is numeric, add 21 '0's in front of the ID value and get the last 20 characters.
If ID is not numeric, add 21 ‘’s at the end of the ID value and get the first 20 characters.
Or this is a better solution for you query Sort Alphanumeric value
Let us see if it helps.
ANOTHER SOLUTION: different exchanged_label:
/** ==========================================================
FUNCTION DESCRIPTION
-------------------------------------------------------------
Function for special sorting - natural-mix sorting.
Order by : number in word are treated as number, not as a
characters only.
So 'a2' is before 'a10' and '9R' is before '10R' ...
-------------------------------------------------------------
Function puts special prefix before each number.
If number has 1 digit -> with prefix is 0A
If number has 2 digits -> with prefix is 0B
... ... ...
If number has 16 digits -> with prefix is 0P
If number has 17 digits -> with prefix is 0PA
If number has 18 digits -> with prefix is 0PB
... ... ...
If number has 32 digits -> with prefix is 0PP
If number has 33 digits -> with prefix is 0PPA
... and so on...
For example:
aa123bb9 -> aa0C123bb0A9
**/
CODE
CREATE FUNCTION [dbo].[fnMixSort] ( #in NVARCHAR(1000) ) RETURNS NVARCHAR(1000) AS
BEGIN
DECLARE
#starts int,
#i int, -- position where next NUMBER starts
#j int, -- position where next NUMBER ends
#temp nvarchar(1000)
set #starts = 1
set #in = '_' + #in + '_' -- extended LABEL: protection from EMPTY input
while (1=1)
begin
select #temp = substring(#in, #starts, len(#in))
SELECT #i = COALESCE( PATINDEX('%[0-9]%',#temp ), 0)
if #i = 0 break -- no more NUMBERs in the LABEL
SELECT #j = COALESCE( PATINDEX('%[0-9][^0-9]%',#temp ), 0)
select #temp = '0' -- numbers->must still be numbers: before letters
while (#j >= #i + 16)
begin
select #j = #j - 16
select #temp = #temp + 'P'
end
select #temp = #temp + CHAR(#j - #i + 65) -- char(65) is 'A'
select #in = STUFF(#in, #i + #starts - 1, 0, #temp)
select #starts = #starts + LEN(#temp) + (LEN(#temp)-2)*16 + #j
end -- while
RETURN #in
END
GO
results:
1rt _0A1rt_
9_rt_10_5 _0A9_rt_0B10_0A5_
9_rt_10_23 _0A9_rt_0B10_0B23_
9rt _0A9rt_
9rt5 _0A9rt0A5_
9rt5 _0A9rt0A5_
9rt10 _0A9rt0B10_
9rt12 _0A9rt0B12_
9rt12 _0A9rt0B12_
10rt _0B10rt_
12rt _0B12rt_
12rt _0B12rt_
12ru _0B12ru_
12rz _0B12rz_
12rz3 _0B12rz0A3_
12rz34 _0B12rz0B34_
12rz105b _0B12rz0C105b_
30 _0B30_
9234567890123456123456789012345rz38c _0PO9234567890123456123456789012345rz0B38c_
12345678901234561234567890123456rz35c _0PP12345678901234561234567890123456rz0B35c_
123456789012345612345678901234561rz36c _0PPA123456789012345612345678901234561rz0B36c_
aa _aa_
aa0A _aa0A0A_
aa0b _aa0A0b_
aa8 _aa0A8_
aa10 _aa0B10_
aa12 _aa0B12_
bb _bb_
Same approach as pi.314 but rewrite for PostgreSQL:
CREATE OR REPLACE FUNCTION fnNumberAwareSort(value varchar, digits integer)
RETURNS varchar
AS '
DECLARE
numbers VARCHAR[];
texts VARCHAR[];
BEGIN
value = CONCAT(''_'', value, ''_'');
SELECT ARRAY(SELECT res[1] FROM regexp_matches(value, ''\d+'', ''g'') AS res) INTO numbers;
texts = regexp_split_to_array(value, ''\d+'');
FOR i IN 1..array_upper(texts,1) LOOP
numbers[i] = lpad(numbers[i], digits, ''0'');
END LOOP;
value = texts[1];
FOR i IN 2..array_upper(texts,1) LOOP
value = value || numbers[i-1] || texts[i];
END LOOP;
RETURN value;
END;
' LANGUAGE plpgsql;

Sorting VARCHAR column with alphanumeric entries

I am using SQL Server, the column is a VARCHAR(50) and I want to sort it like this:
1A
1B
2
2
3
4A
4B
4C
5A
5B
5C
5N
14 Draft
21
22A
22B
23A
23B
23C
23D
23E
25
26
FR01584
MISC
What I have so far is:
Select *
From viewASD
ORDER BY
Case When IsNumeric(LEFT(asdNumNew,1)) = 1
Then CASE When IsNumeric(asdNumNew) = 1
Then Right(Replicate('0',20) + asdNumNew + '0', 20)
Else Right(Replicate('0',20) + asdNumNew, 20)
END
When IsNumeric(LEFT(asdNumNew,1)) = 0
Then Left(asdNumNew + Replicate('',21), 20)
End
But this SQL statement puts '14 Draft' right after '26'.
Could someone help? Thanks
Your WHERE statement is... oddly complex.
It looks like you want to sort by any leading numeric digits in integer order, and then sort by the remainder. If so, you should do that as separate clauses, rather than trying to do it all in one. The specific issue you're having is that you're only allowing for a single-digit number, instead of two or more. (And there's No such thing as two.)
Here's your fix, along with a SQLFiddle, using two separate calculated columns tests for your ORDER BY. (Note that this assumes the numeric portion of asdNumNew will fit in a T-SQL int. If not, you'll need to adjust the CAST and the maximum value on the first ELSE.)
SELECT * FROM viewASD
ORDER BY
CASE
WHEN ISNUMERIC(asdNumNew)=1
THEN CAST(asdNumNew as int)
WHEN PATINDEX('%[^0-9]%',asdNumNew) > 1
THEN CAST(
LEFT(
asdNumNew,
PATINDEX('%[^0-9]%',asdNumNew) - 1
) as int)
ELSE 2147483648
END,
CASE
WHEN ISNUMERIC(asdNumNew)=1
THEN NULL
WHEN PATINDEX('%[^0-9]%',asdNumNew) > 1
THEN SUBSTRING(
asdNumNew,
PATINDEX('%[^0-9]%',asdNumNew) ,
50
)
ELSE asdNumNew
END
If all numbers within the string are reasonably small, say not exceeding 10 digits,
you may expand all the numbers in the string to be exactly 10 digits:
123A -> 0000000123A
S4 -> S0000000004
A3B89 -> A0000000003B0000000089
and so on and then sort them
-- Expand all numbers within S by zeros to be MaxLen
create function [dbo].ExpandNumbers(#S VarChar(4000), #maxlen integer) returns VarChar(4000)
as
begin
declare #result VarChar(4000);
declare #buffer VarChar(4000);
declare #Ch Char;
declare #i integer;
set #buffer = '';
set #result = '';
set #i = 1;
while (#i <= len(#S))
begin
set #Ch = substring(#S, #i, 1);
if ((#Ch >= '0') and (#Ch <= '9'))
set #buffer = #buffer + #Ch
else
begin
if (len(#buffer) > 0)
set #result = #result + right(replicate('0', #maxlen) + #buffer, #maxlen);
set #buffer = '';
set #result = #result + #Ch;
end;
set #i = #i + 1;
end;
if (len(#buffer) > 0)
set #result = #result + right(replicate('0', #maxlen) + #buffer, #maxlen);
return #result;
end;
-- Final query is
select *
from viewASD
order by [dbo].ExpandNumbers(asdNumNew)
I had something similar, but with the possibility of dashes as leading characters as well as trailing spaces. This code worked for me.
SELECT
my_column,
PATINDEX('%[^0-9]%',my_column) AS first_alpha_position,
CONVERT(INT,
CASE
WHEN PATINDEX('%[^0-9]%',my_column) = 0 OR PATINDEX('-%',my_column) = 1
THEN ABS(my_column)
ELSE SUBSTRING(my_column,1,PATINDEX('%[^0-9]%',my_column) -1)
END) AS numeric_value,
LTRIM(
SUBSTRING(my_column,PATINDEX('%[^0-9]%',my_column),LEN(my_column)-PATINDEX('%[^0-9]%',my_column)+1)
) AS alpha_chars
FROM my_table
ORDER BY numeric_value,alpha_chars
TRY THIS
DECLARE #t table (Number nvarchar(20))
INSERT INTO #t
SELECT 'L010'
UNION ALL SELECT 'L011'
UNION ALL SELECT 'L011'
UNION ALL SELECT 'L001'
UNION ALL SELECT 'L012'
UNION ALL SELECT '18'
UNION ALL SELECT '8'
UNION ALL SELECT '17'
UNION ALL SELECT 'B004'
UNION ALL SELECT 'B006'
UNION ALL SELECT 'B008'
UNION ALL SELECT 'B018'
UNION ALL SELECT 'UG001'
UNION ALL SELECT 'UG011'
UNION ALL SELECT 'G001'
UNION ALL SELECT 'G002'
UNION ALL SELECT 'G011';
SELECT Number
FROM #t
ORDER BY
CAST
(
SUBSTRING
(
Number
, 1
, CASE
WHEN patindex('%[^0-9]%',Number) > 0 THEN patindex('%[^0-9]%',Number) - 1
ELSE LEN(Number) END
) AS int
)
, Number
What worked for me is I split up the numeric and the alpha parts and then sorted based on the Alpha, then the Numeric:
CREATE FUNCTION [admin].[GetUnitNumberAsIntFunc](#UnitNumber varchar(20))
RETURNS int
BEGIN
DECLARE #intPosition int
SET #intPosition = PATINDEX('%[^0-9]%', #UnitNumber)
WHILE #intNumber > 0
BEGIN
SET #UnitNumber = STUFF(#UnitNumber, #intNumber, 1, '')
SET #intPosition = PATINDEX('%[^0-9]%', #UnitNumber)
END
RETURN ISNULL(#UnitNumber,9999)
END;
CREATE FUNCTION [admin].[GetUnitNumberAsStrFunc](#UnitNumber varchar(20))
RETURNS varchar(20)
BEGIN
DECLARE #intPosition int
SET #intPosition = PATINDEX('%[0-9]%', #UnitNumber)
SET #UnitNumber = STUFF(#UnitNumber, #intPosition, 6, '')
RETURN ISNULL(#UnitNumber,9999)
END;

Query to get only numbers from a string

I have data like this:
string 1: 003Preliminary Examination Plan
string 2: Coordination005
string 3: Balance1000sheet
The output I expect is
string 1: 003
string 2: 005
string 3: 1000
And I want to implement it in SQL.
First create this UDF
CREATE FUNCTION dbo.udf_GetNumeric
(
#strAlphaNumeric VARCHAR(256)
)
RETURNS VARCHAR(256)
AS
BEGIN
DECLARE #intAlpha INT
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric)
BEGIN
WHILE #intAlpha > 0
BEGIN
SET #strAlphaNumeric = STUFF(#strAlphaNumeric, #intAlpha, 1, '' )
SET #intAlpha = PATINDEX('%[^0-9]%', #strAlphaNumeric )
END
END
RETURN ISNULL(#strAlphaNumeric,0)
END
GO
Now use the function as
SELECT dbo.udf_GetNumeric(column_name)
from table_name
SQL FIDDLE
I hope this solved your problem.
Reference
Try this one -
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT LEFT(subsrt, PATINDEX('%[^0-9]%', subsrt + 't') - 1)
FROM (
SELECT subsrt = SUBSTRING(string, pos, LEN(string))
FROM (
SELECT string, pos = PATINDEX('%[0-9]%', string)
FROM #temp
) d
) t
Output:
----------
003
005
1000
Query:
DECLARE #temp TABLE
(
string NVARCHAR(50)
)
INSERT INTO #temp (string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM #temp
Please try:
declare #var nvarchar(max)='Balance1000sheet'
SELECT LEFT(Val,PATINDEX('%[^0-9]%', Val+'a')-1) from(
SELECT SUBSTRING(#var, PATINDEX('%[0-9]%', #var), LEN(#var)) Val
)x
Getting only numbers from a string can be done in a one-liner.
Try this :
SUBSTRING('your-string-here', PATINDEX('%[0-9]%', 'your-string-here'), LEN('your-string-here'))
NB: Only works for the first int in the string, ex: abc123vfg34 returns 123.
I found this approach works about 3x faster than the top voted answer. Create the following function, dbo.GetNumbers:
CREATE FUNCTION dbo.GetNumbers(#String VARCHAR(8000))
RETURNS VARCHAR(8000)
AS
BEGIN;
WITH
Numbers
AS (
--Step 1.
--Get a column of numbers to represent
--every character position in the #String.
SELECT 1 AS Number
UNION ALL
SELECT Number + 1
FROM Numbers
WHERE Number < LEN(#String)
)
,Characters
AS (
SELECT Character
FROM Numbers
CROSS APPLY (
--Step 2.
--Use the column of numbers generated above
--to tell substring which character to extract.
SELECT SUBSTRING(#String, Number, 1) AS Character
) AS c
)
--Step 3.
--Pattern match to return only numbers from the CTE
--and use STRING_AGG to rebuild it into a single string.
SELECT #String = STRING_AGG(Character,'')
FROM Characters
WHERE Character LIKE '[0-9]'
--allows going past the default maximum of 100 loops in the CTE
OPTION (MAXRECURSION 8000)
RETURN #String
END
GO
Testing
Testing for purpose:
SELECT dbo.GetNumbers(InputString) AS Numbers
FROM ( VALUES
('003Preliminary Examination Plan') --output: 003
,('Coordination005') --output: 005
,('Balance1000sheet') --output: 1000
,('(111) 222-3333') --output: 1112223333
,('1.38hello#f00.b4r#\-6') --output: 1380046
) testData(InputString)
Testing for performance:
Start off setting up the test data...
--Add table to hold test data
CREATE TABLE dbo.NumTest (String VARCHAR(8000))
--Make an 8000 character string with mix of numbers and letters
DECLARE #Num VARCHAR(8000) = REPLICATE('12tf56se',800)
--Add this to the test table 500 times
DECLARE #n INT = 0
WHILE #n < 500
BEGIN
INSERT INTO dbo.NumTest VALUES (#Num)
SET #n = #n +1
END
Now testing the dbo.GetNumbers function:
SELECT dbo.GetNumbers(NumTest.String) AS Numbers
FROM dbo.NumTest -- Time to complete: 1 min 7s
Then testing the UDF from the top voted answer on the same data.
SELECT dbo.udf_GetNumeric(NumTest.String)
FROM dbo.NumTest -- Time to complete: 3 mins 12s
Inspiration for dbo.GetNumbers
Decimals
If you need it to handle decimals, you can use either of the following approaches, I found no noticeable performance differences between them.
change '[0-9]' to '[0-9.]'
change Character LIKE '[0-9]' to ISNUMERIC(Character) = 1 (SQL treats a single decimal point as "numeric")
Bonus
You can easily adapt this to differing requirements by swapping out WHERE Character LIKE '[0-9]' with the following options:
WHERE Letter LIKE '[a-zA-Z]' --Get only letters
WHERE Letter LIKE '[0-9a-zA-Z]' --Remove non-alphanumeric
WHERE Letter LIKE '[^0-9a-zA-Z]' --Get only non-alphanumeric
With the previous queries I get these results:
'AAAA1234BBBB3333' >>>> Output: 1234
'-çã+0!\aº1234' >>>> Output: 0
The code below returns All numeric chars:
1st output: 12343333
2nd output: 01234
declare #StringAlphaNum varchar(255)
declare #Character varchar
declare #SizeStringAlfaNumerica int
declare #CountCharacter int
set #StringAlphaNum = 'AAAA1234BBBB3333'
set #SizeStringAlfaNumerica = len(#StringAlphaNum)
set #CountCharacter = 1
while isnumeric(#StringAlphaNum) = 0
begin
while #CountCharacter < #SizeStringAlfaNumerica
begin
if substring(#StringAlphaNum,#CountCharacter,1) not like '[0-9]%'
begin
set #Character = substring(#StringAlphaNum,#CountCharacter,1)
set #StringAlphaNum = replace(#StringAlphaNum, #Character, '')
end
set #CountCharacter = #CountCharacter + 1
end
set #CountCharacter = 0
end
select #StringAlphaNum
declare #puvodni nvarchar(20)
set #puvodni = N'abc1d8e8ttr987avc'
WHILE PATINDEX('%[^0-9]%', #puvodni) > 0 SET #puvodni = REPLACE(#puvodni, SUBSTRING(#puvodni, PATINDEX('%[^0-9]%', #puvodni), 1), '' )
SELECT #puvodni
A solution for SQL Server 2017 and later, using TRANSLATE:
DECLARE #T table (string varchar(50) NOT NULL);
INSERT #T
(string)
VALUES
('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet');
SELECT
result =
REPLACE(
TRANSLATE(
T.string COLLATE Latin1_General_CI_AI,
'abcdefghijklmnopqrstuvwxyz',
SPACE(26)),
SPACE(1),
SPACE(0))
FROM #T AS T;
Output:
result
003
005
1000
The code works by:
Replacing characters a-z (ignoring case & accents) with a space
Replacing spaces with an empty string.
The string supplied to TRANSLATE can be expanded to include additional characters.
I did not have rights to create functions but had text like
["blahblah012345679"]
And needed to extract the numbers out of the middle
Note this assumes the numbers are grouped together and not at the start and end of the string.
select substring(column_name,patindex('%[0-9]%', column_name),patindex('%[0-9][^0-9]%', column_name)-patindex('%[0-9]%', column_name)+1)
from table name
Although this is an old thread its the first in google search, I came up with a different answer than what came before. This will allow you to pass your criteria for what to keep within a string, whatever that criteria might be. You can put it in a function to call over and over again if you want.
declare #String VARCHAR(MAX) = '-123. a 456-78(90)'
declare #MatchExpression VARCHAR(255) = '%[0-9]%'
declare #return varchar(max)
WHILE PatIndex(#MatchExpression, #String) > 0
begin
set #return = CONCAT(#return, SUBSTRING(#string,patindex(#matchexpression, #string),1))
SET #String = Stuff(#String, PatIndex(#MatchExpression, #String), 1, '')
end
select (#return)
This UDF will work for all types of strings:
CREATE FUNCTION udf_getNumbersFromString (#string varchar(max))
RETURNS varchar(max)
AS
BEGIN
WHILE #String like '%[^0-9]%'
SET #String = REPLACE(#String, SUBSTRING(#String, PATINDEX('%[^0-9]%', #String), 1), '')
RETURN #String
END
Just a little modification to #Epsicron 's answer
SELECT SUBSTRING(string, PATINDEX('%[0-9]%', string), PATINDEX('%[0-9][^0-9]%', string + 't') - PATINDEX('%[0-9]%',
string) + 1) AS Number
FROM (values ('003Preliminary Examination Plan'),
('Coordination005'),
('Balance1000sheet')) as a(string)
no need for a temporary variable
Firstly find out the number's starting length then reverse the string to find out the first position again(which will give you end position of number from the end). Now if you deduct 1 from both number and deduct it from string whole length you'll get only number length. Now get the number using SUBSTRING
declare #fieldName nvarchar(100)='AAAA1221.121BBBB'
declare #lenSt int=(select PATINDEX('%[0-9]%', #fieldName)-1)
declare #lenEnd int=(select PATINDEX('%[0-9]%', REVERSE(#fieldName))-1)
select SUBSTRING(#fieldName, PATINDEX('%[0-9]%', #fieldName), (LEN(#fieldName) - #lenSt -#lenEnd))
T-SQL function to read all the integers from text and return the one at the indicated index, starting from left or right, also using a starting search term (optional):
create or alter function dbo.udf_number_from_text(
#text nvarchar(max),
#search_term nvarchar(1000) = N'',
#number_position tinyint = 1,
#rtl bit = 0
) returns int
as
begin
declare #result int = 0;
declare #search_term_index int = 0;
if #text is null or len(#text) = 0 goto exit_label;
set #text = trim(#text);
if len(#text) = len(#search_term) goto exit_label;
if len(#search_term) > 0
begin
set #search_term_index = charindex(#search_term, #text);
if #search_term_index = 0 goto exit_label;
end;
if #search_term_index > 0
if #rtl = 0
set #text = trim(right(#text, len(#text) - #search_term_index - len(#search_term) + 1));
else
set #text = trim(left(#text, #search_term_index - 1));
if len(#text) = 0 goto exit_label;
declare #patt_number nvarchar(10) = '%[0-9]%';
declare #patt_not_number nvarchar(10) = '%[^0-9]%';
declare #number_start int = 1;
declare #number_end int;
declare #found_numbers table (id int identity(1,1), val int);
while #number_start > 0
begin
set #number_start = patindex(#patt_number, #text);
if #number_start > 0
begin
if #number_start = len(#text)
begin
insert into #found_numbers(val)
select cast(substring(#text, #number_start, 1) as int);
break;
end;
else
begin
set #text = right(#text, len(#text) - #number_start + 1);
set #number_end = patindex(#patt_not_number, #text);
if #number_end = 0
begin
insert into #found_numbers(val)
select cast(#text as int);
break;
end;
else
begin
insert into #found_numbers(val)
select cast(left(#text, #number_end - 1) as int);
if #number_end = len(#text)
break;
else
begin
set #text = trim(right(#text, len(#text) - #number_end));
if len(#text) = 0 break;
end;
end;
end;
end;
end;
if #rtl = 0
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id asc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
else
select #result = coalesce(a.val, 0)
from (select row_number() over (order by m.id desc) as c_row, m.val
from #found_numbers as m) as a
where a.c_row = #number_position;
exit_label:
return #result;
end;
Example:
select dbo.udf_number_from text(N'Text text 10 text, 25 term', N'term',2,1);
returns 10;
This is one of the simplest and easiest one. This will work on the entire String for multiple occurences as well.
CREATE FUNCTION dbo.fn_GetNumbers(#strInput NVARCHAR(500))
RETURNS NVARCHAR(500)
AS
BEGIN
DECLARE #strOut NVARCHAR(500) = '', #intCounter INT = 1
WHILE #intCounter <= LEN(#strInput)
BEGIN
SELECT #strOut = #strOut + CASE WHEN SUBSTRING(#strInput, #intCounter, 1) LIKE '[0-9]' THEN SUBSTRING(#strInput, #intCounter, 1) ELSE '' END
SET #intCounter = #intCounter + 1
END
RETURN #strOut
END
Following a solution using a single common table expression (CTE).
DECLARE #s AS TABLE (id int PRIMARY KEY, value nvarchar(max));
INSERT INTO #s
VALUES
(1, N'003Preliminary Examination Plan'),
(2, N'Coordination005'),
(3, N'Balance1000sheet');
SELECT * FROM #s ORDER BY id;
WITH t AS (
SELECT
id,
1 AS i,
SUBSTRING(value, 1, 1) AS c
FROM
#s
WHERE
LEN(value) > 0
UNION ALL
SELECT
t.id,
t.i + 1 AS i,
SUBSTRING(s.value, t.i + 1, 1) AS c
FROM
t
JOIN #s AS s ON t.id = s.id
WHERE
t.i < LEN(s.value)
)
SELECT
id,
STRING_AGG(c, N'') WITHIN GROUP (ORDER BY i ASC) AS value
FROM
t
WHERE
c LIKE '[0-9]'
GROUP BY
id
ORDER BY
id;
DECLARE #index NVARCHAR(20);
SET #index = 'abd565klaf12';
WHILE PATINDEX('%[0-9]%', #index) != 0
BEGIN
SET #index = REPLACE(#index, SUBSTRING(#index, PATINDEX('%[0-9]%', #index), 1), '');
END
SELECT #index;
One can replace [0-9] with [a-z] if numbers only are wanted with desired castings using the CAST function.
If we use the User Define Function, the query speed will be greatly reduced. This code extracts the number from the string....
SELECT
Reverse(substring(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) , patindex('%[0-9]%', Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) )))) ), len(Reverse(rtrim(ltrim( substring([FieldName] , patindex('%[0-9]%', [FieldName] ) , len([FieldName]) ))))) )) NumberValue
FROM dbo.TableName
CREATE OR REPLACE FUNCTION count_letters_and_numbers(input_string TEXT)
RETURNS TABLE (letters INT, numbers INT) AS $$
BEGIN
RETURN QUERY SELECT
sum(CASE WHEN input_string ~ '[A-Za-z]' THEN 1 ELSE 0 END) as letters,
sum(CASE WHEN input_string ~ '[0-9]' THEN 1 ELSE 0 END) as numbers
FROM unnest(string_to_array(input_string, '')) as input_string;
END;
$$ LANGUAGE plpgsql;
For the hell of it...
This solution is different to all earlier solutions, viz:
There is no need to create a function
There is no need to use pattern matching
There is no need for a temporary table
This solution uses a recursive common table expression (CTE)
But first - note the question does not specify where such strings are stored. In my solution below, I create a CTE as a quick and dirty way to put these strings into some kind of "source table".
Note also - this solution uses a recursive common table expression (CTE) - so don't get confused by the usage of two CTEs here. The first is simply to make the data avaliable to the solution - but it is only the second CTE that is required in order to solve this problem. You can adapt the code to make this second CTE query your existing table, view, etc.
Lastly - my coding is verbose, trying to use column and CTE names that explain what is going on and you might be able to simplify this solution a little. I've added in a few pseudo phone numbers with some (expected and atypical, as the case may be) formatting for the fun of it.
with SOURCE_TABLE as (
select '003Preliminary Examination Plan' as numberString
union all select 'Coordination005' as numberString
union all select 'Balance1000sheet' as numberString
union all select '1300 456 678' as numberString
union all select '(012) 995 8322 ' as numberString
union all select '073263 6122,' as numberString
),
FIRST_CHAR_PROCESSED as (
select
len(numberString) as currentStringLength,
isNull(cast(try_cast(replace(left(numberString, 1),' ','z') as tinyint) as nvarchar),'') as firstCharAsNumeric,
cast(isNull(cast(try_cast(nullIf(left(numberString, 1),'') as tinyint) as nvarchar),'') as nvarchar(4000)) as newString,
cast(substring(numberString,2,len(numberString)) as nvarchar) as remainingString
from SOURCE_TABLE
union all
select
len(remainingString) as currentStringLength,
cast(try_cast(replace(left(remainingString, 1),' ','z') as tinyint) as nvarchar) as firstCharAsNumeric,
cast(isNull(newString,'') as nvarchar(3999)) + isNull(cast(try_cast(nullIf(left(remainingString, 1),'') as tinyint) as nvarchar(1)),'') as newString,
substring(remainingString,2,len(remainingString)) as remainingString
from FIRST_CHAR_PROCESSED fcp2
where fcp2.currentStringLength > 1
)
select
newString
,* -- comment this out when required
from FIRST_CHAR_PROCESSED
where currentStringLength = 1
So what's going on here?
Basically in our CTE we are selecting the first character and using try_cast (see docs) to cast it to a tinyint (which is a large enough data type for a single-digit numeral). Note that the type-casting rules in SQL Server say that an empty string (or a space, for that matter) will resolve to zero, so the nullif is added to force spaces and empty strings to resolve to null (see discussion) (otherwise our result would include a zero character any time a space is encountered in the source data).
The CTE also returns everything after the first character - and that becomes the input to our recursive call on the CTE; in other words: now let's process the next character.
Lastly, the field newString in the CTE is generated (in the second SELECT) via concatenation. With recursive CTEs the data type must match between the two SELECT statements for any given column - including the column size. Because we know we are adding (at most) a single character, we are casting that character to nvarchar(1) and we are casting the newString (so far) as nvarchar(3999). Concatenated, the result will be nvarchar(4000) - which matches the type casting we carry out in the first SELECT.
If you run this query and exclude the WHERE clause, you'll get a sense of what's going on - but the rows may be in a strange order. (You won't necessarily see all rows relating to a single input value grouped together - but you should still be able to follow).
Hope it's an interesting option that may help a few people wanting a strictly expression-based solution.
In Oracle
You can get what you want using this:
SUBSTR('ABCD1234EFGH',REGEXP_INSTR ('ABCD1234EFGH', '[[:digit:]]'),REGEXP_COUNT ('ABCD1234EFGH', '[[:digit:]]'))
Sample Query:
SELECT SUBSTR('003Preliminary Examination Plan ',REGEXP_INSTR ('003Preliminary Examination Plan ', '[[:digit:]]'),REGEXP_COUNT ('003Preliminary Examination Plan ', '[[:digit:]]')) SAMPLE1,
SUBSTR('Coordination005',REGEXP_INSTR ('Coordination005', '[[:digit:]]'),REGEXP_COUNT ('Coordination005', '[[:digit:]]')) SAMPLE2,
SUBSTR('Balance1000sheet',REGEXP_INSTR ('Balance1000sheet', '[[:digit:]]'),REGEXP_COUNT ('Balance1000sheet', '[[:digit:]]')) SAMPLE3 FROM DUAL
If you are using Postgres and you have data like '2000 - some sample text' then try substring and position combination, otherwise if in your scenario there is no delimiter, you need to write regex:
SUBSTRING(Column_name from 0 for POSITION('-' in column_name) - 1) as
number_column_name

How do I convert an int to a zero padded string in T-SQL?

Let's say I have an int with the value of 1. How can I convert that int to a zero padded string, such as 00000001?
Declare #MyInt integer Set #MyInt = 123
Declare #StrLen TinyInt Set #StrLen = 8
Select Replace(Str(#MyInt, #StrLen), ' ' , '0')
Another way is:
DECLARE #iVal int = 1
select REPLACE(STR(#iVal, 8, 0), ' ', '0')
as of SQL Server 2012 you can now do this:
format(#int, '0000#')
This work for me:
SELECT RIGHT('000' + CAST(Table.Field AS VARCHAR(3)),3) FROM Table
...
I created this user function
T-SQL Code :
CREATE FUNCTION CIntToChar(#intVal Int, #intLen Int) RETURNS nvarchar(24) AS BEGIN
IF #intlen > 24
SET #intlen = 24
RETURN REPLICATE('0',#intLen-LEN(RTRIM(CONVERT(nvarchar(24),#intVal))))
+ CONVERT(nvarchar(24),#intVal) END
Example :
SELECT dbo.CIntToChar( 867, 6 ) AS COD_ID
OUTPUT
000867
Use FORMAT(<your number>,'00000000') use as many zeroes as you need to have digits in your final outcome.
Here is official documentation of the FORMAT function
If I'm trying to pad to a specific total length, I use the REPLICATE and DATALENGTH functions, like so:
DECLARE #INT INT
DECLARE #UNPADDED VARCHAR(3)
DECLARE #PADDED VARCHAR(3)
SET #INT = 2
SET #UNPADDED = CONVERT(VARCHAR(3),#INT)
SET #PADDED = REPLICATE('0', 3 - DATALENGTH(#UNPADDED)) + #UNPADDED
SELECT #INT, #UNPADDED, #PADDED
I used variables here for simplicity, but you see, you can specify the final length of the total string and not worry about the size of the INT that you start with as long as it's <= the final string length.
I always use:
SET #padded = RIGHT('z0000000000000'
+ convert(varchar(30), #myInt), 8)
The z stops SQL from implicitly coverting the string into an int for the addition/concatenation.
If the int can go negative you have a problem, so to get around this I sometimes do this:
DECLARE #iVal int
set #iVal = -1
select
case
when #ival >= 0 then right(replicate('0',8) + cast(#ival as nvarchar(8)),8)
else '-' + right(replicate('0',8) + cast(#ival*-1 as nvarchar(8)),8)
end
Very straight forward way to think about padding with '0's is, if you fixed your #_int's to have 4 decimals, you inject 4 '0's:
select RIGHT( '0000'+ Convert(varchar, #_int), 4) as txtnum
; if your fixed space is 3, you inject 3'0's
select RIGHT( '000'+ Convert(varchar, #_int), 3) as txtnum
; below I inject '00' to generate 99 labels for each bldg
declare #_int int
set #_int = 1
while #_int < 100 Begin
select BldgName + '.Floor_' + RIGHT( '00'+ Convert(varchar, #_int), 2)
+ '.balcony' from dbo.tbl_FloorInfo group by BldgName
set #_int = #_int +1
End
Result is:
'BldgA.Floor_01.balcony'
'BldgB.Floor_01.balcony'
'BldgC.Floor_01.balcony'
..
..
'BldgA.Floor_10.balcony'
'BldgB.Floor_10.balcony'
'BldgC.Floor_10.balcony'
..
..
..
'BldgA.Floor_99.balcony'
'BldgB.Floor_99.balcony'
'BldgC.Floor_99.balcony'
Or if you really want to go hard-core... ;-)
declare #int int
set #int = 1
declare #string varchar(max)
set #string = cast(#int as varchar(max))
declare #length int
set #length = len(#string)
declare #MAX int
set #MAX = 8
if #length < #MAX
begin
declare #zeros varchar(8)
set #zeros = ''
declare #counter int
set #counter = 0
while (#counter < (#MAX - #length))
begin
set #zeros = #zeros + '0'
set #counter = #counter + 1
end
set #string = #zeros + #string
end
print #string
And then there's this one, using REPLICATE:
SELECT REPLICATE('0', 7) + '1'
Of course, you can replace the literals 7 and '1' with appropriate functions as needed; the above gives you your example. For example:
SELECT REPLICATE('0', 8 - LEN(CONVERT(nvarchar, #myInt))) + CONVERT(nvarchar, #myInt)
will pad an integer of less than 8 places with zeros up to 8 characters.
Now, a negative number in the second argument of REPLICATE will return NULL. So, if that's a possibility (say, #myInt could be over 100 million in the above example), then you can use COALESCE to return the number without leading zeros if there are more than 8 characters:
SELECT COALESCE(REPLICATE('0', 8 - LEN(CONVERT(nvarchar, #myInt))) + CONVERT(nvarchar, #myInt), CONVERT(nvarchar, #myInt))
I think Charles Bretana's answer is the simplest and fastest. A similar solution without using STR is:
SELECT REPLACE(REVERSE(
CONVERT(CHAR(5 /*<= Target length*/)
, REVERSE(CONVERT(VARCHAR(100), #MyInt)))
), ' ', '0')