TSQL: Find a continuous number in a string - sql

I need to find a continuous 6 or 7 digit number in a string from column name Filename. The string has other numbers in it with dashes(or another character, like an underscore), but I only need the continuous number
The StudentID needs to be extracted from the filename. (I know the data is just wow, multiple vendors, multiple file naming formats is the cause.) Another option would be to just list the starting position of the continuous number.
Desired outcome:
Actual outcome:
Test Code:
DROP TABLE #StuID
CREATE TABLE #StuID (
FILENAME VARCHAR(MAX)
,StudentID INT
)
INSERT INTO #StuID
( FILENAME )
VALUES
('Smith John D, 11-23-1980, 1234567.pdf')
,('Doe Jane, _01_22_1980_123456.pdf')
,('John Doe, 567891.pdf' )
--This is what I tried.
SELECT FILENAME
, substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 8) AS StudentID
FROM #StuID

Because you want 6 or 7 digits, case might be the simplest solution:
SELECT FILENAME,
(CASE WHEN FileName LIKE '%[0-9][0-9][0-9][0-9][0-9][0-9][0-9]%'
THEN substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 7)
WHEN FileName LIKE '%[0-9][0-9][0-9][0-9][0-9][0-9]%'
THEN substring(FileName, patindex('%[0-9][0-9][0-9][0-9][0-9]%', FileName), 6)
END) AS StudentID
FROM #StuID

Another approach I like a lot is a cast to XML and a XQuery filter:
WITH Casted([FileName],ToXml) AS
(
SELECT [FILENAME]
,CAST('<x>' + REPLACE(REPLACE(REPLACE([FILENAME],' ','</x><x>'),'.','</x><x>'),'_','</x><x>') + '</x>' AS XML)
FROM #StuID
)
SELECT [FileName]
,numbers.value('text()[1]','int')
FROM Casted
CROSS APPLY ToXml.nodes('/x[not(empty(. cast as xs:int?))]') A(numbers);
This will split the string in its fragments and return all fragments, which are numbers.
You can easily reduce the set to StudentIDs by using any convenient WHERE clause or you add to the XQuery filter the length of 6 or 7:
CROSS APPLY ToXml.nodes('/x[not(empty(. cast as xs:int?))
and (string-length(.)=6 or string-length(.)=7)]') A(numbers)
EDIT
This would be most on point:
CROSS APPLY ToXml.nodes('/x[. cast as xs:int? >= 100000 and . cast as xs:int? <10000000]') A(numbers)

If you know that filetype is pdf then:
SELECT FILENAME
, substring(REPLACE(FileName, '.pdf',''), patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', FileName), 8)
AS StudentID
FROM #StuID;
db<>fiddle demo
More generic one (SQL Server 2017):
SELECT FILENAME
, substring(s.c, patindex('%[0-9][0-9][0-9][0-9][0-9][0-9]%', s.c), 8) AS StudentID
FROM #StuID
CROSS APPLY (SELECT trim(' !"#$%&\''()*+,-./:;<=>?#ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~' FROM filename) AS c) s
db<>fiddle demo2

Related

sql extract rightmost number in string and increment

i have transaction codes like
"A0004", "1B2005","20CCCCCCC21"
I need to extract the rightmost number and increment the transaction code by one
"AA0004"----->"AA0005"
"1B2005"------->"1B2006"
"20CCCCCCCC21"------>"20CCCCCCCC22"
in SQL Server 2012.
unknown length of string
right(n?) always number
dealing with unsignificant number of string and number length is out of my league.
some logic is always missing.
LEFT(#a,2)+RIGHT('000'+CONVERT(NVARCHAR,CONVERT(INT,SUBSTRING( SUBSTRING(#a,2,4),2,3))+1)),3
First, I want to be clear about this: I totally agree with the comments to the question from a_horse_with_no_name and Jeroen Mostert.
You should be storing one data point per column, period.
Having said that, I do realize that a lot of times the database structure can't be changed - so here's one possible way to get that calculation for you.
First, create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
col varchar(100)
);
INSERT INTO #T (col) VALUES
('A0004'),
('1B2005'),
('1B2000'),
('1B00'),
('20CCCCCCC21');
(I've added a couple of strings as edge cases you didn't mention in the question)
Then, using a couple of cross apply to minimize code repetition, I came up with that:
SELECT col,
LEFT(col, LEN(col) - LastCharIndex + 1) +
REPLICATE('0', LEN(NumberString) - LEN(CAST(NumberString as int))) +
CAST((CAST(NumberString as int) + 1) as varchar(100)) As Result
FROM #T
CROSS APPLY
(
SELECT PATINDEX('%[^0-9]%', Reverse(col)) As LastCharIndex
) As Idx
CROSS APPLY
(
SELECT RIGHT(col, LastCharIndex - 1) As NumberString
) As NS
Results:
col Result
A0004 A0005
1B2005 1B2006
1B2000 1B2001
1B00 1B01
20CCCCCCC21 20CCCCCCC22
The LastCharIndex represents the index of the last non-digit char in the string.
The NumberString represents the number to increment, as a string (to preserve the leading zeroes if they exists).
From there, it's simply taking the left part of the string (that is, up until the number), and concatenate it to a newly calculated number string, using Replicate to pad the result of addition with the exact number of leading zeroes the original number string had.
Try This
DECLARE #test nvarchar(1000) ='"A0004", "1B2005","20CCCCCCC21"'
DECLARE #Temp AS TABLE (ID INT IDENTITY,Data nvarchar(1000))
INSERT INTO #Temp
SELECT #test
;WITH CTE
AS
(
SELECT Id,LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))) AS Data
,RIGHT(LTRIM(RTRIM((REPLACE(Split.a.value('.' ,' nvarchar(max)'),'"','')))),1)+1 AS ReqData
FROM
(
SELECT ID,
CAST ('<S>'+REPLACE(Data,',','</S><S>')+'</S>' AS XML) AS Data
FROM #Temp
) AS A
CROSS APPLY Data.nodes ('S') AS Split(a)
)
SELECT CONCAT('"'+Data+'"','-------->','"'+CONCAT(LEFT(Data,LEN(Data)-1),CAST(ReqData AS VARCHAR))+'"') AS ExpectedResult
FROM CTE
Result
ExpectedResult
-----------------
"A0004"-------->"A0005"
"1B2005"-------->"1B2006"
"20CCCCCCC21"-------->"20CCCCCCC22"
STUFF(#X
,LEN(#X)-CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END+1
,LEN(((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
,((RIGHT(#X,CASE PATINDEX('%[A-Z]%',REVERSE(#X)) WHEN 0 THEN LEN(#X) ELSE PATINDEX('%[A-Z]%',REVERSE(#X))-1 END)/#N)+1)#N)
works on number only strings
99 becomes 100
mod(#N) increments

concatenate a zero onto sql server select value shows 4 digits still and not 5

I have zip codes that on import didn't have zero from excel file. So I was doing a select in order to concatenate 0 to front of every 4 digit zip code.
I was trying this, but it still spits out 4 digits
(0 + [ZIP]) as 'fullzip'
ZIP is a float in db table
my full sql
SELECT
TOP 1000
[ZIP]
,(0 + [ZIP]) as 'fullzip'
,[ZIP_Name]
,[ZIP_CountyFIPS]
,[ZIP_County]
,[ZIP_State]
,[Utility_Name]
,[Holding_Company]
,[Utility_ID]
,[GAS_LDC_Type]
,[ELEC_Non_IOU_Type]
,[Percent_of_Overlap]
,[Utility_Territory_Type]
FROM
cc.dbo.ServiceableZipCodes
WHERE
Len( [ZIP] ) = 4
If zip is a float, I'd convert to char,
then do the string math.
RIGHT( '00000' + CONVERT(varchar(5),ZIP), 5)
Doesn't assume minimum values
of 4 digits.
Another option where you can use the implicit conversion (no need to convert)
Declare #YourTable table (Zip float)
Insert Into #YourTable values
(12345),
(1234),
(123)
Select right('00000'+left(Zip,5),5)
From #YourTable
Returns
(No column name)
12345
01234
00123
Instead of converting ZIP to varchar, I would use cast instead as it is ANSI standard.
SELECT
TOP 1000
[ZIP]
,ZipCode = CASE WHEN LEN(oa_zip.fullzip) = 4 then '0' + oa_zip.fullzip ELSE oa_zip.fullzip END
,[ZIP_Name]
,[ZIP_CountyFIPS]
,[ZIP_County]
,[ZIP_State]
,[Utility_Name]
,[Holding_Company]
,[Utility_ID]
,[GAS_LDC_Type]
,[ELEC_Non_IOU_Type]
,[Percent_of_Overlap]
,[Utility_Territory_Type]
FROM
cc.dbo.ServiceableZipCodes
OUTER APPLY (SELECT fullzip = CAST(ZIP AS VARCHAR)) oa_zip
You're using 0 which is an integer literal, not a string value. I suspect that implicit-conversion is converting your zip to an integer and then adding 0 (which is a non-operation), instead of doing string concatenation. To be safe you should use a string literal '0' instead of an integer literal 0, and use the CONCAT function to force concatenation instead of implicitly allowing addition instead:
SELECT
CONCAT( '0', [Zip] ) AS [FullZip],
-- etc

Split string into new column [duplicate]

This question already has answers here:
How do I split a delimited string so I can access individual items?
(46 answers)
Closed 7 years ago.
There is a column containing following e.g. abcd/ef/g/hij.
Characters between the / are dynamic not fix.
I want to split in a select query the content into 4 separate new columns.
The already answered question is different, I want to split the content in a string seperated by / into new columns.
You can use REPLACE to replace '/' with '.'. Then use PARSENAME to get each separate part of the string:
CREATE TABLE #tmp (str VARCHAR(50))
INSERT INTO #tmp VALUES
('abcd/ef/g/hij'),
('1111111/222/33/4444')
SELECT PARSENAME(x.s, 4) AS [1], PARSENAME(x.s, 3) AS [2],
PARSENAME(x.s, 2) AS [3], PARSENAME(x.s, 1) AS [4]
FROM #tmp
CROSS APPLY (SELECT REPLACE(str, '/', '.')) AS x(s)
Output:
1 2 3 4
---------------------
abcd ef g hij
1111111 222 33 4444
If you ask me, fastest ad-hoc method would be to turn your data into xml and use nodes() method:
declare #temp table (data nvarchar(max))
insert into #temp
select 'abcd/ef/g/hij' union all
select '1/2/3'
select t.data, n.c.value('.', 'nvarchar(max)')
from #temp as t
outer apply (select cast('<t>' + replace(t.data, '/', '</t><t>') + '</t>' as xml) as data) as d
outer apply d.data.nodes('t') as n(c)
You need to find the position of the / characters using CHARINDEX and slice the string up that way. It will be a large expression, because to find the third slash, you need to use the 3rd parameter of CHARINDEX, passing the result of another CHARINDEX, which also has its 3rd parameter being used. Except for the last (fourth) fragment, you also need to use CHARINDEX to find and remove text after the next slash.
Something like this will extract the text after the third slash:
RIGHT(s, CHARINDEX('/', s, CHARINDEX('/', s, CHARINDEX('/', s)+1)+1)+1)
I leave the rest to you.

Teradata : Sum up values in a column

Problem Statement
Example is shown in below image :
The last 2 rows have the patterns like "1.283 2 3" in a single cell. The numbers are seperated by space in the column. We need to add those nos and represent in the format given in Output.
So, the cell having "1.283 2 3" must be converted to 6.283
Challenges facing :
The column values are in string format.
Add nos after casting them into integer
Donot want to take data in UNIX box and manipulate the same.
In TD14 there would be a built-in table UDF named STRTOK_SPLIT_TO_TABLE, before you need to implement your own UDF or use a recursive query.
I modified an existing string splitting script to use blanks as delimiter:
CREATE VOLATILE TABLE Strings
(
groupcol INT NOT NULL,
string VARCHAR(991) NOT NULL
) ON COMMIT PRESERVE ROWS;
INSERT INTO Strings VALUES (1,'71.792');
INSERT INTO Strings VALUES (2,'71.792 1 2');
INSERT INTO Strings VALUES (3,'1.283 2 3');
WITH RECURSIVE cte
(groupcol,
--string,
len,
remaining,
word,
pos
) AS (
SELECT
GroupCol,
--String,
POSITION(' ' IN String || ' ') - 1 AS len,
TRIM(LEADING FROM SUBSTRING(String || ' ' FROM len + 2)) AS remaining,
TRIM(SUBSTRING(String FROM 1 FOR len)) AS word,
1
FROM strings
UNION ALL
SELECT
GroupCol,
--String,
POSITION(' ' IN remaining)- 1 AS len_new,
TRIM(LEADING FROM SUBSTRING(remaining FROM len_new + 2)),
TRIM(SUBSTRING(remaining FROM 1 FOR len_new)),
pos + 1
FROM cte
WHERE remaining <> ''
)
SELECT
groupcol,
-- remove the NULLIF to get 0 for blank strings
SUM(CAST(NULLIF(word, '') AS DECIMAL(18,3)))
FROM cte
GROUP BY 1
This might use a lot of spool, hopefully you're not running that on a large table.

String parsing in SQL Server

I have string column email_id; the data will look like this:
email_id
"1"
"6"
"3 4"
"8"
"0 3"
"0 5 7"
I want to get list of ids as integer. If I have two numbers in my string, I want the last one. My result should look like;
SELECT some_function (email_id ) FROM table
1
6
4
8
3
7
Is it possible to do this in SQL Server?
SELECT
CAST(RIGHT(email_id, LEN(email_id) - CHARINDEX(' ', email_id)) AS INT)
FROM
yourTable
IF and ONLY IF, all your values can reliably be cast to an INT, and there is only ever one space at most.
EDIT To deal with a list of n values
This isn't pretty, but it avoid recurrsion and/or loops. If someone gives an answer without REVERSE() test to see if it's faster than this or not.
SELECT
CAST(
REVERSE(
LEFT(
REVERSE(email_id),
CHARINDEX(' ', REVERSE(email_id) + ' ') - 1
)
)
AS INT
)
FROM
yourTable
SELECT CAST(replace(your_column ,' ','') as int) FROM table