How do I select a substring from two different patindex? - sql

I have many different types of string, but they all follow the two same patterns:
ABC123-S-XYZ789
ABC123-P-XYZ789
QUESTION 1:
I know how I can extract the first part: ABC123
But how do I extract the second part??? XYZ789
QUESTION 2:
I can't tell beforehand if the string follows the -S- pattern or the -P- pattern, it can be different each time. Anyone who know how I can solve this?
Thanks! / Sophie

You can try following code:
SELECT CASE WHEN #a LIKE '%-S-%' THEN right(#a, CHARINDEX('-S-', #a)-1)
WHEN #a LIKE '%-P-%' THEN right(#a, CHARINDEX('-P-', #a)-1)
ELSE NULL END AS 'ColName'
FROM tablename

Is this what you need?
DECLARE #Input VARCHAR(100) = 'ABC123-S-XYZ789'
SELECT
FirstPart = SUBSTRING(
#Input,
1,
CHARINDEX('-', #Input) - 1),
SecondPart = SUBSTRING(
#Input,
LEN(#Input) - CHARINDEX('-', REVERSE(#Input)) + 2,
100),
Pattern = CASE
WHEN #Input LIKE '%-S-%' THEN 'S'
WHEN #Input LIKE '%-P-%' THEN 'P' END

You can use parsename() if the string has always this kind of parts such as ABC123-S-XYZ789
select col, parsename(replace(col, '-', '.'), 1)
However, the parsename() requires the SQL Server+12 if not then you can use reverse()
select col, reverse(left(reverse(col), charindex('-', reverse(col))-1))

If you're using SQL Server 2016 or newer, you can use STRING_SPLIT
CREATE TABLE #temp (string VARCHAR(100));
INSERT #temp VALUES ('ABC123-S-XYZ789'),('ABC123-P-XYZ789');
SELECT *, ROW_NUMBER() OVER (PARTITION BY string ORDER BY string)
FROM #temp t
CROSS APPLY STRING_SPLIT(t.string, '-');
I can't tell beforehand if the string folllows the -S- pattern or the -P- pattern
You can then use a CTE to get a specific part of the string:
WITH cte AS (
SELECT *, ROW_NUMBER() OVER (PARTITION BY string ORDER BY string) rn
FROM #temp t
CROSS APPLY STRING_SPLIT(t.string, '-')
)
SELECT * FROM cte WHERE rn = 2

Related

How to pull out information from a long string of data

I have this data point:
455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215
Column: ,[t810str]
How would I be able to modify column [t810str] in order to pull out the last comma set before 857?
Desired Result = 422-L-202008011052
First you need to implement some kind of splitter that respects ordinal position (STRING_SPLIT does not). I'm therefore going to make use of DelimitedSplit8k_LEAD. Then you can split the value, and use LAG to get the prior value. Finally you can filter on where the item has a value LIKE '857%' but the previous does not:
WITH CTE AS(
SELECT DS.Item,
LAG(DS.Item) OVER (PARTITION BY YourColumn ORDER BY DS.itemNumber) AS PrevItem
FROM (VALUES('455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215'))V(YourColumn)
CROSS APPLY dbo.DelimitedSplit8K_LEAD(V.YourColumn,',') DS)
SELECT C.PrevItem
FROM CTE C
WHERE C.Item LIKE '857%'
AND C.PrevItem NOT LIKE '857%';
Based on your data and the assumption that items are 18 characters (your data do not indicate otherwise):
DECLARE #t AS NVARCHAR(255) = '455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215';
SELECT RIGHT(LEFT(#t,CHARINDEX(',857',#t)-1),18)
Using cross apply (which you can also rewrite using a CTE or a subquery for readability). This removes everything after first occurrence of 857 and then grabs the last set that's left. So even if you have multiple 857 and varying length of delimited strings, this should work
select *, right(remind , charindex (',' ,reverse(remind))-1)
from t t1
cross apply (select stuff(col, charindex(',857',col), len(col),'') as remind) t2
DEMO
Another solution use a recursive CTE
DECLARE #Var VARCHAR(200) = '455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215';
WITH CTE AS
(
SELECT 0 N, LEFT(#Var, CHARINDEX(',', #Var)-1) Part,
RIGHT(#Var, LEN(#Var) - CHARINDEX(',', #Var)) Remind
UNION ALL
SELECT N + 1,
LEFT(Remind, CHARINDEX(',', Remind) - 1),
RIGHT(Remind, LEN(Remind) - CHARINDEX(',', Remind))
FROM CTE
WHERE CHARINDEX(',', Remind) <> 0
)
SELECT TOP 1 Part
FROM CTE
WHERE LEFT(Remind, 3) = '857'
ORDER BY N;
Demo
Implemented with string functions (and assuming your data items can have variable length :-) it might look a bit confusing (therefore I'd prefer #Larnu's answer):
DECLARE #string VARCHAR(2000) = '455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215'
SELECT SUBSTRING(#string, CHARINDEX(',857',#string) - CHARINDEX(',', REVERSE( LEFT(#string, PATINDEX('%,857%',#string) - 1)) ) + 1, CHARINDEX(',', REVERSE( LEFT(#string, PATINDEX('%,857%',#string) - 1)))-1 )
Parts of the latter separated:
DECLARE #string VARCHAR(2000) = '455-U-202007302233,455-L-202007302233,422-U-202008011052,422-L-202008011052,857-U-202008041142,857-L-202008061215'SELECT CHARINDEX(',857',#string)
SELECT LEFT(#string, PATINDEX('%,857%',#string) - 1)
SELECT REVERSE( LEFT(#string, PATINDEX('%,857%',#string) - 1) )
SELECT CHARINDEX(',', REVERSE( LEFT(#string, PATINDEX('%,857%',#string) - 1)) )

Select text from between two characters in string

I have data in database an example of data below
folder/subfolder/file/doc
folder/subfolder/doc
how do I get the 1st instance of characters from between the '/'
I want to extract 'folder/subfolder'
I have tried the following but not what I need. this gets 'folder/'
LEFT([Cat], CHARINDEX('/', [Cat]) ) as 'doc_cat',
and the below gets the last part
RIGHT([Cat], CHARINDEX('/', [Cat]) ) as 'doc_cat2',
I want to get the 1st part of and second part of string
Here is one method:
select left(doc_cat_1, charindex('/', doc_cat_1) - 1)
from t cross apply
(select stuff(cat, 1, charindex('/', cat), '') as doc_cat_1
) v1;
The string handling capabilities of SQL Server are pretty lousy. Apply at least makes it easier to handle intermediate results.
You can use LEFT and CHARINDEX
LEFT([Cat],charindex('/',[Cat],charindex('/',[Cat])+1)-1) AS 'doc_cat'
One more way to accomplish using XML -
declare #s table(patterns nvarchar(100))
insert into #s
values ('folder/subfolder/file/doc'), ('folder/subfolder/doc'),('folder/subfolder')
select cast(concat('<x>', REPLACE(patterns, '/', '</x><x>'), '</x>') as xml).value('/x[1]','varchar(100)') + '/'
+ cast(concat('<x>', REPLACE(patterns, '/', '</x><x>'), '</x>') as xml).value('/x[2]','varchar(100)')
from #s
If you're on SQL 2016 or newer, you could use STRING_SPLIT()
WITH cte AS (
SELECT cat, value, ROW_NUMBER() OVER (PARTITION BY cat ORDER BY cat) rn
FROM someTable CROSS APPLY
STRING_SPLIT(cat,'/')
)
SELECT cat, value FROM cte WHERE rn = 2;
The advantage here is that rn could be any number you need.
Fiddle here.

Split string and take last element

I have a table with this values:
Articles/Search/ArtMID/2681/ArticleID/2218/Diet.aspx
OurStory/MeettheFoodieandtheMD.aspx
TheFood/OurMenu.aspx
I want to get this
Diet.aspx
MeettheFoodieandtheMD.aspx
OurMenu.aspx
How can i do this?
The way to do it in SQL :
SELECT SUBSTRING( string , LEN(string) - CHARINDEX('/',REVERSE(string)) + 2 , LEN(string) ) FROM SAMPLE;
JSFiddle here http://sqlfiddle.com/#!3/41ead/11
SELECT REVERSE(LEFT(REVERSE(columnName), CHARINDEX('/', REVERSE(columnName)) - 1))
FROM tableName
SQLFiddle Demo
ORHER SOURCE(s)
REVERSE
LEFT
CHARINDEX
Please try:
select url,(CASE WHEN CHARINDEX('/', url, 1)=0 THEN url ELSE RIGHT(url, CHARINDEX('/', REVERSE(url)) - 1) END)
from(
select 'Articles/Search/ArtMID/2681/ArticleID/2218/Diet.aspx' as url union
select 'OurStory/MeettheFoodieandtheMD.aspx' as url union
select 'MeettheFoodieandtheMD.aspx' as url
)xx
Try this. It's easier.
SELECT RIGHT(string, CHARINDEX('/', REVERSE(string)) -1) FROM TableName
SELECT REVERSE ((
SELECT TOP 1 value FROM STRING_SPLIT(REVERSE('Articles/Search/ArtMID/2681/ArticleID/2218/Diet.aspx'), '/')
)) AS fName
Result: Diet.aspx
Standard STRING_SPLIT does not allow to take last value.
The trick is to reverse the string (REVERSE) before splitting with STRING_SPLIT, get the first value from the end (TOP 1 value) and then the result needs to be reversed again (REVERSE) to restore the original chars sequence.
Here is the common approach, when working with SQL table:
SELECT REVERSE ((
SELECT TOP 1 VALUE FROM STRING_SPLIT(REVERSE(mySearchString), '/')
)) AS myLastValue
FROM myTable
A slightly more compact way of doing this (similar to ktaria's answer but in SQL Server) would be
SELECT TOP 1 REVERSE(value) FROM STRING_SPLIT(REVERSE(fullPath), '/') AS fileName
The equivalent for PostgreSQL:
SELECT reverse(split_part(reverse(column_name), '/', 1));
Please try the code below:
SELECT SUBSTRING( attachment, LEN(attachment)
- CHARINDEX('/', REVERSE(attachment)) + 2, LEN(attachment) ) AS filename
FROM filestable;
more simple and elegant :
select reverse(SPLIT_PART(reverse('Articles/Search/ArtMID/2681/ArticleID/2218/Diet.aspx'), '/',1))
You can try this too
( SELECT TOP(1) value
FROM STRING_SPLIT(#string, '/')
ORDER BY CHARINDEX('/' + value + '/', '/' + #string+ '-') DESC)
I corrected jazzytomato's solution for single character tokens (D) and for single tokens (Diet.aspx)
SELECT SUBSTRING( string , LEN(string) - CHARINDEX('/','/'+REVERSE(string)) + 2 , LEN(string) ) FROM SAMPLE;
The easiest way in MySQL:
SELECT SUBSTRING_INDEX(string, '/', -1) FROM SAMPLE;
I have more simple solve
SELECT SUBSTRING_INDEX(string, 'SUBSTRING_INDEX(string, '/', -1)', 1) FROM SAMPLE;
reverse(SUBSTRING(reverse(yourString),0,CHARINDEX('/',reverse(yourString)))) as stringLastPart
Create Table #temp
(
ID int identity(1,1) not null,
value varchar(100) not null
)
DECLARE #fileName VARCHAR(100);
INSERT INTO #temp(value) SELECT value from STRING_SPLIT('C:\Users\Documents\Datavalidation\Input.csv','\')
SET #fileName=(SELECT TOP 1 value from #temp ORDER BY ID DESC);
SELECT #fileName AS File_Name;
DROP TABLE #temp

Extract one value from a column containing multiple delimited values

How can I get the value from the sixth field in the following column? I am trying to get the 333 field:
ORGPATHTXT
2123/2322/12323/111/222/333/3822
I believe I have to use select substring, but am unsure how to format the query
Assuming SQL Server
The easiest way I can think of is create a Split function that splits based on '/' and you extract the sixth item like below
declare #text varchar(50) = '2123/2322/12323/111/222/333/3822'
select txt_value from fn_ParseText2Table(#text, '/') t where t.Position = 6
I used the function in this url. See it worked at SQLFiddle
Try this - for a string variable or wrap into a function to use with a select query (Sql-Demo)
Declare #s varchar(50)='2123/2322/12323/111/222/333/3822'
Select #s = right(#s,len(#s)- case charindex('/',#s,1) when 0 then len(#s)
else charindex('/',#s,1) end)
From ( values (1),(2),(3),(4),(5)) As t(num)
Select case when charindex('/',#s,1)>0 then left(#s,charindex('/',#s,1)-1)
else #s end
--Results
333
I'd like to offer a solution that uses CROSS APPLY to split up any delimited string in MSSQL and ROW_NUMBER() to return the 6th element. This assumes you have a table with ORGPATHTXT as a field (it can easily be converted to work without the table though):
SELECT ORGPATHTXT
FROM (
SELECT
Split.a.value('.', 'VARCHAR(100)') AS ORGPATHTXT,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY (SELECT 1)) RN
FROM
(SELECT ID, CAST ('<M>' + REPLACE(ORGPATHTXT, '/', '</M><M>') + '</M>' AS XML) AS String
FROM MyTable
) AS A
CROSS APPLY String.nodes ('/M') AS Split(a)
) t
WHERE t.RN = 6;
Here is some sample Fiddle to go along with it.
Good luck.
For sql, you can use
declare #string varchar(65) = '2123/2322/12323/111/222/333/3822'
select substring(string,25,27) from table_name
If you are using MySQL, then you can use:
select substring_index(orgpathtxt, '/', 6)
Let me just say that it is less convenient in most other databases.
Also you can use option with dynamic management function sys.dm_fts_parser
DECLARE #s nvarchar(50) = '2123/2322/12323/111/222/333/3822'
SELECT display_term
FROM sys.dm_fts_parser('"'+ #s + '"', 1033, NULL, 0)
WHERE display_term NOT LIKE 'nn%' AND occurrence = 6

Strip non-numeric characters from a string

I'm currently doing a data conversion project and need to strip all alphabetical characters from a string. Unfortunately I can't create or use a function as we don't own the source machine making the methods I've found from searching for previous posts unusable.
What would be the best way to do this in a select statement? Speed isn't too much of an issue as this will only be running over 30,000 records or so and is a once off statement.
You can do this in a single statement. You're not really creating a statement with 200+ REPLACEs are you?!
update tbl
set S = U.clean
from tbl
cross apply
(
select Substring(tbl.S,v.number,1)
-- this table will cater for strings up to length 2047
from master..spt_values v
where v.type='P' and v.number between 1 and len(tbl.S)
and Substring(tbl.S,v.number,1) like '[0-9]'
order by v.number
for xml path ('')
) U(clean)
Working SQL Fiddle showing this query with sample data
Replicated below for posterity:
create table tbl (ID int identity, S varchar(500))
insert tbl select 'asdlfj;390312hr9fasd9uhf012 3or h239ur ' + char(13) + 'asdfasf'
insert tbl select '123'
insert tbl select ''
insert tbl select null
insert tbl select '123 a 124'
Results
ID S
1 390312990123239
2 123
3 (null)
4 (null)
5 123124
CTE comes for HELP here.
;WITH CTE AS
(
SELECT
[ProductNumber] AS OrigProductNumber
,CAST([ProductNumber] AS VARCHAR(100)) AS [ProductNumber]
FROM [AdventureWorks].[Production].[Product]
UNION ALL
SELECT OrigProductNumber
,CAST(STUFF([ProductNumber], PATINDEX('%[^0-9]%', [ProductNumber]), 1, '') AS VARCHAR(100) ) AS [ProductNumber]
FROM CTE WHERE PATINDEX('%[^0-9]%', [ProductNumber]) > 0
)
SELECT * FROM CTE
WHERE PATINDEX('%[^0-9]%', [ProductNumber]) = 0
OPTION (MAXRECURSION 0)
output:
OrigProductNumber ProductNumber
WB-H098 098
VE-C304-S 304
VE-C304-M 304
VE-C304-L 304
TT-T092 092
RichardTheKiwi's script in a function for use in selects without cross apply,
also added dot because in my case I use it for double and money values within a varchar field
CREATE FUNCTION dbo.ReplaceNonNumericChars (#string VARCHAR(5000))
RETURNS VARCHAR(1000)
AS
BEGIN
SET #string = REPLACE(#string, ',', '.')
SET #string = (SELECT SUBSTRING(#string, v.number, 1)
FROM master..spt_values v
WHERE v.type = 'P'
AND v.number BETWEEN 1 AND LEN(#string)
AND (SUBSTRING(#string, v.number, 1) LIKE '[0-9]'
OR SUBSTRING(#string, v.number, 1) LIKE '[.]')
ORDER BY v.number
FOR
XML PATH('')
)
RETURN #string
END
GO
Thanks RichardTheKiwi +1
Well if you really can't use a function, I suppose you could do something like this:
SELECT REPLACE(REPLACE(REPLACE(LOWER(col),'a',''),'b',''),'c','')
FROM dbo.table...
Obviously it would be a lot uglier than that, since I only handled the first three letters, but it should give the idea.