SQL Query: Remove multiple characters strings in a long varchar(max) column - sql

I have the following string in a varchar(max) column:
PREV - FirstName: John / LAST - FirstName: Johan; PREV- LastName: Crescot / LAST - LastName: Crescott;
After every semicolon can come endless amounts PREV values and LAST value mutations depending on the amount of changes done in the source system.
I need to write a query that returns ONLY the PREV values. In case of the string above, the desired result would be:
FirstName: John; LastName: Crescot
All the slash (/) delimiters and dashes need to be removed as well, as you can see in the required result.
Could anyone help me with this? Thank you all!

If open to a UDF, consider the following.
Tired of extracting strings (charindindex, patindex, left, right...), I modified a parse function to accept two non-like parameters. In this case a 'PREV' and '/'
Example
Declare #YourTable table (ID int,SomeCol varchar(max))
Insert Into #YourTable values
(1,'PREV - FirstName: John / LAST - FirstName: Johan; PREV- LastName: Crescot / LAST - LastName: Crescott;')
Select A.ID
,B.NewVal
From #YourTable A
Cross Apply (
Select NewVal = Stuff((Select '; '+ltrim(rtrim(replace(RetVal,'-','')))
From [dbo].[udf-Str-Extract](A.SomeCol,'PREV','/')
For XML Path ('')),1,2,'')
) B
Returns
ID NewVal
1 FirstName: John; LastName: Crescot
The UDF if Interested
CREATE FUNCTION [dbo].[udf-Str-Extract] (#String varchar(max),#Delimiter1 varchar(100),#Delimiter2 varchar(100))
Returns Table
As
Return (
with cte1(N) As (Select 1 From (Values(1),(1),(1),(1),(1),(1),(1),(1),(1),(1)) N(N)),
cte2(N) As (Select Top (IsNull(DataLength(#String),0)) Row_Number() over (Order By (Select NULL)) From (Select N=1 From cte1 N1,cte1 N2,cte1 N3,cte1 N4,cte1 N5,cte1 N6) A ),
cte3(N) As (Select 1 Union All Select t.N+DataLength(#Delimiter1) From cte2 t Where Substring(#String,t.N,DataLength(#Delimiter1)) = #Delimiter1),
cte4(N,L) As (Select S.N,IsNull(NullIf(CharIndex(#Delimiter1,#String,s.N),0)-S.N,8000) From cte3 S)
Select RetSeq = Row_Number() over (Order By N)
,RetPos = N
,RetVal = left(RetVal,charindex(#Delimiter2,RetVal)-1)
From (
Select *,RetVal = Substring(#String, N, L)
From cte4
) A
Where charindex(#Delimiter2,RetVal)>1
)
/*
Max Length of String 1MM characters
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[udf-Str-Extract] (#String,'[[',']]')
*/

create table #temp(val varchar(max))
Insert into #temp values('PREV - FirstName: John / LAST - FirstName: Johan; PREV - LastName: Crescot / LAST - LastName')
Select stuff(
(SELECT ';'+
Replace(stuff(Tbl.Col.value('./text()[1]','varchar(50)'),charindex('/',Tbl.Col.value('./text()[1]','varchar(50)')),len(Tbl.Col.value('./text()[1]','varchar(50)')),''),'PREV -','')as ColName
FROM
(Select cast('<a>'+ replace((SELECT val As [*] FOR XML PATH('')), ';', '</a><a>') + '</a>' as xml)as t
from #temp) tl
Cross apply
tl.t.nodes('/a') AS Tbl(Col) for xml path(''),type).value('.','NVARCHAR(MAX)'),1,2,'')
This method does not need any additional UDF.
Breaking down the above query for easy understanding:
1. Convert one row of string to multiple rows based on semicolon ';'
SELECT
Tbl.Col.value('./text()[1]','varchar(50)')
FROM
(Select cast('<a>'+ replace((SELECT val As [*] FOR XML PATH('')), ';', '</a><a>') + '</a>' as xml)as t
from #temp) tl
Cross apply
tl.t.nodes('/a') AS Tbl(Col)
2.Over the above extracted value ,use replace and stuff commands to remove unnecessary characters
SELECT
Replace(stuff(Tbl.Col.value('./text()[1]','varchar(50)'),charindex('/',Tbl.Col.value('./text()[1]','varchar(50)')),len(Tbl.Col.value('./text()[1]','varchar(50)')),''),'PREV -','')as ColName
FROM
(Select cast('<a>'+ replace((SELECT val As [*] FOR XML PATH('')), ';', '</a><a>') + '</a>' as xml)as t
from #temp) tl
Cross apply
tl.t.nodes('/a') AS Tbl(Col)
3. Use stuff and xml path to make the multiple rows back to a single row separated by semicolons as required
Select stuff(
(SELECT ';'+
Replace(stuff(Tbl.Col.value('./text()[1]','varchar(50)'),charindex('/',Tbl.Col.value('./text()[1]','varchar(50)')),len(Tbl.Col.value('./text()[1]','varchar(50)')),''),'PREV -','')as food_Name
FROM
(Select cast('<a>'+ replace((SELECT val As [*] FOR XML PATH('')), ';', '</a><a>') + '</a>' as xml)as t
from #temp) tl
Cross apply
tl.t.nodes('/a') AS Tbl(Col) for xml path(''),type).value('.','NVARCHAR(MAX)'),1,2,'')

Related

TSQL - Extract text between two words

I did find some info on the site but I am unable to make it work correctly. I have a text field [User] that contains USER: John.Smith SessionId: {There is a space after User: and one after the name}
Everything I tried will either remove the first section or the last one, none remove both. Or will give me this message Invalid length parameter passed to the LEFT or SUBSTRING function
I want to have the name John.Smith extracted from that field.
If possible I do not want to declare any tables.
Thanks
Why not use replace()?
select replace(replace(col, 'USER: ', ''), ' SessionId:', '')
If open to a TVF
Example
Select A.ID
,B.*
From YourTable A
Cross Apply [dbo].[tvf-Str-Extract](SomeCol,'USER:','SessionId:') B
Returns
ID RetSeq RetVal
1 1 John.Smith
The Function if Interested
CREATE FUNCTION [dbo].[tvf-Str-Extract] (#String varchar(max),#Delim1 varchar(100),#Delim2 varchar(100))
Returns Table
As
Return (
Select RetSeq = row_number() over (order by RetSeq)
,RetVal = left(RetVal,charindex(#Delim2,RetVal)-1)
From (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From ( values (convert(xml,'<x>' + replace((Select replace(#String,#Delim1,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>').query('.'))) as A(XMLData)
Cross Apply XMLData.nodes('x') AS B(i)
) C1
Where charindex(#Delim2,RetVal)>1
)
/*
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[tvf-Str-Extract] (#String,'[[',']]')
*/
I got SUBSTRING() to work:
SUBSTRING(USER, 7,(LEN(USER)-7)-(charindex('SessionId',USERID)))
Where:
7 = # of characters in "USERID:"
LEN(User)-7 counts the character length less the 7 from "USERID:"
charindex('SessionId',USERID) gives you the character location where "SessionId" starts

Finding line and position of text in string

I need to get de line number and position (on that line) of a specific word in a text.
For example:
--
This is my first line.
This is my second line.
--
If I would check for 'second' I should get something back like: 2,12
Anyone any suggestion?
Assuming you are looking for the first occurrence per line, and assuming a LINE is delimited by char(13) and not punctuation.
Example
Declare #YourTable table (ID int,SomeText varchar(max))
Insert Into #YourTable values
(1,'This is my first line.
This is my second line.')
,(2,'This another but has a second note
Which not related to the prior "second" note')
Declare #Search varchar(100)='second'
Select A.ID
,Position=concat(RetSeq,',',charindex(#Search,RetVal))
From #YourTable A
Cross Apply (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(replace(SomeText,char(10),''),char(13),'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Where charindex(#Search,RetVal)>0
Returns
ID Position
1 2,12
2 1,24
2 2,33
EDIT - Requested EDIT
Select Top 1 with Ties
A.ID
,Position=concat(RetSeq,',',charindex(#Search,RetVal))
From #YourTable A
Cross Apply (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = B.i.value('(./text())[1]', 'varchar(max)')
From (Select x = Cast('<x>' + replace((Select replace(replace(SomeText,char(10),''),char(13),'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Where charindex(#Search,RetVal)>0
Order by Row_Number() over (Partition By ID Order by RetSeq)
Returns
ID Position
1 2,12
2 1,24
Here is an alternative solution that does not make use of a subquery ; it relies on ,SQL Server string functions, such as CHARINDEX and REVERSE.
SELECT
CASE WHEN CHARINDEX( #match, t.value ) < CHARINDEX( CHAR(10), t.value )
THEN CONCAT( '1,', CHARINDEX( #match, t.value ) )
ELSE
CONCAT(
LEN(LEFT(t.value, CHARINDEX(#match, t.value)))
- LEN(REPLACE(LEFT(t.value, CHARINDEX(#match, t.value)), CHAR(10), '')) + 1,
',',
CHARINDEX (CHAR(10), REVERSE(LEFT(t.value, CHARINDEX(#match, t.value)))) - 1
)
END
from t;
The principle is to first find the position of the searched string (#match), and then compute the position of the previous carriage return (CHAR(10) - could be also CHAR(13) depending on your EOF settings), using REVERSE. With these two values at hand, we can compute the position of the match on the line, and the line number (for this, we compare the length of the substring until the match to its length without carriage returns). Special care has to be taken when the match is on the first line.
db<>fiddle here
Declare #match varchar(100) = 'second';
with t as (SELECT 'This is my first line.
This is my second line.
This is my third line.' value)
SELECT
CASE WHEN CHARINDEX( #match, t.value ) < CHARINDEX( CHAR(10), t.value )
THEN CONCAT( '1,', CHARINDEX( #match, t.value ) )
ELSE
CONCAT(
LEN(LEFT(t.value, CHARINDEX(#match, t.value)))
- LEN(REPLACE(LEFT(t.value, CHARINDEX(#match, t.value)), CHAR(10), '')) + 1,
',',
CHARINDEX (CHAR(10), REVERSE(LEFT(t.value, CHARINDEX(#match, t.value)))) - 1
)
END
from t;
GO
| (No column name) |
| :--------------- |
| 2,12 |

Is it possible to compare comma delimited string in T-SQL without looping?

Let's say I have 2 tables where both has column called Brand. The value is comma delimited so for example if one of the table has
ACER,ASUS,HP
AMD,NVIDIA,SONY
as value. Then the other table has
HP,GIGABYTE
MICROSOFT
SAMSUNG,PHILIPS
as values.
I want to compare these table to get all matched record, in my example ACER,ASUS,HP and HP,GIGABYTE match because both has HP. Right now I'm using loop to achieve this, I'm wondering if it's possible to do this in a single query syntax.
You are correct in wanting to step away from the loop.
Since you are on 2012, String_Split() is off the table. However, there are any number of split/parse TVF functions in-the-wild.
Example 1 - without a TVF
Declare #T1 table (Brand varchar(50))
Insert Into #T1 values
('ACER,ASUS,HP'),
('AMD,NVIDIA,SONY')
Declare #T2 table (Brand varchar(50))
Insert Into #T2 values
('HP,GIGABYTE'),
('MICROSOFT'),
('SAMSUNG,PHILIPS')
Select Distinct
T1_Brand = A.Brand
,T2_Brand = B.Brand
From (
Select Brand,B.*
From #T1
Cross Apply (
Select RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace(Brand,',','</x><x>')+'</x>' as xml)) as A
Cross Apply x.nodes('x') AS B(i)
) B
) A
Join (
Select Brand,B.*
From #T2
Cross Apply (
Select RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace(Brand,',','</x><x>')+'</x>' as xml)) as A
Cross Apply x.nodes('x') AS B(i)
) B
) B
on A.RetVal=B.RetVal
Example 2 - with a TVF
Select Distinct
T1_Brand = A.Brand
,T2_Brand = B.Brand
From (
Select Brand,B.*
From #T1
Cross Apply [dbo].[tvf-Str-Parse](Brand,',') B
) A
Join (
Select Brand,B.*
From #T2
Cross Apply [dbo].[tvf-Str-Parse](Brand,',') B
) B
on A.RetVal=B.RetVal
Both Would Return
T1_Brand T2_Brand
ACER,ASUS,HP HP,GIGABYTE
The UDF if interested
CREATE FUNCTION [dbo].[tvf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[tvf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[tvf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[tvf-Str-Parse]('this,is,<test>,for,< & >',',')
Had the same problem with comparing "," delimited strings
you can use "XML" to do that and compare the outputs and return the same/different value:
declare #TestInput nvarchar(255)
, #TestInput2 nvarchar(255)
set #TestInput = 'ACER,ASUS,HP'
set #TestInput2 = 'HP,GIGABYTE'
;WITH FirstStringSplit(S1) AS
(
SELECT CAST('<x>' + REPLACE(#TestInput,',','</x><x>') + '</x>' AS XML)
)
,SecondStringSplit(S2) AS
(
SELECT CAST('<x>' + REPLACE(#TestInput2,',','</x><x>') + '</x>' AS XML)
)
SELECT STUFF(
(
SELECT ',' + part1.value('.','nvarchar(max)')
FROM FirstStringSplit
CROSS APPLY S1.nodes('/x') AS A(part1)
WHERE part1.value('.','nvarchar(max)') IN(SELECT B.part2.value('.','nvarchar(max)')
FROM SecondStringSplit
CROSS APPLY S2.nodes('/x') AS B(part2)
)
FOR XML PATH('')
),1,1,'') as [Same Value]
Edit:
Changed 'Stuff' to 'XML'

SQL replace/ remove multiple date and time stamp from any part of a string

I have a nvarchar field that contains multiple date & time stamps and various text. The date and time can be at any position in the field.
I want to select only the text from the field. I have tried with REPLACE and PATINDEX to no avail.
Please can anyone share how i would write my select on this example notes field which contains this string:
ADMIN1 21/04/2017 02:01:01 This student is here and trying to gain a masters.
ITSYS2 09/05/2017 03:51:04 60 APL Credits on xout
The following will exclude dates and times from the note_detail. This is an in-line approach, but just about any split/parse function will do the trick as well.
Example
Declare #YourTable table(studend_id int,note_detail varchar(max))
Insert Into #YourTable values
(1,'CHIDLOL 21/04/2017 02:01:01 '+CHAR(13)+CHAR(10)+'This studend is here and trying to gain a masters. THOMASXC 09/05/2014 03:54:04 60 APL Credon on xout')
Select A.studend_id
,new_note_detail = B.S
From #YourTable A
Cross Apply (
Select S = Stuff((Select ' ' +RetVal
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(replace(replace(A.note_detail,char(13),' '),char(10),' '),' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B1
Where RetVal not like '%[0-9]/[0-9][0-9]/[0-9]%'
and RetVal not like '%[0-9]:[0-9][0-9]:[0-9]%'
Order by RetSeq
For XML Path ('')),1,1,'')
) B
Returns
studend_id new_note_detail
1 CHIDLOL This studend is here and trying to gain a masters. THOMASXC 60 APL Credon on xout
Edit - Option 2 with a Parse Function
Select A.studend_id
,new_note_detail = B.S
From #YourTable A
Cross Apply (
Select S = Stuff((Select ' ' +RetVal
From [dbo].[udf-Str-Parse](replace(replace(A.note_detail,char(13),' '),char(10),' '),' ') B1
Where RetVal not like '%[0-9]/[0-9][0-9]/[0-9]%'
and RetVal not like '%[0-9]:[0-9][0-9]:[0-9]%'
Order by RetSeq
For XML Path ('')),1,1,'')
) B
The UDF if Interested
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')

Issue with some characters in the splitting strings

Below is the stored procedure
;WITH cte AS (
SELECT
AgentId,
CAST('<r>' + REPLACE(States, ',', '</r><r>') + '</r>' AS XML) AS States,
CAST('<r>' + REPLACE(REPLACE(Products,'&','&'), ',', '</r><r>') + '</r>' AS XML) AS Products
FROM #tbVendor
)
,FinalList AS (
SELECT
AgentId,
RTRIM(LTRIM (sTable.sColumn.value('.', 'VARCHAR(MAX)'))) AS States,
RTRIM(LTRIM (PTable.PColumn.value('.', 'VARCHAR(MAX)'))) AS Products
FROM cte
CROSS APPLY States.nodes('//r') AS sTable(sColumn)
CROSS APPLY Products.nodes('//r') AS PTable(PColumn)
)
SELECT DISTINCT F.Products AS ProductName
,T.ProductId AS ProductId
FROM FinalList F
CROSS APPLY (SELECT ProductId FROM #tbProduct TP WHERE TP.ProductName = F.Products) AS T
WHERE F.States = 'New York'
AND F.AgentId = 1
ORDER BY T.ProductId ASC
This is the SQL fiddle
http://rextester.com/SVXKFH57654
It is working fine and perfectly but it eliminate the records with "-" character in ProductName feild For e.g Non-Stick Utensils... etc
I am not able to tackle this issue... Please help me!!!
For the splitting string you use XML. In this case, you will have problems with some characters. For example &, <, >. You can avoid this by using another method of splitting a string
Table function splitting strings:
CREATE FUNCTION [dbo].[SplitStr] (
#str varchar(MAX)
,#sep char(1)=','
)
RETURNS TABLE
AS
RETURN
(
WITH Split ( n1, n2)
AS
(
SELECT CAST(0 as bigint) as n1, CHARINDEX(#sep, #str + #sep) as n2
UNION ALL
SELECT n2 as n1, CHARINDEX(#sep, #str + #sep, n2 + 1) as n2
FROM Split
WHERE n2 < LEN(#str)
)
SELECT SUBSTRING(#str, n1+1, n2-n1-1) as Col FROM Split
)
GO
Using this function:
SELECT
tbVendor.AgentId
,States.Col as States
,Products.Col as Products
FROM #tbVendor as tbVendor
CROSS APPLY [dbo].[SplitStr](States, ',') as States
CROSS APPLY [dbo].[SplitStr](Products, ',') as Products
this is equivalent to your code
-- First convert all comma separated data into tabular form as:
;WITH cte AS (
SELECT
AgentId,
CAST('<r>' + REPLACE(States, ',', '</r><r>') + '</r>' AS XML) AS States,
CAST('<r>' + REPLACE(Products, ',', '</r><r>') + '</r>' AS XML) AS Products
FROM #tbVendor
)
SELECT
AgentId,
sTable.sColumn.value('.', 'VARCHAR(MAX)') AS States,
PTable.PColumn.value('.', 'VARCHAR(MAX)') AS Products
FROM cte
CROSS APPLY States.nodes('//r') AS sTable(sColumn)
CROSS APPLY Products.nodes('//r') AS PTable(PColumn)