Finding line and position of text in string - sql

I need to get de line number and position (on that line) of a specific word in a text.
For example:
--
This is my first line.
This is my second line.
--
If I would check for 'second' I should get something back like: 2,12
Anyone any suggestion?

Assuming you are looking for the first occurrence per line, and assuming a LINE is delimited by char(13) and not punctuation.
Example
Declare #YourTable table (ID int,SomeText varchar(max))
Insert Into #YourTable values
(1,'This is my first line.
This is my second line.')
,(2,'This another but has a second note
Which not related to the prior "second" note')
Declare #Search varchar(100)='second'
Select A.ID
,Position=concat(RetSeq,',',charindex(#Search,RetVal))
From #YourTable A
Cross Apply (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(replace(SomeText,char(10),''),char(13),'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Where charindex(#Search,RetVal)>0
Returns
ID Position
1 2,12
2 1,24
2 2,33
EDIT - Requested EDIT
Select Top 1 with Ties
A.ID
,Position=concat(RetSeq,',',charindex(#Search,RetVal))
From #YourTable A
Cross Apply (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = B.i.value('(./text())[1]', 'varchar(max)')
From (Select x = Cast('<x>' + replace((Select replace(replace(SomeText,char(10),''),char(13),'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Where charindex(#Search,RetVal)>0
Order by Row_Number() over (Partition By ID Order by RetSeq)
Returns
ID Position
1 2,12
2 1,24

Here is an alternative solution that does not make use of a subquery ; it relies on ,SQL Server string functions, such as CHARINDEX and REVERSE.
SELECT
CASE WHEN CHARINDEX( #match, t.value ) < CHARINDEX( CHAR(10), t.value )
THEN CONCAT( '1,', CHARINDEX( #match, t.value ) )
ELSE
CONCAT(
LEN(LEFT(t.value, CHARINDEX(#match, t.value)))
- LEN(REPLACE(LEFT(t.value, CHARINDEX(#match, t.value)), CHAR(10), '')) + 1,
',',
CHARINDEX (CHAR(10), REVERSE(LEFT(t.value, CHARINDEX(#match, t.value)))) - 1
)
END
from t;
The principle is to first find the position of the searched string (#match), and then compute the position of the previous carriage return (CHAR(10) - could be also CHAR(13) depending on your EOF settings), using REVERSE. With these two values at hand, we can compute the position of the match on the line, and the line number (for this, we compare the length of the substring until the match to its length without carriage returns). Special care has to be taken when the match is on the first line.
db<>fiddle here
Declare #match varchar(100) = 'second';
with t as (SELECT 'This is my first line.
This is my second line.
This is my third line.' value)
SELECT
CASE WHEN CHARINDEX( #match, t.value ) < CHARINDEX( CHAR(10), t.value )
THEN CONCAT( '1,', CHARINDEX( #match, t.value ) )
ELSE
CONCAT(
LEN(LEFT(t.value, CHARINDEX(#match, t.value)))
- LEN(REPLACE(LEFT(t.value, CHARINDEX(#match, t.value)), CHAR(10), '')) + 1,
',',
CHARINDEX (CHAR(10), REVERSE(LEFT(t.value, CHARINDEX(#match, t.value)))) - 1
)
END
from t;
GO
| (No column name) |
| :--------------- |
| 2,12 |

Related

Extract strings till the second delim SQL

I wanted to extract all the details till the second /(forward slash)from my table in SQL Server. Any ideas?
website
AA.AA/AB/123
www.google.com/en/abcd/
yahoo.com/us/dev
gmail.com
ouput
website
AA.AA/AB
www.google.com/en
yahoo.com/us
gmail.com
Perhaps this will suit your needs:
DECLARE #Table TABLE (Col1 NVARCHAR(100))
INSERT #Table VALUES
('website'),
('AA.AA/AB/123'),
('www.google.com/en/abcd/'),
('yahoo.com/us/dev'),
('gmail.com')
SELECT
COALESCE(
NULLIF(
SUBSTRING(Col1,1,CHARINDEX('/',Col1,CHARINDEX('/',Col1)+1))
,'')
,Col1
) AS Col1
FROM #Table
If you are using SQL Server 2017 or 2019, you can use STRING_AGG() to reassemble the output from STRING_SPLIT():
SELECT STRING_AGG(x.value, '/')
FROM dbo.table_name CROSS APPLY
(
SELECT value, ROW_NUMBER() OVER (ORDER BY (SELECT NULL))
FROM STRING_SPLIT(Col1, '/') AS ss
) AS x(value, rn)
WHERE x.rn <= 2
GROUP BY Col1;
You might say:
"But Aaron, the output of STRING_SPLIT() isn't guaranteed to be in order; in fact the documentation warns about that."
This is true; the documentation does say that. But in current versions the output is extremely unlikely to be in anything but left-to-right order. I still suggest you be wary of relying on this, since it could break at any time (I warn about this in more detail here).
If you are on an older version, or don't trust it, you can use a table-valued function that preserves the order of the input string, for example from this answer:
CREATE FUNCTION [dbo].[SplitString]
(
#List NVARCHAR(MAX),
#Delim VARCHAR(255)
)
RETURNS TABLE
AS
RETURN ( SELECT [Value], idx = RANK() OVER (ORDER BY n) FROM
(
SELECT n = Number,
[Value] = LTRIM(RTRIM(SUBSTRING(#List, [Number],
CHARINDEX(#Delim, #List + #Delim, [Number]) - [Number])))
FROM (SELECT Number = ROW_NUMBER() OVER (ORDER BY name)
FROM sys.all_objects) AS x
WHERE Number <= LEN(#List)
AND SUBSTRING(#Delim + #List, [Number], LEN(#Delim)) = #Delim
) AS y
);
With that function in place, you can then do the following, and now feel safer about relying on order (at the cost of a more expensive query):
;WITH src AS
(
SELECT Col1, idx, Value
FROM dbo.table_name CROSS APPLY dbo.SplitString(Col1, '/')
)
SELECT STUFF((SELECT '/' + Value
FROM src
WHERE src.idx <= 2 AND Col1 = t.Col1
ORDER BY idx
FOR XML PATH(''), TYPE).value(N'./text()[1]', N'nvarchar(max)'), 1, 1, '')
FROM dbo.table_name AS t
GROUP BY Col1;
I find cross apply handy for these situations
select case when str like '%/%' then left(str, i2-1) else str end as str
from t
cross apply (select charindex( '/', str ) as i1) t2 --position of first slash
cross apply (select charindex( '/', str, (i1 + 1)) as i2 ) t3 --position of second slash
Below is the simple query you can try. In the below query please replace 'colName' with your column name and Table_1 with your table name.
SELECT LEFT([colName], charindex('/', [colName], charindex('/', [colName])+1)-1) AS [AfterSecondPipe]
FROM [Table_1]

TSQL - Extract text between two words

I did find some info on the site but I am unable to make it work correctly. I have a text field [User] that contains USER: John.Smith SessionId: {There is a space after User: and one after the name}
Everything I tried will either remove the first section or the last one, none remove both. Or will give me this message Invalid length parameter passed to the LEFT or SUBSTRING function
I want to have the name John.Smith extracted from that field.
If possible I do not want to declare any tables.
Thanks
Why not use replace()?
select replace(replace(col, 'USER: ', ''), ' SessionId:', '')
If open to a TVF
Example
Select A.ID
,B.*
From YourTable A
Cross Apply [dbo].[tvf-Str-Extract](SomeCol,'USER:','SessionId:') B
Returns
ID RetSeq RetVal
1 1 John.Smith
The Function if Interested
CREATE FUNCTION [dbo].[tvf-Str-Extract] (#String varchar(max),#Delim1 varchar(100),#Delim2 varchar(100))
Returns Table
As
Return (
Select RetSeq = row_number() over (order by RetSeq)
,RetVal = left(RetVal,charindex(#Delim2,RetVal)-1)
From (
Select RetSeq = row_number() over (order by 1/0)
,RetVal = ltrim(rtrim(B.i.value('(./text())[1]', 'varchar(max)')))
From ( values (convert(xml,'<x>' + replace((Select replace(#String,#Delim1,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>').query('.'))) as A(XMLData)
Cross Apply XMLData.nodes('x') AS B(i)
) C1
Where charindex(#Delim2,RetVal)>1
)
/*
Declare #String varchar(max) = 'Dear [[FirstName]] [[LastName]], ...'
Select * From [dbo].[tvf-Str-Extract] (#String,'[[',']]')
*/
I got SUBSTRING() to work:
SUBSTRING(USER, 7,(LEN(USER)-7)-(charindex('SessionId',USERID)))
Where:
7 = # of characters in "USERID:"
LEN(User)-7 counts the character length less the 7 from "USERID:"
charindex('SessionId',USERID) gives you the character location where "SessionId" starts

SQL Server: Find words in string that don't exist in dictionary

Consider the following tables:
DROP TABLE IF EXISTS ##tableA;
CREATE TABLE ##tableA (id int,keywords VARCHAR(MAX));
INSERT INTO ##tableA (id,keywords) VALUES
(1,'apple,orange,potato'),
(2,'I typed a sentence here because I can''t follow directions.'),
(3,'potato and apple');
DROP TABLE IF EXISTS ##dictionary;
CREATE TABLE ##dictionary (id int,keyword VARCHAR(255));
INSERT INTO ##dictionary (id,keyword) VALUES
(1,'apple'),
(2,'orange'),
(3,'lemon'),
(4,'potato');
We have users entering keywords into the keyword column in tableA. I want return the id of any record that contains a word not in ##dictionary.
In the case above:
- id 1 would not be returned because each comma separated keyword is found in the dictionary
- id 2 would be returned because it contains words that are not in the dictionary
- id 3 would be returned because it contains the word "and", which is not in the dictionary
The ideal situation I think would somehow break up the keywords column from ##tableA into individual keywords, then check each of them against the keyword column in ##dictionary.
Here is an inline approach
Example
Select Distinct A.*
From ##tableA A
Cross Apply (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(replace(A.KeyWords,',',' '),' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Left Join ##dictionary C on B.RetVal=C.keyword
Where C.keyWord is null
Returns
id keywords
2 I typed a sentence here because I can't follow directions.
3 potato and apple
Just another BRUTE FORCE OPTION - Just for fun
Declare #S varchar(max) = (Select * From ##tableA For XML Raw )
Select #S = replace(#S,keyword,'') From ##dictionary
Select id = B.i.value('#id', 'int')
From (Select x = Cast(#S as xml).query('.')) as A
Cross Apply x.nodes('row') AS B(i)
Where B.i.value('#keywords', 'varchar(max)') like '%[a-z]%'
Under SQL Server 2017, you can use STRING_SPLIT:
SELECT
id
FROM
##tableA
CROSS APPLY STRING_SPLIT(keywords, ' ') splitBySpace
CROSS APPLY STRING_SPLIT(splitBySpace.value, ',') splitBySpaceOrComma
WHERE
splitBySpaceOrComma.value NOT IN (SELECT keyword FROM ##dictionary)
GROUP BY
id;
Using:
Splitter
you can split lines by delimiter then use the result to match against the dictionary. like this:
SELECT t.keywords FROM ##tablea t
CROSS APPLY (SELECT REPLACE(t.keywords, ' and ', ',')) new(kwds)
CROSS APPLY dbo.DelimitedSplit8K(new.kwds, ',') s
WHERE s.item NOT IN (SELECT keyword FROM ##dictionary)
Try this:
select t.*
from ##tableA t
cross join (
select max(case when id = 1 then keyword end) firstKeyword,
max(case when id = 2 then keyword end) secondKeyword,
max(case when id = 3 then keyword end) thirdKeyword,
max(case when id = 4 then keyword end) fourthKeyword
from ##dictionary
) d where
len(replace(replace(replace(replace(replace(replace(keywords, firstKeyword, ''), secondKeyword, ''), thirdKeyword, ''), fourthKeyword, ''), ' ', ''), ',', '')) > 0
First, you need to pivot your data from ##dictionary, then you can replace your keywords with '' as well as spaces and commas, and see in the end if the are any characters left.

Pad zeros for nvarchar column in table

I want to add zero for only single digit value before the dot (.)
When i use
Input:
1.3.45 TU 3
1.2.5 TU 8
Expected Output:
01034503
01020508
Current query:
select REPLACE(
replace(
replace(#Column,'TU','') -- remove TU
,'.','' -- remove dot
)
,' ','') -- remove space
from Table;
Current Output:
13453
1258
If SQL Server, you can use a Split/Parse function to normalize the string
Declare #YourTable Table (YourField varchar(25))
Insert Into #YourTable values
('1.3.45 TU 3'),
('1.2.5 TU 8')
Select A.*
,NewField = B.String
From #YourTable A
Cross Apply (
Select String = ltrim((Select cast(RetVal as varchar(25))
From (Select RetSeq,RetVal=Right('00'+RetVal,2)
From [dbo].[udf-Str-Parse](replace(YourField,' ','.'),'.')
Where Try_Convert(int,RetVal)>=0 ) A
For XML Path ('')))
) B
Returns
YourField NewField
1.3.45 TU 3 01034503
1.2.5 TU 8 01020508
The UDF if needed
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>'+ Replace(#String,#Delimiter,'</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
Where are the zeros? You want something like this:
select ('0' + -- initial zero
replace(replace(replace(#Column, 'TU', '' -- remove TU
), '.', '0' -- replace dot with zero
), ' ', ''
) -- remove space
)
from Table;

Issue with some characters in the splitting strings

Below is the stored procedure
;WITH cte AS (
SELECT
AgentId,
CAST('<r>' + REPLACE(States, ',', '</r><r>') + '</r>' AS XML) AS States,
CAST('<r>' + REPLACE(REPLACE(Products,'&','&'), ',', '</r><r>') + '</r>' AS XML) AS Products
FROM #tbVendor
)
,FinalList AS (
SELECT
AgentId,
RTRIM(LTRIM (sTable.sColumn.value('.', 'VARCHAR(MAX)'))) AS States,
RTRIM(LTRIM (PTable.PColumn.value('.', 'VARCHAR(MAX)'))) AS Products
FROM cte
CROSS APPLY States.nodes('//r') AS sTable(sColumn)
CROSS APPLY Products.nodes('//r') AS PTable(PColumn)
)
SELECT DISTINCT F.Products AS ProductName
,T.ProductId AS ProductId
FROM FinalList F
CROSS APPLY (SELECT ProductId FROM #tbProduct TP WHERE TP.ProductName = F.Products) AS T
WHERE F.States = 'New York'
AND F.AgentId = 1
ORDER BY T.ProductId ASC
This is the SQL fiddle
http://rextester.com/SVXKFH57654
It is working fine and perfectly but it eliminate the records with "-" character in ProductName feild For e.g Non-Stick Utensils... etc
I am not able to tackle this issue... Please help me!!!
For the splitting string you use XML. In this case, you will have problems with some characters. For example &, <, >. You can avoid this by using another method of splitting a string
Table function splitting strings:
CREATE FUNCTION [dbo].[SplitStr] (
#str varchar(MAX)
,#sep char(1)=','
)
RETURNS TABLE
AS
RETURN
(
WITH Split ( n1, n2)
AS
(
SELECT CAST(0 as bigint) as n1, CHARINDEX(#sep, #str + #sep) as n2
UNION ALL
SELECT n2 as n1, CHARINDEX(#sep, #str + #sep, n2 + 1) as n2
FROM Split
WHERE n2 < LEN(#str)
)
SELECT SUBSTRING(#str, n1+1, n2-n1-1) as Col FROM Split
)
GO
Using this function:
SELECT
tbVendor.AgentId
,States.Col as States
,Products.Col as Products
FROM #tbVendor as tbVendor
CROSS APPLY [dbo].[SplitStr](States, ',') as States
CROSS APPLY [dbo].[SplitStr](Products, ',') as Products
this is equivalent to your code
-- First convert all comma separated data into tabular form as:
;WITH cte AS (
SELECT
AgentId,
CAST('<r>' + REPLACE(States, ',', '</r><r>') + '</r>' AS XML) AS States,
CAST('<r>' + REPLACE(Products, ',', '</r><r>') + '</r>' AS XML) AS Products
FROM #tbVendor
)
SELECT
AgentId,
sTable.sColumn.value('.', 'VARCHAR(MAX)') AS States,
PTable.PColumn.value('.', 'VARCHAR(MAX)') AS Products
FROM cte
CROSS APPLY States.nodes('//r') AS sTable(sColumn)
CROSS APPLY Products.nodes('//r') AS PTable(PColumn)