Removing an entire line when only the beginning string is known - sql

I am trying to remove any line that has a particular substring Signed By: in it.
So the original string would look like:
information on line 1
information on line 2
Signed By: John Smith
information on an additional line
Signed By: Jane Doe
And after the removal of the lines
information on line 1
information on line 2
information on an additional line
The issue I am running into is while I can easily replace Signed By: I need to remove the name after it as well which can have a very different number of characters.
EDIT
To make the issue clearer, all of this is contained in a single field within the database. So I would get the entire original string if I was to say
SELECT TOP 1 NoteValue
FROM Notes

You can use a split function to split the string into rows based on char(13) or char(10). Then, stuff it back together with for xml.
ONLINE DEMO
declare #table table (strr varchar(4000))
insert into #table
values
('information on line 1
Signed By: John Smith
information on line 2
Signed By: John Smith
information on an additional line
Signed By: Jane Doe')
select
stuff(( SELECT ' ' + x.Item
from #table
cross apply DelimitedSplit8K(strr,char(13)) x
where Item not like '%Signed By:%'
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
RETURNS
information on line 1
information on line 2
information on an additional line
HERE IS THE SPLITTER I USE
USE [Test01]
GO
/****** Object: UserDefinedFunction [dbo].[DelimitedSplit8K] Script Date: 09/15/2017 9:59:16 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[DelimitedSplit8K] (#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
/* "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
enough to cover VARCHAR(8000)*/
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO

You could try
SELECT TOP 1
CASE WHEN PATINDEX(NoteValue,'%Signed by%')>0
THEN LEFT(NoteValue, PATINDEX(NoteValue,'%Signed by%')-1)
ELSE NoteValue
END FROM Notes

Another solution using TSQL
DECLARE #document varchar(max);
SET #document = 'information on line 1
information on line 2
Signed By: John Smith
information on an additional line
Signed By: Jane Doe';
DECLARE #index int =0;
DECLARE #newLineindex int =0;
DECLARE #ReplaceText varchar(100) = 'Signed'
SELECT CHARINDEX(#ReplaceText, #document, #index)
WHILE ((SELECT CHARINDEX(#ReplaceText, #document, #index)) > 0)
BEGIN
SELECT #index = CHARINDEX(#ReplaceText, #document, #index);
SELECT #newLineindex = CHARINDEX(CHAR(13), #document, #index);
IF(#newLineindex >#index)
BEGIN
SET #document = REPLACE(#document, SUBSTRING ( #document ,#index , (#newLineindex - #index)), '')
END
ELSE
BEGIN
SET #document = REPLACE(#document, SUBSTRING ( #document ,#index , ((len(#document) - #index) +1)), '')
END
SET #index =0
END
SELECT #document

Related

How to put mailto around email addresses in text string

I am trying to figure out how to be able to select/find and format each email address contained in a piece of text.
Example string:
Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – paulhall#email.com - Mikel Zubizarreta – mikelzubizarreta#email.com
The output of the string should be:
Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – <a href='mailto:paulhall#email.com'> - paulhall#email.com</a> - Mikel Zubizarreta – <a href='mailto:mikelzubizarreta#email.com'>mikelzubizarreta#email.com</a>
This are the attempts I have come up with:
Within a select:
, CASE
WHEN CHARINDEX('#',CONDITION) > 0 THEN
REPLACE(CONDITION, dbo.FN_GET_EMAIL_FROM_STRING(CONDITION), '<a href=''mailto:' + dbo.FN_GET_EMAIL_FROM_STRING(CONDITION) + '''>' + dbo.FN_GET_EMAIL_FROM_STRING(CONDITION) + '</a>')
ELSE CONDITION
END [CONDITION]
Contents of dbo.FN_GET_EMAIL_FROM_STRING(CONDITION):
ALTER FUNCTION [dbo].[FN_GET_EMAIL_FROM_STRING]
(
#TextContainingEmail VARCHAR(1000)
)
RETURNS VARCHAR(1000)
AS
BEGIN
DECLARE #retval VARCHAR(1000);
SELECT TOP
1 #retval = Items
FROM
dbo.FN_SPLIT_STRING(#TextContainingEmail, '')
WHERE
Items LIKE '%#%';
RETURN #retval;
END;
Contents of: FN_SPLIT_STRING(#TextContainingEmail, '')
ALTER FUNCTION [dbo].[FN_SPLIT_STRING]
(
#STRING NVARCHAR(4000)
, #Delimiter CHAR(1)
)
RETURNS #Results TABLE(Items NVARCHAR(4000))
AS
BEGIN
DECLARE #INDEX INT;
DECLARE #SLICE NVARCHAR(4000);
-- HAVE TO SET TO 1 SO IT DOESNT EQUAL ZERO FIRST TIME IN LOOP
SELECT #INDEX = 1;
IF #STRING IS NULL
RETURN;
WHILE #INDEX != 0
BEGIN
-- GET THE INDEX OF THE FIRST OCCURENCE OF THE SPLIT CHARACTER
SELECT
#INDEX = CHARINDEX(#Delimiter, LTRIM(RTRIM(#STRING)));
-- NOW PUSH EVERYTHING TO THE LEFT OF IT INTO THE SLICE VARIABLE
IF #INDEX != 0
SELECT
#SLICE = LEFT(#STRING, #INDEX - 1);
ELSE
SELECT
#SLICE = #STRING;
-- PUT THE ITEM INTO THE RESULTS SET
INSERT INTO #Results
(
Items
)
VALUES(#SLICE);
-- CHOP THE ITEM REMOVED OFF THE MAIN STRING
SELECT
#STRING = REPLACE(RIGHT(#STRING, LEN(#STRING) - #INDEX), ',', '');
-- BREAK OUT IF WE ARE DONE
IF LEN(#STRING) = 0
BREAK;
END;
RETURN;
END;
But the output for the string I used as an example at the top of this post, ends up looking like this:
Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – <a href='mailto:paulhall#email.com'>paulhall#email.com</a> - Mikel Zubizarreta – mikelzubizarreta#email.com
As you can see, it sort of works but it only ads the 'mailto' tag to the first email address and not the second one.
This solution uses the splitter function created by Eirikur Eiriksson based on the original function by Jeff Moden. The whole explanation of this function can be found here.
I'll just copy the code for the function.
CREATE FUNCTION [dbo].[DelimitedSplit8K_LEAD]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table” produces values from 0 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "zero base" and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT 0 UNION ALL
SELECT TOP (DATALENGTH(ISNULL(#pString,1))) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT t.N+1
FROM cteTally t
WHERE (SUBSTRING(#pString,t.N,1) = #pDelimiter OR t.N = 0)
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY s.N1),
Item = SUBSTRING(#pString,s.N1,ISNULL(NULLIF((LEAD(s.N1,1,1) OVER (ORDER BY s.N1) - 1),0)-s.N1,8000))
FROM cteStart s
;
GO
This way we can identify the email addresses independently and concatenate the string again using FOR XML.
CREATE TABLE #SampleData(
String varchar(8000)
)
INSERT INTO #SampleData VALUES('Notification: Organizer must notify at least 30 days prior to the event. Provide the event information, including: day of contact information, location, date, schedule, activities, etc. Paul T. Hall – paulhall#email.com - Mikel Zubizarreta – mikelzubizarreta#email.com')
SELECT STUFF(( SELECT ' ' + CASE WHEN s.Item LIKE '_%#_%._%' THEN '<a href=''mailto:' + s.Item + '''>' + s.Item + '</a>'
ELSE s.Item END
FROM dbo.DelimitedSplit8K_LEAD( d.String, ' ') s
ORDER BY s.ItemNumber
FOR XML PATH(''), TYPE).value('./text()[1]', 'varchar(max)'), 1, 1, '')
FROM #SampleData d

SQL Server - Split column data and retrieve last second value

I have a column name MasterCode in XYZ Table where data is stored in below form.
.105248.105250.104150.111004.
Now first of all I want to split the data into :
105248
105250
104150
111004
Then after to retrieve only last second value from the above.
So In the above given array, value returned should be 104150.
Use a split string function, but not the built in once since it will return only the values and you will lose the location data.
You can use Jeff Moden's DelimitedSplit8K that will return the item and the item index:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
Then you can use it to split the string and it will return a table like this:
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
SELECT *
FROM [dbo].[DelimitedSplit8K](#string, '.')
ItemNumber Item
1
2 105248
3 105250
4 104150
5 111004
6
You want only the parts where there actually is an item, so add a where clause, and you want the second from last so add row_number(), and you want the entire thing in a common table expression so that you can query it:
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
WITH CTE AS
(
SELECT Item, ROW_NUMBER() OVER(ORDER BY ItemNumber DESC) As rn
FROM [dbo].[DelimitedSplit8K](#string, '.')
WHERE Item <> ''
)
And the query:
SELECT Item
FROM CTE
WHERE rn = 2
Result: 104150
If there are always four parts, you can use PARSENAME():
DECLARE #s varchar(64) = '.105248.105250.104150.111004.';
SELECT PARSENAME(SUBSTRING(#s, 2, LEN(#s)-2),2);
Depending on your version of SQL SERVER, you can also use the STRING_SPLIT function.
DECLARE #string varchar(100) = '.105248.105250.104150.111004.';
SELECT value,
ROW_NUMBER() OVER (ORDER BY CHARINDEX('.' + value + '.', '.' + #string + '.')) AS Pos
FROM STRING_SPLIT(#string,'.')
WHERE RTRIM(value) <> '';
It doesn't return the original position like Jeff's splitter, but does compare very favourably if you check Aaron Bertrand's Article :
Performance Surprises and Assumptions : STRING_SPLIT()
Edit:
Added position, but although works in this case may have issues with duplicate values
You can create a SQL server table valued function with parameters stringvalue and delemeter and call that function for the results as expected.
ALTER function [dbo].[SplitString]
(
#str nvarchar(4000),
#separator char(1)
)
returns table
AS
return (
with tokens(p, a, b) AS (
select
1,
1,
charindex(#separator, #str)
union all
select
p + 1,
b + 1,
charindex(#separator, #str, b + 1)
from tokens
where b > 0
)
select
p-1 ID,
substring(
#str,
a,
case when b > 0 then b-a ELSE 4000 end)
AS s
from tokens
)
To call the function
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> ''
Output
ID s
1 105248
2 105250
3 104150
4 111004
To get only second value you can write your query as shown below
DECLARE #MaxID INT
SELECT #MaxID = MAX (ID) FROM (SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '') A
SELECT TOP 1 #MaxID = MAX (ID) FROM (
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> ''
)a where ID < #MaxID
SELECT * FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '' AND ID = #MaxID
Output
ID s
3 104150
If you want 1 as value of ID then you can write your query as shown below in last line of query.
SELECT 1 AS ID , S FROM [DBO].[SPLITSTRING] ('.105248.105250.104150.111004.', '.') WHERE ISNULL(S,'') <> '' AND ID = #MaxID
Then the output will be
ID S
1 104150
Hope this will help you.
Try this
DECLARE #DATA AS TABLE (Data nvarchar(1000))
INSERT INTO #DATA
SELECT '.105248.105250.104150.111004.'
;WITH CTE
AS
(
SELECT Data,ROW_NUMBER()OVER(ORDER BY Data DESC) AS Rnk
FROM
(
SELECT Split.a.value('.','nvarchar(100)') Data
FROM(
SELECT CAST('<S>'+REPLACE(Data,'.','</S><S>')+'</S>' AS XML ) As Data
FROM #DATA
)DT
CROSS APPLY Data.nodes('S') AS Split(a)
) AS Fnl
WHERE Fnl.Data <>''
)
SELECT Data FROM CTE
WHERE Rnk=2
Result
Data
-----
105248
105250
104150
111004
It can also be achieve only using string functions:
IF OBJECT_ID('tempdb..#temp') IS NOT NULL
DROP TABLE #temp
SELECT '.105248.105250.104150.111004.' code INTO #temp UNION ALL
SELECT '.205248.205250.204150.211004.'
SELECT
REVERSE(LEFT(
REVERSE(LEFT(code, LEN(code) - CHARINDEX('.', REVERSE(code), 2)))
, CHARINDEX('.',REVERSE(LEFT(code, LEN(code) - CHARINDEX('.', REVERSE(code), 2)))) -1
)
) second_last_value
FROM #temp
Result:
second_last_value
-----------------------------
104150
204150

Extract All Instances of String into Concatenated Result

Using SQL Server 2014, I'm wanting to search within a field and return all instances of a string that is found, plus the following word. For example, the text in the column may be:
"exec sproc1 and then some more text here and then maybe execute sproc2 exec storedproc3 and maybe exec sproc1"
I'd like to elegantly return "sproc1, sproc2, storedproc3, sproc1", as each was the word following either exec or execute (as delimited by spaces). As you can see in the example, the leading word may vary, as may the length of the sproc name. I've been able to return the first usage of exec/execute; my issue is that sometimes there are multiple (see below).
REPLACE(REPLACE(CASE
WHEN [sJSTP].[subsystem]='TSQL' AND CHARINDEX('EXECUTE',[sJSTP].[command],1)>0
THEN SUBSTRING([sJSTP].[command],CHARINDEX('EXECUTE',[sJSTP].[command],1)+8,
IIF(
CHARINDEX(' ',[sJSTP].[command],CHARINDEX('EXECUTE',[sJSTP].[command],1)+8)>0,
CHARINDEX(' ',[sJSTP].[command],CHARINDEX('EXECUTE',[sJSTP].[command],1)+8)-CHARINDEX('EXECUTE',[sJSTP].[command],1)-8,
LEN([sJSTP].[command])))
WHEN [sJSTP].[subsystem]='TSQL' AND CHARINDEX('EXEC',[sJSTP].[command],1)>0 AND CHARINDEX('DCEXEC',[sJSTP].[command],1)<=0
THEN SUBSTRING([sJSTP].[command],CHARINDEX('EXEC',[sJSTP].[command],1)+5,
IIF(
CHARINDEX(' ',[sJSTP].[command],CHARINDEX('EXEC',[sJSTP].[command],1)+5)>0,
CHARINDEX(' ',[sJSTP].[command],CHARINDEX('EXEC',[sJSTP].[command],1)+5)-CHARINDEX('EXEC',[sJSTP].[command],1)-5,
LEN([sJSTP].[command])))
END,'[',''),']','') AS sprocname
The ultimate use of this is parsing job commands from the msdb..sysjobsteps table to see what stored procedures are being used.
Edit: Add sample data
Sample 1:
exec quarterly_run 1, 'BW'
exec quarterly_run_2 1, 'QR '
exec quarterly_run 2, 'VAS'
exec quarterly_run 1, 'WR'
exec quarterly_run 3, 'RW'
exec quarterly_run_2 1, 'ASF'
exec quarterly_run_3 1, 'ALL'
Sample 2:
declare #rundate datetime, #rptqtr datetime, #qtr int
set #rundate = getdate()
set #rptqtr = '06/30/2016'
set #qtr = (select datediff(quarter,#rptqtr,#rundate))
exec quarterly_extract #qtr
Sample 3:
exec Daily_Sync_Process
exec Daily_Process
Just another inline option, and not limited to 8K
Example
Declare #YourTable table (ID int,SomeCol varchar(max))
Insert into #YourTable values
(1,'exec quarterly_run 1, ''BW'' exec quarterly_run_2 1, ''QR '' exec quarterly_run 2, ''VAS'' exec quarterly_run 1, ''WR'' exec quarterly_run 3, ''RW'' exec quarterly_run_2 1, ''ASF'' exec quarterly_run_3 1, ''ALL''')
,(2,'declare #rundate datetime, #rptqtr datetime, #qtr int
set #rundate = getdate() set #rptqtr = ''06/30/2016''
set #qtr = (select datediff(quarter,#rptqtr,#rundate))
exec quarterly_extract #qtr
')
,(3,'exec Daily_Sync_Process exec Daily_Process')
;with cte as (
Select A.ID
,C.*
From #YourTable A
Cross Apply (values (replace(replace(SomeCol,char(13),' '),char(10),' '))) B(CleanString)
Cross Apply (
Select RetSeq,RetVal = case when Lag(RetVal,1) over (Order by RetSeq) in ('Exec','Execute') then RetVal else null end
From (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(CleanString,' ','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) C1
) C
)
Select A.ID
,NewString = Stuff((Select ', ' +RetVal From cte Where ID=A.ID Order By RetSeq For XML Path ('')),1,2,'')
From cte A
Group By A.ID
Returns
ID NewString
1 quarterly_run, quarterly_run_2, quarterly_run, quarterly_run, quarterly_run, quarterly_run_2, quarterly_run_3
2 quarterly_extract
3 Daily_Sync_Process, Daily_Process
So if you want to get what is immediately following exec then I'd split on the space, and then use a self join. Here is code using the function below, which is Jeff Moden's splitter.
with cte as(
select
job_id
,step_name
,step_id
,s.ItemNumber
,s.Item
from msdb..sysjobsteps
--split on the space
cross apply dbo.DelimitedSplit8K(command,' ') s)
select
c.job_id
,c.step_id
,c.step_name
,c.Item
,c2.Item
from cte c
--self join to get exec myproc in the same row
full join
cte c2 on
c2.ItemNumber = c.ItemNumber + 1
and c.job_id = c2.job_id
and c.step_id = c2.step_id
--we only care where the base table has exec or execute (not executed, etc)
where c.Item = 'exec' or c.Item = 'execute'
order by
c.job_id, c.step_id, c.ItemNumber
It's important to realize this would fail when, for example, the command was exec someproc which has two spaces. You can fix that with a replace() but you'd have to nest this replace multiple times to account for as many spaces as you want. You'd handle that on the command column in the splitter function
--here we replace two spaces with 1 for the entire command
cross apply dbo.DelimitedSplit8K(replace(command,' ',' '),' ') s)
SPLITER FUNCTION
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE FUNCTION [dbo].[DelimitedSplit8K] (#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
/* "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000... enough to cover VARCHAR(8000)*/
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
GO

SQL Server Remove multiple character strings within one string

I have the following string of text in my database:
Value1 - Value2: Value3 - Value4: Value5 - Value6:
I need to remove the dash AND everything between the dash up until the colon
The above result would become:
Value1: Value3: Value5:
Basicly, there could be endless amounts of values, but there could only be just a series of one.
Thing to note: The values could be any string!
Is there an easy way to do this? Preferably without a UDF. Could anyone help me out with this? Thanks in advance!
Edit: I agree this is a very poor implementation. The rest of the database itself isnt like this at all. It's just one table. The query I get from this will be used in a view where all values are seperated into multiple aliases. Thanks for understanding
You can use a split function... though your values shouldn't be stored like this in the first place.
declare #table table (col1 varchar(256))
insert into #table
values
('Value1 - Value2: Value3 - Value4: Value5 - Value6:')
select
ReturnVal = replace(ltrim(left(Item,charindex('-',Item))),'-',':')
from
#table
cross apply dbo.DelimitedSplit8K(col1,':')
where
Item <> ''
RETURNS
+-----------+
| ReturnVal |
+-----------+
| Value1 : |
| Value3 : |
| Value5 : |
+-----------+
Or, an ugly hack to get it back how you want it
select distinct
--ReturnVal = replace(ltrim(left(Item,charindex('-',Item))),'-',':')
ReturnVal = 'V' + STUFF((
SELECT replace(left(Item,charindex('-',Item)),'-',':')
FROM
#table
cross apply dbo.DelimitedSplit8K(col1,':')
FOR XML PATH(''), TYPE).value('.', 'NVARCHAR(MAX)'), 1, 1, '')
from
#table
cross apply dbo.DelimitedSplit8K(col1,':')
where
Item <> ''
RETURNS
ReturnVal
Value1 : Value3 : Value5 :
JEFF MODEN SPLITTER
CREATE FUNCTION [dbo].[DelimitedSplit8K] (#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
/* "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
enough to cover VARCHAR(8000)*/
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l
;
GO
SELECT substring(NameValue, 1, charindex('_', NameValue)-1) AS Names,
substring(NameValue, charindex('_', NameValue)+1, LEN(NameValue)) AS Values
FROM Table
EDIT: Something like this put in a function or stored procedure combined with a temp table should work for more than one line, depending on the line delimiter you should also remove CHAR(13) before you start:
DECLARE #helper varchar(512)
DECLARE #current varchar(512)
SET #helper = NAMEVALUE
WHILE CHARINDEX(CHAR(10), #helper) > 0 BEGIN
SET #current = SUBSTRING(#helper, 1, CHARINDEX(CHAR(10), NAMEVALUE)-1)
SELECT SUBSTRING(#current, 1, CHARINDEX('_', #current)-1) AS Names,
SUBSTRING(#current, CHARINDEX('_', #current)+1, LEN(#current)) AS Names
SET #helper = SUBSTRING(#helper, CHARINDEX(CHAR(10), #helper)+1, LEN(#helper))
END
SELECT SUBSTRING(#helper, 1, CHARINDEX('_', #helper)-1) AS Names,
SUBSTRING(#helper, CHARINDEX('_', #helper)+1, LEN(#helper)) AS Names

Function is slow but query runs fast

I have a simple Table-Valued function that takes around 5 second to execute. The function holds a query which returns the data in 1 sec. I have read through some blogs where it is said that this might be due to parameter sniffing but couldn't find a resolution yet. How can I fix the function if it is due to parameter sniffing?
CREATE FUNCTION [dbo].[fn_PurchaseRecord]
(
#ID INT = NULL,
#Name nvarchar(MAX),
#PurchaseDate DATE
)
RETURNS #result TABLE
(
[ID] [int] NULL,
[Name] [varchar](20) NULL,
[BasePrice] [FLOAT] NULL,
[Amount] [FLOAT]
)
AS BEGIN
WITH CTE_Purchase AS
(
SELECT
ht.ID,
ProductName AS Name,
BasePrice AS BasePrice
FROM
data.PurchaseRecord i (NOLOCK)
WHERE
i.ID = #ID
AND
Date = #PurchaseDate
AND
BuyerName=#Name
)
INSERT INTO #result
SELECT
ID,
Name,
BasePrice,
BasePrice*10.25
FROM
CTE_Purchase
RETURN;
END
Why not a single-statement TVF ?
CREATE FUNCTION [dbo].[fn_PurchaseRecordTESTFIRST]
(
#ID INT = NULL,
#Name nvarchar(MAX),
#PurchaseDate DATE
)
RETURNS TABLE
Return (
SELECT ID
,Name = ProductName
,BasePrice
,Amount = BasePrice*10.25
FROM data.PurchaseRecord i
WHERE i.ID = #ID
AND Date = #PurchaseDate
AND BuyerName=#Name
)
If parameter sniffing is happening it's the least of your worries - Sean hit nail on the head when saying that Multi-statement Table Valued Functions (mTVFs) should be avoided like the plague. By design, they're going to be much slower than an inline Table Valued Function (iTVF) in that you define a table, populate it, then return it. iTVF's, on the other hand, can be thought of as views that accept parameters and returns data directly from the underlying tables.
Another HUGE problem with mTVFs is that they kill parallelism; this means that if you have 2 CPUS or 2,000 CPUs only only ONE will work on resolving your query. No exceptions. Looks have a look at Jeff Moden's delimitedsplit8K:
CREATE FUNCTION [dbo].[DelimitedSplit8K]
--===== Define I/O parameters
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
--WARNING!!! DO NOT USE MAX DATA-TYPES HERE! IT WILL KILL PERFORMANCE!
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
GO
Now let's build an mTVF version like so and do a performance test...
CREATE FUNCTION [dbo].[DelimitedSplit8K_MTVF]
(#pString VARCHAR(8000), #pDelimiter CHAR(1))
RETURNS #table TABLE (ItemNumber int, Item varchar(100))
AS
BEGIN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
INSERT #table
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
RETURN;
END
GO
Before continuing I want to address #John Cappelletti 's statement:
I've seen claims like this before [about MAX data types], but I've yet to see any compelling stats
For some compelling stats let's make a minor tweek to the iTVF version of delimitedSplit8K and change the input string to varchar(max):
CREATE FUNCTION [dbo].[DelimitedSplit8K_VCMAXINPUT]
(#pString VARCHAR(max), #pDelimiter CHAR(1))
RETURNS TABLE WITH SCHEMABINDING AS
RETURN
--===== "Inline" CTE Driven "Tally Table" produces values from 1 up to 10,000...
-- enough to cover VARCHAR(8000)
WITH E1(N) AS (
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL
SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1 UNION ALL SELECT 1
), --10E+1 or 10 rows
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS (--==== This provides the "base" CTE and limits the number of rows right up front
-- for both a performance gain and prevention of accidental "overruns"
SELECT TOP (ISNULL(DATALENGTH(#pString),0)) ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
),
cteStart(N1) AS (--==== This returns N+1 (starting position of each "element" just once for each delimiter)
SELECT 1 UNION ALL
SELECT t.N+1 FROM cteTally t WHERE SUBSTRING(#pString,t.N,1) = #pDelimiter
),
cteLen(N1,L1) AS(--==== Return start and length (for use in substring)
SELECT s.N1,
ISNULL(NULLIF(CHARINDEX(#pDelimiter,#pString,s.N1),0)-s.N1,8000)
FROM cteStart s
)
--===== Do the actual split. The ISNULL/NULLIF combo handles the length for the final element when no delimiter is found.
SELECT ItemNumber = ROW_NUMBER() OVER(ORDER BY l.N1),
Item = SUBSTRING(#pString, l.N1, l.L1)
FROM cteLen l;
GO
Now we have three versions of the function: the original iTVF, one that accepts varchar(max) and an mTVF version. Now a performance test.
-- sample data
IF OBJECT_ID('tempdb..#string') IS NOT NULL DROP TABLE #string;
SELECT TOP (10000)
id = IDENTITY(int, 1,1),
txt = REPLICATE(newid(), ABS(checksum(newid())%5)+1)
INTO #string
FROM sys.all_columns a, sys.all_columns b;
SET NOCOUNT ON;
-- Performance tests:
PRINT 'ITVF 8K'+char(13)+char(10)+replicate('-',90);
GO
DECLARE #st datetime2 = getdate(), #x varchar(20);
SELECT #x = ds.Item
FROM #string s
CROSS APPLY dbo.DelimitedSplit8K(s.txt, '-') ds;
PRINT datediff(ms, #st, getdate());
GO 5
PRINT 'MTVF 8K'+char(13)+char(10)+replicate('-',90);
GO
DECLARE #st datetime2 = getdate(), #x varchar(20);
SELECT #x = ds.Item
FROM #string s
CROSS APPLY dbo.DelimitedSplit8K_MTVF(s.txt, '-') ds;
PRINT datediff(ms, #st, getdate());
GO 5
PRINT 'ITVF VCMAX'+char(13)+char(10)+replicate('-',90);
GO
DECLARE #st datetime2 = getdate(), #x varchar(20);
SELECT #x = ds.Item
FROM #string s
CROSS APPLY dbo.DelimitedSplit8K_VCMAXINPUT(s.txt, '-') ds;
PRINT datediff(ms, #st, getdate());
GO 5
and the results:
ITVF 8K
------------------------------------------------------------------------------------------
Beginning execution loop
280
267
284
300
280
Batch execution completed 5 times.
MTVF 8K
------------------------------------------------------------------------------------------
Beginning execution loop
1190
1190
1157
1173
1187
Batch execution completed 5 times.
ITVF VCMAX
------------------------------------------------------------------------------------------
Beginning execution loop
1204
1220
1190
1190
1203
Batch execution completed 5 times.
Both the mTVF and iTVF version that takes varchar(max) are 4-5 times slower. Again: Avoid mTVFs like the plague and avoid max data types whenever possible.