How to encode XML in T SQL without the additional XML overhead - sql

I have a database which (For whatever reason) has a column containing pipe delimited data.
I want to parse this data quickly, so I've thought of converting this column (nvarchar) into an XML by replacing the pipes with XML attributes and putting it into an XML data typed column somewhere else.
It works, except in the case where that column had a character that required encoding, such a '<' character.
I found I could encode XML using FOR XML clause, however, that appears to inject some XML tags around the data.
For example: (this gives error on bad character)
SELECT CAST('<f>' + replace(value,'|','</f><f>') + '</f>' AS XML)
FROM TABLE
this gives xml encoded value, but wraps it in "< value> < /value>" tag
SELECT value
FROM table
FOR XML PATH('')
Any ideas on how I can get the XML encoded value without this extra tag added, so I can convert the pipe format to XML after it's done (preferably in one swoop)?
EDIT: since people are asking, this is what 5 potential rows of data might look like
foo
foo|bar
foo|bar|1
foo||
baz|
And the results would be
Col1, Col2, Col3
foo,null,null
foo,bar,null
foo,bar,1
foo,null,null
baz,null,null
I'm achieving this by using the resulting XML type in a sub query such as: (it can be up to 4 columns pr 3 pipes in any given row)
SELECT
*,
x.query('f[1]').value('.','nVarChar(2048)') Col1
,x.query('f[2]').value('.','nVarChar(2048)') Col2
,x.query('f[3]').value('.','nvarchar(2048)') Col3
,x.query('f[4]').value('.','nvarchar(2048)') Col4
FROM
(
SELECT *,
CAST('<f>' + REPLACE(Value,'|','</f><f>') + '</f>' AS XML) as x
FROM table
) y
#srutzky makes a great point. No, I do not need to do XML here at all. If I can find a fast & clean way to parse pipes in a set based operation, I'll do that. Will review the SQL# documentation...

SELECT CAST('<values><f>' +
REPLACE(
REPLACE(
REPLACE(
REPLACE(
REPLACE(value,'&','&')
,'"','"')
,'<','<')
,'>','>')
,'|','</f><f>') + '</f></values>' AS XML)
FROM TABLE;

You could try the following BUT you need to make sure the content is "xml safe", in other words the content does not contain values which xml will reject (look into xml element content parsing).
Try the following...it's test script to see if it does what you want..
UPDATE:
ok, it might help if I read the question all the way through...2 steps...split the pipes and then xml all the split items...try this:
Create the following function:
CREATE FUNCTION [dbo].[udf_SPLIT]
(
#s nvarchar(max),
#trimPieces bit,
#returnEmptyStrings bit,
#delimiter nvarchar(10)
)
RETURNS #t TABLE (val nvarchar(max))
AS
BEGIN
DECLARE #i int, #j int
SELECT #i = 0, #j = (LEN(#s) - LEN(REPLACE(#s,#delimiter,'')))
;WITH cte AS
(
SELECT i = #i + 1,
s = #s,
n = substring(#s, 0, charindex(#delimiter, #s)),
m = substring(#s, charindex(#delimiter, #s)+1, len(#s) - charindex(#delimiter, #s))
UNION ALL
SELECT i = cte.i + 1,
s = cte.m,
n = substring(cte.m, 0, charindex(#delimiter, cte.m)),
m = substring(cte.m, charindex(#delimiter, cte.m) + 1, len(cte.m)-charindex(#delimiter, cte.m))
FROM cte
WHERE i <= #j
)
INSERT INTO #t (val)
SELECT [pieces]
FROM (
SELECT CASE
WHEN #trimPieces = 1 THEN LTRIM(RTRIM(CASE WHEN i <= #j THEN n ELSE m END))
ELSE CASE WHEN i <= #j THEN n ELSE m END
END AS [pieces]
FROM cte
) t
WHERE (#returnEmptyStrings = 0 AND LEN(pieces) > 0)
OR (#returnEmptyStrings = 1)
OPTION (maxrecursion 0)
RETURN
END
next try the following to test...
DECLARE #str nvarchar(500) = 'test|<html>this</html>|boogie woogie| SDGDSFG| game<br /> on |working| this|'
SELECT REPLACE(
REPLACE(
REPLACE(
REPLACE([val],'&','&')
,'"','"')
,'<','<')
,'>','>')
AS [f]
FROM [dbo].[udf_SPLIT](#str,1,0,'|')
FOR XML PATH('')
If not totally correct, hopefully will put you on right path...
HTH
Dave

Your idea was absolutely OK: By making an XML out of your string the XML engine will convert all special characters properly. After your splitting the XML should be correct.
If your string is stored in a column you can avoid the automatically given name by either doing kind of computation (something like '' + YourColumn) or you give the column an alias AS [*]:
Try it like this:
DECLARE #str VARCHAR(100)='300|2€&ÄÖÜ|This is text -> should be text|2015-12-31';
SELECT #str FOR XML PATH('');
/*
300|2€&ÄÖÜ|This is text -> should be text|2015-12-31
*/
DECLARE #Xml XML=(SELECT CAST('<x>' + REPLACE((SELECT #str FOR XML PATH('')),'|','</x><x>')+'</x>' AS XML));
SELECT #Xml.value('/x[1]','int') AS IntTypeSave
,#Xml.value('/x[3]','varchar(max)') AS VarcharTypeSave
,#Xml.value('/x[4]','datetime') AS DateTypeSave;
/*
300 This is text -> should be text 2015-12-31 00:00:00.000
*/
SELECT X.value('.','varchar(max)') AS EachX
FROM #Xml.nodes('/x') AS Each(X);
/*
300
2€&ÄÖÜ
This is text -> should be text
2015-12-31
*/

Related

The argument 1 of the XML data type method “value” must be a string literal

If i pass #count variable i am getting this error
Below is my query
DECLARE #Error_Description NVARCHAR(Max)
DECLARE #Count VARCHAR(20)
DECLARE #x NVARCHAR(Max)
SELECT #Error_Description = 'The external columns for Excel Source are out of synchronization with the data source columns.
The column "szReferencceNumber" needs to be added to the external columns.
The column "SMSa" needs to be added to the external columns.
The column "as" needs to be added to the external columns.'
SELECT #Count = (LEN(#Error_Description) - LEN(REPLACE(#Error_Description, '"', ''))) / LEN('"')
SELECT #Count
SELECT COALESCE(LTRIM(CAST(('<X>' + REPLACE(#Error_Description, '"', '</X><X>') + '</X>') AS XML).value('(/X)[' + #Count + ']', 'varchar(128)')), '')
The first parameter to value must be a string literal. To select the nodes with a dynamic index you can do the following
SELECT
n.value('.', 'varchar(128)') as Result
from (SELECT CAST(('<X>' + REPLACE(#Error_Description, '"', '</X><X>') + '</X>') AS XML)) ca(x)
CROSS APPLY x.nodes('(/X)') n(n)
WHERE n.value('for $l in . return count(../*[. << $l]) + 1', 'int') %2 = 0
This returns the value for every second node. So achieves your desired results of getting the values enclosed in quotes.
Result
---------------------
szReferencceNumber
SMSa
as
if you're using 2012+, and you can use nvarchar(4000) (not MAX), you could get a copy of DelimitedSplitN4K_LEAD and grab rows where the value of ItemNumber is even:
DECLARE #Error_Description nvarchar(4000);
SELECT #Error_Description = N'The external columns for Excel Source are out of synchronization with the data source columns.
The column "szReferencceNumber" needs to be added to the external columns.
The column "SMSa" needs to be added to the external columns.
The column "as" needs to be added to the external columns.';
SELECT DS.Item
FROM dbo.DelimitedSplitN4K_LEAD(#Error_Description,'"') DS
WHERE DS.ItemNumber % 2 = 0;
If you're on SQL server 2016+, then you could use some JSON manipulation (which supports MAX values):
SELECT OJ.value
FROM (VALUES(#Error_Description))V(Error_Description)
CROSS APPLY (VALUES('["' + REPLACE(REPLACE(REPLACE(V.Error_Description,'"','","'),NCHAR(13),''),NCHAR(10),'')+ '"]'))R(JSON)
CROSS APPLY OPENJSON(R.JSON) OJ
WHERE OJ.[Key] % 2 = 1;
You can use your #Count within the XQuery predicate, but not via concatenation. There is sql:variable():
TheXml.value('(/X)[sql:variable("#Count") cast as xs:int?][1]', 'varchar(128)')
It would help to declare the variable #Count as INT in order to avoid the XQuery cast.
Hint: You need the final [1] to enforce the singleton .value() demands for.
this is all based on the #Shnugo answer above, thanks a lot Shnugo
I have a long script saved in to a temp table
select * from #Radhe
I want to print the whole script.
DECLARE #SQL NVARCHAR(MAX)
DECLARE #XML3 XML
--load the script to XML
SELECT #XML3 = (SELECT #Radhe.Item AS x FROM #Radhe FOR XML PATH(''))
--print line by line
declare #i int = 1
select #sql = 'radhe'
while #sql is not null
begin
SELECT #sql = #xml3.value('(/x/text())[sql:variable("#i")
cast as xs:int?][1]', 'varchar(max)')
print #sql
select #i = #i + 1
if #i > 10000 --limit it to 10000 lines
set #sql = null
end
and it works.
It took me a long time to get this done.
Hope I can help a fellow DBA or developer.

Read each string that follows a char in sql

So let us say you have a string:
set #string = 'aaa,2,dqw,3,asdad,5,4'
I would like to read the chars that are after a char and a ","
So the result to this string would be:
Result
--------
2
3
5
How could I do this?is there a way to use CHARINDEX for this?
If your string is just like your example, using Charindex(',', <string>) works too.
Otherwise, use PATINDEX. It functions similarly,but you can also set it to recognize all numeric characters.
IF PATINDEX('%[0-9]%', #String') <> 0
THEN BEGIN
SET #string = SUBSTRING(#string, PATINDEX('%[0-9]%', #string), LEN(#string) )
SET #var = SUBSTRING(#string, PATINDEX('%[^0-9]%', #string) )
END
you now can do as you please with those variables.
To your exception, just use one more case statement and rid of it. No reason the code has to be too complicated.
One more Approach using numbers table and split string functions
declare #string varchar(max)
set #string = 'aaa,2,dqw,3,asdad,5,4'
;with cte
as
(
select * from [dbo].[SplitStrings_Numbers](#string,',')
)
select
* from cte where isnumeric(item)=1
Output:
2
3
5
4
if you are sure about no special characters in your data..You can use above,,but some times using NUMERIC tends to show some characters as numbers
SELECT
ISNUMERIC('123') as '123' --1
,ISNUMERIC('.') as '.' --Period ---1
,ISNUMERIC(',') as ',' --Comma ---1
In this case,you can use TRY_Parse available from SQL server 2012..
declare #string varchar(max)
set #string = 'aaa,2,dqw,3,asdad,5,4'
;with cte
as
(
select b.* from [dbo].[SplitStrings_Numbers](#string,',') a
cross apply
(select try_parse(a.item as int) ) b(val)
)
select
* from cte where val is not null

Split the query string with repeatative special characters using SQL

This is my String
Declare #qstr as varchar(max)='hireteammember.aspx?empemail=kuldeep#asselsolutions.com&empid=376&empname=kuldeep&adminname=TMA1&term=5&teamid=161&contactid=614¥1&WP=100¥5¥Months&Amt=500&DueDay=5&StrDt=12/31/2013&MemCatg=Employees&StrTm=21:05&PlnHrs=5&WrkDays=true¥true¥true¥true¥true¥false¥false'
I want to extract the values of empid,empname,adminname,term,teamid,contactid,WP,Months,Dueday,StrDt,MemCatgmStrTm,PlnHrs,WrkDays and assign them to new variables
I have used
select ( SUBSTRING(#qstr,CHARINDEX('=',#qstr)+1,CHARINDEX('&',#qstr)-CHARINDEX('=',#qstr)-1)))
but only getting the 'empemail' , for the next occurance of special char '&' , not able to get the values of further terms , if i am using '&' in spite of '=' .
Help me to split the whole string
How about using XML to split the values into rows, and then splitting them into columns.
Something like
Declare #qstr as varchar(max)='hireteammember.aspx?empemail=kuldeep#asselsolutions.com&empid=376&empname=kuldeep&adminname=TMA1&term=5&teamid=161&contactid=614¥1&WP=100¥5¥Months&Amt=500&DueDay=5&StrDt=12/31/2013&MemCatg=Employees&StrTm=21:05&PlnHrs=5&WrkDays=true¥true¥true¥true¥true¥false¥false'
DECLARe #str VARCHAR(MAX) = SUBSTRING(#qstr,CHARINDEX('?',#qstr,0) + 1, LEN(#qstr)-CHARINDEX('?',#qstr,0))
DECLARE #xml XML
SELECT #xml = CAST('<d>' + REPLACE(#str, '&', '</d><d>') + '</d>' AS XML)
;WITH Vals AS (
SELECT T.split.value('.', 'nvarchar(max)') AS data
FROM #xml.nodes('/d') T(split)
)
SELECT LEFT(data,CHARINDEX('=',data,0) - 1),
RIGHT(data,LEN(data) - CHARINDEX('=',data,0))
FROM Vals
SQL Fiddle DEMO
CREATE FUNCTION dbo.SplitQueryString (#s varchar(8000))
RETURNS table
AS
RETURN (
WITH splitter_cte AS (
SELECT CHARINDEX('&', #s) as pos, 0 as lastPos
UNION ALL
SELECT CHARINDEX('&', #s, pos + 1), pos
FROM splitter_cte
WHERE pos > 0
),
pair_cte AS (
SELECT chunk,
CHARINDEX('=', chunk) as pos
FROM (
SELECT SUBSTRING(#s, lastPos + 1,
case when pos = 0 then 80000
else pos - lastPos -1 end) as chunk
FROM splitter_cte) as t1
)
SELECT substring(chunk, 0, pos) as keyName,
substring(chunk, pos+1, 8000) as keyValue
FROM pair_cte
)
GO
declare #queryString varchar(2048)
set #queryString = 'foo=bar&temp=baz&key=value';
SELECT *
FROM dbo.SplitQueryString(#queryString)
OPTION(MAXRECURSION 0);
when run produces the following output.
keyName keyValue
------- --------
foo bar
temp baz
key value
(3 row(s) affected)
I believe that this will do exactly what you are asking.
SQL FIDDLE DEMO
If the order of the values in the html string remains same i would suggest using the whole string name like
select ( SUBSTRING(#qstr,CHARINDEX('empemail=',#qstr)+1,CHARINDEX('&empid=',#qstr)-CHARINDEX('empemail=',#qstr)-1)))
If you are still looking for nth occurance then refer to this link
Declare #qstr as varchar(max)='hireteammember.aspx?empemail=kuldeep#asselsolutions.com&empid=376&empname=kuldeep&adminname=TMA1&term=5&teamid=161&contactid=614¥1&WP=100¥5¥Months&Amt=500&DueDay=5&StrDt=12/31/2013&MemCatg=Employees&StrTm=21:05&PlnHrs=5&WrkDays=true¥true¥true¥true¥true¥false¥false'
(select ( SUBSTRING(#qstr,CHARINDEX('&empname=',#qstr)+1,CHARINDEX('&adminname=',#qstr)-CHARINDEX('&empname=',#qstr)-1)))
(select ( SUBSTRING(#qstr,CHARINDEX('?empemail=',#qstr)+1,CHARINDEX('&empid=',#qstr)-CHARINDEX('?empemail=',#qstr)-1)))
like this i have splitted and updated The whole string. Thank you All for your answers, Your answers Helped me to solve this

TSQL Reverse FOR XML Encoding

I am using FOR XML in a query to join multiple rows together, but the text contains quotes, "<", ">", etc. I need the actual character instead of the encoded value like """ etc. Any suggestions?
Basically what you're asking for is invalid XML and luckly SQL Server will not produce it. You can take the generated XML and extract the content, and this operation will revert the escaped characters to their text representation. This revert normally occurs in the presnetaitonlayer, but it can occur in SQL Server itslef by instance using XML methods to extract the content of the produced FOR XML output. For example:
declare #text varchar(max) = 'this text has < and >';
declare #xml xml;
set #xml = (select #text as [node] for xml path('nodes'), type);
select #xml;
select x.value(N'.', N'varchar(max)') as [text]
from #xml.nodes('//nodes/node') t(x);
I have a similar requirement to extract column names for use in PIVOT query.
The solution I used was as follows:
SELECT #columns = STUFF((SELECT '],[' + Value
FROM Table
ORDER BY Value
FOR XML PATH('')), 1, 2, '') + ']'
This produces a single string:
[Value 1],[Value 2],[Value 3]
I hope this points you in the right direction.
--something like this?
SELECT * INTO #Names FROM (
SELECT Name='<>&' UNION ALL
SELECT Name='ab<>'
) Names;
-- 1)
SELECT STUFF(
(SELECT ', ' + Name FROM #Names FOR XML PATH(''))
,1,2,'');
-- 2)
SELECT STUFF(
(SELECT ', ' + Name FROM #Names FOR XML PATH(''),TYPE).value('text()[1]','nvarchar(max)')
,1,2,'');
-- 2) is slower but will not return encoded value.
Hope it help.

Replace with wildcard, in SQL

I know MS T-SQL does not support regular expression, but I need similar functionality. Here's what I'm trying to do:
I have a varchar table field which stores a breadcrumb, like this:
/ID1:Category1/ID2:Category2/ID3:Category3/
Each Category name is preceded by its Category ID, separated by a colon. I'd like to select and display these breadcrumbs but I want to remove the Category IDs and colons, like this:
/Category1/Category2/Category3/
Everything between the leading slash (/) up to and including the colon (:) should be stripped out.
I don't have the option of extracting the data, manipulating it externally, and re-inserting back into the table; so I'm trying to accomplish this in a SELECT statement.
I also can't resort to using a cursor to loop through each row and clean each field with a nested loop, due to the number of rows returned in the SELECT.
Can this be done?
Thanks all - Jay
I think your best bet is going to be to use a recursive user-defined function (UDF). I've included some code here that you can use to pass in a string to achieve the results you're looking for.
CREATE FUNCTION ufn_StripIDsFromBreadcrumb (#cIndex int, #breadcrumb varchar(max), #theString varchar(max))
RETURNS varchar(max)
AS
BEGIN
DECLARE #nextColon int
DECLARE #nextSlash int
SET #nextColon = CHARINDEX(':', #theString, #cIndex)
SET #nextSlash = CHARINDEX('/', #theString, #nextColon)
SET #breadcrumb = #breadcrumb + SUBSTRING(#theString, #nextColon + 1, #nextSlash - #nextColon)
IF #nextSlash != LEN(#theString)
BEGIN
exec #breadcrumb = ufn_StripIDsFromBreadcrumb #cIndex = #nextSlash, #breadcrumb = #breadcrumb, #theString = #theString
END
RETURN #breadcrumb
END
You could then execute it with:
DECLARE #myString varchar(max)
EXEC #myString = ufn_StripIDsFromBreadcrumb 1, '/', '/ID1:Category1/ID2:Category2/ID3:Category3/'
PRINT #myString
This works for SQL Server 2005 and up.
create table strings (
string varchar(1000)
)
insert into strings values( '/ID1:Category1/ID2:Category2/ID3:Category3/' )
insert into strings values( '/ID4:Category4/ID5:Category5/ID8:Category6/' )
insert into strings values( '/ID7:Category7/ID8:Category8/ID9:Category9/' )
go
with
replace_with_wildcard ( restrung ) as
(
select replace( string, '', '' )
from strings
union all
select
replace( restrung, substring( restrung, patindex( '%ID%', restrung ), 4 ), '' )
from replace_with_wildcard
where patindex( '%ID%', restrung ) > 0
)
select restrung
from replace_with_wildcard
where charindex( ':', restrung ) = 0
order by restrung
drop table strings
You might be able to do this using a Split function. The following split function relies on the existence of a Numbers table which literally contains a sequential list of numbers like so:
Create Table dbo.Numbers( Value int not null primary key clustered )
GO
With Nums As
(
Select ROW_NUMBER() OVER( Order By o.object_id ) As Num
From sys.objects as o
cross join sys.objects as o2
)
Insert dbo.Numbers( Value )
Select Num
From Nums
Where Num Between 1 And 10000
GO
Create Function [dbo].[udf_Split] (#DelimitedList nvarchar(max), #Delimiter nvarchar(2) = ',')
Returns #SplitResults TABLE (Position int NOT NULL PRIMARY KEY, Value nvarchar(max))
AS
/*
PURPOSE: to split the #DelimitedList based on the #Delimter
DESIGN NOTES:
1. In general the contents of the next item is: NextDelimiterPosition - CurrentStartPosition
2. CurrentStartPosition =
CharIndex(#Delimiter, A.list, N.Value) = Current Delimiter position
+ Len(#Delimiter) + The number of delimiter characters
+ 1 + 1 since the text of the item starts after the delimiter
3. We need to calculate the delimiter length because the LEN function excludes trailing spaces. Thus
if a delimiter of ", " (a comma followed by a space) is used, the LEN function will return 1.
4. The DataLength function returns the number of bytes in the string. However, since we're using
an nvarchar for the delimiter, the number of bytes will double the number of characters.
*/
Begin
Declare #DelimiterLength int
Set #DelimiterLength = DataLength(#Delimiter) / 2
If Left(#DelimitedList, #DelimiterLength) <> #Delimiter
Set #DelimitedList = #Delimiter + #DelimitedList
If Right(#DelimitedList, #DelimiterLength) <> #Delimiter
Set #DelimitedList = #DelimitedList + #Delimiter
Insert #SplitResults(Position, Value)
Select CharIndex(#Delimiter, A.list, N.Value) + #DelimiterLength
, Substring (
A.List
, CharIndex(#Delimiter, A.list, N.Value) + #DelimiterLength
, CharIndex(#Delimiter, A.list, N.Value + 1)
- ( CharIndex(#Delimiter, A.list, N.Value) + #DelimiterLength )
)
From dbo.Numbers As N
Cross Join (Select #DelimitedList As list) As A
Where N.Value > 0
And N.Value < LEN(A.list)
And Substring(A.list, N.Value, #DelimiterLength) = #Delimiter
Order By N.Value
Return
End
You then might be able to run a query like so where you strip out the prefixes:
Select Table, Substring(S.Value, CharIndex(':', S.Value) + 1, Len(S.Value))
From Table
Cross Apply dbo.udf_Split(Table.ListColumn, '/') As S
This would give you values like:
Category1
Category2
Category3
You could then use FOR XML PATH to combine them again:
Select Table.PK
, Stuff( (
Select '/' + Substring(S.Value, CharIndex(':', S.Value) + 1, Len(S.Value))
From Table As Table1
Cross Apply dbo.udf_Split(Table.ListColumn, '/') As S1
Where Table1.PK = Table.PK
Order By S1.Position
For Xml Path('')
), 1, 1, '') As BreadCrumb
From Table
For SQL Server 2005+, you can get regex support by:
Enabling CLR (doesn't require instance restart)
Uploading your CLR functionality (in this case, regex replace)
Using native TSQL, you'll need to define REPLACE statements for everything you want to remove:
SELECT REPLACE(
REPLACE(
REPLACE(''/ID1:Category1/ID2:Category2/ID3:Category3/'', 'ID1:', ''),
'ID2:', ''),
'ID3:', '')
Regex or otherwise, you need to be sure these patterns don't appear in the actual data.
You can use SQL CLR. Here's an MSDN article:
declare #test1 nvarchar(max)
set #test1='/ID1:Category1/ID2:Category2/ID3:Category3/'
while(CHARINDEX('ID',#test1)<>0)
Begin
select #test1=REPLACE(#test1,SUBSTRING(#test1,CHARINDEX('ID',#test1),CHARINDEX(':',#test1)-
CHARINDEX('ID',#test1)+1),'')
End
select #test1