T-SQL retrieving sql attribute from xml using variable - sql

I am trying to retrieve an XML attribute from an XML variable passing in the name of the desired attribute. The first select statement works just fine retrieving the correct attribute values. However, when I try to set within a SQL variable the desired attribute name, all that is displayed is the string /root/attribs/#id instead of the actual value. I have tried numerous permutations of the #path variable, all to no avail.
What am I missing here?
DECLARE #XMLString XML = '<root><attribs flags="1" id="test_id" platform="test_platform" /></root>';
SELECT
flags = x.c.value('(/root/attribs/#flags)[1]', 'nvarchar(50)') ,
id = x.c.value('(/root/attribs/#id)[1]', 'nvarchar(50)') ,
[platform] = x.c.value('(/root/attribs/#platform)[1]', 'nvarchar(50)')
FROM
#XMLString.nodes('/*') x ( c );
DECLARE #Path NVARCHAR(50) = '/root/attribs/#id';
SELECT
result = x.c.value('(sql:variable("#Path"))[1]', 'nvarchar(50)')
FROM
#XMLString.nodes('/*') x ( c );

This will allow you to specify the attribute name.
DECLARE #XMLString xml = '
<root>
<attribs flags="1" id="test_id" platform="test_platform" />
</root>'
DECLARE #Attribute nvarchar(max) = 'flags'
SELECT
t.x.value('(/root/attribs/#*[local-name() = sql:variable("#Attribute")])[1]', 'nvarchar(max)')
FROM #XMLString.nodes('/*') t(x)

Related

Get list of xml from parent xml in sql

Need to get the list of xml present inside an xml. This is what I have tried.
DECLARE #xml xml
SET #xml = '<a><b /><c><d /><d /><d /></c></a>';
DECLARE #i_xml xml = #xml.value('(/a/b/c)[1]', 'VARCHAR(100)')
Select #i_xml
But this gives me NULL
Based on your sample xml, c and b are on same child record of <a>. value() will give a scalar result, use query() instead.
DECLARE #xml xml
SET #xml = '<a><b /><c><d /><d /><d /></c></a>';
DECLARE #i_xml xml = #xml.query('(//a/c/child::*)')
SELECT #i_xml

How to get all PDF links from HTML content using T-SQL

I am trying to retrieve all PDF links from a string column which contains HTML.
Example text of one column is:
<p>text here link
some other text home
link 2</p>
I need all links with .pdf extension.
I already tried function like this
ALTER function [dbo].[GetLinks] (#t nvarchar(max))
returns #Links table (link nvarchar(max))
as
begin
declare #strtpos int
set #strtpos=100
declare #endpos int
declare #lnk nvarchar(max)
while #strtpos > 6
begin
set #strtpos = PATINDEX('%href="%', #t)+6
if #strtpos>6 begin
--set #endpos = CHARINDEX ('"',#t,#strtpos+1)
set #endpos = PATINDEX('%.pdf"%',#t)+4
if #endpos>0 begin
set #lnk = substring(#t ,#strtpos, #endpos - #strtpos)
set #strtpos = PATINDEX('%href="%', #lnk)+6
set #t= RIGHT (#t, len(#t) - #endpos)
insert #Links values(#lnk)
end
end
end
return
end
And calling this function from SQL Server like this:
select top 1 * from dbo.GetLinks(' <p>text here link
some other text home
link 2</p>')
This returns the first link only when I match CHAR, but when I match string ".pdf" it returns long string. Please let me know if I am doing something wrong or need to change approach for this.
If your html column can be converted to XML like your example suggests, your can parse the href values in T-SQL using XML data type methods:
CREATE FUNCTION dbo.GetLinks (#t xml)
RETURNS #Links TABLE (link nvarchar(max))
AS
BEGIN
INSERT #Links
SELECT
AnchorTag.value('#href', 'nvarchar(MAX)') AS link
FROM #t.nodes('//a') AS AnchorTags(AnchorTag);
RETURN;
END;
GO
The same approach can be used with an inline TVF:
CREATE FUNCTION dbo.GetLinks (#t xml)
RETURNS TABLE
AS
RETURN (
SELECT
AnchorTag.value('#href', 'nvarchar(MAX)') AS link
FROM #t.nodes('//a') AS AnchorTags(AnchorTag)
);
GO
Xquery expression can do it simply
DECLARE #html xml = '<p>text here link<b v="3">ok</b>some other text home<a title="er">kj</a>link 2</p>'
select [pdfLink] = a.value('#href','varchar(max)')
from #html.nodes('//a[#href[contains(., ".pdf")]]') c(a)
If you are on SQL Server 2016+ you can use STRING_SPLIT.
DECLARE #string VARCHAR(8000) = '
<p>text here link
some other text home
link 2</p>';
SELECT TheUrl = split.value
FROM STRING_SPLIT(#string,'"') AS split
WHERE split.value LIKE '%.pdf';
Returns:
TheUrl
---------------------------
example.com/abc.pdf
www.example.com/abc123.pdf
If you can't convert your html into xml for whatever reason, you can still do this with regular string manipluation, though it is not pretty.
This solution (ironically) utilises an xml based string splitter to allow for multi-character delimiters, the output of which is then further filtered to only return the .pdf links:
create or alter function [dbo].[fn_StringSplitXML]
(
#str varchar(max) = '' -- String to split.
,#Delimiter varchar(10) = ',' -- Delimiting value to split on.
,#num int = null -- Which value to return.
)
returns table
as
return
select rn
,item
from(select rn = row_number() over(order by(select null))
,item = ltrim(rtrim(n.i.value('(./text())[1]','varchar(max)')))
from(select x = cast('<x>'+replace(#str,#Delimiter,'</x><x>')+'</x>' as xml).query('.')) as s
cross apply s.x.nodes('x') as n(i)
) as a
where rn = #num
or #num is null
;
declare #html varchar(1000) =
'<p>text here link
some other text home
link 2</p>
<input type="text" name="self closed tag" />
<b>some more text</b>
';
select left(s.item
,patindex('%.pdf%',s.item)+3
) as link
from dbo.fn_StringSplitXML(replace(replace(#html
,'>'
,''
)
,'<'
,''
)
,'href="'
,null
) as s
where patindex('%.pdf%',s.item) > 0;
Output
link
example.com/abc.pdf
www.example.com/abc123.pdf

SQL Server Xquery sql:variable usage

I need to use a dynamic string for an xquery path but .query/.nodes methods require a literal string as parameter. So I decided to try sql:variable
DECLARE #xmlData XML, #node varchar(max)
SET #xmlData = 'Some XML Here'
SET #node = '/path1/path2/path3'
When I query with
select #xmlData.query('/path1/path2/path3')
It returns the intended result
But when I query with
select #xmlData.query('sql:variable("#node")')
It returns the variable value itself as "/path1/path2/path3"
What is wrong here?
This should do the trick:
select #xmlData.query('/*[local-name()=sql:variable("#node")]')
It matches any node with a wildcard *, but there is an extra predicate that the name has to match the variable
For performance reasons, you should preferably use /text() to get inner text, and use .values to get a single value.
select #xmlData.value('(/*[local-name()=sql:variable("#node")]/text())[1]', 'nvarchar(100)')
sql:variable is used to substitute a single scalar variable into an XPath expression, so can't be used to define a full path. You can use it to test against a single node's name, though, e.g.:
declare #xmlData XML = '<path1><path2><path3>foo</path3></path2></path1>'
select [example1] = #xmlData.query('/path1/path2/path3')
--example1
--<path3>foo</path3>
declare #node1 varchar(max) = 'path1'
declare #node2 varchar(max) = 'path2'
declare #node3 varchar(max) = 'path3'
select [example2] = #xmlData.query('//*[local-name()= sql:variable("#node1")]/*[local-name()= sql:variable("#node2")]/*[local-name()= sql:variable("#node3")]');
--example2
--<path3>foo</path3>

Use xQuery to extract attribute values that also exist in SQL variable

XML structure:
<ns0:message xmlns:ns0='xxx:testing'>
<ns0:field name='AAA'>...</ns0:field>
<ns0:field name='BBB'>...</ns0:field>
<ns0:field name='VVV'>...</ns0:field>
<ns0:field name='CAR'>...</ns0:field>
</ns0:message>
I have SQL that will extract the values of the attributes titled name:
SELECT
( (SELECT ',' + CHAR(13) + P.N.value('#name', 'varchar(max)')
FROM myTable.message_xml.nodes('declare namespace ns0="xxx:testing";
ns0:message/ns0:field[ #name = "AAA" or
#name = "BBB"]') P(N)
FOR XML PATH(''), type).value('substring(text()[1], 3)', 'varchar(max)')) as ATTRIBUTE_VALUES
This returns a column that looks like:
ATTRIBUTE_VALUES
---------------
AAA,
BBB
My problem is that the list of potential attribute values is quite large.
Instead of repeating #name = "AAA" in my query for every attribute value I want to check for, I was hoping I could declare it as a variable like:
DECLARE #ATTRIBUTES VarChar(Max)
SET #ATTRIBUTES = '(AAA,BBB,CAR,XYZ)'
And then just stick the variable in the sql like:
[#name = sql:variable("#ATTRIBUTES")]
but this is not working for any combination of parens,commas,etc I use to build the variable.
You can use contains function.
Declare a variable like so:
DECLARE #ATTRIBUTES VarChar(Max) = '|AAA|BBB|CAR|XYZ|';
And in you query instead of #name = "AAA" use:
contains(sql:variable("#ATTRIBUTES"), concat("|", #name, "|"))

Extracting a single value from a json array in sql server

I am using MS SQL server to get a search result in json format there is only ever 1 row returned in my use case but they designed this as a search tool so you can return more than one value hence the array. The issue I am having is extracting the id value from the array that is returned.
json #response (Array):
{"hits":[{"id":1320172,"email":"xyz#domain.eu","first_name":"IMA","last_name":"TESTERTOO","created":"2018-12-12T11:52:58+00:00","roles":["Learner"],"status":true}],"total":1}
I have tried a number of things but I can't seem to get the path right.
SET #MyUserid = JSON_QUERY(#Reponse, '$.hits[0].id')
SET #MyUserid =JSON_VALUE(#Reponse,'$.hits[0].id')
SET #MyUserid = JSON_QUERY(#Reponse, '$.id')
On most examples I have found the json is not a single line array so I feel like I am missing something there. I'm inexperienced with working with json so any help would be greatly appreciated.
You can try this
DECLARE #json NVARCHAR(MAX)=
N'{"hits":[{"id":1320172,"email":"xyz#domain.eu","first_name":"IMA","last_name":"TESTERTOO","created":"2018-12-12T11:52:58+00:00","roles":["Learner"],"status":true}],"total":1}';
--This will return just one selected value
SELECT JSON_VALUE(#json,'$.hits[0].id')
--This will return the whole everything:
SELECT A.total
,B.*
FROM OPENJSON(#json)
WITH(hits nvarchar(max) AS JSON, total int) A
CROSS APPLY OPENJSON(A.hits)
WITH(id int
,email nvarchar(max)
,first_name nvarchar(max)
,last_name nvarchar(max)
,created nvarchar(max)
,roles nvarchar(max) AS JSON
,[status] bit) B