Parse XML with multilevel nesting in SQL - sql

I'm trying to Parse some XML having multiple nesting levels in SQL. The problem I hit into here is how to write my query generic enough for it to Parse the XML below without having to hard-code the node path e:g
EXEC sp_xml_preparedocument #handle OUTPUT, #xml
INSERT #Root(ID)
SELECT *
FROM OPENXML(#handle, '/Root')
WITH (ID VARCHAR(100))
INSERT #ConditionSet(Operator)
SELECT *
FROM OPENXML(#handle, '/Root/ConditionSet')
WITH (Operator VARCHAR(100))
INSERT #ConditionSet(Operator)
SELECT *
FROM OPENXML(#handle, '/Root/ConditionSet/ConditionSet')
WITH (Operator VARCHAR(100))
INSERT #ConditionSet(Operator)
SELECT *
FROM OPENXML(#handle, '/Root/ConditionSet/ConditionSet/ConditionSet')
WITH (Operator VARCHAR(100))
Is there a better way to Parse the following XML in SQL and represent all data in tabular form?
<?xml version="1.0"?>
-<Root ID="414141" Source="AudienceBuilder">
-<ConditionSet Operator="I">
-<Condition Operator="E" ID="74373">
-<Relationship ID="56756">
<Relationship ID="67868"/>
</Relationship>
-<Value>
<![CDATA[ABC]]>
</Value>
</Condition>
-<ConditionSet Operator="O">
-<Condition Operator="E" ID="6566">
-<Relationship ID="7658">
<Relationship ID="6547"/>
</Relationship>
-<Value>
<![CDATA[DEF]]>
</Value>
</Condition>
-<Condition Operator="E" ID="96967">
-<Relationship ID="3884">
<Relationship ID="9954"/>
</Relationship>
-<Value>
<![CDATA[GHI]]>
</Value>
</Condition>
-<ConditionSet Operator="A">
-<Condition Operator="E" ID="31654">
-<Relationship ID="57894">
<Relationship ID="8532"/>
</Relationship>
-<Value>
<![CDATA[JKL]]>
</Value>
</Condition>
-<Condition Operator="E" ID="65636">
-<Relationship ID="843">
<Relationship ID="7473"/>
</Relationship>
-<Value>
<![CDATA[MNO]]>
</Value>
</Condition>
</ConditionSet>
</ConditionSet>
</ConditionSet>
</Root>
Any inputs/recommendations are highly appreciated :)
Thank you
Zeaous

Assuming you have your XML in a SQL Server variable called #XML, you can use the native XQuery support in SQL Server 2005 and newer to do this much more elegantly and efficiently:
DECLARE #XML XML = '...(your XML here).....'
SELECT
RootID = #xml.value('(/Root/#ID)[1]', 'int'),
ConditionSetOperator = XC.value('#Operator', 'varchar(50)'),
ConditionID = XC2.value('#ID', 'int'),
ConditionOperator = XC2.value('#Operator', 'varchar(50)')
FROM
#Xml.nodes('//ConditionSet') AS XT(XC)
CROSS APPLY
xc.nodes('Condition') AS XT2(XC2)
This gives me an output of
With XQuery operators like .nodes() or .value(), you can easily "shred" an XML document into relational data and store that as needed.
The first call to #xml.nodes('//ConditionSet') will get a "pseudo" table for each matching node - so each <ConditionSet> node will be returned in the "pseudo" table XT as column XC, and then I can easily grab attributes (or XML elements) from that XML fragment using XQuery methods like .value().
Or I can even grab the list of sub-nodes <Condition> for each of those <ConditionSet> nodes - using the CROSS APPLY with a second call to .nodes()

Related

Query xml using xquery in SQL Server 2016

I have my XML in following format:
<resultset xmlns="qm_system_resultset" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<result>
<result_id>F5</result_id>
<exception>NO</exception>
<recurring_count>0</recurring_count>
<defect>NO</defect>
<unresolved>NO</unresolved>
<exception_approval />
<comments />
<exception_expiration>3000-01-01</exception_expiration>
<exception_stat_only>NO</exception_stat_only>
<result_data>
<phraseprefix>rx</phraseprefix>
<phrasenumber>0001</phrasenumber>
<languagedesc>Khmer</languagedesc>
<englishphrase> each time.</englishphrase>
<phrasedesc> គ្រាប់ក្នុងមួយដង។</phrasedesc>
<qm_translatedphrase>day.</qm_translatedphrase>
</result_data>
</result>
<result>
<result_id>26</result_id>
<exception>NO</exception>
<recurring_count>0</recurring_count>
<defect>NO</defect>
<unresolved>NO</unresolved>
<exception_approval />
<comments />
<exception_expiration>3000-01-01</exception_expiration>
<exception_stat_only>NO</exception_stat_only>
<result_data>
<phraseprefix>hold</phraseprefix>
<phrasenumber>0001</phrasenumber>
<languagedesc>Hmong</languagedesc>
<englishphrase>Hold than 160.</englishphrase>
<phrasedesc>Tsis 160.</phrasedesc>
<qm_translatedphrase>Do not use </qm_translatedphrase>
</result_data>
</result>
Using TSQL/XML query how do I achieve this RESULT
[phraseprefix][phrasenumber]
rx 0001
hold 0001
...
I tried the following query, but I got null values for both the columns:
DECLARE #input XML = (SELECT result_xml
FROM QM_Data_Audit.QM_Package.test_results
WHERE result_id = 2446338)
SELECT
resultset.value('(phraseprefix)[1]', 'varchar(max)') AS 'phrasenumber',
resultset.value('(phrasenumber)[1]', 'int') AS 'phrasenumber'
FROM #input.nodes('/resultset/result/result_data') AS List(resultset)
My apologies if the question is asked previously, I am new to querying XML.
Appreciate your help.
Your xml has a namespace declared so you need to provide this when querying it. In this example this can be acheived with the WITH XMLNAMESPACES statement:
DECLARE #input XML = (SELECT result_xml
FROM QM_Data_Audit.QM_Package.test_results
WHERE result_id = 2446338);
WITH XMLNAMESPACES(DEFAULT 'qm_system_resultset')
SELECT
resultset.value('(phraseprefix)[1]', 'varchar(max)') AS 'phrasenumber',
resultset.value('(phrasenumber)[1]', 'varchar(max)') AS 'phrasenumber'
FROM #input.nodes('/resultset/result/result_data') AS List(resultset)
You'll want to set the data type for phrasenumber to varchar as well to preserve the leading 0s if you need them.

Save XML with attribute to Table in SQL Server

Hi I have XML data with attribute as input for SQL, i need this to be inserted in my table.
XML Data is
<?xml version="1.0" encoding="ISO-8859-1"?>
<MESSAGEACK>
<GUID GUID="kfafb30" SUBMITDATE="2015-10-15 11:30:29" ID="1">
<ERROR SEQ="1" CODE="28681" />
</GUID>
<GUID GUID="kfafb3" SUBMITDATE="2015-10-15 11:30:29" ID="1">
<ERROR SEQ="2" CODE="286381" />
</GUID>
</MESSAGEACK>
I want this to be inserted in below Format
GUID SUBMIT DATE ID ERROR SEQ CODE
kfafb3 2015-10-15 11:30:29 1 1 28681
kfafb3 2015-10-15 11:30:29 1 1 2868
please help.
Look into XPath and xml Data Type Methods in MSDN. This is one possible way :
declare #xml As XML = '...you XML string here...'
INSERT INTO YourTable
SELECT
guid.value('#GUID', 'varchar(100)') as 'GUID'
,guid.value('#SUBMITDATE', 'datetime') as 'SUBMIT DATE'
,guid.value('#ID', 'int') as 'ID'
,guid.value('ERROR[1]/#SEQ', 'int') as 'SEQ'
,guid.value('ERROR[1]/#CODE', 'int') as 'CODE'
FROM #xml.nodes('/MESSAGEACK/GUID') as x(guid)
Result :
just paste this into an empty query window and execute. Adapt to your needs:
DECLARE #xml XML=
'<?xml version="1.0" encoding="ISO-8859-1"?>
<MESSAGEACK>
<GUID GUID="kfafb30" SUBMITDATE="2015-10-15 11:30:29" ID="1">
<ERROR SEQ="1" CODE="28681" />
</GUID>
<GUID GUID="kfafb3" SUBMITDATE="2015-10-15 11:30:29" ID="1">
<ERROR SEQ="2" CODE="286381" />
</GUID>
</MESSAGEACK>';
SELECT Msg.Node.value('#GUID','varchar(max)') AS [GUID] --The value is no GUID, if the original values are, you could use uniqueidentifier instead of varchar(max)
,Msg.Node.value('#SUBMITDATE','datetime') AS SUBMITDATE
,Msg.Node.value('#ID','int') AS ID
,Msg.Node.value('(ERROR/#SEQ)[1]','int') AS [ERROR SEQ]
,Msg.Node.value('(ERROR/#CODE)[1]','int') AS CODE
FROM #xml.nodes('/MESSAGEACK/GUID') AS Msg(Node)

How can I query a value in SQL Server TEXT column that contains XML (not xml column type)

I have table DOCUMENTS with:
DOCUMENTS
____________________
DOCUMENTID int
USERID int
CONTENT text
I have following XML stored in TEXT column with name CONTENT in a SQL Server database
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<IDMSDocument>
<DocumentContent>
<Attribute Name="Number" GUID="{FFFFFFFF-0000-0000-0000-000000000001}">
<IDMSType>3060</IDMSType>
<Value Type="Integer">
<Value>122</Value>
</Value>
</Attribute>
<Attribute Name="Date" GUID="{FFFFFFFF-0000-0000-0000-000000000002}">
<IDMSType>3061</IDMSType>
<Value Type="DateTime">
<Date>10-09-2014</Date>
</Value>
</Attribute>
<Attribute Name="Публикуване" GUID="{CA646F55-5229-4FC5-AA27-494B25023F4E}">
<IDMSType>3062</IDMSType>
<Value Type="String">
<Value>Да</Value>
</Value>
</Attribute>
<Attribute Name="Дата" GUID="{AC0465B0-4FB4-49E2-B4FA-70901068FD9B}">
<IDMSType>3063</IDMSType>
<Value Type="DateTime">
<Date>01-10-2014</Date>
</Value>
</Attribute>
<Attribute Name="Предмет на поръчка" GUID="{04A4EC72-6F33-461F-98DD-D8D271997788}">
<IDMSType>3065</IDMSType>
<Value Type="String">
<Value>Избор на консултанти</Value>
</Value>
</Attribute>
</DocumentContent>
</IDMSDocument>
I find a way how to query dingle XML attribute in a single row from table, with heavy conversion :
DECLARE #strContent NVARCHAR(MAX);
DECLARE #xmlContent XML;
SET #strContent = (select content from DOCUMENTS where DocumentID=24);
SET #xmlContent = CAST(REPLACE(CAST(#strContent AS NVARCHAR(MAX)),'utf-8','utf-16') AS XML)
SELECT #xmlContent.query('/IDMSDocument/DocumentContent/Attribute[5]/Value/Value')
where #xmlContent.value('(/IDMSDocument/DocumentContent/Attribute[5]/Value/Value)[1]', 'nvarchar(max)') like '%search%'
I need to make query like "select all rows in table that in fifth attribute in XML have value 'search' "
For me the general problem is that column type is not XML, but I have text column with stored xml inside. when I try cast, query, value directly server return that I can use it Only with XML column.
I would be very grateful if someone suggest how to do this!
Thanks!
Kamen
SQL Server doesn't allow inline XML conversion to have functions applied, i.e.: no CAST(...).query(), use a CTE to handle the conversion:
;WITH cte AS (
SELECT DocumentID, UserID, CAST(Content AS XML) AS XMLContent
FROM Documents
)
SELECT XMLContent.query('/IDMSDocument/DocumentContent/Attribute[5]/Value/Value')
FROM cte
WHERE XMLContent.value('(/IDMSDocument/DocumentContent/Attribute[5]/Value/Value)[1]', 'nvarchar(max)') like '%search%'
One thing though: I saw Unicode (Russian?) characters in the Content column. It may be better to use utf-16 in your XML and ntext for column type.
text and ntext are also on the way out. If this is new code, use nvarchar(max)

How do I set the xmlns attribute on the root element in the generated XML by using T-SQL's xml data type method: query?

I've created a simplified version of my problem:
DECLARE #X XML =
'<Root xmlns="TestNS" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Test>
<Id>1</Id>
<InnerCollection>
<InnerItem>
<Value>1</Value>
</InnerItem>
<InnerItem>
<Value>2</Value>
</InnerItem>
<InnerItem>
<Value>3</Value>
</InnerItem>
</InnerCollection>
</Test>
<Test>
<Id>2</Id>
<InnerCollection>
<InnerItem>
<Value>5</Value>
</InnerItem>
<InnerItem>
<Value>6</Value>
</InnerItem>
<InnerItem>
<Value>7</Value>
</InnerItem>
</InnerCollection>
</Test>
</Root>'
I'm trying to write a query that takes each <Test> element and breaks it into a row. On each row I want to select the Id and the InnerCollection as XML. I want to create this InnerCollection XML for the first row (Id:1):
<InnerCollection xmlns="Reed.Api" xmlnsi="http//www.w3.org/2001/XMLSchema-instance">
<InnerItem>
<Value>1</Value>
</InnerItem>
<InnerItem>
<Value>2</Value>
</InnerItem>
<InnerItem>
<Value>3</Value>
</InnerItem>
</InnerCollection>
I tried doing that with this query but it puts a namespace I don't want on the elements:
;WITH XMLNAMESPACES
(
DEFAULT 'TestNS'
, 'http://www.w3.org/2001/XMLSchema-instance' AS i
)
SELECT
X.value('Id[1]', 'INT') Id
-- Creates a p1 namespace that I don't want.
, X.query('InnerCollection') InnerCollection
FROM #X.nodes('//Test') AS T(X)
My Google-fu isn't very strong today, but I imagine it doesn't make it any easier that the darn function is called query. I'm open to using other methods to create that XML value other than the query method.
I could use this method:
;WITH XMLNAMESPACES
(
DEFAULT 'TestNS'
, 'http://www.w3.org/2001/XMLSchema-instance' AS i
)
SELECT
X.value('Id[1]', 'INT') Id
,CAST(
(SELECT
InnerNodes.Node.value('Value[1]', 'INT') AS 'Value'
FROM X.nodes('./InnerCollection[1]//InnerItem') AS InnerNodes(Node)
FOR XML PATH('InnerItem'), ROOT('InnerCollection')
) AS XML) AS InnerCollection
FROM #X.nodes('//Test') AS T(X)
But that involves calling nodes on it to break it out into something selectable, and then selecting it back into XML using FOR XML... when it was XML to begin with. This seems like a inefficient method of doing this, so I'm hoping someone here will have a better idea.
This is how to do the SELECT using the query method to create the XML on each row that my question was looking for:
;WITH XMLNAMESPACES
(
'http://www.w3.org/2001/XMLSchema-instance' AS i
, DEFAULT 'TestNS'
)
SELECT
Test.Row.value('Id[1]', 'INT') Id
, Test.Row.query('<InnerCollection xmlns="TestNS" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">{InnerCollection}</InnerCollection>')
FROM #X.nodes('/Root/Test') AS Test(Row)

SQL XML Replacing elements

Please help! Is it possible to replace elements within an xml field of an sql database with other elements. I have tried using .modify(replace value of) but I can only replace text within elements rather than nodes.
Ultimately I am trying to update an element which may or may not contain other elements, with another element (possibly of the same name) within an XML field. (I am using SQL Server 2008)
E.g:
<Root>
<Sub>
<Value1>
</Value1>
<Value2>
</Value2>
<Value3>
</Value3>
</Sub>
</Root>
Would be replaced by:
<Root>
<SubVERSION2>
<Value1>
</Value1>
<Value2>
</Value2>
<Value3>
</Value3>
</SubVERSION2>
</Root>
Any help would be very much appreciated!
You can recreate your XML:
declare #x xml = '<Root>
<Sub>
<Value1>1
</Value1>
<Value2>2
</Value2>
<Value3>3
</Value3>
</Sub>
</Root>'
select cast(('<Root>' +
cast(
(
select t.c.query('.')
from #x.nodes('Root/Sub/*') t(c)
for xml path(''), root('SubVERSION2')
) as nvarchar(max)) + '</Root>') as xml)
produces desired output:
<Root>
<SubVERSION2>
<Value1>1
</Value1>
<Value2>2
</Value2>
<Value3>3
</Value3>
</SubVERSION2>
</Root>
declare #T table(XMLCol xml)
insert into #T values ('
<Root>
<Sub>
<Value1></Value1>
<Value2></Value2>
<Value3></Value3>
</Sub>
</Root>')
update #T set
XMLCol = XMLCol.query('for $s in Root/Sub
return
<Root>
<SubVERSION2>
{ $s/* }
</SubVERSION2>
</Root>')
Result:
<Root>
<SubVERSION2>
<Value1 />
<Value2 />
<Value3 />
</SubVERSION2>
</Root>