Parsing a xml string using sql - sql

i am looking to parse the XML string using SQL .I like to have the data in separate columns.could some one please help?.
The string:
<item id="1" value="0"></item><item id="2" value="14"></item><item id="0" value="0"></item>

This is how you can do it in SQL Server (e.g., v2008):
create table #temp (xml_data xml)
insert into #temp values ('<item id="1" value="0"></item><item id="2" value="14"></item><item id="0" value="0"></item>')
select C.value('#id', 'int') as [id]
,C.value('#value', 'int') as [value]
from #temp cross apply
#temp.xml_data.nodes('item') as X(C)
drop table #temp
Which returns:
id value
----------- -----------
1 0
2 14
0 0

Related

Need a where clause for an XML Node in a SQL Server 2019 stored procedure

I have 1.5 million XML documents stored in a SQL Server 2019 database and I need to have a where clause that has multiple nodes in a stored procedure.
<PROJECTS xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<row>
<APPLICATION_ID>1015</APPLICATION_ID>
<ORG_STATE>SC</ORG_STATE>
<ORG_CITY>Charleston</ORG_CITY>
<ORG_ZIPCODE>29407</ORG_ZIPCODE>
<PIS>
<PI>
<PI_NAME>BO, LEO (contact)</PI_NAME>
<PI_ID>9983950 (contact)</PI_ID>
</PI>
<PI>
<PI_NAME>KUZ, BEN I</PI_NAME>
<PI_ID>1862593</PI_ID>
</PI>
</PIS>
<PROJECT_START>08/15/2019</PROJECT_START>
<PROJECT_END>05/31/2024</PROJECT_END>
<INDIRECT_COST_AMT>103034</INDIRECT_COST_AMT>
<TOTAL_COST>638854</TOTAL_COST>
<TOTAL_COST_SUB_PROJECT />
</row>
</PROJECTS>
I need to pull all XML files where PI_ID equals 9983950. The number of PI's in the PIS node could be one or 5.
I'm using this code:
SELECT TOP 100
[APPLICATION_ID], [FileName], [XMLData],
nref.value('ORG_CITY[1]', 'VARCHAR(30)') as ORG_CITY
FROM
[NIH_EXPORTER].[dbo].[ADMIN_Exporter_Files_XML]
CROSS APPLY
XMLData.nodes('//row[1]') AS R(nref)
WHERE
nref.value('ORG_CITY[1]', 'VARCHAR(30)') = 'Charleston'
when I need the city but I'm not sure how to find the value when there are multiple nodes
Please try the following solution.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, xmldata XML);
INSERT INTO #tbl (xmldata) VALUES
(N'<PROJECTS xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<row>
<APPLICATION_ID>1015</APPLICATION_ID>
<ORG_STATE>SC</ORG_STATE>
<ORG_CITY>Charleston</ORG_CITY>
<ORG_ZIPCODE>29407</ORG_ZIPCODE>
<PIS>
<PI>
<PI_NAME>BO, LEO (contact)</PI_NAME>
<PI_ID>9983950 (contact)</PI_ID>
</PI>
<PI>
<PI_NAME>KUZ, BEN I</PI_NAME>
<PI_ID>1862593</PI_ID>
</PI>
</PIS>
<PROJECT_START>08/15/2019</PROJECT_START>
<PROJECT_END>05/31/2024</PROJECT_END>
<INDIRECT_COST_AMT>103034</INDIRECT_COST_AMT>
<TOTAL_COST>638854</TOTAL_COST>
<TOTAL_COST_SUB_PROJECT/>
</row>
</PROJECTS>');
-- DDL and sample data population, end
DECLARE #PI_ID VARCHAR(20) = '9983950';
SELECT ID
, xmldata.value('(/PROJECTS/row/ORG_CITY/text())[1]', 'VARCHAR(30)') as ORG_CITY
FROM #tbl
WHERE xmldata.exist('/PROJECTS/row/PIS/PI/PI_ID[contains(./text()[1], sql:variable("#PI_ID"))]') = 1;
You can check if any required PIS/PI node exists with CROSS APPLY down the chain.
SELECT TOP 100
[APPLICATION_ID], [FileName], [XMLData],
nref.value('ORG_CITY[1]', 'VARCHAR(30)') as ORG_CITY
FROM
[NIH_EXPORTER].[dbo].[ADMIN_Exporter_Files_XML]
CROSS APPLY
XMLData.nodes('//row[1]') AS R(nref)
--
cross apply (select top(1) null x
from R.nref.nodes('./PIS/PI') t(n)
where t.n.value('./PI_ID[1]', 'VARCHAR(30)') like '9983950%' ) t
--
WHERE
nref.value('ORG_CITY[1]', 'VARCHAR(30)') = 'Charleston'

Shredding XML in SQL Server 2017

Given the following SQL:
drop table if exists #testXML
create table #testXML (InputXML xml)
insert into #testXML
values ('<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<document>
<table name="tableName1">
<column name="ID">000010313500011171011710001 </column>
<column name="StartDate">10/27/2019</column>
<column name="EndDate">11/02/2019</column>
</table>
</document>')
I'm trying to get output like this:
ID StartDate EndDate
000010313500011171011710001 10/27/2019 11/02/2019
Here's my start, but I'm just flailing at this point.
SELECT
px1.tbl.value('#name','nvarchar(50)') as TableName
,px2.col.value('#name','nvarchar(50)') as ColName
from #testXML px
cross apply inputxml.nodes ('/document/table') as px1(tbl)
cross apply inputxml.nodes ('/document/table/column') as px2(col)
This is on SQL Server 2017.
Your SQL needs to be adjusted as follows, by leveraging the #name attribute value.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (InputXML xml)
INSERT INTO #tbl (InputXML)
VALUES ('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<document>
<table name="tableName1">
<column name="ID">000010313500011171011710001</column>
<column name="StartDate">10/27/2019</column>
<column name="EndDate">11/02/2019</column>
</table>
</document>');
-- DDL and sample data population, end
SELECT col.value('(column[#name="ID"]/text())[1]','nvarchar(50)') as ID
, col.value('(column[#name="StartDate"]/text())[1]','DATE') as StartDate
, col.value('(column[#name="EndDate"]/text())[1]','DATE') as EndDate
FROM #tbl tbl
CROSS APPLY tbl.InputXML.nodes('/document/table') AS tab(col);
Output
+-----------------------------+------------+------------+
| ID | StartDate | EndDate |
+-----------------------------+------------+------------+
| 000010313500011171011710001 | 2019-10-27 | 2019-11-02 |
+-----------------------------+------------+------------+

shred multiple xml elements into multiple columns

I am trying to shred this xml column into multiple xml columns but it seems to be in the wrong format.
Here is my data from using xml path()
<claim id="1111111">
<InsHistDB>2</InsHistDB>
<ClaimHistID>111111</ClaimHistID>
<PatID>00000001</PatID>
<ProcedureData>
<row proc_logid="0000009" proc_logdb="1000000" createdate="2000-09-21T00:00:00" pldate="2000-09-21T00:00:00" adacode="D0120" />
<row proc_logid="1211557" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0220" />
<row proc_logid="1211558" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0230" />
<row proc_logid="1211556" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0272" />
</ProcedureData>
</claim>
The select statement is currently this
SELECT TOP (1000) [ClaimID] as '#id'
,[InsHistDB]
,[ClaimHistID]
,[PatID]
-- ,[ProcedureData].value('declare namespace ns= "ProcedureData"; (/ns:ProcedureData/ns:row[1])','nvarchar(50)') as pp
,[ProcedureData]
--,[ProcedureData].query('proc_logid').value('.','varchar(50)') as 'proc_1'
FROM [Mine].[dbo].[claim]
where claimid=1111111
FOR XML PATH('claim')
What I am wanting to do is divide out the {Proc_Log_id} into different columns so the row should read.
Claim ID INSHISTDB CLaimHistID PATID Proc_Id1 Proc_ID2 ProcID3 procID4
11111 2 1111111 000000001 0000009 1211557 1211558 1211556
Is this possible or am i just spinning my wheels? Also this will be for multiple patid's so the query without the where clause is expected. Also I believe there can be up to 10 proc_logids per xml data point. I am fine with null values, as i plan to pivot and normalize this data.
Thanks for reading.
The syntax to select the 3rd node is column.value('(/foo/bar)[3]','varchar(25)').
You are right to consider other options. The 'query' or 'nodes' functions in particular are better suited as they could return the values as a second result set or a new XML document.
You can first graph the IDs from your XML content, and then you have to use PIVOT as you mentioned. Since you are not sure about number of columns, you can make use of dynamic pivot to make it more flexible.
First get the data using XML nodes and value and I stored the value in temp table as it would be convenient to do pivot.
Declare #xmlstring xml =
'<claim id="1111111">
<InsHistDB>2</InsHistDB>
<ClaimHistID>111111</ClaimHistID>
<PatID>00000001</PatID>
<ProcedureData>
<row proc_logid="0000009" proc_logdb="1000000" createdate="2000-09-21T00:00:00" pldate="2000-09-21T00:00:00" adacode="D0120" />
<row proc_logid="1211557" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0220" />
<row proc_logid="1211558" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0230" />
<row proc_logid="1211556" proc_logdb="1000010" createdate="2015-09-21T00:00:00" pldate="2015-09-21T00:00:00" adacode="D0272" />
</ProcedureData>
</claim>'
if object_id('tempdb..#temp1') is not null
drop table #temp1
select m.Col.value('#id','varchar(150)') as ClaimID
,m.Col.value('(InsHistDB)[1]','varchar(150)') as InsHistDB
,m.Col.value('(ClaimHistID)[1]','varchar(150)') as ClaimHistID
,m.Col.value('(PatID)[1]','varchar(150)') as PatID
,t.new.value('(#proc_logid)[1]', 'Varchar(150)') IDcol,
concat('Proc_ID', cast(ROW_NUMBER() over (Partition by m.Col.value('#id','varchar(150)') order by t.new.value('(#proc_logid)[1]', 'Varchar(150)')) as varchar(10))) AS ProcID
into #temp1
from #xmlstring.nodes('/claim') as m(col)
CROSS APPLY #xmlstring.nodes('claim/ProcedureData/row') as t(new);
You will have data in temp table like this. I have concatenated rownumber for a given claimID with proc ID to create column in the way you have mentioned.
ClaimID InsHistDB ClaimHistID PatID IDcol ProcID
1111111 2 111111 00000001 0000009 Proc_ID1
1111111 2 111111 00000001 1211557 Proc_ID2
1111111 2 111111 00000001 1211558 Proc_ID3
1111111 2 111111 00000001 1211556 Proc_ID4
Then you can make use of dynamic pivot to get your expected output.
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(c.procID)
FROM #temp1 c
FOR XML PATH(''))
,1,1,'')
set #query = 'SELECT ClaimID, InsHistDB,ClaimHistID, PatID, '+#cols+' from
(
select ClaimID, InsHistDB,ClaimHistID, PatID, procID, IDcol
from #temp1
) x
pivot
(
max(IDcol)
for procID in (' + #cols + ')
) p '
Exec sp_executesql #query
Final output from the query:
ClaimID InsHistDB ClaimHistID PatID Proc_ID1 Proc_ID2 Proc_ID3 Proc_ID4
1111111 2 111111 00000001 0000009 1211557 1211558 1211556

Select multiple xml nodes using crossapply

I'm currently trying to select multiple nodes for a xml variable that get passed into a stored procedure. But I only get one node from it's xml. How can I list all of the item names and string values?
Test code:
DECLARE #T TABLE (AllXml ntext)
INSERT #T VALUES('<error>
<item name="item 1">
<value string="string 1" />
</item>
<item name="item 2">
<value string="string 2" />
</item>
<item name="item 3">
<value string="string 3" />
</item>
</error>')
SELECT
CAST(AllXml as xml).value('(/error/item/#name)[1]', 'varchar(100)' ),
CAST(AllXml as xml).value('(/error/item/value/#string)[1]', 'varchar(max)' )
FROM #T
Desired result:
Item 1 string 1
Item 2 string 2
Item 3 string 3
You can achieve it using the CROSS Apply. And Sub-Select.
SELECT
m.c.value('(#name)[1]', 'varchar(100)') AS Name,
m.c.value('(value/#string)[1]', 'varchar(max)') AS Value
FROM
(
SELECT CAST(AllXml as xml) AllXml
FROM #T
) AS data
CROSS APPLY AllXml.nodes('/error/item') as m(c)
Or you can use the one more CROSS APPLY as below,
SELECT
m.c.value('(#name)[1]', 'varchar(100)') AS Name,
m.c.value('(value/#string)[1]', 'varchar(max)') AS Value
FROM #T
CROSS APPLY (SELECT CAST(AllXml AS XML)) as D(D)
CROSS APPLY D.D.nodes('/error/item') as m(c)

Transform XML to Table data using XQuery in SQL Server

Is it possible to achieve the following table from of output using XQuery in SQL Server
Edit: A change in my requirement, Consider i have a table which stores the below xml and also UserID
DECLARE #XML xml
set #XML = '<Security>
<FiscalYear Name="2012">
<Country Id="204">
<State Id="1">
<City Id="10"></City>
</State>
<State Id="2">
<City Id="20"></City>
<City Id="30"></City>
</State>
<State Id ="3"></State>
</Country >
</FiscalYear>
</Security>'
CREATE TABLE #tmp_user
(UserID INT,SecurityXML XML)
INSERT INTO #tmp_user
( UserID, SecurityXML )
VALUES ( 1,
#XML
)
Now how can i get a o/p like
Output:
UserID StateID CityID
1 1 10
1 2 20
1 2 30
1 3 0
Is it possible to achieve?
I modified your XML a bit because it was invalid. Change the end tag </Subsidiary> to </Country>.
declare #XML xml
set #XML =
'<Security>
<FiscalYear Name="2012">
<Country Id="204">
<State Id="1">
<City Id="10"></City>
</State>
<State Id="2">
<City Id="20"></City>
<City Id="30"></City>
</State>
<State Id ="3"></State>
</Country>
</FiscalYear>
</Security>'
select S.N.value('#Id', 'int') as StateID,
coalesce(C.N.value('#Id', 'int'), 0) as CityID
from #XML.nodes('/Security/FiscalYear/Country/State') as S(N)
outer apply S.N.nodes('City') as C(N)
A version using a table instead of XML variable
select T.UserID,
S.N.value('#Id', 'int') as StateID,
coalesce(C.N.value('#Id', 'int'), 0) as CityID
from #tmp_user as T
cross apply T.SecurityXML.nodes('/Security/FiscalYear/Country/State') as S(N)
outer apply S.N.nodes('City') as C(N)