Shredding XML in SQL Server 2017 - sql

Given the following SQL:
drop table if exists #testXML
create table #testXML (InputXML xml)
insert into #testXML
values ('<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
<document>
<table name="tableName1">
<column name="ID">000010313500011171011710001 </column>
<column name="StartDate">10/27/2019</column>
<column name="EndDate">11/02/2019</column>
</table>
</document>')
I'm trying to get output like this:
ID StartDate EndDate
000010313500011171011710001 10/27/2019 11/02/2019
Here's my start, but I'm just flailing at this point.
SELECT
px1.tbl.value('#name','nvarchar(50)') as TableName
,px2.col.value('#name','nvarchar(50)') as ColName
from #testXML px
cross apply inputxml.nodes ('/document/table') as px1(tbl)
cross apply inputxml.nodes ('/document/table/column') as px2(col)
This is on SQL Server 2017.

Your SQL needs to be adjusted as follows, by leveraging the #name attribute value.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (InputXML xml)
INSERT INTO #tbl (InputXML)
VALUES ('<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<document>
<table name="tableName1">
<column name="ID">000010313500011171011710001</column>
<column name="StartDate">10/27/2019</column>
<column name="EndDate">11/02/2019</column>
</table>
</document>');
-- DDL and sample data population, end
SELECT col.value('(column[#name="ID"]/text())[1]','nvarchar(50)') as ID
, col.value('(column[#name="StartDate"]/text())[1]','DATE') as StartDate
, col.value('(column[#name="EndDate"]/text())[1]','DATE') as EndDate
FROM #tbl tbl
CROSS APPLY tbl.InputXML.nodes('/document/table') AS tab(col);
Output
+-----------------------------+------------+------------+
| ID | StartDate | EndDate |
+-----------------------------+------------+------------+
| 000010313500011171011710001 | 2019-10-27 | 2019-11-02 |
+-----------------------------+------------+------------+

Related

Need a where clause for an XML Node in a SQL Server 2019 stored procedure

I have 1.5 million XML documents stored in a SQL Server 2019 database and I need to have a where clause that has multiple nodes in a stored procedure.
<PROJECTS xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<row>
<APPLICATION_ID>1015</APPLICATION_ID>
<ORG_STATE>SC</ORG_STATE>
<ORG_CITY>Charleston</ORG_CITY>
<ORG_ZIPCODE>29407</ORG_ZIPCODE>
<PIS>
<PI>
<PI_NAME>BO, LEO (contact)</PI_NAME>
<PI_ID>9983950 (contact)</PI_ID>
</PI>
<PI>
<PI_NAME>KUZ, BEN I</PI_NAME>
<PI_ID>1862593</PI_ID>
</PI>
</PIS>
<PROJECT_START>08/15/2019</PROJECT_START>
<PROJECT_END>05/31/2024</PROJECT_END>
<INDIRECT_COST_AMT>103034</INDIRECT_COST_AMT>
<TOTAL_COST>638854</TOTAL_COST>
<TOTAL_COST_SUB_PROJECT />
</row>
</PROJECTS>
I need to pull all XML files where PI_ID equals 9983950. The number of PI's in the PIS node could be one or 5.
I'm using this code:
SELECT TOP 100
[APPLICATION_ID], [FileName], [XMLData],
nref.value('ORG_CITY[1]', 'VARCHAR(30)') as ORG_CITY
FROM
[NIH_EXPORTER].[dbo].[ADMIN_Exporter_Files_XML]
CROSS APPLY
XMLData.nodes('//row[1]') AS R(nref)
WHERE
nref.value('ORG_CITY[1]', 'VARCHAR(30)') = 'Charleston'
when I need the city but I'm not sure how to find the value when there are multiple nodes
Please try the following solution.
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, xmldata XML);
INSERT INTO #tbl (xmldata) VALUES
(N'<PROJECTS xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<row>
<APPLICATION_ID>1015</APPLICATION_ID>
<ORG_STATE>SC</ORG_STATE>
<ORG_CITY>Charleston</ORG_CITY>
<ORG_ZIPCODE>29407</ORG_ZIPCODE>
<PIS>
<PI>
<PI_NAME>BO, LEO (contact)</PI_NAME>
<PI_ID>9983950 (contact)</PI_ID>
</PI>
<PI>
<PI_NAME>KUZ, BEN I</PI_NAME>
<PI_ID>1862593</PI_ID>
</PI>
</PIS>
<PROJECT_START>08/15/2019</PROJECT_START>
<PROJECT_END>05/31/2024</PROJECT_END>
<INDIRECT_COST_AMT>103034</INDIRECT_COST_AMT>
<TOTAL_COST>638854</TOTAL_COST>
<TOTAL_COST_SUB_PROJECT/>
</row>
</PROJECTS>');
-- DDL and sample data population, end
DECLARE #PI_ID VARCHAR(20) = '9983950';
SELECT ID
, xmldata.value('(/PROJECTS/row/ORG_CITY/text())[1]', 'VARCHAR(30)') as ORG_CITY
FROM #tbl
WHERE xmldata.exist('/PROJECTS/row/PIS/PI/PI_ID[contains(./text()[1], sql:variable("#PI_ID"))]') = 1;
You can check if any required PIS/PI node exists with CROSS APPLY down the chain.
SELECT TOP 100
[APPLICATION_ID], [FileName], [XMLData],
nref.value('ORG_CITY[1]', 'VARCHAR(30)') as ORG_CITY
FROM
[NIH_EXPORTER].[dbo].[ADMIN_Exporter_Files_XML]
CROSS APPLY
XMLData.nodes('//row[1]') AS R(nref)
--
cross apply (select top(1) null x
from R.nref.nodes('./PIS/PI') t(n)
where t.n.value('./PI_ID[1]', 'VARCHAR(30)') like '9983950%' ) t
--
WHERE
nref.value('ORG_CITY[1]', 'VARCHAR(30)') = 'Charleston'

Oracle XMLtable giving cross joined data

I have written a Oracle XML sql and it is giving output like below. I have given the whole code below from creating table to sql below for your quick help.
output: (WRONG)
ID, NAME
1 name1
1 name2
2 name1
2 name2
Want to make output like below (Required Output):
ID, NAME
1 name1
2 name2
Code:
CREATE TABLE XML_TBL
( "INSTANCE_DETAIL_XML" "SYS"."XMLTYPE"
);
SET DEFINE OFF;
Insert into XML_TBL (INSTANCE_DETAIL_XML) values ('<?xml version="1.0" encoding="UTF-8" standalone=''yes''?>
<driXML>
<sDet>
<cols>
<col>
<id>1</id>
<name>name1</name>
</col>
<col>
<id>2</id>
<name>name2</name>
</col>
</cols>
</sDet>
</driXML>
');
I tried the below sql, you may modify it or create a new one using it:
Select XT_ID.id
, XT_NAME.name
FROM xml_tbl XT
join XMLTABLE
('/driXML' PASSING XT.INSTANCE_DETAIL_XML COLUMNS
id_XML XMLType PATH 'sDet/cols/col/id'
, name_XML XMLType PATH 'sDet/cols/col/name'
) RI_XML on 1=1
join XMLTABLE('/id' PASSING RI_XML.id_XML COLUMNS ID number PATH '.') XT_ID on 1=1
join XMLTABLE('/name' PASSING RI_XML.name_XML COLUMNS NAME varchar2(50) PATH '.') XT_NAME on 1=1
;
You can just directly extract the id and name in the first XMLTABLE and if you descend through the hierarchy so that the path is '/driXML/sDet/cols/col' then each row from the XMLTYPE will be one col element and the id and name will correlate to that.
Oracle Setup:
CREATE TABLE XML_TBL( INSTANCE_DETAIL_XML ) AS
SELECT XMLTYPE( '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<driXML>
<sDet>
<cols>
<col>
<id>1</id>
<name>name1</name>
</col>
<col>
<id>2</id>
<name>name2</name>
</col>
</cols>
</sDet>
</driXML>' ) FROM DUAL;
Query:
SELECT id,
name
FROM xml_tbl XT
CROSS JOIN XMLTABLE(
'/driXML/sDet/cols/col'
PASSING XT.INSTANCE_DETAIL_XML
COLUMNS
id NUMBER PATH 'id',
name VARCHAR2(50) PATH 'name'
);
Output:
ID | NAME
-: | :----
1 | name1
2 | name2
db<>fiddle here

Parsing a xml string using sql

i am looking to parse the XML string using SQL .I like to have the data in separate columns.could some one please help?.
The string:
<item id="1" value="0"></item><item id="2" value="14"></item><item id="0" value="0"></item>
This is how you can do it in SQL Server (e.g., v2008):
create table #temp (xml_data xml)
insert into #temp values ('<item id="1" value="0"></item><item id="2" value="14"></item><item id="0" value="0"></item>')
select C.value('#id', 'int') as [id]
,C.value('#value', 'int') as [value]
from #temp cross apply
#temp.xml_data.nodes('item') as X(C)
drop table #temp
Which returns:
id value
----------- -----------
1 0
2 14
0 0

Collation of multiple XMLs into a single XML using SSIS or SQL

I have a table with the below structure.
Declare #YourTable Table ([ColA] int,[ColB] xml)
Insert Into #YourTable Values
(123,'<XMLData><ID>1</ID></XMLData>')
,(456,'<XMLData><ID>2</ID></XMLData>')
,(333,'<XMLData><ID>3</ID></XMLData>')
The XMLs are of same structure.
Now i need to concatenate all the XMLs in ColumnB into a single XML using either SSIS or SQL. The expected result should be
<XMLData>
<ID>1</ID>
<ID>2</ID>
<ID>3</ID>
</XMLData>
Any help is much appreciated.
Cheers!
Example
Declare #YourTable Table ([ColA] int,[ColB] xml)
Insert Into #YourTable Values
(123,'<XMLData><ID>1</ID></XMLData>')
,(456,'<XMLData><ID>2</ID></XMLData>')
,(333,'<XMLData><ID>3</ID></XMLData>')
Select [*] = [ColB].query('XMLData/*')
From #YourTable A
For XML Path(''), ROOT('XMLData')
Returns
<XMLData>
<ID>1</ID>
<ID>2</ID>
<ID>3</ID>
</XMLData>
Assuming your table name is TBL with below data model:
COLUMN_A | COLUMN_B
123 | XML1
456 | XML2
333 | XML3
Use below for SQL Server
SELECT COLUMN_1, STRING_AGG(COLUMN_B, ', ') WITHIN GROUP (ORDER BY COLUMN_B) AS CONCATENATED_XML
FROM TBL GROUP BY COLUMN_1;
Use below query for Oracle
SELECT COLUMN_1, LISTAGG(COLUMN_B, ‘’) WITHIN GROUP (ORDER BY COLUMN_B) AS CONCATENATED_XML FROM TBL GROUP BY COLUMN_1;

Use SQL Server modify('insert') to append data to xml column

Consider the following situation. I have the following table
CREATE TABLE [dbo].[GoldenEgg]
(
rowIndex int NOT NULL IDENTITY(1,1),
AccountNumber varchar(256) NULL,
SubscriptionID int NOT NULL,
SubscriptionData_XML xml NULL,
SubscriptionData_AFTER_XML NULL
CONSTRAINT [PK_GoldenEgg]
PRIMARY KEY CLUSTERED ([rowIndex] ASC)
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF,
IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON,
ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]
GoldenEgg sample data:
SubscriptionData_XML data for SubscriptionID 6070:
<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
</Value>
</Item>
</NVPList>
I want to append all account numbers for each SubscriptionID to the already existing xml <Value> node in the SubscriptionData_XML column and I do not want to add account numbers that already exist in the xml.
So for SubscriptionID 6070 account number 39448474 should only be listed once in the xml like so:
<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
<ValueItem>56936495</ValueItem>
<ValueItem>70660044</ValueItem>
<ValueItem>41447395</ValueItem>
</Value>
</Item>
</NVPList>
If there are not other nodes within your XML you might choose the FLWOR-query.
Some hints:
first I create a mock-up table and fill it with data
I use and updateable CTE to collect the data
I use a FOR XML-sub-select without a namespace to build the <Value> node wihtout bothering about already existing IDs in your actual XML
I use a FLWOR-query() to build up the full XML out of the just created Value-node
As this CTE is updateable, I can use it directly for the UPDATE
The final SELECT * FROM #tbl shows to you, that all AFTER_XML are filled
Try this:
DECLARE #tbl TABLE(rowIndex INT IDENTITY,AccountNumber INT,SubscriptionID INT, SubscriptionData_XML XML,SubscriptionData_AFTER_XML XML);
INSERT INTO #tbl VALUES
(1111,6070,N'<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
</Value>
</Item>
</NVPList>',NULL)
,(2222,6070,NULL,NULL)
,(3333,6070,NULL,NULL)
,(4444,6070,NULL,NULL)
,(5555,6071,N'<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
</Value>
</Item>
</NVPList>',NULL)
,(6666,6071,NULL,NULL)
,(7777,6071,NULL,NULL)
,(8888,6071,NULL,NULL);
--Here starts the updateable CTE
WITH UpdateableCTE AS
(
SELECT t1.rowIndex
,t1.SubscriptionData_AFTER_XML
,(
SELECT t2.AccountNumber AS ValueItem
FROM #tbl AS t2
WHERE t2.SubscriptionID=t1.SubscriptionID
FOR XML PATH(''),ROOT('Value'),TYPE
).query
(N'declare default element namespace "http://www.whatevernamspace.com/v1";
let $nd:=/*:Value
return
<NVPList>
<Item>
<Name>{sql:column("XmlName")}</Name>
<Value>
{
for $vi in $nd/*:ValueItem
return <ValueItem>{$vi/text()}</ValueItem>
}
</Value>
</Item>
</NVPList>
'
) AS NewXML
FROM #tbl AS t1
CROSS APPLY( SELECT t1.SubscriptionData_XML.value('(//*:Name)[1]','nvarchar(max)') AS XmlName) AS x
WHERE SubscriptionData_XML IS NOT NULL
)
--The UPDATE statement
UPDATE UpdateableCTE SET SubscriptionData_AFTER_XML=NewXML
FROM UpdateableCTE;
--The SELECT to check the success
SELECT * FROM #tbl
I was able to accomplish this task with a sql UPDATE statement using the xml modify() method and without using any loops. Here is a breakdown of the solution:
1) I had to get all the AccountNumbers for the SubscriptionID and format them in
into xml <ValueItem> nodes.
SQL QUERY 1:
SELECT
ge.SubscriptionID,
CAST((SELECT DISTINCT ValueItem = ISNULL(ge2.AccountNumber,'')
FROM dbo.GoldenEgg ge2
WHERE ge2.SubscriptionID = ge.SubscriptionID
FOR XML PATH('')) AS xml) AS AccountNumberXml
FROM dbo.GoldenEgg ge
WHERE ge.SubscriptionData_XML IS NOT NULL
SQL QUERY 1 RESULT:
SQL QUERY 1 XML RESULT (SubscriptionID 6070):
<ValueItem>39448474</ValueItem>
<ValueItem>41447395</ValueItem>
<ValueItem>56936495</ValueItem>
<ValueItem>70660044</ValueItem>
2) Now that I have the AccountNumbers in a single value, I can now use the xml modify() method and insert the AccountNumberXml value into the last position of the <Value> xml node. I will do this using an UPDATE statement with INNER JOIN. Also note that I initally set SubscriptionData_AFTER_XML equal to SubscriptionData_XML before doing anything.
SQL QUERY 2:
UPDATE ge
SET SubscriptionData_AFTER_XML.modify
('declare default element namespace "http://www.whatevernamspace.com/v1";
insert sql:column("t1.AccountNumberXml") as last into (/NVPList/Item/Value)[1]')
FROM dbo.GoldenEgg ge
INNER JOIN (SELECT
ge2.SubscriptionID,
CAST((SELECT DISTINCT ValueItem = ISNULL(ge1.AccountNumber,'')
FROM dbo.GoldenEgg ge1
WHERE ge1.SubscriptionID = ge2.SubscriptionID
FOR XML PATH('')) AS xml) as AccountNumberXml
FROM dbo.GoldenEgg ge2
WHERE ge2.SubscriptionData_AFTER_XML IS NOT NULL) t1 ON t1.SubscriptionID = ge.SubscriptionID
WHERE ge.SubscriptionData_AFTER_XML IS NOT NULL
SQL QUERY 2 RESULT:
SQL QUERY 2 XML RESULT (SubscriptionID 6070 SubscriptionData_AFTER_XML column):
<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
<ValueItem xmlns="">39448474</ValueItem>
<ValueItem xmlns="">41447395</ValueItem>
<ValueItem xmlns="">56936495</ValueItem>
<ValueItem xmlns="">70660044</ValueItem>
</Value>
</Item>
</NVPList>
As you may see there are now two problems with the final xml result in the SubscriptionData_AFTER_XML column.
Problem 1
For subscriptionID 6070 AccountNumber 39448474 is being repeated in the <ValueItem> node list, which I do not want. To fix this I have to query the current AccountNumber values in the xml and exclude those AccountNumbers from the previous INNER JOIN
SQL QUERY 3:
This query will give me a result set with all the current AccountNumbers in the SubscriptionData_XML column, which I can then use to exclude these AccountNumbers from the SQL QUERY 1 result set
SELECT SubscriptionID, t.c.value('.', 'varchar(MAX)') as CurrentValueItems
FROM dbo.GoldenEgg
CROSS APPLY SubscriptionData_XML.nodes('declare default element namespace "http://www.whatevernamspace.com/v1";
/NVPList/Item/Value/ValueItem') as t(c)
WHERE SubscriptionData_XML IS NOT NULL
SQL QUERY 3 RESULT:
Now putting it all together to get the correct final result
SQL QUERY 4:
UPDATE ge
SET SubscriptionData_AFTER_XML.modify
('declare default element namespace "http://www.whatevernamspace.com/v1";
insert sql:column("t1.AccountNumberXml") as last into (/NVPList/Item/Value)[1]')
FROM dbo.GoldenEgg ge
INNER JOIN (SELECT
ge2.SubscriptionID,
CAST((SELECT DISTINCT ValueItem = ISNULL(ge1.AccountNumber,'')
FROM dbo.GoldenEgg ge1
--make sure we are not inserting AccountNumbers that already exists in the subscription data
WHERE ge1.AccountNumber NOT IN (SELECT t.c.value('.', 'varchar(MAX)') as CurrentValueItems
FROM dbo.GoldenEgg
CROSS APPLY SubscriptionData_XML.nodes('declare default element namespace "http://www.whatevernamspace.com/v1";
/NVPList/Item/Value/ValueItem') as t(c)
WHERE SubscriptionData_XML IS NOT NULL
AND SubscriptionID = ge2.SubscriptionID)
AND ge1.SubscriptionID = ge2.SubscriptionID
FOR XML PATH('')) AS xml) as AccountNumberXml
FROM dbo.GoldenEgg ge2
WHERE ge2.SubscriptionData_AFTER_XML IS NOT NULL) t1 ON t1.SubscriptionID = ge.SubscriptionID
WHERE ge.SubscriptionData_AFTER_XML IS NOT NULL
SQL QUERY 4 XML RESULT (SubscriptionID 6070 SubscriptionData_AFTER_XML column):
As you can see AccountNumber 39448474 is now only listed once in the xml
<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
<ValueItem xmlns="">41447395</ValueItem>
<ValueItem xmlns="">56936495</ValueItem>
<ValueItem xmlns="">70660044</ValueItem>
</Value>
</Item>
</NVPList>
Problem 2
When the with AccountNumber node list is inserted, it is being inserted with an empty xmlns="" namespace. This is query I used to remove the empty xmlns="" namespace.
SQL QUERY 5:
UPDATE dbo.GoldenEgg
SET SubscriptionData_AFTER_XML = CONVERT(XML, REPLACE(CONVERT(NVARCHAR(MAX), SubscriptionData_AFTER_XML), N'xmlns=""',''))
WHERE SubscriptionData_AFTER_XML IS NOT NULL
SQL QUERY 5 XML RESULT (SubscriptionID 6070):
<NVPList xmlns="http://www.whatevernamspace.com/v1" xmlns:i="http://www.w3.org/2001/XMLSchema-instance">
<Item>
<Name>AccountNumbers</Name>
<Value>
<ValueItem>39448474</ValueItem>
<ValueItem>41447395</ValueItem>
<ValueItem>56936495</ValueItem>
<ValueItem>70660044</ValueItem>
</Value>
</Item>
</NVPList>
I hope this helps anyone who may need to do something similar