Unable to Query XML Document with SQL/OPENXML - sql

I would like to query the following XML-File using SQL:
<?xml version="1.0" encoding="UTF-8"?>
<GL_MarketDocument xmlns="urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0">
<mRID>2f6f8b82348440b1b121bca06311945d</mRID>
<time_Period.timeInterval>
<start>2020-03-02T23:00Z</start>
<end>2020-03-03T18:30Z</end>
</time_Period.timeInterval>
</GL_MarketDocument>
Using this code I would like to get the value for "mRID":
DECLARE #DocHandle int
DECLARE #XmlDocument varchar(MAX)
SELECT #XMLDocument=I
FROM OPENROWSET (BULK 'TP_10V1001C--00013H_ENTSOE-ETP__00a8f07d-95bd-4075-b1f7-3f54ce6162f3.xml', SINGLE_BLOB) as ImportFile(I)
EXEC sp_xml_preparedocument #DocHandle OUTPUT, #XmlDocument, N'<root xmlns:d="urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0"/>' ;
SELECT *
FROM OPENXML (#DocHandle, N'/d:GL_MarketDocument')
WITH ([mRID] varchar(50))
EXEC sp_xml_removedocument #DocHandle
However, the result is:
mRID
NULL
How to get the correct value for mRID ('2f6f8b82348440b1b121bca06311945d') instead of NULL?

Microsoft proprietary OPENXML and its companions sp_xml_preparedocument and sp_xml_removedocument are kept just for backward compatibility with the obsolete SQL Server 2000. Their use is diminished just to very few fringe cases.
Starting from SQL Server 2005 onwards, it is better to use XQuery language, based on the w3c standards, while dealing with the XML data type.
Your XML has a default namespace, so it should be taken into account.
SQL, from a variable
DECLARE #XMLDocument XML =
N'<GL_MarketDocument xmlns="urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0">
<mRID>2f6f8b82348440b1b121bca06311945d</mRID>
<time_Period.timeInterval>
<start>2020-03-02T23:00Z</start>
<end>2020-03-03T18:30Z</end>
</time_Period.timeInterval>
</GL_MarketDocument>';
WITH XMLNAMESPACES (DEFAULT 'urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0')
SELECT c.value('(mRID/text())[1]','NVARCHAR(100)') AS mRID
FROM #XMLDocument.nodes('/GL_MarketDocument') AS t(c);
SQL, directly from the XML file
WITH XMLNAMESPACES (DEFAULT 'urn:iec62325.351:tc57wg16:451-6:generationloaddocument:3:0')
, rs (xmlData) AS
(
SELECT TRY_CAST(BulkColumn AS XML)
FROM OPENROWSET(BULK N'e:\Temp\TP_10V1001C--00013H_ENTSOE-ETP__00a8f07d-95bd-4075-b1f7-3f54ce6162f3.xml', SINGLE_BLOB) AS x
)
SELECT c.value('(mRID/text())[1]','NVARCHAR(100)') AS mRID
FROM rs
CROSS APPLY xmlData.nodes('/GL_MarketDocument') AS t(c);
Output
+----------------------------------+
| mRID |
+----------------------------------+
| 2f6f8b82348440b1b121bca06311945d |
+----------------------------------+

Related

Insert XML file into SQL Server

I'm trying to import an XML file into a SQL table. I found a few examples of code to do this, but I can't seem to get it to work. I've tried a few variations in my code but at this point I'm not sure if the issue is the XML file structure or my SQL.
Below is the code I'm using as well as the XML file (truncated to one record).
CREATE TABLE workspace.dbo.tbt_SED_XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)
INSERT INTO workspace.dbo.tbt_SED_XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE()
FROM OPENROWSET(BULK 'File.xml', SINGLE_BLOB) AS x;
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM workspace.dbo.tbt_SED_XMLwithOpenXML WHERE ID = '1' -- The row to process
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
INSERT INTO workspace.dbo.tb_SED_Emails
SELECT email
FROM OPENXML(#hDoc, 'responseData/manifest/contact_data')
WITH
(
email [varchar](128) 'email'
)
EXEC sp_xml_removedocument #hDoc
GO
XML File Example:
<?xml version="1.0" encoding="utf-8"?>
<methodResponse>
<item>
<methodName>
<![CDATA[]]>
</methodName>
<responseData>
<manifest>
<contact_data>
<email>jason.kang#stanfordalumni.org</email>
</contact_data>
</manifest>
</responseData>
<responseNum>
<![CDATA[1]]>
</responseNum>
<responseCode>
<![CDATA[]]>
</responseCode>
</item>
</methodResponse>
Try to use the built-in, native XQuery support instead of the clunky old OPENXML stuff:
SELECT
Email = XC.value('(email)[1]', 'varchar(255)')
FROM
workspace.dbo.tbt_SED_XMLwithOpenXML
CROSS APPLY
XMLData.nodes('/methodResponse/item/responseData/manifest/contact_data') AS XT(XC)
That should output the desired e-mail address for you:
You are using the wrong xPath expression.
Change 'responseData/manifest/contact_data' to 'methodResponse/item/responseData/manifest/contact_data'.

getting NULL in sql table while parsing xml in SQL Server 2008

I'm trying to parse an XML document with a query.
Here is a sample of my XML:
<export xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://bbhgtm.gov.com/oos/export/1" xmlns:oos="http://bbhgtm.gov.com/oos/types/1">
<notificationOK>
<oos:id>8373125</oos:id>
<oos:notificationNumber>0173200001513000422</oos:notificationNumber>
Here is my query
declare #hdoc int
EXEC sp_xml_preparedocument #hdoc OUTPUT, #x,
'
<export xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:q="http://bbhgtm.gov.com/oos/export/1"
xmlns:oos="http://bbhgtm.gov.com/oos/types/1"/>
'
select *
from openxml(#hdoc, '/notificationOK/oos:id/oos:notificationNumber/', 1)
WITH(
versionNumber int 'oos:versionNumber'
,createDate datetime 'oos:createDate'
)
EXEC sp_xml_removedocument #hdoc
But I'm getting NULL in my SQL table.
What to do?
You're ignoring the XML namespaces on your XML document!
<export xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://bbhgtm.gov.com/oos/export/1"
xmlns:oos="http://bbhgtm.gov.com/oos/types/1">
See those xmlns=..... and xmlns:oos=...... attributes? Those define XML namespaces that need to be taken into account when querying!
Also, I'd recommend to use the built-in, native XQuery support rather than the clumsy OPENXML code.
Try this code here:
DECLARE #input XML =
'<export xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://bbhgtm.gov.com/oos/export/1"
xmlns:oos="http://bbhgtm.gov.com/oos/types/1">
<notificationOK>
<oos:id>8373125</oos:id>
<oos:notificationNumber>0173200001513000422</oos:notificationNumber>
</notificationOK>
</export>'
;WITH XMLNAMESPACES('http://bbhgtm.gov.com/oos/types/1' AS oos,
DEFAULT 'http://bbhgtm.gov.com/oos/export/1')
SELECT
id = XC.value('(oos:id)[1]', 'int'),
NotificationNumber = XC.value('(oos:notificationNumber)[1]', 'bigint')
FROM
#input.nodes('/export/notificationOK') AS XT(XC)
This results in an output something like this:

Bulk Import XML into SQL Server

I was looking at these examples on Microsoft.com here:
http://support.microsoft.com/kb/316005
http://msdn.microsoft.com/en-us/library/aa225754%28v=sql.80%29.aspx
But it's saying in part of it's steps that VBScript code has to be executed, and I wasn't able to find where the VBScript should be executed. Is it possible to be executed in SQL Server itself?
The code from the site looks something like this:
Set objBL = CreateObject("SQLXMLBulkLoad.SQLXMLBulkLoad")
objBL.ConnectionString = "provider=SQLOLEDB.1;data source=MySQLServer;
database=MyDatabase;uid=MyAccount;pwd=MyPassword"
objBL.ErrorLogFile = "c:\error.log"
objBL.Execute "c:\customermapping.xml", "c:\customers.xml"
Set objBL = Nothing
This looks like it could be executed in classic asp or something, but I prefer to keep it inside SQL Server. Does anyone know how to execute something like this all with-in SQL Server? or does anyone have a better method for Bulk import XML into SQL server?
SQL Server is capable of reading XML and inserting it as you need. Here is an example of an XML file and insertion pulled from here:
XML:
<Products>
<Product>
<SKU>1</SKU>
<Desc>Book</Desc>
</Product>
<Product>
<SKU>2</SKU>
<Desc>DVD</Desc>
</Product>
<Product>
<SKU>3</SKU>
<Desc>Video</Desc>
</Product>
</Products>
Insert statement that is parsing the XML:
INSERT INTO Products (sku, product_desc)
SELECT X.product.query('SKU').value('.', 'INT'),
X.product.query('Desc').value('.', 'VARCHAR(30)')
FROM (
SELECT CAST(x AS XML)
FROM OPENROWSET(
BULK 'C:\Products.xml',
SINGLE_BLOB) AS T(x)
) AS T(x)
CROSS APPLY x.nodes('Products/Product') AS X(product);
I tried this and for 975 rows from a 1MB XML file, this took about 2.5 minutes to execute on a very fast PC.
I switched to using OpenXml in a multi-step process and process takes less than a second.
CREATE TABLE XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)
INSERT INTO XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE()
FROM OPENROWSET(BULK 'clients.xml', SINGLE_BLOB) AS x;
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM XMLwithOpenXML WHERE ID = '1' -- The row to process
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
INSERT INTO Clients
SELECT CustomerID, CustomerName
FROM OPENXML(#hDoc, 'Clients/Client')
WITH
(
CustomerID [varchar](50) 'ID',
CustomerName [varchar](100) 'Name'
)
EXEC sp_xml_removedocument #hDoc
GO
I got this from here:
http://www.mssqltips.com/sqlservertip/2899/importing-and-processing-data-from-xml-files-into-sql-server-tables/
Basically you load the XML into a table as a big blob of text, then you use OpenXml to process it.

Using XQuery in Sql Server to Parse XML Complex types

I have the following XML :
<Feed>
<FeedId>10</FeedId>
<Component>
<Date>2011-10-01</Date>
<Date>2011-10-02</Date>
</Component>
</Feed>
Now if possible I would like to parse the XML into sql so it's serialized into the following relational data:
FeedId Component_Date
10 2011-10-01
10 2011-10-02
However using the following SQL:
DECLARE #XML XML;
DECLARE #XMLNodes XML;
SET #XML = '<Feed><FeedId>10</FeedId><Component><Date>2011-10-01</Date><Date>2011-10-02</Date></Component></Feed>';
SELECT t.a.query('FeedId').value('.', 'INT') AS FeedId
,t.a.query('Component/Date').value('.', 'VARCHAR(80)') AS [Component_Date]
FROM #XML.nodes(' /Feed') AS t(a)
The closest I get is :
FeedId Component_Date
10 2011-10-012011-10-02
So the date values appear in the same row, is it possible to achieve what I want using XQuery?
You need a second call to .nodes() since you have multiple entries inside your XML - try this:
SELECT
t.a.value('(FeedId)[1]', 'INT') AS FeedId,
c.d.value('(.)[1]', 'DATETIME') AS [Component_Date]
FROM
#XML.nodes('/Feed') AS t(a)
CROSS APPLY
t.a.nodes('Component/Date') AS C(D)
Gives me an output of:
FeedId Component_Date
10 2011-10-01 00:00:00.000
10 2011-10-02 00:00:00.000
OK, I can do it using OPENXML method:
eclare #idoc int
DECLARE #XML XML;
DECLARE #XMLNodes XML;
SET #XML = '<Feed><FeedId>10</FeedId><Component><Date>2011-10-01</Date><Date>2011-10-02</Date></Component></Feed>';
exec sp_xml_preparedocument #idoc OUTPUT, #XML
-- SELECT stmt using OPENXML rowset provider
SELECT *
FROM OPENXML (#idoc, '/Feed/Component/Date',1)
WITH (
FeedId Int '../../FeedId',
ComponentDate Date 'text()'
)

How do I overcome OpenXML's 8000 character limit?

I'm loading an XML in SQL using OpenXML while declaring the variable the max i can go up to is 8000 chars :
DECLARE #xml_text varchar(8000)
Since text, ntext is not allowed to be used with openXML what other alternatives do i have to load the entire XML (over 20000 chars) in SQL ?
You should be able to use varchar(max) (SQL 2005 and higher)
DECLARE #idoc int
DECLARE #doc varchar(max)
SET #doc = '
<myxml>
<node nodeid="1" nodevalue="value 1">
</node>
</myxml>'
EXEC sp_xml_preparedocument #idoc OUTPUT, #doc
SELECT
*
FROM
OPENXML (#idoc, '/myxml/node',1) WITH ( nodeid varchar(10), nodevalue varchar(20) )
If you're using SQL 2005 or better you could use the XML data type itself. This way you would be able to avoid using OPENXML:
DECLARE #XDoc XML
SET #XDoc = '<Customer>
<FirstName>Fred</FirstName>
<LastName>Flinstone</LastName>
</Customer>
<Customer>
<FirstName>Barney</FirstName>
<LastName>Rubble</LastName>
</Customer>'
SELECT
Tbl.Col.value('FirstName[1]', 'VARCHAR(MAX)'),
Tbl.Col.value('LastName[1]', 'VARCHAR(MAX)')
FROM #XDoc.nodes('/Customer') Tbl(Col)