SQL Server OPENXML v XQUERY Performance - sql

I've been testing both OpenXML and XQuery and OpenXML is out performing XQuery by a lot (I believe it should be the other way around from what I have been reading)
I prefer to use XQuery so I'm looking for some help. Any way to improve the Xquery?
I have around 10,000 of these XMLs in a table (and much larger than this example)
<Root>
<Data Code="123" Ref="1">
<Node>
<Details Date="2021-06-28" Code="ABC" />
<Types>
<Type Type="3" Count="29" />
<Type Type="5" Count="0" />
<TypeDetails Date2="2021-06-30" />
</Type>
</Types>
<Invoice Number="1234" Version="1" />
</Node>
</Data>
</Root>
OpenXML query:
DECLARE name_cursor CURSOR
FOR
SELECT XML
FROM [dbo].[table]
OPEN name_cursor
DECLARE #xmlVal XML
DECLARE #idoc int
FETCH NEXT FROM name_cursor INTO #xmlVal
WHILE (##FETCH_STATUS = 0)
BEGIN
EXEC sp_xml_preparedocument #idoc OUTPUT, #xmlVal
INSERT INTO finaltable
SELECT Code, Ref, Date, Code, Number,Version, Type, Count,Date2
FROM OPENXML(#iDoc, 'Root/Data/Node/Types/Type')
WITH
(
Code [varchar](50) '../../../#Code',
Ref [varchar](50) '../../../#Ref',
Date [varchar](50) '../../Details/#Date',
Code [varchar](50) '../../Details/#Code',
Number [varchar](50) '../../Invoice/#Number',
Version [varchar](50) '../../Invoice/#Version',
Type [varchar](50) '#Type',
Count [varchar](50) '#Count',
Date2 [varchar](50) 'TypeDetails/#Date2'
)
EXEC sp_xml_removedocument #idoc
FETCH NEXT FROM name_cursor INTO #xmlVal
END
CLOSE name_cursor
DEALLOCATE name_cursor
XQuery (I have also tried different versions with CROSS APPLY for each node):
INSERT INTO finaltable
select
c.value('(../../../#Code)[1]','nvarchar(50)') as Code
,c.value('(../../../#Ref)[1]','nvarchar(50)') as Ref
,c.value('(../../Details/#Date)[1]','nvarchar(50)') as Date
,c.value('(../../Details/#Code)[1]','nvarchar(50)') as Code
,c.value('#Type','nvarchar(50)') as Type
,c.value('#Count','nvarchar(50)') as Count
,c.value('(TypeDetails/#Date2)[1]','nvarchar(50)') as Date2
,c.value('(../../Invoice/#Number)[1]','nvarchar(50)') as Number
,c.value('(../../Invoice/#Version)[1]','nvarchar(50)') as Version
from [dbo].[table]
CROSS APPLY XML.nodes('Root/Data/Node/Types/Type') as t(c)

Related

Insert XML file into SQL Server

I'm trying to import an XML file into a SQL table. I found a few examples of code to do this, but I can't seem to get it to work. I've tried a few variations in my code but at this point I'm not sure if the issue is the XML file structure or my SQL.
Below is the code I'm using as well as the XML file (truncated to one record).
CREATE TABLE workspace.dbo.tbt_SED_XMLwithOpenXML
(
Id INT IDENTITY PRIMARY KEY,
XMLData XML,
LoadedDateTime DATETIME
)
INSERT INTO workspace.dbo.tbt_SED_XMLwithOpenXML(XMLData, LoadedDateTime)
SELECT CONVERT(XML, BulkColumn) AS BulkColumn, GETDATE()
FROM OPENROWSET(BULK 'File.xml', SINGLE_BLOB) AS x;
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM workspace.dbo.tbt_SED_XMLwithOpenXML WHERE ID = '1' -- The row to process
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
INSERT INTO workspace.dbo.tb_SED_Emails
SELECT email
FROM OPENXML(#hDoc, 'responseData/manifest/contact_data')
WITH
(
email [varchar](128) 'email'
)
EXEC sp_xml_removedocument #hDoc
GO
XML File Example:
<?xml version="1.0" encoding="utf-8"?>
<methodResponse>
<item>
<methodName>
<![CDATA[]]>
</methodName>
<responseData>
<manifest>
<contact_data>
<email>jason.kang#stanfordalumni.org</email>
</contact_data>
</manifest>
</responseData>
<responseNum>
<![CDATA[1]]>
</responseNum>
<responseCode>
<![CDATA[]]>
</responseCode>
</item>
</methodResponse>
Try to use the built-in, native XQuery support instead of the clunky old OPENXML stuff:
SELECT
Email = XC.value('(email)[1]', 'varchar(255)')
FROM
workspace.dbo.tbt_SED_XMLwithOpenXML
CROSS APPLY
XMLData.nodes('/methodResponse/item/responseData/manifest/contact_data') AS XT(XC)
That should output the desired e-mail address for you:
You are using the wrong xPath expression.
Change 'responseData/manifest/contact_data' to 'methodResponse/item/responseData/manifest/contact_data'.

SQL Server - OPENXML for XML within XML

I have an issue where I am reading data from an XML and one of the columns is an another XML. When I adjust the data type to varchar(max) or nvarchar(max) I get Null and when using XML data type I get "Element-centric mapping must be used with OPENXML when one of the columns is of type XML."
The column that contains the raw XML is raw_xml.
My goal would be to save the XML as another table with attributes of the parent XML, but I am failing in even reading it.
declare #xml XML
SELECT #xml=CAST(REPLACE(y, 'UTF-16','UTF-8') as XML)
FROM OPENROWSET( BULK 'D:\myxml.xml', SINGLE_CLOB) x(y)
DECLARE #hdoc int
EXEC sp_xml_preparedocument #hdoc OUTPUT, #xml
Select *
from OPENXML (#hdoc, '/LoanApplications/LoanApplication/LoanApplicationStates/LoanApplicationState/Customers/Customer/Reports/Report',1)
WITH(
active varchar(255),
valid varchar(255),
bureau varchar(255),
report_date datetime,
score varchar(255),
--raw_xml varchar(255),
raw_xml [xml] ,
create_date datetime,
update_date datetime,
updated_by varchar(255),
deal_detail_id bigint,
bundle_id bigint
)
EXEC sp_xml_removedocument #hdoc
Sample XML, The data stored in xml_raw doesnt look like XML format, but not sure if that because of the formatting or not.
<Reports>
<Report active="True" valid="True" bureau="EX" report_date="2016-03-27T19:06:32.3470000" score="111" raw_xml="<Response Score="111"" create_date="2016-03-27T19:06:32.4430000" update_date="2016-03-27T19:06:32.4430000" updated_by="System [No User Available]" deal_detail_id="123" bundle_id="5656" />
</Reports>
Thx for the example...
If you execute this
DECLARE #xml XML=
' <Reports>
<Report active="True" valid="True" bureau="EX" report_date="2016-03-27T19:06:32.3470000" score="111" raw_xml="<Response Score="111"" create_date="2016-03-27T19:06:32.4430000" update_date="2016-03-27T19:06:32.4430000" updated_by="System [No User Available]" deal_detail_id="123" bundle_id="5656" />
</Reports>';
SELECT #xml.value('(/Reports/Report/#raw_xml)[1]','varchar(max)');
You get this
<Response Score="111"
The problem is, that your attribut "raw_xml" seems to be the beginning of XML, but it is not complete... You'll not be able to cast this to XML...
Did you manipulate the XML in order to reduce it for an example, or is this your "real data"? If it's real, your might be in trouble...
UPDATE
Just to show you how to get the data without FOR OPENXML
DECLARE #xml XML=
' <Reports>
<Report active="True" valid="True" bureau="EX" report_date="2016-03-27T19:06:32.3470000" score="111" raw_xml="<Response Score="111"" create_date="2016-03-27T19:06:32.4430000" update_date="2016-03-27T19:06:32.4430000" updated_by="System [No User Available]" deal_detail_id="123" bundle_id="5656" />
</Reports>';
SELECT R.value('#active','bit') AS active
,R.value('#valid','bit') AS valid
,R.value('#bureau','varchar(max)') AS bureau
,R.value('#report_date ','datetime') AS report_date
,R.value('#score ','int') AS score
,R.value('#raw_xml ','varchar(max)') AS raw_xml
,R.value('#create_date ','datetime') AS create_date
,R.value('#update_date ','datetime') AS update_date
,R.value('#updated_by ','varchar(max)') AS updated_by
,R.value('#deal_detail_id ','bigint') AS deal_detail_id
,R.value('#bundle_id ','bigint') AS bundle_id
FROM #xml.nodes('/Reports/Report') AS A(R);

Xml to Database

I have an xml file as below:
<?xml version="1.0" encoding="UTF-8" ?>
- <DeliveryPackage xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" VersionNo="Q214" CreationTime="2014-04-16T02:29:04.000000000" MapVersion="IND 14 2 00" Language_Code_Desc="../DEFINITIONS/language.xml" Country_Code_Desc="../DEFINITIONS/country.xml" Supplier_Code_Desc="../DEFINITIONS/supplier.xml" XY_Type="WGS84" Category_Code_Desc="../DEFINITIONS/category.xml" Char_Set="UTF-8" UpdateType="BulkUpdate" Coverage="IND" Category="7997">
- <POI>
<Action>Add</Action>
<SupplierID>3</SupplierID>
- <Identity>
<POI_Entity_ID>BEL_Q112FD_308076</POI_Entity_ID>
- <Names>
- <POI_Name Language_Code="ENG">
<Text>100 Raw Gym</Text>
</POI_Name>
</Names>
<Category_ID Type="NT">7997</Category_ID>
<Product_Type>23</Product_Type>
</Identity>
- <Locations>
- <Location>
- <Address>
- <ParsedAddress>
- <ParsedStreetAddress>
- <ParsedStreetName>
<StreetName Language_Code="ENG">NH-1</StreetName>
</ParsedStreetName>
</ParsedStreetAddress>
- <ParsedPlace>
<PlaceLevel2 Language_Code="ENG">Punjab</PlaceLevel2>
<PlaceLevel3 Language_Code="ENG">Amritsar</PlaceLevel3>
<PlaceLevel4 Language_Code="ENG">Amritsar</PlaceLevel4>
</ParsedPlace>
<CountryCode>IND</CountryCode>
</ParsedAddress>
</Address>
- <GeoPosition>
<Latitude>31.6294</Latitude>
<Longitude>74.81178</Longitude>
</GeoPosition>
- <MapLinkID>
<LinkID>1027669120</LinkID>
<Side_of_Street>R</Side_of_Street>
<Percent_from_RefNode>70</Percent_from_RefNode>
</MapLinkID>
- <Confidence>
<Match_Level>GeoPoint</Match_Level>
</Confidence>
</Location>
</Locations>
- <Contacts>
- <Contact Language_Code="ENG" Type="Main">
<Number Preferred="YES" Type="Phone Number">+(91)-9988692233</Number>
</Contact>
</Contacts>
</POI>
I want to insert all attributes in sql server table, starting from Action to Number.
I already tried on 1st three columns,like below:
At first I put complete xml file in a table 'data',then use the following code:
DECLARE #XML AS XML, #hDoc AS INT, #SQL NVARCHAR (MAX)
SELECT #XML = XMLData FROM data
EXEC sp_xml_preparedocument #hDoc OUTPUT, #XML
SELECT [Action],SupplierID,Poi_Entity_Id
FROM OPENXML(#hDoc, 'POI')
WITH
(
[Action] [varchar](50) '#Action',
SupplierID [int] '#SupplierID',
Poi_Entity_Id [varchar](100) 'Poi_Entity_Id'
)
EXEC sp_xml_removedocument #hDoc
GO
but getting null values in all columns...
I don't want to do it manually as I have 1000 records in my XML.
Please Help
Your data is stored in elements, not attributes, and you forgot to add DeliveryPackage node into the xpath:
select
T.C.value('Action[1]', 'nvarchar(128)') as [Action],
T.C.value('SupplierID[1]', 'nvarchar(128)') as SupplierID,
T.C.value('(Identity/POI_Entity_ID)[1]', 'nvarchar(128)') as POI_Entity_ID
from #XML.nodes('DeliveryPackage/POI') as T(C)
Or, if you want to use openxml:
declare #hDoc int
exec dbo.sp_xml_preparedocument #hDoc output, #XML
select
[Action],SupplierID,Poi_Entity_Id
from openxml(#hDoc, 'DeliveryPackage/POI')
with
(
[Action] [varchar](50) 'Action[1]',
SupplierID [int] 'SupplierID[1]',
Poi_Entity_Id [varchar](100) '(Identity/POI_Entity_ID)[1]'
)
exec dbo.sp_xml_removedocument #hDoc

insert data from xml column into temp table

I have a xml column that look like
SET #XMLData = '<ArrayOfEntityNested xmlns:i="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.Bijak">
<EntityNested>
<Id xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto">1</Id>
<Date xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">0001-01-01T00:00:00</Date>
<Description xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">deesc</Description>
<Number xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak" i:nil="true" />
</EntityNested>
</ArrayOfEntityNested>'
I need insert data from the XML into a temp table.
here
For this I use from following code. But it's not working, and it's not inserting any data into temp table.
--Variables Decleration
DECLARE #XMLData VARCHAR(MAX)
DECLARE #idoc INT
-- Creating Temporary Table
CREATE TABLE #TEMP_TABLE
(
REC_ID INT IDENTITY(1,1),
[Id] INT,
[Date] VARCHAR(50),
[Number] VARCHAR(50),
);
--Case 1
SET #XMLData = '<ArrayOfEntityNested xmlns:i="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.Bijak">
<EntityNested>
<Id xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto">1</Id>
<Date xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">0001-01-01T00:00:00</Date>
<Number xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak" i:nil="true" />
</EntityNested>
</ArrayOfEntityNested>
'
--Reading Data from XML and inserting into Temp Table
EXECUTE sp_xml_preparedocument #idoc OUTPUT, #XMLData
INSERT INTO #TEMP_TABLE
SELECT *
FROM OpenXML(#idoc,'/ArrayOfEntityNested/EntityNested', 1)
WITH #TEMP_TABLE
EXECUTE sp_xml_removedocument #idoc
--Displaying data from Temp Table
SELECT * FROM #TEMP_TABLE
DROP TABLE #TEMP_TABLE;
But that doesn't work, if xml format correct might look like :
SET #XMLData = '<ArrayOfEntityNested>
<EntityNested>
<Id>1</Id>
<Date>0001-01-01T00:00:00</Date>
<Description>deesc</Description>
<EmployeeId>2</EmployeeId>
<IsDeleted>false</IsDeleted>
<LoadingPermitTruckId>7541</LoadingPermitTruckId>
</EntityNested>
</ArrayOfEntityNested>'
then it works.
Please help me.
First of all - please use appropriate data types! If your source data is XML - why aren't you using the XML datatype?
Also, if you have a Date in your table - why isn't that a DATE or DATETIME type?? And why is the Number a VARCHAR(50) ??
Makes no sense......
Then: you're not looking at the XML namespaces that are present in the XML document - but you must!
At lastly - I would recommend using the native XQuery support instead of the legacy, deprecated sp_xml_preparedocument / OpenXML approach....
Seems much easier, much clearer to me...
Use this:
-- variable declaration
DECLARE #XMLData XML
-- creating temporary table
CREATE TABLE #TEMP_TABLE
(
REC_ID INT IDENTITY(1,1),
[Id] INT,
[Date] DATETIME2(3),
[Number] INT
);
and then use proper XQuery statements, including the XML namespaces to handle the data:
SET #XMLData = '<ArrayOfEntityNested xmlns:i="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.Bijak">
<EntityNested>
<Id xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto">1</Id>
<Date xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">0001-01-01T00:00:00</Date>
<Number xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak" i:nil="true" />
</EntityNested>
<EntityNested>
<Id xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto">42</Id>
<Date xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">2013-12-22T14:45:00</Date>
<Number xmlns="http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak">373</Number>
</EntityNested>
</ArrayOfEntityNested>'
;WITH XMLNAMESPACES ('http://schemas.datacontract.org/2004/07/Gbms.Dto.Bijak' AS ns1,
'http://schemas.datacontract.org/2004/07/Gbms.Dto' AS ns2,
'http://schemas.datacontract.org/2004/07/Gbms.Dto.VirginBijak' AS ns3)
INSERT INTO #TEMP_TABLE(ID, Date, Number)
SELECT
xc.value('(ns2:Id)[1]', 'int'),
xc.value('(ns3:Date)[1]', 'DateTime2'),
xc.value('(ns3:Number)[1]', 'int')
FROM
#XmlData.nodes('/ns1:ArrayOfEntityNested/ns1:EntityNested') AS xt(xc)
DECLARE #idoc int
DECLARE #doc varchar(1000)
SET #doc ='
<OutLookContact>
<Contact FirstName="Asif" LastName="Ghafoor" EmailAddress1="asifghafoor#my.web.pk" />
<Contact FirstName="Rameez" LastName="Ali" EmailAddress1="rameezali#my.web.pk" />
</OutLookContact>'
--Create an internal representation of the XML document.
EXEC sp_xml_preparedocument #idoc OUTPUT, #doc
-- Execute a SELECT statement that uses the OPENXML rowset provider.
DECLARE #Temp TABLE(FirstName VARCHAR(250),LastName VARCHAR(250),Email1 VARCHAR(250))
INSERT INTO #Temp(FirstName,LastName,Email1)
SELECT *
FROM OPENXML (#idoc, '/OutLookContact/Contact',1)
WITH (FirstName varchar(50),LastName varchar(50),EmailAddress1 varchar(50))
select FirstName,LastName,Email1 from #Temp
it will be a lot easier if you try to use a tool called pentaho. http://en.wikipedia.org/wiki/Pentaho
it is an open source tool which is used for data integration.you can create a database connection from mysql or oracle to it and do the transformation.it is easy to use.

TSQL - use variables in OPENXML

Suppose I have a query like this -
SELECT * FROM
OPENXML(#i, '/root/authors', 2)
WITH authors
Now, I want to pass '/root' via a parameter/variable like -
DECLARE #nodeName varchar(MAX) ----> EDIT: Solution- Use fixed length instead of MAX
SET #nodeName = '/root'
and use #nodeName instead. Then concatenate the rest of the elements dynamically.
I am getting error just by using #nodeName in the OPENXML parameter.
Better to use the new XML type.
create proc [dbo].[getLocationTypes](#locationIds XML,
#typeIds XML=null)
as
begin
set nocount on
SELECT locationId, typeId
FROM xrefLocationTypes
WHERE locationId
IN (SELECT Item.value('.', 'int' )
FROM #locationIDs.nodes('IDList/ID') AS x(Item))
AND (typeId IN
(SELECT Item.value('.', 'int' )
FROM #typeIds.nodes('IDList/ID') AS x(Item)))
ORDER BY 1, 2
end
And then you would call this like:
EXECUTE dbo.getLocationTypes '<IDList><ID>1</ID><ID>3</ID></IDList>',
'<IDList><ID>200</ID><ID>300</ID><ID>400</ID></IDList>'
I tried the following in SQL 2008 R2 and it works fine.
DECLARE #idoc int
DECLARE #doc varchar(1000)
SET #doc ='
<ROOT>
<Customer CustomerID="VINET" ContactName="Paul Henriot">
<Order CustomerID="VINET" EmployeeID="5" OrderDate="1996-07-04T00:00:00">
<OrderDetail OrderID="10248" ProductID="11" Quantity="12"/>
<OrderDetail OrderID="10248" ProductID="42" Quantity="10"/>
</Order>
</Customer>
<Customer CustomerID="LILAS" ContactName="Carlos Gonzlez">
<Order CustomerID="LILAS" EmployeeID="3" OrderDate="1996-08-16T00:00:00">
<OrderDetail OrderID="10283" ProductID="72" Quantity="3"/>
</Order>
</Customer>
</ROOT>'
--Create an internal representation of the XML document.
EXEC sp_xml_preparedocument #idoc OUTPUT, #doc
-- Execute a SELECT statement that uses the OPENXML rowset provider.
DECLARE #NodeName VARCHAR(100)
SET #NodeName = '/ROOT/Customer'
SELECT *
FROM OPENXML (#idoc, #NodeName,1)
WITH (CustomerID varchar(10),
ContactName varchar(20))
It could be that other versions of SQL only support the use of NVARCHAR as a parameter, not VARCHAR.
I hope this help.