how to get xsd element format result from sql script - sql

I need to create xml files to migrate a huge amount of data from one database to another database.
How to get result xsd format as below from ms sql script query?
Please share if you have any idea.
The xml file format is below:
<Batch>
<retail:customer xmlns:core="http://www.bactor.com/core" xmlns:retail=""http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:addresses>
<retail:address>
<retail:country>GB</retail:country>
<retail:countryCodeId></retail:countryCodeId>
<retail:isPreferred>true</retail:isPreferred>
<retail:isActive>true</retail:isActive>
<retail:typeId>PERSONAL_ADDRESS</retail:typeId>
<retail:postCode>2344</retail:postCode>
<retail:street1>1234214</retail:street1>
<retail:isTemplate>false</retail:isTemplate>
<retail:referenceId></retail:referenceId>
<retail:addressReferenceId>0727-:83f5bd9f331:e8e438a1:fa34668911272008</retail:addressReferenceId>
</retail:address>
</retail:addresses>
<retail:contactMethod></retail:contactMethod>
<retail:contactable>false</retail:contactable>
<retail:maritalStatus></retail:maritalStatus>
<retail:nationality></retail:nationality>
<retail:noChildren>0</retail:noChildren>
<retail:customerNumber>1</retail:customerNumber>
<retail:emailAddresses>
<retail:emailAddress>alice#wonderland.hearts</retail:emailAddress>
<retail:preferred>true</retail:preferred>
<retail:restrictedReason></retail:restrictedReason>
<retail:status></retail:status>
<retail:typeId>PERSONAL_EMAIL</retail:typeId>
<retail:referenceId></retail:referenceId>
<retail:active>false</retail:active>
</retail:emailAddresses>
<retail:phoneNumbers>
<retail:countryCode></retail:countryCode>
<retail:number>11222445566</retail:number>
<retail:preferred>true</retail:preferred>
<retail:restrictedReason></retail:restrictedReason>
<retail:status></retail:status>
<retail:typeId>LANDLINE</retail:typeId>
<retail:referenceId></retail:referenceId>
<retail:active>true</retail:active>
<retail:phoneNumberReferenceId>e437-:83f5bd9f331:e8e438a1:fa34668911272008</retail:phoneNumberReferenceId>
</retail:phoneNumbers>
<retail:customerName>
<retail:surname>AppleGate</retail:surname>
<retail:forename>Alice</retail:forename>
<retail:title>Mrs</retail:title>
<retail:sex>FEMALE</retail:sex>
<retail:dateOfBirth>2012-09-12T00:00:00+01:00</retail:dateOfBirth>
</retail:customerName>
<retail:businessContactMethod></retail:businessContactMethod>
<retail:preferredContactTime></retail:preferredContactTime>
<retail:allowInternalMarketing>true</retail:allowInternalMarketing>
<retail:allowExternalMarketing>true</retail:allowExternalMarketing>
<retail:employeeKey></retail:employeeKey>
<retail:customerType>RETAIL</retail:customerType>
<retail:organisation></retail:organisation>
<retail:taxIdentification></retail:taxIdentification>
<retail:companyNumber></retail:companyNumber>
<retail:createdBy></retail:createdBy>
<retail:createdAt></retail:createdAt>
<retail:status>New</retail:status>
<retail:source></retail:source>
</retail:customer>
<retail:customer xmlns:core="http://www.enactor.com/core" xmlns:retail="http://www.enactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:addresses>
<retail:address>
<retail:country>GB</retail:country>
<retail:countryCodeId></retail:countryCodeId>
<retail:isPreferred>true</retail:isPreferred>
<retail:isActive>true</retail:isActive>
<retail:typeId>PERSONAL_ADDRESS</retail:typeId>
<retail:postCode>2344</retail:postCode>
<retail:street1>1234214</retail:street1>
<retail:isTemplate>false</retail:isTemplate>
<retail:referenceId></retail:referenceId>
<retail:addressReferenceId>0727-:83f5bd9f331:e8e438a1:fa34668911272008</retail:addressReferenceId>
</retail:address>
</retail:addresses>
<retail:contactMethod></retail:contactMethod>
<retail:contactable>false</retail:contactable>
<retail:maritalStatus></retail:maritalStatus>
<retail:nationality></retail:nationality>
<retail:noChildren>0</retail:noChildren>
<retail:customerNumber>1</retail:customerNumber>
<retail:emailAddresses>
<retail:emailAddress>alice#wonderland.hearts</retail:emailAddress>
<retail:preferred>true</retail:preferred>
<retail:restrictedReason></retail:restrictedReason>
<retail:status></retail:status>
<retail:typeId>PERSONAL_EMAIL</retail:typeId>
<retail:referenceId></retail:referenceId>
<retail:active>false</retail:active>
</retail:emailAddresses>
<retail:phoneNumbers>
<retail:countryCode></retail:countryCode>
<retail:number>11222445566</retail:number>
<retail:preferred>true</retail:preferred>
<retail:restrictedReason></retail:restrictedReason>
<retail:status></retail:status>
<retail:typeId>LANDLINE</retail:typeId>
<retail:referenceId></retail:referenceId>
<retail:active>true</retail:active>
<retail:phoneNumberReferenceId>e437-:83f5bd9f331:e8e438a1:fa34668911272008</retail:phoneNumberReferenceId>
</retail:phoneNumbers>
<retail:customerName>
<retail:surname>AppleGate</retail:surname>
<retail:forename>Alice</retail:forename>
<retail:title>Mrs</retail:title>
<retail:sex>FEMALE</retail:sex>
<retail:dateOfBirth>2012-09-12T00:00:00+01:00</retail:dateOfBirth>
</retail:customerName>
<retail:businessContactMethod></retail:businessContactMethod>
<retail:preferredContactTime></retail:preferredContactTime>
<retail:allowInternalMarketing>true</retail:allowInternalMarketing>
<retail:allowExternalMarketing>true</retail:allowExternalMarketing>
<retail:employeeKey></retail:employeeKey>
<retail:customerType>RETAIL</retail:customerType>
<retail:organisation></retail:organisation>
<retail:taxIdentification></retail:taxIdentification>
<retail:companyNumber></retail:companyNumber>
<retail:createdBy></retail:createdBy>
<retail:createdAt></retail:createdAt>
<retail:status>New</retail:status>
<retail:source></retail:source>
</retail:customer>
</Batch>
I am also trying with the below a pieces of sql script. but it is not working.
WITH XMLNAMESPACES ('http://www.enactor.com/retail' as ns1)
SELECT top 100 [id]
,[Title]
,[First_Name]
, RowNum = Row_NUMBER() OVER (Order by id)
FROM [Firinne].[dbo].[Contact] as Customer
For XML PATH('retail:Customer')

Try this:
DECLARE #xml XML
;WITH XMLNAMESPACES('http://www.w3.org/2001/XMLSchema-instance' AS xsi, 'http://www.bactor.com/retail' AS retail, 'http://www.bactor.com/core' AS core)
SELECT #xml = (SELECT
[retail:Customer].id AS [retail:id],
[retail:Customer].contactMethod AS [retail:contactMethod],
[retail:Customer].contactable AS [retail:contactable],
[retail:address].emailAddress AS [retail:emailAddress],
[retail:address].typeId AS [retail:typeId]
FROM dbo.Customers AS [retail:Customer]
JOIN dbo.emailAddresses AS [retail:address] ON [retail:Customer].id = [retail:address].customerID
FOR XML AUTO, ELEMENTS)
SET #xml = '<Batch>' + CAST(#xml AS NVARCHAR(max)) + '</Batch>'
SELECT #xml
Output:
<Batch>
<retail:Customer xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:id>1</retail:id>
<retail:contactMethod>1</retail:contactMethod>
<retail:contactable>1</retail:contactable>
<retail:address>
<retail:emailAddress>some#some.some</retail:emailAddress>
<retail:typeId>1</retail:typeId>
</retail:address>
<retail:address>
<retail:emailAddress>some#some.some</retail:emailAddress>
<retail:typeId>2</retail:typeId>
</retail:address>
</retail:Customer>
<retail:Customer xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:id>2</retail:id>
<retail:contactMethod>1</retail:contactMethod>
<retail:contactable>0</retail:contactable>
<retail:address>
<retail:emailAddress>some#some.some</retail:emailAddress>
<retail:typeId>3</retail:typeId>
</retail:address>
</retail:Customer>
</Batch>
EDIT:
You can do it like:
DECLARE #xml XML
;WITH XMLNAMESPACES('http://www.w3.org/2001/XMLSchema-instance' AS xsi, 'http://www.bactor.com/retail' AS retail, 'http://www.bactor.com/core' AS core)
SELECT #xml = (SELECT
[retail:Customer].id AS [retail:id],
[retail:Customer].contactMethod AS [retail:contactMethod],
[retail:Customer].contactable AS [retail:contactable],
(SELECT
[retail:address].emailAddress ,
[retail:address].typeId
FROM dbo.emailAddresses AS [retail:address] WHERE [retail:Customer].id = [retail:address].customerID
FOR XML AUTO, TYPE, ELEMENTS, ROOT('retail:Addresses')
)
FROM dbo.Customers AS [retail:Customer]
FOR XML AUTO, ELEMENTS)
SET #xml = '<Batch>' + CAST(#xml AS NVARCHAR(max)) + '</Batch>'
SELECT #xml
But namespaces are added to all child:
<Batch>
<retail:Customer xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:id>1</retail:id>
<retail:contactMethod>1</retail:contactMethod>
<retail:contactable>1</retail:contactable>
<retail:Addresses xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:address>
<emailAddress>some#some.some</emailAddress>
<typeId>1</typeId>
</retail:address>
<retail:address>
<emailAddress>some#some.some</emailAddress>
<typeId>2</typeId>
</retail:address>
</retail:Addresses>
</retail:Customer>
<retail:Customer xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:id>2</retail:id>
<retail:contactMethod>1</retail:contactMethod>
<retail:contactable>0</retail:contactable>
<retail:Addresses xmlns:core="http://www.bactor.com/core" xmlns:retail="http://www.bactor.com/retail" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<retail:address>
<emailAddress>some#some.some</emailAddress>
<typeId>3</typeId>
</retail:address>
</retail:Addresses>
</retail:Customer>
</Batch>
It is a known issue, and you can see details here:
https://connect.microsoft.com/SQLServer/feedback/details/265956/suppress-namespace-attributes-in-nested-select-for-xml-statements
Additional namespaces doesn't do any harm but increasing size of generated xml document.

Related

SELECT query to get child elements from xml datatype

I have the following SQL script (and the XML structure at the bottom):
DECLARE #questions XML
SELECT
t.Col.value('QuestionId[1]', 'int') AS QuestionId,
t.Col.value('Options[1]/string[1]', 'varchar(MAX)') Options
FROM
#questions.nodes ('//Question') t(Col)
WHERE
t.Col.value('QuestionId[1]', 'int') = 5
The SELECT query is returning only first row for Options child string (Blue). How can I get all the values as 4 rows (Blue, Red, White, Black) by changing t.Col.value('Options[1]/string[1]', 'varchar(MAX)') ?
SET #questions = '<?xml version="1.0" encoding="UTF-8"?>
<Questions>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>5</QuestionId>
<Options>
<string>Blue</string>
<string>Red</string>
<string>White</string>
<string>Black</string>
</Options>
</Question>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>6</QuestionId>
<Options />
</Question>
</Questions>'
You need apply :
SELECT t.col.value('(./QuestionId)[1]','int') AS QuestionId,
t1.Col.value('(text())[1]', 'varchar(max)') AS Options
FROM #questions.nodes ('/Questions/Question') t(Col) OUTER APPLY
t.Col.nodes('Options/*') t1(Col);
You can go down to reach <string> tag level with Questions/Question/Options/string and then go back one level to get QuestionId:
SELECT
t.col.value('(//QuestionId)[1]','int') AS QuestionId,
t.Col.value('(.)[1]' ,'varchar(50)') AS Options
FROM #questions.nodes ('Questions/Question/Options/string') t(Col)
WHERE t.Col.value('(//QuestionId)[1]', 'int') = 5
Results:
As pointed out in comments the above solution will not work when another <question> tag appears before the one that must be selected.
This is the new input scenario with 4 <question> tags:
<?xml version="1.0" encoding="UTF-8"?>
<Questions>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>6</QuestionId>
<Options />
</Question>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>5</QuestionId>
<Options>
<string>Blue</string>
<string>Red</string>
<string>White</string>
<string>Black</string>
</Options>
</Question>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>7</QuestionId>
<Options />
</Question>
<Question>
<RowType>Question</RowType>
<Required>False</Required>
<QuestionText>select color</QuestionText>
<QuestionType>Radio Buttons</QuestionType>
<QuestionId>8</QuestionId>
<Options>
<string>Blue</string>
<string>Red</string>
<string>White</string>
<string>Black</string>
</Options>
</Question>
</Questions>
Using the following query:
SELECT
t.col.value('((.)/QuestionId)[1]','int') AS QuestionId,
u.Col.value('(.)[1]' ,'varchar(50)') AS Options
FROM #questions.nodes ('Questions/*') t(Col)
OUTER APPLY t.Col.nodes('Options/*') u(Col)
these are the results:
applying the where clause leads to the desired results:

Find element or attribute value anywhere in XML

I am trying to find the value of an element / attribute regardless of where it exists in the XML.
XML:
<?xml version="1.0" encoding="UTF-8"?>
<cXML payloadID="12345677-12345567" timestamp="2017-07-26T09:11:05">
<Header>
<From>
<Credential domain="1212">
<Identity>01235 </Identity>
<SharedSecret/>
</Credential>
</From>
<To>
<Credential domain="1212">
<Identity>01234</Identity>
</Credential>
</To>
<Sender>
<UserAgent/>
<Credential domain="8989">
<Identity>10678</Identity>
<SharedSecret>Testing123</SharedSecret>
</Credential>
</Sender>
</Header>
<Request deploymentMode="Prod">
<ConfirmationRequest>
<ConfirmationHeader noticeDate="2017-07-26T09:11:05" operation="update" type="detail">
<Total>
<Money>0.00</Money>
</Total>
<Shipping>
<Description>Delivery</Description>
</Shipping>
<Comments>WO# generated</Comments>
</ConfirmationHeader>
<OrderReference orderDate="2017-07-25T15:22:11" orderID="123456780000">
<DocumentReference payloadID="5678-4567"/>
</OrderReference>
<ConfirmationItem quantity="1" lineNumber="1">
<ConfirmationStatus quantity="1" type="detail">
<ItemIn quantity="1">
<ItemID>
<SupplierPartID>R954-89</SupplierPartID>
</ItemID>
<ItemDetail>
<UnitPrice>
<Money currency="USD">0.00</Money>
</UnitPrice>
<Description>Test Descritpion 1</Description>
<UnitOfMeasure>QT</UnitOfMeasure>
</ItemDetail>
</ItemIn>
</ConfirmationStatus>
</ConfirmationItem>
<ConfirmationItem quantity="1" lineNumber="2">
<ConfirmationStatus quantity="1" type="detail">
<ItemIn quantity="1">
<ItemID>
<SupplierPartID>Y954-89</SupplierPartID>
</ItemID>
<ItemDetail>
<UnitPrice>
<Money currency="USD">0.00</Money>
</UnitPrice>
<Description>Test Descritpion 2</Description>
<UnitOfMeasure>QT</UnitOfMeasure>
</ItemDetail>
</ItemIn>
</ConfirmationStatus>
</ConfirmationItem>
</ConfirmationRequest>
</Request>
</cXML>
I want to get the value of the payloadID on the DocumentReference element. This is what I have tried so far:
BEGIN
Declare #Xml xml
Set #Xml = ('..The XML From Above..' as xml)
END
--no value comes back
Select c.value('(/*/DocumentReference/#payloadID)[0]','nvarchar(max)') from #Xml.nodes('//cXML') x(c)
--no value comes back
Select c.value('#payloadID','nvarchar(max)') from #Xml.nodes('/cXML/*/DocumentReference') x(c)
--check if element exists and it does
Select #Xml.exist('//DocumentReference');
I tried this in an xPath editor: //DocumentReference/#payloadID
This does work, but I am not sure what the equivalent syntax is in SQL
Calling .nodes() (like suggested in comment) is an unecessary overhead...
Better try it like this:
SELECT #XML.value('(//DocumentReference/#payloadID)[1]','nvarchar(max)')
And be aware, that XPath starts counting at 1. Your example with [0] cannot work...
--no value comes back
Select c.value('(/*/DocumentReference/#payloadID)[0]','nvarchar(max)') from...

dc:creator from XML into SQL table

I am trying to store an XML file (Code below) but the the dc:creator is causing an error. I have found from other related questions on here stating that I should use ;WITH XMLNAMESPACES(''http://purl.org/dc/elements/1.1/'' AS dc) but this has not worked either any ideas on what might be the problem/solution ? .
XML file:
<?xml version="1.0" encoding="UTF-8"?>
-<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:base="http://talksport.com/rss/sports-news/football/feed" version="2.0">
-<channel>
<title>Football</title>
<link>http://talksport.com/rss/sports-news/football/feed</link>
<description/>
<language>en</language>
<atom:link type="application/rss+xml" rel="self" href="http://talksport.com/rss/sports-news/football/feed"/>
-<item>
<title>Hillsborough families 'back introduction of rail seating' as bereaved family says 'standing did not kill our 96'</title>
<link>http://talksport.com/football/hillsborough-families-back-introduction-rail-seating-bereaved-family-says-standing-did-not</link>
<description/>
<pubDate>Wed, 19 Jul 2017 08:18:37 +0000</pubDate>
<dc:creator>talkSPORT</dc:creator>
<guid isPermaLink="false">247276 at http://talksport.com</guid>
</item>
</rss>
This is the stored procedure:
CREATE PROCEDURE feed.usp_importXML(#file VARCHAR(8000))
AS
BEGIN
DECLARE #Query VARCHAR(8000)
SET #Query ='
DECLARE #xmlFile as XML
SET #xmlFile = ( cast
SELECT CONVERT(XML,BulkColumn) as BulkColumn
FROM OPENROWSET (BULK '''+#file+''', SINGLE_BLOB) AS t)
INSERT INTO feed.tempXML (title,link,source)
SELECT
title = t.value (''title[1]'', ''NVARCHAR(300)''),
link = t.value (''link[1]'', ''NVARCHAR(300)''),
source = t.value(''(dc:creator)[1]'',''NVARCHAR(30)'')
FROM #xmlFile.nodes(''/rss/channel/item'') AS xTable(t);'
EXEC(#Query)
END
GO
In your case it might be enough to replace the dc: with a wildcard: *:. Assuming your XML is written into an XML file already, you might try as such:
DECLARE #xmlFile XML=
N'<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xml:base="http://talksport.com/rss/sports-news/football/feed" version="2.0">
<channel>
<title>Football</title>
<link>http://talksport.com/rss/sports-news/football/feed</link>
<description />
<language>en</language>
<atom:link type="application/rss+xml" rel="self" href="http://talksport.com/rss/sports-news/football/feed" />
<item>
<title>Hillsborough families ''back introduction of rail seating'' as bereaved family says ''standing did not kill our 96''</title>
<link>http://talksport.com/football/hillsborough-families-back-introduction-rail-seating-bereaved-family-says-standing-did-not</link>
<description />
<pubDate>Wed, 19 Jul 2017 08:18:37 +0000</pubDate>
<dc:creator>talkSPORT</dc:creator>
<guid isPermaLink="false">247276 at http://talksport.com</guid>
</item>
</channel>
</rss>';
SELECT
title = t.value ('title[1]', 'NVARCHAR(300)'),
link = t.value ('link[1]', 'NVARCHAR(300)'),
source = t.value('(*:creator)[1]','NVARCHAR(30)')
FROM #XmlFile.nodes('/rss/channel/item') AS xTable(t);
This will work too:
WITH XMLNAMESPACES('http://purl.org/dc/elements/1.1/' AS dc)
SELECT
title = t.value ('title[1]', 'NVARCHAR(300)'),
link = t.value ('link[1]', 'NVARCHAR(300)'),
source = t.value('(dc:creator)[1]','NVARCHAR(30)')
FROM #XmlFile.nodes('/rss/channel/item') AS xTable(t);
And you can declare the namespace within .value too:
SELECT
title = t.value ('title[1]', 'NVARCHAR(300)'),
link = t.value ('link[1]', 'NVARCHAR(300)'),
source = t.value('declare namespace dc="http://purl.org/dc/elements/1.1/";(dc:creator)[1]','NVARCHAR(30)')
FROM #XmlFile.nodes('/rss/channel/item') AS xTable(t);

FORXML SQL Group By Element

I am trying to group some elements together under one node. This is my current SQL;
declare #xml xml
set #xml = (
select (
select
'DERIVED' '#type',
m.NuixDerivedFieldName '#name', (
SELECT
NuixFieldType as 'metadata/#type',
NuixFieldName as 'metadata/#name'
from eddsdbo.MetadataMapping m1
where m1.NuixDerivedFieldName = m.NuixDerivedFieldName
for xml path ('first-non-blank'), type
)
from (select distinct NuixDerivedFieldName from eddsdbo.MetadataMapping) m
for xml path ('metadata'))
)
;WITH XMLNAMESPACES(DEFAULT 'http://nuix.com/fbi/metadata-profile')
select #xml for XML PATH ('metadata-list'), ROOT ('metadata-profile')
Which gives me the following output;
<metadata-profile xmlns="http://nuix.com/fbi/metadata-profile">
<metadata-list>
<metadata type="DERIVED" name="Barcode" xmlns="">
<first-non-blank>
<metadata type="CUSTOM" name="Barcode" />
</first-non-blank>
<first-non-blank>
<metadata type="EVIDENCE" name="Barcode" />
</first-non-blank>
</metadata>
I want to group together elements together which have the same 'name' attribute of the metadata element under the <first-non-blank> element.
The desired output should be;
<metadata-profile xmlns="http://nuix.com/fbi/metadata-profile">
<metadata-list>
<metadata type="DERIVED" name="Barcode" xmlns="">
<first-non-blank>
<metadata type="CUSTOM" name="Barcode" />
<metadata type="EVIDENCE" name="Barcode" />
</first-non-blank>
</metadata>
...
My database looks something like this;
NuixFieldName NuixFieldType NuixDerivedFieldName
------------------------------ ------------------------------ ------------------------------
_EmailEntryID PROPERTY EmailEntryID
Audited Audited Audited
Author PROPERTY Author
Barcode CUSTOM Barcode
Barcode EVIDENCE Barcode
I would also like to remove the xlmns namespace identifier from the metadata elements.
Thanks in advance!
You could try this
DECLARE #SampleData AS TABLE
(
NuixFieldName varchar(20),
NuixFieldType varchar(20),
NuixDerivedFieldName varchar(20)
)
INSERT INTO #SampleData
VALUES
('_EmailEntryID','PROPERTY','EmailEntryID'),
('Audited', 'Audited ','Audited'),
('Author ', 'PROPERTY','Author '),
('Barcode', 'CUSTOM ','Barcode'),
('Barcode', 'EVIDENCE','Barcode')
DECLARE #xml XML
SET #xml = (
SELECT
-- sd.NuixDerivedFieldName AS [#name],
'DERIVED' AS [#type],
sd.NuixDerivedFieldName AS [#name],
(
SELECT
sd2.NuixFieldType as '#type',
sd2.NuixFieldName as '#name'
FROM #SampleData sd2 WHERE sd2.NuixDerivedFieldName = sd.NuixDerivedFieldName
FOR XML PATH ('metadata'),ROOT('first-non-blank'), TYPE
)
FROM (select DISTINCT sd.NuixDerivedFieldName from #SampleData sd ) sd
FOR XML PATH('metadata'), ROOT('metadata-list'),TYPE
)
;WITH XMLNAMESPACES(DEFAULT 'http://nuix.com/fbi/metadata-profile')
SELECT #xml FOR XML PATH (''),ROOT('metadata-profile')
return:
<metadata-profile xmlns="http://nuix.com/fbi/metadata-profile">
<metadata-list>
<metadata type="DERIVED" name="Audited">
<first-non-blank>
<metadata type="Audited " name="Audited" />
</first-non-blank>
</metadata>
<metadata type="DERIVED" name="Author ">
<first-non-blank>
<metadata type="PROPERTY" name="Author " />
</first-non-blank>
</metadata>
<metadata type="DERIVED" name="Barcode">
<first-non-blank>
<metadata type="CUSTOM " name="Barcode" />
<metadata type="EVIDENCE" name="Barcode" />
</first-non-blank>
</metadata>
<metadata type="DERIVED" name="EmailEntryID">
<first-non-blank>
<metadata type="PROPERTY" name="_EmailEntryID" />
</first-non-blank>
</metadata>
</metadata-list>
</metadata-profile>

bulk insert and parse a complex XML file into several tables

I have the following sql stored procedure to bulk insert and parse an xml file and insert its data into several tables in a database.
The sql below works, however its inserting duplicate records into the #questions table and the #cards table.
Any help on this would be much appreciated. Thanks in advance.
Here is the XML file:
<?xml version="1.0" encoding="UTF-8"?>
<Users>
<User>
<UserInfo>
<Id>0001</Id>
<FirstName>John</FirstName>
<LastName>Doe</LastName>
<Email>Doejk#net.com</Email>
</UserInfo>
<Questions>
<Question>
<Id>1</Id>
<AnswerId>1</AnswerId>
</Question>
<Question>
<Id>2</Id>
<AnswerId>3</AnswerId>
</Question>
</Questions>
<Cards>
<Card>
<Id>1234</Id>
<Type>Digital</Type>
<Status>Active</Status>
</Card>
<Card>
<Id>1334</Id>
<Type>Physical</Type>
<Status>Not Active</Status>
</Card>
</Cards>
</User>
<User>
<UserInfo>
<Id>0002</Id>
<FirstName>Mary</FirstName>
<LastName>Doe</LastName>
<Email>Doem#net.com</Email>
</UserInfo>
<Questions>
<Question>
<Id>3</Id>
<AnswerId>6</AnswerId>
</Question>
<Question>
<Id>4</Id>
<AnswerId>7</AnswerId>
</Question>
</Questions>
<Cards>
<Card>
<Id>3333</Id>
<Type>Digital</Type>
<Status>Active</Status>
</Card>
<Card>
<Id>4444</Id>
<Type>Physical</Type>
<Status>Active</Status>
</Card>
</Cards>
</User>
</Users>
Here is SQL code
/*************************************************************************
-- CREATE TEMP TABLES --
**************************************************************************/
CREATE TABLE #XMLDATA
(RecordId int IDENTITY(1,1) NOT NULL,
XmlData xml NOT NULL)
CREATE TABLE #USERS
(UserID int null,
firstname varchar(50) null,
lastname varchar(50) null,
email varchar(50) null)
CREATE TABLE #CARDS
(CardId int null,
userid int null,
card_type varchar(50) null,
card_status varchar(50) null)
CREATE TABLE #QUESTIONS
(UserID int null,
Question_ID int null,
Answer_ID int null)
/*************************************************************************
-- LOAD THE WHOLE XML AS SINGLE BLOB --
**************************************************************************/
INSERT INTO #XMLDATA(XmlData)
SELECT *
FROM OPENROWSET(
BULK 'c:\users.xml', SINGLE_BLOB)
/*************************************************************************
-- INSERT USERS --
**************************************************************************/
INSERT INTO #USERS
(userid, firstname, lastname, email)
SELECT href.value('(Id/text())[1]', 'integer'),
href.value('(FirstName/text())[1]', 'varchar(50)'),
href.value('(LastName/text())[1]', 'varchar(50)'),
href.value('(Email/text())[1]', 'varchar(30)'),
FROM #XMLDATA CROSS APPLY
XmlData.nodes('Users') AS userinfo(href)
/*************************************************************************
-- INSERT QUESTIONS INFORMATION --
**************************************************************************/
INSERT INTO #QUESTIONS
(UserId, Question_ID, Answer_ID)
SELECT sref.value('(Id/text())[1]', 'integer'),
qref.value('(Id/text())[1]', 'integer'),
qref.value('(AnswerId/text())[1]', 'integer')
FROM #XMLDATA CROSS APPLY
XmlData.nodes('Users/Questions/Question') AS Ques(qref) CROSS APPLY
XmlData.nodes('Users') AS userinf(sref)
/*************************************************************************
-- INSERT CARD INFORMATION --
**************************************************************************/
INSERT INTO #CARDS
(userid, card_id, card_type,card_status)
SELECT sref.value('(Id/text())[1]', 'integer'),
cref.value('(Id/text())[1]', 'integer'),
cref.value('(Type/text())[1]', 'varchar(50)'),
cref.value('(Status/text())[1]', 'varchar(50)')
FROM #XMLDATA CROSS APPLY
XmlData.nodes('/Users/Cards/Card') AS cardlist(cref) CROSS APPLY
XmlData.nodes('/Users') AS userinf(sref)
I'm not sure what your CROSS JOIN is about.
Here is some shredding.
Tip: Comment out the INSERT portion and just do the SELECT portion until you get those tweaked correctly.
DECLARE #data XML;
SET #data =
N'
<Users>
<User>
<UserInfo>
<Id>0001</Id>
<FirstName>John</FirstName>
<LastName>Doe</LastName>
<Email>Doejk#net.com</Email>
</UserInfo>
<Questions>
<Question>
<Id>1</Id>
<AnswerId>1</AnswerId>
</Question>
<Question>
<Id>2</Id>
<AnswerId>3</AnswerId>
</Question>
</Questions>
<Cards>
<Card>
<Id>1234</Id>
<Type>Digital</Type>
<Status>Active</Status>
</Card>
<Card>
<Id>1334</Id>
<Type>Physical</Type>
<Status>Not Active</Status>
</Card>
</Cards>
</User>
<User>
<UserInfo>
<Id>0002</Id>
<FirstName>Mary</FirstName>
<LastName>Doe</LastName>
<Email>Doem#net.com</Email>
</UserInfo>
<Questions>
<Question>
<Id>3</Id>
<AnswerId>6</AnswerId>
</Question>
<Question>
<Id>4</Id>
<AnswerId>7</AnswerId>
</Question>
</Questions>
<Cards>
<Card>
<Id>3333</Id>
<Type>Digital</Type>
<Status>Active</Status>
</Card>
<Card>
<Id>4444</Id>
<Type>Physical</Type>
<Status>Active</Status>
</Card>
</Cards>
</User>
</Users>';
SELECT T.myEntity.value('(Id)[1]', 'VARCHAR(20)')
, T.myEntity.value('(FirstName)[1]', 'VARCHAR(20)')
, T.myEntity.value('(LastName)[1]', 'VARCHAR(20)')
, T.myEntity.value('(Email)[1]', 'VARCHAR(20)')
FROM #data.nodes('Users/User/UserInfo') AS T(myEntity);
SELECT
T.myEntity.value('(../../UserInfo/Id)[1]', 'VARCHAR(20)')
, T.myEntity.value('(Id)[1]', 'INT')
, T.myEntity.value('(AnswerId)[1]', 'INT')
FROM #data.nodes('Users/User/Questions/Question') AS T(myEntity);
SELECT
T.myEntity.value('(../../UserInfo/Id)[1]', 'VARCHAR(20)')
, T.myEntity.value('(Id)[1]', 'INT')
, T.myEntity.value('(Type)[1]', 'VARCHAR(20)')
, T.myEntity.value('(Status)[1]', 'VARCHAR(20)')
FROM #data.nodes('Users/User/Cards/Card') AS T(myEntity);
CREATE TABLE #XMLDATA
(RecordId int IDENTITY(1,1) NOT NULL,
XmlData xml NOT NULL)
CREATE TABLE #USERS
(UserID int null,
firstname varchar(50) null,
lastname varchar(50) null,
email varchar(50) null)