I have a problem with converting XML content to JSON format (with plain oracle select statement), where more then 1 sub level of data is present in the original XML - with my code the result of level 2+ is presented as string and not as JSON_OBJECT. Please, could someone tell me, where is fault in my code or what I'm doing wrong:
source:
<envelope>
<sender>
<name>IZS</name>
<country>SU</country>
<address>LOCATION 10B</address>
<address>1000 CITY</address>
<sender_identifier>SU46794093</sender_identifier>
<sender_address>
<sender_agent>SKWWSI20XXX</sender_agent>
<sender_mailbox>SI56031098765414228</sender_mailbox>
</sender_address>
</sender>
</envelope>
transformation select statement:
WITH SAMPLE AS (SELECT XMLTYPE ('
<envelope>
<sender>
<name>IZS</name>
<country>SU</country>
<address>LOCATION 10B</address>
<address>1000 CITY</address>
<sender_identifier>SU46794093</sender_identifier>
<sender_address>
<sender_agent>SKWWSI20XXX</sender_agent>
<sender_mailbox>SI56031098765414228</sender_mailbox>
</sender_address>
</sender>
</envelope>') XMLDOC FROM DUAL)
SELECT JSON_SERIALIZE (
JSON_OBJECT (
KEY 'envelope' VALUE
JSON_OBJECTAGG (
KEY ID_LEVEL1 VALUE
CASE ID_LEVEL1
WHEN 'sender' THEN
( SELECT JSON_OBJECTAGG (
KEY ID_LEVEL2 VALUE
CASE ID_LEVEL2
WHEN 'sender_address' THEN
( SELECT JSON_OBJECTagg (KEY ID_LEVEL22 VALUE TEXT_LEVEL22)
FROM XMLTABLE ('/sender/sender_address/*'
PASSING XML_LEVEL2
COLUMNS ID_LEVEL22 VARCHAR2 (128) PATH './name()',
TEXT_LEVEL22 VARCHAR2 (128) PATH './text()'
)
)
ELSE
TEXT_LEVEL2
END)
FROM XMLTABLE ('/sender/*'
PASSING XML_LEVEL2
COLUMNS ID_LEVEL2 VARCHAR2 (1024) PATH './name()',
TEXT_LEVEL2 VARCHAR2 (1024) PATH './text()'
)
)
ELSE
'"' || TEXT_LEVEL1 || '"'
END FORMAT JSON)
) PRETTY
)JSON_DOC
FROM SAMPLE, XMLTABLE ('/envelope/*'
PASSING XMLDOC
COLUMNS ID_LEVEL1 VARCHAR2 (1024) PATH './name()',
TEXT_LEVEL1 VARCHAR2 (1024) PATH './text()',
XML_LEVEL2 XMLTYPE PATH '.'
);
wrong result:
{
"envelope" :
{
"sender" :
{
"name" : "IZS",
"country" : "SU",
"address" : "LOCATION 10B",
"address" : "1000 CITY",
"sender_identifier" : "SU46794093",
"sender_address" : "{\"sender_agent\":\"SKWWSI20XXX\",\"sender_mailbox\":\"SI56031098765414228\"}"
}
}
}
wrong part:
***"sender_address" : "{\"sender_agent\":\"SKWWSI20XXX\",\"sender_mailbox\":\"SI56031098765414228\"}"***
For the level 1 text you're wrapping the value in double-quotes and specifying format json; you aren't doing that for level 2. If you change:
ELSE
TEXT_LEVEL2
END
to:
ELSE
'"' || TEXT_LEVEL2 || '"'
END FORMAT JSON)
then the result is:
{
"envelope" :
{
"sender" :
{
"name" : "IZS",
"country" : "SU",
"address" : "LOCATION 10B",
"address" : "1000 CITY",
"sender_identifier" : "SU46794093",
"sender_address" :
{
"sender_agent" : "SKWWSI20XXX",
"sender_mailbox" : "SI56031098765414228"
}
}
}
}
fiddle
The problem is that you need kind of conditional "FORMAT JSON" in the "SELECT JSON_OBJECTAGG ( KEY ID_LEVEL2 VALUECASE ID_LEVEL2": when the ID_LEVEL2 is 'sender_address' but not in the ELSE part, but the syntax requires you put after the END of CASE, and of course this fails for the "ELSE TEXT_LEVEL2" part.
The following bigquery code does not display correctly Guillemets « and ». In the output of the code below, notice that the Guillements are 'translated' as xAB and xBB. The expected answer should preserve the current translation but replace xAB with « and xBB with ».
CREATE TEMP FUNCTION
decode(word string) AS ((
SELECT
IF
(STARTS_WITH(word, '&#x'),
safe.code_points_to_STRING(ARRAY(
SELECT
ifnull(SAFE_CAST(value AS int64),
ASCII(value))
FROM
UNNEST(SPLIT(REPLACE(word, '&#', '0'),';')) value
WHERE
NOT value = '' )),
word) ));
WITH
DATA AS (
SELECT
'Arabic' AS lang,
'https://www.elwatannews.com/news/details/5516935' AS url,
`'تطورات «مذبحة أبو حزام ».. دفن 10 جثث وضبط 19 من عائلتي المجزرة'` AS title)
SELECT
url,
lang,
(
SELECT
STRING_AGG(decode(chars), ''
ORDER BY
OFFSET
)
FROM
UNNEST(REGEXP_EXTRACT_ALL(title, r'(?:&#x.{3};)+|[^&]+')) chars
WITH
OFFSET
) AS translate
FROM
DATA
CREATE TEMP FUNCTION
decode(word string) AS ((
SELECT
IF
(STARTS_WITH(word, '&#x'),
safe.code_points_to_STRING(ARRAY(
SELECT
ifnull(SAFE_CAST(value AS int64),
ASCII(value))
FROM
UNNEST(SPLIT(REPLACE(word, '&#', '0'),';')) value
WHERE
NOT value = '' )),
word) ));
WITH
DATA AS (
SELECT
'Arabic' AS lang,
'https://www.elwatannews.com/news/details/5516935' AS url,
'تطورات «مذبحة أبو حزام ».. دفن 10 جثث وضبط 19 من عائلتي المجزرة' AS title)
SELECT
# url,
lang,
(
SELECT
STRING_AGG(decode(chars), ''
ORDER BY
OFFSET
)
FROM
UNNEST(REGEXP_EXTRACT_ALL(title, r'(?:&#x.{2,3};)+|[^&]+')) chars
WITH
OFFSET
) AS translate
FROM
DATA
with output
I need to convert the following text to XML
{"name":"daniel & sophia","age":20,"year":2009,"weight":15.1,"points":3,"alias":"dani,da"}{"name":"charls & lina","age":22,"year":2007,"weight":19.0"points":3,"alias":"carlos,lini"}
to
<participants>
<participant>
<name>daniel & sophia</name>
<age>20</age>
<year>2009</year>
<weight>15.1</weight>
<points>3</points>
<alias>dani,da</alias>
</participant>
<participant>
<name>charls & lina</name>
<age>22</age>
<year>2007</year>
<weight>19.0</weight>
<points>3</points>
<alias>carlos,lini</alias>
</participant>
<participants>
I tried to insert the data to a temporary table and then replace "{}" characteres. Then I tried to convert with XML function but I really don't know how to replicate the name of each item.
IF OBJECT_ID('tempdb..#tmp') IS NOT NULL
DROP TABLE #tmp
CREATE TABLE #tmp
(
Id INT IDENTITY,
Campo VARCHAR(MAX)
)
INSERT INTO #tmp
(
Campo
)
VALUES
(
'{"name":"daniel & sophia","age":20,"year":2009,"weight":15.1,"points":3,"alias":"dani,da"}{"name":"charls & lina","age":22,"year":2007,"weight":19.0"points":3,"alias":"carlos,lini"}'
)
SELECT
CONVERT
(
XML, '<participants>' +
REPLACE
(
REPLACE
(
REPLACE
(
REPLACE(REPLACE((SELECT Campo AS [*] FOR XML PATH('')), '{', '<participant>'),'}','</participant>') + '</participants>'
,'<participant>"'
,'<participant><dato>'
)
, '","'
, '</dato><dato>'
)
, '</participant>'
, '</dato></participant>'
)
)
AS xmlname
FROM #tmp
And this is what I get, but It is wrong:
<participants>
<participant>
<dato>name":"daniel & sophia</dato>
<dato>age":20,"year":2009,"weigth":15.1,"points":3,"alias":"dani,da"</dato>
</participant>
<participant>
<dato>name":"charls & lina</dato>
<dato>age":22,"year":2007,"weigth":19.0,"points":3,"alias":"carlos,lini"</dato>
</participant>
</participants>
NOTE: The amount of The number of nodes within the Participant node is
unknown, it can be more than 100 and I would really like it to be a
dynamic query. (Without using EXEC "sql code")
John Cappelletti's answer is great, as long, as you know the column names in advance. The following approach will help you in cases, where you have to deal with such structures dynamically.
It is ugly, to create XML on string level (as I do it in the final SELECT like SELECT '<' + innerNvp.Name + '>' +, but it is a working possibility to deal with column names dynamically. Otherwise you'd either have to know all columns in advance, or you'd need to go the path of dynamic SQL with EXEC. There's one thing to keep in mind: The names in your structure (like "name" must be valid XML-tag-names.
one general hint: All approaches here try to cut your parts on string level. This might break, if there is a } or a ," or a : in an unexpected place...
DECLARE #str NVARCHAR(MAX)='{"name":"daniel & sophia","age":20,"year":2009,"weigth":15.1,"points":3,"alias":"dani,da"}{"name":"charls & lina","age":22,"year":2009,"weigth":15.1,"points":3,"alias":"carlos,lini"}';
WITH SplittedAtClosingCurly AS
(
SELECT CAST('<x>' + REPLACE((SELECT #str AS [*] FOR XML PATH('')),'}','</x><x>') + '</x>' AS XML) AS TheRows
)
,SplittedAtCommaQuote AS
(
SELECT ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS RowNr
,CAST('<x>' + REPLACE((SELECT Rw.value(N'text()[1]','nvarchar(max)') AS [*] FOR XML PATH('')),',"','</x><x>') + '</x>' AS XML) AS TheRow
FROM SplittedAtClosingCurly
CROSS APPLY TheRows.nodes(N'/x[text()]') AS A(Rw)
)
,SplittedAtDoubleDot AS
(
SELECT RowNr
,ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) AS TplNr
,CAST('<x>' + REPLACE((SELECT Tpl.value(N'text()[1]','nvarchar(max)') AS [*] FOR XML PATH('')),':','</x><x>') + '</x>' AS XML) AS TheTupel
FROM SplittedAtCommaQuote
CROSS APPLY TheRow.nodes(N'/x[text()]') AS A(Tpl)
)
,DerivedTable_NameValuePairs AS
(
SELECT RowNr
,TplNr
,REPLACE(REPLACE(TheTupel.value(N'/x[1]/text()[1]','nvarchar(max)'),'{',''),'"','') AS Name
,REPLACE(REPLACE(TheTupel.value(N'/x[2]/text()[1]','nvarchar(max)'),'{',''),'"','') AS Value
FROM SplittedAtDoubleDot
)
SELECT CAST(
(
SELECT '<' + innerNvp.Name + '>' + (SELECT innerNvp.Value AS [*] FOR XML PATH('')) + '</' + innerNvp.Name + '>'
FROM DerivedTable_NameValuePairs AS innerNvp
WHERE innerNvp.RowNr=nvp.RowNr
ORDER BY TplNr
FOR XML PATH(''),TYPE
).value(N'text()[1]','nvarchar(max)') AS XML)
FROM DerivedTable_NameValuePairs AS nvp
GROUP BY RowNr
FOR XML PATH('participant'),ROOT('participants')
This query returns an equivalent attribute-type xml like this
IF OBJECT_ID('tempdb..#tmp') IS NOT NULL
DROP TABLE #tmp
CREATE TABLE #tmp
(
Id INT IDENTITY,
Campo VARCHAR(MAX)
)
INSERT INTO #tmp
(
Campo
)
VALUES
(
'{"name":"daniel & sophia","age":20,"year":2009,"weigth":15.1,"points":3,"alias":"dani,da"}{"name":"charls & lina","age":22,"year":2009,"weigth":15.1,"points":3,"alias":"carlos,lini"}'
)
SELECT (select (CAST(replace(replace(replace(replace(replace(replace(
replace(replace(Replace(t.Campo, '{"', '<participant '), '"}','" />'), '":"', '":'),'","',',"')
, '":', '":"'),',"','","'), '":"', '="'), '","', '" '), '&',',') AS XML)
)
FOR XML PATH (''), ROOT ('participants')) as xml
FROM #tmp t
& is illegal in xml, then i replaced it with ,.
Result:
<participants>
<participant name="daniel , sophia" age="20" year="2009" weigth="15.1" points="3" alias="dani,da"/>
<participant name="charls , lina" age="22" year="2009" weigth="15.1" points="3" alias="carlos,lini"/>
</participants>
** -- Updated for Updated Question --**
With the help of a Parse/Split UDF. Just about any Parse/Split UDF would do the trick. I did however supply mine.
This approach can be applied to any portion of your core data.
Example
Declare #YourTable table (ID int,Campo varchar(max))
Insert Into #YourTable values
(1,'{"name":"daniel & sophia","age":20,"year":2009,"weight":15.1,"points":3,"alias":"dani,da"}{"name":"charls & lina","age":22,"year":2007,"weight":19.0"points":3,"alias":"carlos,lini"}')
Select [name] = max(case when Item='name' then Value end)
,[age] = max(case when Item='age' then Value end)
,[year] = max(case when Item='year' then Value end)
,[weight] = max(case when Item='weight' then Value end)
,[points] = max(case when Item='points' then Value end)
,[alias] = max(case when Item='alias' then Value end)
From (
Select A.ID
,RowNr = B.RetSeq
,Item = replace(replace(left(C.RetVal,charindex(':',C.RetVal)-1),'"',''),'{','')
,Value = replace(replace(right(C.RetVal,len(C.RetVal)-charindex(':',C.RetVal)),'"',''),'}','')
From #YourTable A
Cross Apply [dbo].[udf-Str-Parse](A.Campo,'}{') B -- NOTE: Should really be },{
Cross Apply [dbo].[udf-Str-Parse](B.RetVal,',"') C
-- YOUR WHERE STATEMENT HERE
) A
Group By ID,RowNr
Order By ID,RowNr
For XML Path('participant'),Root('participants'),Type
Returns
<participants>
<participant>
<name>daniel & sophia</name>
<age>20</age>
<year>2009</year>
<weight>15.1</weight>
<points>3</points>
<alias>dani,da</alias>
</participant>
<participant>
<name>charls & lina</name>
<age>22</age>
<year>2007</year>
<weight>19.0points:3</weight>
<alias>carlos,lini</alias>
</participant>
</participants>
The UDF if Interested
CREATE FUNCTION [dbo].[udf-Str-Parse] (#String varchar(max),#Delimiter varchar(10))
Returns Table
As
Return (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(#String,#Delimiter,'§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
);
--Thanks Shnugo for making this XML safe
--Select * from [dbo].[udf-Str-Parse]('Dog,Cat,House,Car',',')
--Select * from [dbo].[udf-Str-Parse]('John Cappelletti was here',' ')
--Select * from [dbo].[udf-Str-Parse]('this,is,<test>,for,< & >',',')
One final note:
If you can't use or want an UDF, this can easily be converted to in-line.
EDIT - In-line Approach
Select [name] = max(case when Item='name' then Value end)
,[age] = max(case when Item='age' then Value end)
,[year] = max(case when Item='year' then Value end)
,[weight] = max(case when Item='weight' then Value end)
,[points] = max(case when Item='points' then Value end)
,[alias] = max(case when Item='alias' then Value end)
From (
Select A.ID
,RowNr = B.RetSeq
,Item = replace(replace(left(C.RetVal,charindex(':',C.RetVal)-1),'"',''),'{','')
,Value = replace(replace(right(C.RetVal,len(C.RetVal)-charindex(':',C.RetVal)),'"',''),'}','')
From YourTable A
Cross Apply (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(A.Campo,'}{','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) B
Cross Apply (
Select RetSeq = Row_Number() over (Order By (Select null))
,RetVal = LTrim(RTrim(B.i.value('(./text())[1]', 'varchar(max)')))
From (Select x = Cast('<x>' + replace((Select replace(B.RetVal,',"','§§Split§§') as [*] For XML Path('')),'§§Split§§','</x><x>')+'</x>' as xml).query('.')) as A
Cross Apply x.nodes('x') AS B(i)
) C
-- Your WHERE STATEMENT here --
) A
Group By ID,RowNr
Order By ID,RowNr
For XML Path('participant'),Root('participants'),Type
I add this as a new answer, it is an addition to TriV's answer actually:
The transformation to attribute centered XML is a good idea. You might go one step further with a FLWOR XQuery approach:
DECLARE #xml XML=
N'<participants>
<participant name="daniel & sophia" age="20" year="2009" weigth="15.1" points="3" alias="dani,da"/>
<participant name="charls & lina" age="22" year="2009" weigth="15.1" points="3" alias="carlos,lini"/>
</participants>';
SELECT #xml.query
('
<participants>
{
for $p in /participants/participant
return
<participant>
{
for $attr in $p/#*
return <data name="{local-name($attr)}" value="{string($attr)}"/>
}
</participant>
}
</participants>
');
The result
<participants>
<participant>
<data name="name" value="daniel & sophia" />
<data name="age" value="20" />
<data name="year" value="2009" />
<data name="weigth" value="15.1" />
<data name="points" value="3" />
<data name="alias" value="dani,da" />
</participant>
<participant>
<data name="name" value="charls & lina" />
<data name="age" value="22" />
<data name="year" value="2009" />
<data name="weigth" value="15.1" />
<data name="points" value="3" />
<data name="alias" value="carlos,lini" />
</participant>
</participants>
Regrettfully this approach does not support dynamically created elements.
UPDATE: Slightly different, even closer:
The following query will place the values as element's text(), while the element's name is still an attribute...
SELECT #xml.query
('
<participants>
{
for $p in /participants/participant
return
<participant>
{
for $attr in $p/#*
return <data name="{local-name($attr)}">{string($attr)}</data>
}
</participant>
}
</participants>');
The result
<participants>
<participant>
<data name="name">daniel & sophia</data>
<data name="age">20</data>
<data name="year">2009</data>
<data name="weigth">15.1</data>
<data name="points">3</data>
<data name="alias">dani,da</data>
</participant>
<participant>
<data name="name">charls & lina</data>
<data name="age">22</data>
<data name="year">2009</data>
<data name="weigth">15.1</data>
<data name="points">3</data>
<data name="alias">carlos,lini</data>
</participant>
</participants>
I want to figure out one issue.
I already had question about simple ordering issue but I want to order more detail.
check below this link :
SQL Server : FOR XML sorting control by attribute
I made a example case.
SQL Query.
select (
select '123' AS '#id', (
select
(
select 'test' AS '#testid' , '20' AS '#order'
FOR XML path ('tree') , TYPE
),
(
select 'test2' AS '#testid' , '30' AS '#order'
FOR XML path ('tree-order') , TYPE
),
(
select 'test' AS '#testid' , '10' AS '#order'
FOR XML path ('tree') , TYPE
)
FOR XML path ('Node') , TYPE
)
FOR XML path ('Sample') , TYPE
),
(select '456' AS '#id', (
select
(
select 'test' AS '#testid' , '20' AS '#order'
FOR XML path ('tree') , TYPE
),
(
select 'test2' AS '#testid' , '30' AS '#order'
FOR XML path ('tree-order') , TYPE
),
(
select 'test' AS '#testid' , '10' AS '#order'
FOR XML path ('tree') , TYPE
)
FOR XML path ('Node') , TYPE
)
FOR XML path ('Sample') , TYPE)
FOR XML path ('Main') , TYPE
Result :
<Main>
<Sample id="123">
<Node>
<tree testid="test" order="20" />
<tree-order testid="test2" order="30" />
<tree testid="test" order="10" />
</Node>
</Sample>
<Sample id="456">
<Node>
<tree testid="test" order="20" />
<tree-order testid="test2" order="30" />
<tree testid="test" order="10" />
</Node>
</Sample>
</Main>
Expected result :
<Main>
<Sample id="123">
<Node>
<tree testid="test" order="10" />
<tree testid="test" order="20" />
<tree-order testid="test2" order="30" />
</Node>
</Sample>
<Sample id="456">
<Node>
<tree testid="test" order="10" />
<tree testid="test" order="20" />
<tree-order testid="test2" order="30" />
</Node>
</Sample>
</Main>
final result :
<Main>
<Sample id="123">
<Node>
<tree testid="test" />
<tree testid="test" />
<tree-order testid="test2" />
</Node>
</Sample>
<Sample id="456">
<Node>
<tree testid="test" />
<tree testid="test" />
<tree-order testid="test2" />
</Node>
</Sample>
</Main>
That's order by tree-order.
finally I don't want to show order information in attribute
Any one has great Idea?
Thank you for everybody who interesting to this.
Updated ----------------------------------------
Thank you every body finally I solved problem as below about order by and remove attribute issue :
declare #resultData xml = (select #data.query('
element Main {
for $s in Main/Sample
return element Sample {
$s/#*,
for $n in $s/Node
return element Node {
for $i in $n/*
order by $i/#order
return $i
}
}
}'));
SET #resultData.modify('delete (Main/Sample/Node/tree/#order)');
SET #resultData.modify('delete (Main/Sample/Node/tree-order/#order)');
select #resultData
select #data.query('
element Main {
for $s in Main/Sample
return element Sample {
$s/#*,
for $n in $s/Node
return element Node {
for $i in Node/*
order by $i/#order
return
if ($i/self::tree)
then element tree { $i/#testid }
else element tree-order { $i/#testid }
}
}
}
}')
What's interesting to me is that in your original post, you're stating that you're generating the XML as the result of a SQL query. If it were me, I'd control the ordering at that level.