SQL Server - parse GPX file - sql

I'm trying to parse a GPX file within SQL Server 2019, but I'm hitting a snag with namespaces, I think.
From what I can see - if the GPX file contains :
xmlns="http://www.topografix.com/GPX/1/1"
SQL returns a NULL. But if I remove that from the GPX file, the SQL returns a string of coords - as expected.
SQL code :
DECLARE #XML TABLE (XML_COLUMN XML)
DECLARE #sqlstmt NVARCHAR(255)
DECLARE #file NVARCHAR(255) = 'd:\demo_2.gpx'
SET #sqlstmt= 'SELECT * FROM OPENROWSET (BULK ''' + #file + ''', SINGLE_CLOB) AS xmlData'
INSERT INTO #XML
EXEC (#sqlstmt)
;WITH XMLNAMESPACES ('http://www.topografix.com/GPX/1/1' AS ns), X_CTE AS
(
SELECT
T1.Name.query('.') AS Name,
T2.X_Content.query('.') AS X_Content
FROM
#XML AS X
CROSS APPLY
XML_Column.nodes('/gpx/trk') AS T1(Name)
CROSS APPLY
XML_Column.nodes('/gpx/trk/trkseg/trkpt') AS T2(X_Content)
),
XML_Data AS
(
SELECT
X_Content.value('(/trkpt/#lat)[1]', 'VARCHAR(50)') AS LAT,
X_Content.value('(/trkpt/#lon)[1]', 'VARCHAR(50)') AS LON
FROM
X_CTE
)
SELECT
STUFF((SELECT '[' + LON + ',' + LAT + ']' + ','
FROM XML_Data
WHERE 1 = 1
FOR XML PATH('')), 1, 0, '') AS mapString;
GPX file content (demo_2.gpx)
<?xml version="1.0" encoding="UTF-8"?>
<gpx creator="Garmin Connect" version="1.1"
xsi:schemaLocation="http://www.topografix.com/GPX/1/1 http://www.topografix.com/GPX/11.xsd"
xmlns:ns3="http://www.garmin.com/xmlschemas/TrackPointExtension/v1"
xmlns="http://www.topografix.com/GPX/1/1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ns2="http://www.garmin.com/xmlschemas/GpxExtensions/v3">
<metadata>
<link href="connect.garmin.com">
<text>Garmin Connect</text>
</link>
<time>2022-05-29T08:37:21.000Z</time>
</metadata>
<trk>
<name>My Route</name>
<type>e_bike_mountain</type>
<trkseg>
<trkpt lat="54.37033147551119327545166015625" lon="-3.075514398515224456787109375">
<ele>65.8000030517578125</ele>
<time>2022-05-29T11:37:02.000Z</time>
<extensions>
<ns3:TrackPointExtension>
<ns3:atemp>17.0</ns3:atemp>
<ns3:hr>155</ns3:hr>
</ns3:TrackPointExtension>
</extensions>
</trkpt>
<trkpt lat="54.37033147551119327545166015625" lon="-3.075514398515224456787109375">
<ele>65.8000030517578125</ele>
<time>2022-05-29T11:37:03.000Z</time>
<extensions>
<ns3:TrackPointExtension>
<ns3:atemp>17.0</ns3:atemp>
<ns3:hr>155</ns3:hr>
</ns3:TrackPointExtension>
</extensions>
</trkpt>
</trkseg>
</trk>
</gpx>
Really pulling the last bits of remaining hair out with this one, if anyone can assist, that would be totally awesome!

Your XML defines a default namespace, that is applied to all XML nodes - as long as no other namespace is defined explicitly, by means of a prefix:
<gpx creator="Garmin Connect" version="1.1"
...
xmlns="http://www.topografix.com/GPX/1/1"
The xmlns= declaration, without an alias (like xmlns:ns=...), is the default XML namespace for your XML document.
Now if you actually define your XML namespace in the query, with an alias ns like so:
;WITH XMLNAMESPACES ('http://www.topografix.com/GPX/1/1' AS ns)
then you must also use that alias in all your relevant XPath queries:
SELECT
...
FROM
#XML AS X
CROSS APPLY
XML_Column.nodes('/ns:gpx/ns:trk') AS T1(Name)
CROSS APPLY
XML_Column.nodes('/ns:gpx/ns:trk/ns:trkseg/ns:trkpt') AS T2(X_Content)
Alternatively, and much simpler - define the XML namespace as your default namespace in the XQuery in T-SQL; then you do not need to apply the namespace alias everywhere:
;WITH XMLNAMESPACES (DEFAULT 'http://www.topografix.com/GPX/1/1')
And in the end - you could write your XQuery much simpler - try this:
WITH XMLNAMESPACES (DEFAULT 'http://www.topografix.com/GPX/1/1')
SELECT
LAT = XC.value('#lat', 'VARCHAR(50)'),
LON = XC.value('#lon', 'VARCHAR(50)')
FROM
#Xml AS X
CROSS APPLY
XML_Column.nodes('/gpx/trk/trkseg/trkpt') AS XT(XC)
This should return the same value - with much less code and indirections....

Sorted, with many thanks to marc_s - my final working code ...
DECLARE #XML TABLE (XML_COLUMN XML)
DECLARE #sqlstmt NVARCHAR(255)
DECLARE #file NVARCHAR(255) = 'd:\erc\gpx\demo_4.gpx'
SET #sqlstmt= 'SELECT * FROM OPENROWSET ( BULK ''' + #file + ''', SINGLE_CLOB) AS xmlData'
INSERT INTO #XML
EXEC (#sqlstmt)
;WITH XMLNAMESPACES (DEFAULT 'http://www.topografix.com/GPX/1/1'), X_CTE AS
(
SELECT
LAT = XC.value('#lat', 'VARCHAR(50)'),
LON = XC.value('#lon', 'VARCHAR(50)')
FROM
#XML AS X
CROSS APPLY
XML_Column.nodes('/gpx/trk/trkseg/trkpt') AS XT(XC)
),
XML_Data AS
(
SELECT * FROM X_CTE
)
SELECT Stuff
((
SELECT
'[' + LON + ',' + LAT + ']' + ','
FROM XML_Data
WHERE 1 = 1
FOR XML PATH('')), 1, 0, '') AS mapString;

Related

Sql Server - For XML - Get null value from element

I am trying to replace the null xml element into the null value while doing the concatenation. And i am making some silly mistake. I want to differentiate between an empty value and null value. I am using OpenXML to parse the XML data and something is missing in the code to read the null based param element.
I am using Server Server 2014.
Please suggest.
DECLARE #message_body XML;
DECLARE #XMLParameterData Table
(SeqID INT Identity(1,1),
ParamValue varchar(max))
DECLARE #docRef int
DECLARE #dataPath nvarchar(255)
DECLARE #mappingType int = 2 --Element-Centric mapping
Select #message_body = N'<AsyncRequest xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ParamList> <Param>Bruce</Param>
<Param>Wa''yne</Param>
<Param>Bruce#karan.com</Param>
<Param>Coke</Param>
<Param>20000</Param>
<Param xsi:nil="true"/>
<Param></Param>
</ParamList>
</AsyncRequest>';
Set #dataPath = '/AsyncRequest/ParamList/Param'
EXEC sp_xml_preparedocument #docRef output, #message_body
INSERT INTO #XMLParameterData(ParamValue)
Select * From OpenXML(#docRef, #dataPath, #mappingType)
WITH
(
valx varchar(max) '.'
)
-- the xml document ref needs to be released ASAP
EXEC sp_xml_removedocument #docRef
SELECT * From #XMLParameterData
DECLARE #CSVString varchar(max)
SELECT #CSVString = STUFF(
(SELECT ', ' +
CHAR(34) + ParamValue + CHAR(34)
FROM #XMLParameterData
ORDER BY SeqID
FOR XML PATH('')
), 1, 1, '')
SELECT #CSVString as CSVTest
Output :-
"Bruce", "Wa'yne", "Bruce#karan.com", "Coke", "20000", "", ""
Desired output :-
"Bruce", "Wa'yne", "Bruce#karan.com", "Coke", "20000", NULL, ""
Keep it simple! Use CASE WHEN to check if #xsi:nil="true" and .nodes instead of OPENXML:
DECLARE #message_body XML,
#output nvarchar(max);
select #message_body = N'<AsyncRequest xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ParamList>
<Param>Bruce</Param>
<Param>Wa''yne</Param>
<Param>Bruce#karan.com</Param>
<Param>Coke</Param>
<Param>20000</Param>
<Param xsi:nil="true"/>
<Param></Param>
</ParamList>
</AsyncRequest>';
SELECT #output = STUFF((
SELECT
CASE WHEN t.v.value('#xsi:nil','nvarchar(max)') = 'true' THEN ',NULL'
ELSE ',"'+t.v.value('.','nvarchar(max)') + '"'
END
FROM #message_body.nodes('AsyncRequest/ParamList/Param') as t(v)
FOR XML PATH('')
),1,1,'')
SELECT #output
Will return:
"Bruce","Wa'yne","Bruce#karan.com","Coke","20000",NULL,""
How about this (I have slightly simplified your code by using xml.nodes rather than an xml document).
It uses the xml query expression .[not(#xsi:nil = "true")] to return a null where xsi:nil is true.
I then use COALESCE to return the string 'NULL' when a NULL is returned:
DECLARE #message_body XML;
DECLARE #XMLParameterData Table
(SeqID INT Identity(1,1),
ParamValue varchar(max))
Select #message_body = N'<AsyncRequest xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"><ParamList><Param>Bruce</Param><Param>Wa''yne</Param>
<Param>Bruce#karan.com</Param>
<Param>Coke</Param>
<Param>20000</Param>
<Param xsi:nil="true"/>
<Param></Param>
</ParamList>
</AsyncRequest>';
INSERT INTO #XMLParameterData(ParamValue)
SELECT T.c.value('.[not(#xsi:nil = "true")]', 'varchar(max)') AS result
FROM #message_body.nodes('/AsyncRequest/ParamList/Param')T(c)
SELECT * From #XMLParameterData
DECLARE #CSVString varchar(max)
SELECT #CSVString = STUFF(
(SELECT ', ' +
CHAR(34) + COALESCE(ParamValue, 'NULL') + CHAR(34)
FROM #XMLParameterData
ORDER BY SeqID
FOR XML PATH('')
), 1, 1, '')
SELECT #CSVString as CSVTest
This returns:
"Bruce", "Wa'yne", "Bruce#karan.com", "Coke", "20000", "NULL", ""
What are you trying to achieve is not a standard behavior. You propbably expect following:
DECLARE #message_body XML = N'<AsyncRequest xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ParamList>
<Param>Bruce</Param>
<Param>Wa''yne</Param>
<Param>Bruce#karan.com</Param>
<Param>Coke</Param>
<Param>20000</Param>
<Param xsi:nil="true"/>
<Param></Param>
</ParamList>
</AsyncRequest>';
SELECT X.value('.[not(#xsi:nil="true")]', 'nvarchar(MAX)') Value
FROM #message_body.nodes('//Param') T(X)
Which yields:
Value
-----
Bruce
Wa'yne
Bruce#karan.com
Coke
20000
NULL
(empty string here)
You may want text from nodes, which is more standarized:
SELECT X.value('text()[1]', 'nvarchar(MAX)') Value
FROM #message_body.nodes('//Param') T(X)
Value
-----
Bruce
Wa'yne
Bruce#karan.com
Coke
20000
NULL
NULL
Note that <element/> and <element></element> are synonyms. It's empty, no matter how you write. Ask yourself: is first <element/> empty string? That would lead to long discussion - it's all a matter of interpretation. You may also consider xml:space attribute to handle whitespaces.

how to execute subquery without declaring XML?

why this query is not executing ??
SELECT [Value] = T.c.value('.','varchar(30)') FROM (SELECT '<s>'+ REPLACE ((select tag_id+',' from tbl_container_track for xml path('')),',','</s> <s>')+ '</s>').nodes('/s') T(c)
But this one is working ?
declare #X xml
SELECT #X = (SELECT '<s>'+ REPLACE ((select tag_id+',' from tbl_container_track for xml path('')),',','</s> <s>')+ '</s>')
SELECT [Value] = T.c.value('.','varchar(30)') FROM #X.nodes('/s') T(c)
Can some one help me to simplify without declaring #X ?
Try this: CAST TO XML Datatype you missed
SELECT [Value] = T.c.value('.', 'varchar(30)')
FROM (SELECT Cast(( '<s>' + Replace ((SELECT tag_id+',' FROM tbl_container_track FOR xml path('')), ',', '</s> <s>')
+ '</s>' ) AS XML)) AS Data
CROSS APPLY Data.nodes('/s') T(c)

Iterative Query for Spatial Records

I have created following query in SQL Server
declare #x XML
set #x='<Dataset_Extent>
<EXTENT_TYPE>Bounding_Polygon</EXTENT_TYPE>
<Vertex>
<LON>66.91292909247741</LON>
<LAT>30.27001012181008</LAT>
<X>299232</X>
<Y>3350549</Y>
<COL>1</COL>
<ROW>1</ROW>
</Vertex>
<Vertex>
<LON>66.99456841960638</LON>
<LAT>30.27128639618252</LAT>
<X>307089.5</X>
<Y>3350549</Y>
<COL>15715</COL>
<ROW>1</ROW>
</Vertex>
<Vertex>
<LON>66.99700791329992</LON>
<LAT>30.1509623521339</LAT>
<X>307089.5</X>
<Y>3337207.5</Y>
<COL>15715</COL>
<ROW>26683</ROW>
</Vertex>
<Vertex>
<LON>66.91546772378466</LON>
<LAT>30.14969219541345</LAT>
<X>299232</X>
<Y>3337207.5</Y>
<COL>1</COL>
<ROW>26683</ROW>
</Vertex>
<Center>
<LON>66.9549932872921</LON>
<LAT>30.21048776638499</LAT>
<X>303160.75</X>
<Y>3343878.25</Y>
<COL>7858</COL>
<ROW>13342</ROW>
</Center>
</Dataset_Extent>';
declare #wkt varchar(8000);
select #wkt=CONVERT(varchar(7000),#x.query('distinct-values(
for $lon in /Dataset_Extent/Vertex/LON/text()
for $lat in /Dataset_Extent/Vertex/LAT/text()
return ($lon cast as xs:string?,$lat cast as xs:string?,","))
'));
--concatenate the word POLYGON
set #wkt='POLYGON(('+#wkt +'))';
print #wkt
This gives me the following output:
POLYGON((66.91292909247741 66.99456841960638 30.27128639618252 66.99700791329992 66.91546772378466 30.27001012181008,30.1509623521339 30.14969219541345))
Each Lon/lat pair is not in proper order, I required output in following format:
POLYGON((66.91292909247741 30.27001012181008,66.99456841960638 30.27128639618252,66.99700791329992 30.1509623521339,66.91546772378466 30.14969219541345,66.91292909247741 30.27001012181008))
The starting vertex Lon/Lat should also repeat at end to make polygon close.
What do I need to do this to fix the problems?
this will do the job...
DECLARE #wkt NVARCHAR(MAX)
SELECT #wkt = COALESCE(#wkt + ',' + char(13), '')
+ concat(convert(varchar, x.query('./LON/text()')),' ', convert(varchar,x.query('./LAT/text()')))
from #x.nodes('/Dataset_Extent/Vertex') as t(x)
select #wkt = COALESCE(#wkt + ',' + char(13), '')
+ concat(convert(varchar, x.query('./LON/text()')),' ', convert(varchar,x.query('./LAT/text()')))
from #x.nodes('/Dataset_Extent/Vertex[1]') as t(x)
set #wkt='POLYGON(('+#wkt +'))';
print #wkt
not exactly beautiful but i couldnt figure out how to subquery the first node... good luck! i try to avoid xpath as much as possible if i can :)
Shred the XML on /Dataset_Extent/Vertex and build the string using the for xml path trick.
declare #wkt varchar(8000);
set #wkt = 'POLYGON((' +
stuff((select ','+T.X.value('(LON/text())[1]', 'varchar(50)')+
' '+T.X.value('(LAT/text())[1]', 'varchar(50)')
from #x.nodes('/Dataset_Extent/Vertex') as T(X)
for xml path('')), 1, 1, '')+
','+#x.value('(/Dataset_Extent/Vertex/LON/text())[1]', 'varchar(50)')+
' '+#x.value('(/Dataset_Extent/Vertex/LAT/text())[1]', 'varchar(50)')+
'))';
Against a table it would look like this instead.
select 'POLYGON((' +
stuff((select ','+T.X.value('(LON/text())[1]', 'varchar(50)')+
' '+T.X.value('(LAT/text())[1]', 'varchar(50)')
from X.XMLData.nodes('/Dataset_Extent/Vertex') as T(X)
for xml path('')), 1, 1, '')+
','+X.XMLData.value('(/Dataset_Extent/Vertex/LON/text())[1]', 'varchar(50)')+
' '+X.XMLData.value('(/Dataset_Extent/Vertex/LAT/text())[1]', 'varchar(50)')+
'))'
from XMLFiles as X;

How to Generate xml from sql for below pattern

I'm writing one stored procedure, which I have to create a xml column from db.
µ = CHAR(181) this is value separator,
¶ = CHAR(182) this is row separator
This is the statement I wrote. I know its not well formed.
SELECT #xmlString= CAST('<root><Section> ID =' + REPLACE(REPLACE ('20211µ1¶20212µ2', CHAR(182),
'</Section><Section> ID ='),CHAR(181), ' Slno=') + '</Section></root>' AS XML)
This is the pattern which I need to display like this.
<root>
<sections id="20211" slno="1" ></sections>
<sections id="20215" slno="2" ></sections>
</root>
declare #s varchar(50) = '20211µ1¶20212µ2'
declare #xmlString xml
;with C as
(
select T.N.value('value[1]', 'int') as id,
T.N.value('value[2]', 'int') as slno
from (select cast('<item><value>'+replace(replace(#s, 'µ','</value><value>'), '¶','</value></item><item><value>')+'</value></item>' as xml)) as X(XMLCol)
cross apply X.XMLCol.nodes('item') as T(N)
)
select #xmlString =
(
select C.id as [#id] ,
C.slno as [#slno]
from C
for xml path('sections'), root('root'), type
)
select #xmlString
Result:
<root>
<sections id="20211" slno="1" />
<sections id="20212" slno="2" />
</root>

TSQL Reverse FOR XML Encoding

I am using FOR XML in a query to join multiple rows together, but the text contains quotes, "<", ">", etc. I need the actual character instead of the encoded value like """ etc. Any suggestions?
Basically what you're asking for is invalid XML and luckly SQL Server will not produce it. You can take the generated XML and extract the content, and this operation will revert the escaped characters to their text representation. This revert normally occurs in the presnetaitonlayer, but it can occur in SQL Server itslef by instance using XML methods to extract the content of the produced FOR XML output. For example:
declare #text varchar(max) = 'this text has < and >';
declare #xml xml;
set #xml = (select #text as [node] for xml path('nodes'), type);
select #xml;
select x.value(N'.', N'varchar(max)') as [text]
from #xml.nodes('//nodes/node') t(x);
I have a similar requirement to extract column names for use in PIVOT query.
The solution I used was as follows:
SELECT #columns = STUFF((SELECT '],[' + Value
FROM Table
ORDER BY Value
FOR XML PATH('')), 1, 2, '') + ']'
This produces a single string:
[Value 1],[Value 2],[Value 3]
I hope this points you in the right direction.
--something like this?
SELECT * INTO #Names FROM (
SELECT Name='<>&' UNION ALL
SELECT Name='ab<>'
) Names;
-- 1)
SELECT STUFF(
(SELECT ', ' + Name FROM #Names FOR XML PATH(''))
,1,2,'');
-- 2)
SELECT STUFF(
(SELECT ', ' + Name FROM #Names FOR XML PATH(''),TYPE).value('text()[1]','nvarchar(max)')
,1,2,'');
-- 2) is slower but will not return encoded value.
Hope it help.