Select XML nodes as rows - sql

I am selecting from a table that has an XML column using T-SQL. I would like to select a certain type of node and have a row created for each one.
For instance, suppose I am selecting from a people table. This table has an XML column for addresses. The XML is formated similar to the following:
<address>
<street>Street 1</street>
<city>City 1</city>
<state>State 1</state>
<zipcode>Zip Code 1</zipcode>
</address>
<address>
<street>Street 2</street>
<city>City 2</city>
<state>State 2</state>
<zipcode>Zip Code 2</zipcode>
</address>
How can I get results like this:
Name City State
Joe Baker Seattle WA
Joe Baker Tacoma WA
Fred Jones Vancouver BC

Here is your solution:
/* TEST TABLE */
DECLARE #PEOPLE AS TABLE ([Name] VARCHAR(20), [Address] XML )
INSERT INTO #PEOPLE SELECT
'Joel',
'<address>
<street>Street 1</street>
<city>City 1</city>
<state>State 1</state>
<zipcode>Zip Code 1</zipcode>
</address>
<address>
<street>Street 2</street>
<city>City 2</city>
<state>State 2</state>
<zipcode>Zip Code 2</zipcode>
</address>'
UNION ALL SELECT
'Kim',
'<address>
<street>Street 3</street>
<city>City 3</city>
<state>State 3</state>
<zipcode>Zip Code 3</zipcode>
</address>'
SELECT * FROM #PEOPLE
-- BUILD XML
DECLARE #x XML
SELECT #x =
( SELECT
[Name]
, [Address].query('
for $a in //address
return <address
street="{$a/street}"
city="{$a/city}"
state="{$a/state}"
zipcode="{$a/zipcode}"
/>
')
FROM #PEOPLE AS people
FOR XML AUTO
)
-- RESULTS
SELECT [Name] = T.Item.value('../#Name', 'varchar(20)'),
street = T.Item.value('#street' , 'varchar(20)'),
city = T.Item.value('#city' , 'varchar(20)'),
state = T.Item.value('#state' , 'varchar(20)'),
zipcode = T.Item.value('#zipcode', 'varchar(20)')
FROM #x.nodes('//people/address') AS T(Item)
/* OUTPUT*/
Name | street | city | state | zipcode
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Joel | Street 1 | City 1 | State 1 | Zip Code 1
Joel | Street 2 | City 2 | State 2 | Zip Code 2
Kim | Street 3 | City 3 | State 3 | Zip Code 3

Here's how I do it generically:
I shred the source XML via a call such as
DECLARE #xmlEntityList xml
SET #xmlEntityList =
'
<ArbitrarilyNamedXmlListElement>
<ArbitrarilyNamedXmlItemElement><SomeVeryImportantInteger>1</SomeVeryImportantInteger></ArbitrarilyNamedXmlItemElement>
<ArbitrarilyNamedXmlItemElement><SomeVeryImportantInteger>2</SomeVeryImportantInteger></ArbitrarilyNamedXmlItemElement>
<ArbitrarilyNamedXmlItemElement><SomeVeryImportantInteger>3</SomeVeryImportantInteger></ArbitrarilyNamedXmlItemElement>
</ArbitrarilyNamedXmlListElement>
'
DECLARE #tblEntityList TABLE(
SomeVeryImportantInteger int
)
INSERT #tblEntityList(SomeVeryImportantInteger)
SELECT
XmlItem.query('//SomeVeryImportantInteger[1]').value('.','int') as SomeVeryImportantInteger
FROM
[dbo].[tvfShredGetOneColumnedTableOfXmlItems] (#xmlEntityList)
by utilizing the scalar-valued function
/* Example Inputs */
/*
DECLARE #xmlListFormat xml
SET #xmlListFormat =
'
<ArbitrarilyNamedXmlListElement>
<ArbitrarilyNamedXmlItemElement>004421UB7</ArbitrarilyNamedXmlItemElement>
<ArbitrarilyNamedXmlItemElement>59020UH24</ArbitrarilyNamedXmlItemElement>
<ArbitrarilyNamedXmlItemElement>542514NA8</ArbitrarilyNamedXmlItemElement>
</ArbitrarilyNamedXmlListElement>
'
declare #tblResults TABLE
(
XmlItem xml
)
*/
-- =============================================
-- Author: 6eorge Jetson
-- Create date: 01/02/3003
-- Description: Shreds a list of XML items conforming to
-- the expected generic #xmlListFormat
-- =============================================
CREATE FUNCTION [dbo].[tvfShredGetOneColumnedTableOfXmlItems]
(
-- Add the parameters for the function here
#xmlListFormat xml
)
RETURNS
#tblResults TABLE
(
-- Add the column definitions for the TABLE variable here
XmlItem xml
)
AS
BEGIN
-- Fill the table variable with the rows for your result set
INSERT #tblResults
SELECT
tblShredded.colXmlItem.query('.') as XmlItem
FROM
#xmlListFormat.nodes('/child::*/child::*') as tblShredded(colXmlItem)
RETURN
END
--SELECT * FROM #tblResults

In case this is useful to anyone else out there looking for a "generic" solution, I created a CLR procedure that can take an Xml fragment as above and "shred" it into a tabular resultset, without you providing any additional information about the names or types of the columns, or customizing your call in any way for the given Xml fragment:
http://architectshack.com/ClrXmlShredder.ashx
There are of course some restrictions (the xml must be "tabular" in nature like this sample, the first row needs to contain all the elements/columns that will be supported, etc) - but I do hope it's a few steps ahead of what's available built-in.

Here's an alternate solution:
;with cte as
(
select id, name, addresses, addresses.value('count(/address/city)','int') cnt
from #demo
)
, cte2 as
(
select id, name, addresses, addresses.value('((/address/city)[sql:column("cnt")])[1]','nvarchar(256)') city, cnt-1 idx
from cte
where cnt > 0
union all
select cte.id, cte.name, cte.addresses, cte.addresses.value('((/address/city)[sql:column("cte2.idx")])[1]','nvarchar(256)'), cte2.idx-1
from cte2
inner join cte on cte.id = cte2.id and cte2.idx > 0
)
select id, name, city
from cte2
order by id, city
FYI: I've posted another version of this SQL on the code review site here: https://codereview.stackexchange.com/questions/108805/select-field-in-an-xml-column-where-both-xml-and-table-contain-multiple-matches

If you can use it, the linq api is convenient for XML:
var addresses = dataContext.People.Addresses
.Elements("address")
.Select(address => new {
street = address.Element("street").Value,
city = address.Element("city").Value,
state = address.Element("state").Value,
zipcode = address.Element("zipcode").Value,
});

Related

Shortening Replace statement with RexEX?

Below is the SQL Query I am using in order to get some information. Within this information is an XML column. I am wanting to read this XML and parse out the needed ID inside the <> brackets. This query below does do that but I am looking for a cleaner way of doing it [if it exists]:
SELECT
tblAT.*,
tblA.*,
tblEM.[Custom] AS fullXML,
REPLACE(
REPLACE(
CONVERT(
VARCHAR(MAX),
tblEM.[Custom].query('/Ind/ABC')
)
, '<ABC>'
, ''
)
,'</ABC>'
,''
) AS ABC
FROM
ATable AS tblA
JOIN
LLink AS tblL
ON tblL.A_AID = tblA.AID
JOIN
AssetsT AS tblAT
ON tblAT.AID = tblL.BAID
JOIN
ExternalMetadata AS tblEM
ON tblEM.AID = tblA.AID
WHERE
tblAT.ATID = 12
AND
tblA.AID = 30610
AND
tblA.CreatedDate > '2021-05-11 08:58:00'
The XML strutor looks like this:
<Ind>
<ABC>some value here</ABC>
</Ind>
The part:
REPLACE(
REPLACE(
CONVERT(
VARCHAR(MAX),
tblEM.[Custom].query('/Individual/ABC')
)
, '<ABC>'
, ''
)
,'</ABC>'
,''
) AS ABC
is what I am wanting to replace with perhaps a simpler type of removing the <> from the beginning and the end of the XML.
I was hoping to be able to do a type of regex replace using /<[^>]*>/g in order to lessen the query length.
I am using SQL version 13.0.5103.6.
So is there any way of cleaning up the replace query area?
No need for any RegEx and/or multiple REPLACE() calls.
XML date type could be easily handled by the XQuery.
Check it out
SQL
-- DDL and sample data population, start
DECLARE #tbl TABLE (ID INT IDENTITY PRIMARY KEY, [Custom] XML);
INSERT INTO #tbl ([Custom]) VALUES
(N'<Ind>
<ABC>some value here</ABC>
</Ind>');
-- DDL and sample data population, end
SELECT *
, [Custom].value('(/Ind/ABC/text())[1]', 'varchar(30)') AS result
FROM #tbl;
Output
+----+---------------------------------------+-----------------+
| ID | Custom | result |
+----+---------------------------------------+-----------------+
| 1 | <Ind><ABC>some value here</ABC></Ind> | some value here |
+----+---------------------------------------+-----------------+

How to parse out a column in sql with multiple values

I have a column in a sql server table named [City_St_Zip] that contains records that look like this
Dallas, TX 12345
What I would like to do is separate the column into three different columns (i.e. City, State and Zip)
like this:
Dallas
TX
12345
I am not sure how to go about this in SQL
I have tried the following
DECLARE #X NVARCHAR(100),
DECLARE #T NVARCHAR(100),
SELECT
#X = [City_St_Zip],
#T = [NewDivision]
FROM
dbo.Invoice
CROSS APPLY STRING_SPLIT(#X, ',');
This yielded 0 results so I am pretty sure I did that incorrectly
Any suggestions? I am using SQL Server 2019
EDIT:
I also tried this which is closer to what I want
SELECT
value
FROM
dbo.Invoice
CROSS APPLY STRING_SPLIT([City_St_Zip], ',');
That gives me a result set of:
Dallas
TX 12345
So I guess this is convoluted and needs both a comma and a space delimiter. Would I just put the value through another STRING_SPLIT?
SQL Server has poor string processing support. And, string_split() is not guaranteed to keep the values in order. And string searches are dangerous -- think New York, New York.
So, a brute force method:
select left(col, charindex(',', col) - 1) as city,
substring(col, charindex(',', col) + 2, 2) as state,
right(col, 5) as zipcode
Here is a db<>fiddle.
If you want to use STRING_SPLIT then this will work without variables.
Sample data:
create table dbo.Invoice
(
id int identity(1,1) primary key,
[City_St_Zip] nvarchar(100)
);
insert into dbo.Invoice
([City_St_Zip]) values
('Dallas, TX 12345'),
('Fort Worth, TX 12345')
GO
2 rows affected
Query:
SELECT inv.*, a.*
FROM dbo.Invoice inv
OUTER APPLY
(
SELECT
[1] AS [City],
LTRIM(LEFT([2], 3)) AS [State],
TRIM(SUBSTRING([2],4,LEN([2]))) AS [Zip]
FROM
( SELECT spl.value
, ROW_NUMBER() OVER (ORDER BY (SELECT 0)) AS rn
FROM STRING_SPLIT(inv.[City_St_Zip],',') spl
) s
PIVOT (MAX(value) FOR rn IN ([1],[2])) p
) a;
Result:
id | City_St_Zip | City | State | Zip
-: | :------------------- | :--------- | :---- | :----
1 | Dallas, TX 12345 | Dallas | TX | 12345
2 | Fort Worth, TX 12345 | Fort Worth | TX | 12345
db<>fiddle here
Extra:
Using the XML type, this SQL will also work in an earlier version like Sql Server 2012.
SELECT inv.*
, a.City
, RTRIM(LEFT(a.StateZip, CHARINDEX(' ',a.StateZip))) AS State
, LTRIM(SUBSTRING(a.StateZip, CHARINDEX(' ',a.StateZip),LEN(a.StateZip))) AS Zip
FROM dbo.Invoice inv
OUTER APPLY
(
SELECT X.x AS CityStateZipXml
, X.x.value('/x[1]','nvarchar(max)') AS City
, LTRIM(X.x.value('/x[2]','nvarchar(max)')) AS StateZip
FROM (
SELECT CAST(CONCAT('<x>', REPLACE(inv.[City_St_Zip],',','</x><x>'),'</x>') AS XML)
) AS X(x)
) a;
* Updated as per SQL Server*
create table ctry
(
city_st_zip nvarchar(100)
);
insert into ctry values('Dallas, TX 12345');
--SQL USED--
SELECT
LEFT([city_st_zip], CHARINDEX(',', [city_st_zip]) - 1) AS [City],
SUBSTRING([city_st_zip], CHARINDEX(',', [city_st_zip]) + 2, 2) as [State],
RIGHT([city_st_zip], CHARINDEX(' ', [city_st_zip]) - 2) AS [Zip]
FROM ctry;
--Result--
City State Zip
Dallas TX 12345

Create String Template from two stored procedures

I have two stored procedures that return result sets.
How can I use them to populate a string and update another column?
exec getEmailSignatureDetails 'Jane', 'Doe'
exec getFeaturedAccount 'June'
These both return columns that I would like to map to variables.
I would then like to put the variables into a string.
Then update a column in another table with that string.
Output from getEmailSignatureDetails:
addCity | addLine | addSt | addZip | fName | lName
--------------+-------------+-------+---------+-------+------
San Francisco | 777 SV Lane | CA | 94016 | Jane | Doe
Output from getFeaturedAccount:
month | img
------+----------
June | base64...
I would like to turn this into a string like
Your package has been delivered to
#fName #lName
#addLine
#addCity #addSt, #addZip
#img
And then update a column with this string matching on name.
If I understand your question your are looking to dynamically fill-in a template via macro substitution
Example
-- Create some Sample Data
Declare #getEmailSignatureDetails Table ([addCity] varchar(50),[addLine] varchar(50),[addSt] varchar(50),[addZip] varchar(50),[fName] varchar(50),[lName] varchar(50))
Insert Into #getEmailSignatureDetails Values
('San Francisco','777 SV Lane','CA',94016,'Jane','Doe')
Declare #getFeaturedAccount Table ([month] varchar(50),[img] varchar(50))
Insert Into #getFeaturedAccount Values
('June','base64..')
-- Declare the Template
Declare #Template varchar(max) ='
Your package has been delivered to
#fName #lName
#addLine
#addCity #addSt, #addZip
#img
'
-- Populate the Template
Select #Template = replace(#Template,'#'+Field,Value)
From (
Select C.*
From (values (convert(XML,(Select * From #getEmailSignatureDetails Join #getFeaturedAccount on [month]='June' For XML Raw ) ) ) ) A(XMLData)
Cross Apply (
Select Field = a.value('local-name(.)','varchar(100)')
,Value = a.value('.','varchar(max)')
From A.XMLData.nodes('/row') as C1(n)
Cross Apply C1.n.nodes('./#*') as C2(a)
Where a.value('local-name(.)','varchar(100)') not in ('Column1','Column2')
) C
) A
Updated Template
Your package has been delivered to
Jane Doe
777 SV Lane
San Francisco CA, 94016
base64..
If it helps with the visualization, the sub-query is a "dynamic" unpivot and generates the following:
Field Value
addCity San Francisco
addLine 777 SV Lane
addSt CA
addZip 94016
fName Jane
lName Doe
month June
img base64.. -- (presumably would be the image)

Need horizontal columns in vertical

I have compiled the data in a table called Employees. Table definition is
Name Age
Sam 25
Mike 28
Is it possible to write a query that can give me the output in format
SAM
25
MIKE
28
I am unable to write this query. Is it possible to do it.
If not, how can i achieve that.
I can do it using a cursor but it will largely degrade the performance of my proc.
Easily modified to suite your needs
Declare #User table (id int,First_Name varchar(50),Last_Name varchar(50),EMail varchar(50))
Insert into #User values
(1,'John','Smith','john.smith#gmail.com'),
(2,'Jane','Doe' ,'jane.doe#gmail.com')
Declare #XML xml
Set #XML = (Select * from #User for XML RAW)
Select ID = r.value('#id','int')
,Item = Attr.value('local-name(.)','varchar(100)')
,Value = Attr.value('.','varchar(max)')
From #XML.nodes('/row') as A(r)
Cross Apply A.r.nodes('./#*[local-name(.)!="id"]') as B(Attr)
Returns
ID Item Value
1 First_Name John
1 Last_Name Smith
1 EMail john.smith#gmail.com
2 First_Name Jane
2 Last_Name Doe
2 EMail jane.doe#gmail.com
As requested, but I see little value in it
Declare #Table varchar(150) = 'YourTableName'
Declare #SQL varchar(max) = '>>>'
Select #SQL = Replace(#SQL + SQL ,'>>>Union All ','')
From (Select Seq=ORDINAL_POSITION,SQL='Union All Select Value=cast(['+Column_Name+'] as varchar(500)) From ['+Table_Schema+'].['+Table_Name+']' From INFORMATION_SCHEMA.COLUMNS where Table_Name=#Table) A
Order By Seq
--Print #SQL
Exec(#SQL)
Sample Return
Value
22 Star Ave, Riverside, RI 02915
22 Planet Ave, Riverside, RI 02915
100 Peck Ave, Riverside, RI 02915
1086 Willett Ave, Riverside, RI 02915
4
5
6

Trouble combining rows into one column using CAST(

Ok SO, here's your time to shine!
No really, I'm getting my butt kicked by an MS-SQL query that I can't seem to get to work.
What I am trying to do is search on a patient name; but also return patients who have a similar first or last name to the querying patient's last name. So "John Smith" can return anyone named "John Smith" or anyone who has a first or last name like "smith". If the a patient has multiple disease states, then combine those disease states into a single column. I have the following tables (though of course there are many more columns, but these are the most imortant):
Patient Table
PatientID FirstName LastName UserIDFK
10000 John Smith 1
10001 Miss Smith 2
10002 Smith Bomb 3
10003 Bobby Smith 4
-- etc
DiseaseStateForUser
UserIDFK DiseaseStateRefId
1 1
1 2
2 2
3 1
3 2
4 1
GlobalLookUp
RefId Ref_Code
1 HIV
2 HEPC
The results I'm looking for are this:
PatientID FirstName LastName DiseaseStates
10000 John Smith HIV|HEPC
10001 Miss Smith HEPC
10002 Smith Bomb HIV|HEPC
10003 Bobby Smith HIV
I've taken the examples from these questions (and countless others):
Is there a way to create a SQL Server function to “join” multiple
rows from a subquery into a single delimited
field?
Simulating group_concat MySQL function in MS SQL Server
2005?
As well as from this blog post Emulating MySQL’s GROUP_CONCAT() Function in SQL Server 2005 I came up with the following SQL procedure
DECLARE
#PatientID INT=null,
#FirstName Varchar(15)= null,
#LastName Varchar(15)= 'Smith',
#Name Varchar(15) = 'John Smith',
Select
Patient.First_Name,
Patient.Last_Name,
patient.PatientID,
(select CAST(GlobalLookUp.Ref_Code + '|' as VARCHAR(MAX))
from
TBL_PATIENT patient
,TBL_GBLLOOKUP GlobalLookUp
,TBL_DiseaseStateForUser DiseaseStateForUser
-- Try and make a collection of all the PatientIDs
-- that match the search criteria
-- so that only these are used to build
-- the DiseaseStatesColumn
,(Select
Patient.PatientID
FROM TBL_PATIENT patient
,TBL_SITEMASTER SiteMaster
,TBL_USERMASTER UserMaster
,TBL_USERSINSITES UserInSites
,TBL_GBLLOOKUP GlobalLookUp
,TBL_DiseaseStateForUser DiseaseStateForUser
WHERE (((patient.[Last_Name] like #LastName + '%') OR (patient.[Last_Name] Like #Name + '%' ))
OR ((patient.[First_Name] Like #Name + '%' ))
OR (patient.[First_Name] + ' ' + patient.[Last_Name] Like #Name + '%' ))
AND UserMaster.User_Id = UserInSites.User_Id_FK
AND UserInSites.Site_Id_FK = SiteMaster.Site_Id
AND UserInSites.Is_Active = 'True'
AND patient.[User_Id_FK] = UserMaster.[User_Id]
AND (DiseaseStateForUser.User_Id_FK = patient.User_Id_FK
AND DiseaseStateForUser.DiseaseState_RefId_FK = GlobalLookUp.Ref_Id)
and DiseaseStateForUser.Is_Active='True'
AND patient.[Is_Active] = 'TRUE'
group by Patient.PatientID) as PATIENTIDs
where patient.PatientID = PATIENTIDs.PatientID
AND (DiseaseStateForUser.User_Id_FK = patient.User_Id_FK
AND DiseaseStateForUser.DiseaseState_RefId_FK = GlobalLookUp.Ref_Id)
For XML PATH('')) as MultiDiseaseState
FROM TBL_PATIENT patient, TBL_SITEMASTER SiteMaster ,TBL_USERMASTER UserMaster,TBL_USERSINSITES UserInSites, TBL_GBLLOOKUP GlobalLookUp, TBL_DiseaseStateForUser DiseaseStateForUser
WHERE (((patient.[Last_Name] like #LastName + '%') OR (patient.[Last_Name] Like #Name + '%' ))
or ((patient.[First_Name] Like #Name + '%' ))
OR (patient.[First_Name] + ' ' + patient.[Last_Name] Like #Name + '%' ))
AND patient.PatientID = patient.PatientID
AND UserMaster.User_Id = UserInSites.User_Id_FK
AND UserInSites.Site_Id_FK = SiteMaster.Site_Id
AND UserInSites.Is_Active = 'True'
AND patient.[User_Id_FK] = UserMaster.[User_Id]
AND DiseaseStateForUser.User_Id_FK = patient.User_Id_FK
AND DiseaseStateForUser.DiseaseState_RefId_FK = GlobalLookUp.Ref_Id
and DiseaseStateForUser.Is_Active='True'
AND patient.[Is_Active] = 'TRUE'
group by PatientID, patient.First_Name, patient.Last_Name, GlobalLookUp.Ref_Code
order by PatientID
Unfortunately, this query nets me the following:
PatientID FirstName LastName MultiDiseaseState
10000 John Smith HIV|HEPC|HEPC|HIV|HEPC|HIV
10001 Miss Smith HIV|HEPC|HEPC|HIV|HEPC|HIV
10002 Smith Bomb HIV|HEPC|HEPC|HIV|HEPC|HIV
10003 Bobby Smith HIV|HEPC|HEPC|HIV|HEPC|HIV
In other words, the select CAST(GlobalLookUp.Ref_Code + '|' as VARCHAR(MAX)) call is building up the MultiDiseaseState column with all of the disease states for ALL of the selected patients.
I know there is something fundamentally wrong with the most inner SELECT statement, but I'm having a hard time figuring out what it is and how to write the query so that it builds only the disease states for a given patient.
Kind of a long post, but are there any suggestions people can make given the code snippets I've provided?
You should be able to use the Stuff function (I think it's only on SQL 2005 and higher) to make this work, I took your example data and wrote a demonstration off of that
SET NOCOUNT ON
CREATE TABLE #Patient
(
PatientID INT,
FirstName varchar(25),
LastName varchar(25),
UserIDFK INT
)
INSERT INTO #PATIENT SELECT 10000,'John','Smith',1
INSERT INTO #PATIENT SELECT 10001,'Miss','Smith',2
INSERT INTO #PATIENT SELECT 10002,'Smith','Bomb',3
INSERT INTO #PATIENT SELECT 10003,'Bobby','Smith',4
CREATE TABLE #DiseaseStateForUser
(
UserIDFK int,
DiseaseStateRefId int
)
INSERT INTO #DiseaseStateForUser SELECT 1,1
INSERT INTO #DiseaseStateForUser SELECT 1,2
INSERT INTO #DiseaseStateForUser SELECT 2,2
INSERT INTO #DiseaseStateForUser SELECT 3,1
INSERT INTO #DiseaseStateForUser SELECT 3,2
INSERT INTO #DiseaseStateForUser SELECT 4,1
CREATE TABLE #GlobalLookUp
(
RefId int,
Ref_Code varchar(10)
)
INSERT INTO #GlobalLookUp SELECT 1,'HIV'
INSERT INTO #GlobalLookUp SELECT 2,'HEPC'
SELECT
PatientID,
UserIDFK,
FirstName,
LastName,
STUFF(
(SELECT '|' + l.Ref_Code
FROM #DiseaseStateForUser u with (Nolock)
JOIN dbo.#GlobalLookUp l with (nolock)
ON u.DiseaseStateRefId = l.RefId
WHERE u.UserIDFK = p.UserIDFK FOR XML PATH('')
)
, 1, 1, '')
FROM #PATIENT p with (Nolock)
GROUP BY PatientID, FirstName, LastName, UserIDFK