Compare 2 or more column values in arbitrary order - sql

I need a quick way to compare 2 or more values from different tables where the orders are arbitrarily stored in sql server. The data comes from a 3rd party who will not change.
Example data below shows the same item described in two ways. the remaining columns contain other data that i am joining.
table1
i j other columns...
1 2 ...
table2
i j other columns
2 1 ...
1 2 ...
right now for 2, i do a union query to cover both directions (i=i, j=j / i=j, j=i) . but if you expand to 3, that is 9 possible orders.
SELECT * FROM Table1 INNER JOIN Table2 ON Table1.i = Table2.i AND Table1.j = Table2.j
UNION
SELECT * FROM Table1 INNER JOIN Table2 ON Table1.i = Table2.j AND Table1.j = Table2.i
is there a way to order data returned from the first two columns before doing the comparison so i don't have to create all the unions?

Edit: New xml approach
I wonder how this approach performs:
select *, cast( '<c>' + cast(i as varchar) + '</c>' +
'<c>' + cast(j as varchar) + '</c>' +
'<c>' + cast(k as varchar) + '</c>'
as xml).query('for $a in /c order by $a return concat($a, "/")').value('.', 'varchar(100)')
from #Table1 o
This can be wrapped in a function and referenced in a persisted column... which should scale very well for you:
create table dbo.Table1 (pk int identity(1,1) primary key, i int, j int, k int);
insert into dbo.Table1
values(1, 2, 3), (3, 1, 2), (4, 5, 6), (9,9,9);
go
create function dbo.fn_GenerateCompare(#i int, #j int, #k int)
returns varchar(100)
with schemabinding
as
begin
return
(
select cast('<c>' + cast(#i as varchar) + '</c>' +
'<c>' + cast(#j as varchar) + '</c>' +
'<c>' + cast(#k as varchar) + '</c>'
as xml).query('for $a in /c order by $a return concat($a, "/")').value('.', 'varchar(100)')
);
end
alter table dbo.Table1
add Compare as dbo.fn_GenerateCompare(i, j, k) persisted;
select * from dbo.Table1
Returns:
pk i j k Compare
-- - - - -------
1 1 2 3 1/2/3
2 3 1 2 1/2/3
3 4 5 6 4/5/6
4 9 9 9 9/9/9
Your query should now be really simple. Slap an index on the new Compare column and it should fly.
Original Post:
I like the sorted list idea proposed by Thorsten. Heres a rough idea of how it might be done. Performance would be greatly improved by persisting this compare column on the table (trigger or persisted computed column?)
declare #Table1 table (pk int identity(1,1) primary key, i int, j int, k int)
declare #Table2 table (pk int identity(1,1) primary key, i int, j int, k int)
insert into #Table1
values(1, 2, 3), (3, 1, 2), (4, 5, 6), (9,9,9)
insert into #Table2
values (2, 1, 3), (6, 4, 5)
--since the order is unimportant, concatenate the columns into a sorted array
--note how 1,2,3 and 3,1,2 both result in the same compare value:
select *
from #Table1 o
cross
apply ( select cast(value as varchar) + '/'
from #Table1
unpivot (value for c in (i,j,k)) as u
where pk = o.pk
order
by value
for xml path('')
)d(compare)
--now, bring in the 2nd table
select [src] = 1, pk, compare
from #Table1 o
cross
apply ( select cast(value as varchar) + '/'
from #Table1
unpivot (value for c in (i,j,k)) as u
where pk = o.pk
order
by value
for xml path('')
)d(compare)
union all
select [src] = 2, pk, compare
from #Table2 o
cross
apply ( select cast(value as varchar) + '/'
from #Table2
unpivot (value for c in (i,j,k)) as u
where pk = o.pk
order
by value
for xml path('')
)d(compare)
--now just group them to find the matching rows
select min(src), min(pk), compare
from (
select [src] = 1, pk, compare
from #Table1 o
cross
apply ( select cast(value as varchar) + '/'
from #Table1
unpivot (value for c in (i,j,k)) as u
where pk = o.pk
order
by value
for xml path('')
)d(compare)
union all
select [src] = 2, pk, compare
from #Table2 o
cross
apply ( select cast(value as varchar) + '/'
from #Table2
unpivot (value for c in (i,j,k)) as u
where pk = o.pk
order
by value
for xml path('')
)d(compare)
)grouped
group
by compare
having count(*) > 1;

Related

Split string and divide value

I got a table Test with columns A and B.
The A column contains different values in one entry, e.g. abc;def;ghi, all separated by ;. And the B column contains numeric values, but only one.
What I want is to seperate the values from column A into multiple rows.
So:
abc;def;ghi;jkl
-->
abc
def
ghi
jkl
In column B is one value, e.g. 20 and I want that value split to the amount of rows,
So the final result shut be:
abc 5
def 5
ghi 5
jkl 5
The issue is that the amount of values in column A must be variable.
First you need to create this function
REATE FUNCTION Split
(
#delimited nvarchar(max),
#delimiter nvarchar(100)
) RETURNS #t TABLE
(
-- Id column can be commented out, not required for sql splitting string
id int identity(1,1), -- I use this column for numbering splitted parts
val nvarchar(max),
origVal nvarchar(max)
)
AS
BEGIN
declare #xml xml
set #xml = N'<root><r>' + replace(#delimited,#delimiter,'</r><r>') + '</r></root>'
insert into #t(val,origval)
select
r.value('.','varchar(max)') as item, #delimited
from #xml.nodes('//root/r') as records(r)
RETURN
END
GO
then this query might help
Select x.Val, test.B / (len(test.A) - len(replace(Test.A, ';', '')) + 1) from Test
inner join dbo.Split(Test.A,';') x on x.origVal = Test.A
this part len(test.A) - len(replace(Test.A, ';', '')) will count the number of ; in string
Be aware this query might have some malfunctioning if there will be duplicate strings in A column, in this situation you need to pass the unique value (for example ID) to split function and return it in the result table, then join it by this value (ie. x.origVal = Test.A => x.origID = Test.ID)
You can use some tricks with CTE, STUFF and windows functions
DECLARE #t TABLE
(
ID INT ,
A NVARCHAR(MAX) ,
B INT
)
INSERT INTO #t
VALUES ( 1, 'a;b;c;d;', 20 ),
( 2, 'x;y;z;', 40 );
WITH cte ( ID, B, D, A )
AS ( SELECT ID ,
B ,
LEFT(A, CHARINDEX(';', A + ';') - 1) ,
STUFF(A, 1, CHARINDEX(';', A + ';'), '')
FROM #t
UNION ALL
SELECT ID ,
B ,
LEFT(A, CHARINDEX(';', A + ';') - 1) ,
STUFF(A, 1, CHARINDEX(';', A + ';'), '')
FROM cte
WHERE A > ''
)
SELECT ID ,
B ,
D,
CAST(B AS DECIMAL) / COUNT(*) OVER (PARTITION BY ID) AS Portion
FROM cte
Output:
ID B D Portion
1 20 a 5.00000000000
1 20 b 5.00000000000
1 20 c 5.00000000000
1 20 d 5.00000000000
2 40 x 13.33333333333
2 40 y 13.33333333333
2 40 z 13.33333333333
this an example how you can achieve required result
DECLARE #table AS TABLE
(
ColumnA VARCHAR(100) ,
ColumnB FLOAT
)
INSERT INTO #table
( ColumnA, ColumnB )
VALUES ( 'abc;def;ghi;jkl', 20 ),
( 'asf;ret;gsd;jas', 30 ),
( 'dfa;aef;gffhi;fjfkl', 40 );
WITH C AS ( SELECT n = 1
UNION ALL
SELECT n + 1
FROM C
WHERE n <= 100
),
SetForSplit
AS ( SELECT T.ColumnA ,
T.ColumnB ,
C.n ,
( CASE WHEN LEFT(SUBSTRING(T.ColumnA, n, 100), 1) = ';'
THEN SUBSTRING(T.ColumnA, n + 1, 100) + ';'
ELSE SUBSTRING(T.ColumnA, n, 100) + ';'
END ) AS SomeText
FROM #table AS T
JOIN C ON C.n <= LEN(T.ColumnA)
WHERE SUBSTRING(T.ColumnA, n, 1) = ';'
OR n = 1
)
SELECT ROW_NUMBER() OVER ( PARTITION BY columnA ORDER BY LEFT(SomeText,
CHARINDEX(';',
SomeText) - 1) ) AS RowN,
LEFT(SomeText, CHARINDEX(';', SomeText) - 1) AS ColA ,
ColumnB / COUNT(*) OVER ( PARTITION BY ColumnA ) AS ColB
FROM SetForSplit
ORDER BY ColumnA
This is full working exmaple:
DECLARE #DataSource TABLE
(
[A] VARCHAR(MAX)
,[B] INT
);
INSERT INTO #DataSource ([A], [B])
VALUES ('a;b;c;d', 20 ),
('x;y;z', 40 );
SELECT T.c.value('.', 'VARCHAR(100)')
,[B] / COUNT([B]) OVER (PARTITION BY [B])
FROM #DataSource
CROSS APPLY
(
SELECT CONVERT(XML, '<t>' + REPLACE([A], ';', '</t><t>') + '</t>')
) DS([Bxml])
CROSS APPLY [Bxml].nodes('/t') AS T(c)
and of couse you can ROUND the devision as you like.

UPDATE set FROM select [duplicate]

How do I get:
id Name Value
1 A 4
1 B 8
2 C 9
to
id Column
1 A:4, B:8
2 C:9
No CURSOR, WHILE loop, or User-Defined Function needed.
Just need to be creative with FOR XML and PATH.
[Note: This solution only works on SQL 2005 and later. Original question didn't specify the version in use.]
CREATE TABLE #YourTable ([ID] INT, [Name] CHAR(1), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'A',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
SELECT
[ID],
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
If it is SQL Server 2017 or SQL Server Vnext, SQL Azure you can use STRING_AGG as below:
SELECT id, STRING_AGG(CONCAT(name, ':', [value]), ', ')
FROM #YourTable
GROUP BY id
using XML path will not perfectly concatenate as you might expect... it will replace "&" with "&" and will also mess with <" and ">
...maybe a few other things, not sure...but you can try this
I came across a workaround for this... you need to replace:
FOR XML PATH('')
)
with:
FOR XML PATH(''),TYPE
).value('(./text())[1]','VARCHAR(MAX)')
...or NVARCHAR(MAX) if thats what youre using.
why the hell doesn't SQL have a concatenate aggregate function? this is a PITA.
I ran into a couple of problems when I tried converting Kevin Fairchild's suggestion to work with strings containing spaces and special XML characters (&, <, >) which were encoded.
The final version of my code (which doesn't answer the original question but may be useful to someone) looks like this:
CREATE TABLE #YourTable ([ID] INT, [Name] VARCHAR(MAX), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'Oranges & Lemons',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'1 < 2',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
SELECT [ID],
STUFF((
SELECT ', ' + CAST([Name] AS VARCHAR(MAX))
FROM #YourTable WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE
/* Use .value to uncomment XML entities e.g. > < etc*/
).value('.','VARCHAR(MAX)')
,1,2,'') as NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
Rather than using a space as a delimiter and replacing all the spaces with commas, it just pre-pends a comma and space to each value then uses STUFF to remove the first two characters.
The XML encoding is taken care of automatically by using the TYPE directive.
Another option using Sql Server 2005 and above
---- test data
declare #t table (OUTPUTID int, SCHME varchar(10), DESCR varchar(10))
insert #t select 1125439 ,'CKT','Approved'
insert #t select 1125439 ,'RENO','Approved'
insert #t select 1134691 ,'CKT','Approved'
insert #t select 1134691 ,'RENO','Approved'
insert #t select 1134691 ,'pn','Approved'
---- actual query
;with cte(outputid,combined,rn)
as
(
select outputid, SCHME + ' ('+DESCR+')', rn=ROW_NUMBER() over (PARTITION by outputid order by schme, descr)
from #t
)
,cte2(outputid,finalstatus,rn)
as
(
select OUTPUTID, convert(varchar(max),combined), 1 from cte where rn=1
union all
select cte2.outputid, convert(varchar(max),cte2.finalstatus+', '+cte.combined), cte2.rn+1
from cte2
inner join cte on cte.OUTPUTID = cte2.outputid and cte.rn=cte2.rn+1
)
select outputid, MAX(finalstatus) from cte2 group by outputid
Install the SQLCLR Aggregates from http://groupconcat.codeplex.com
Then you can write code like this to get the result you asked for:
CREATE TABLE foo
(
id INT,
name CHAR(1),
Value CHAR(1)
);
INSERT INTO dbo.foo
(id, name, Value)
VALUES (1, 'A', '4'),
(1, 'B', '8'),
(2, 'C', '9');
SELECT id,
dbo.GROUP_CONCAT(name + ':' + Value) AS [Column]
FROM dbo.foo
GROUP BY id;
Eight years later... Microsoft SQL Server vNext Database Engine has finally enhanced Transact-SQL to directly support grouped string concatenation. The Community Technical Preview version 1.0 added the STRING_AGG function and CTP 1.1 added the WITHIN GROUP clause for the STRING_AGG function.
Reference: https://msdn.microsoft.com/en-us/library/mt775028.aspx
SQL Server 2005 and later allow you to create your own custom aggregate functions, including for things like concatenation- see the sample at the bottom of the linked article.
This is just an addition to Kevin Fairchild's post (very clever by the way). I would have added it as a comment, but I don't have enough points yet :)
I was using this idea for a view I was working on, however the items I was concatinating contained spaces. So I modified the code slightly to not use spaces as delimiters.
Again thanks for the cool workaround Kevin!
CREATE TABLE #YourTable ( [ID] INT, [Name] CHAR(1), [Value] INT )
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (1, 'A', 4)
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (1, 'B', 8)
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (2, 'C', 9)
SELECT [ID],
REPLACE(REPLACE(REPLACE(
(SELECT [Name] + ':' + CAST([Value] AS VARCHAR(MAX)) as A
FROM #YourTable
WHERE ( ID = Results.ID )
FOR XML PATH (''))
, '</A><A>', ', ')
,'<A>','')
,'</A>','') AS NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
An example would be
In Oracle you can use LISTAGG aggregate function.
Original records
name type
------------
name1 type1
name2 type2
name2 type3
Sql
SELECT name, LISTAGG(type, '; ') WITHIN GROUP(ORDER BY name)
FROM table
GROUP BY name
Result in
name type
------------
name1 type1
name2 type2; type3
This kind of question is asked here very often, and the solution is going to depend a lot on the underlying requirements:
https://stackoverflow.com/search?q=sql+pivot
and
https://stackoverflow.com/search?q=sql+concatenate
Typically, there is no SQL-only way to do this without either dynamic sql, a user-defined function, or a cursor.
Just to add to what Cade said, this is usually a front-end display thing and should therefore be handled there. I know that sometimes it's easier to write something 100% in SQL for things like file export or other "SQL only" solutions, but most of the times this concatenation should be handled in your display layer.
Don't need a cursor... a while loop is sufficient.
------------------------------
-- Setup
------------------------------
DECLARE #Source TABLE
(
id int,
Name varchar(30),
Value int
)
DECLARE #Target TABLE
(
id int,
Result varchar(max)
)
INSERT INTO #Source(id, Name, Value) SELECT 1, 'A', 4
INSERT INTO #Source(id, Name, Value) SELECT 1, 'B', 8
INSERT INTO #Source(id, Name, Value) SELECT 2, 'C', 9
------------------------------
-- Technique
------------------------------
INSERT INTO #Target (id)
SELECT id
FROM #Source
GROUP BY id
DECLARE #id int, #Result varchar(max)
SET #id = (SELECT MIN(id) FROM #Target)
WHILE #id is not null
BEGIN
SET #Result = null
SELECT #Result =
CASE
WHEN #Result is null
THEN ''
ELSE #Result + ', '
END + s.Name + ':' + convert(varchar(30),s.Value)
FROM #Source s
WHERE id = #id
UPDATE #Target
SET Result = #Result
WHERE id = #id
SET #id = (SELECT MIN(id) FROM #Target WHERE #id < id)
END
SELECT *
FROM #Target
Let's get very simple:
SELECT stuff(
(
select ', ' + x from (SELECT 'xxx' x union select 'yyyy') tb
FOR XML PATH('')
)
, 1, 2, '')
Replace this line:
select ', ' + x from (SELECT 'xxx' x union select 'yyyy') tb
With your query.
You can improve performance significant the following way if group by contains mostly one item:
SELECT
[ID],
CASE WHEN MAX( [Name]) = MIN( [Name]) THEN
MAX( [Name]) NameValues
ELSE
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
END
FROM #YourTable Results
GROUP BY ID
didn't see any cross apply answers, also no need for xml extraction. Here is a slightly different version of what Kevin Fairchild wrote. It's faster and easier to use in more complex queries:
select T.ID
,MAX(X.cl) NameValues
from #YourTable T
CROSS APPLY
(select STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = T.ID)
FOR XML PATH(''))
,1,2,'') [cl]) X
GROUP BY T.ID
Using the Stuff and for xml path operator to concatenate rows to string :Group By two columns -->
CREATE TABLE #YourTable ([ID] INT, [Name] CHAR(1), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'A',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',5)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
-- retrieve each unique id and name columns and concatonate the values into one column
SELECT
[ID],
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX)) -- CONCATONATES EACH APPLICATION : VALUE SET
FROM #YourTable
WHERE (ID = Results.ID and Name = results.[name] )
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID
SELECT
[ID],[Name] , --these are acting as the group by clause
STUFF((
SELECT ', '+ CAST([Value] AS VARCHAR(MAX)) -- CONCATONATES THE VALUES FOR EACH ID NAME COMBINATION
FROM #YourTable
WHERE (ID = Results.ID and Name = results.[name] )
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID, name
DROP TABLE #YourTable
Using Replace Function and FOR JSON PATH
SELECT T3.DEPT, REPLACE(REPLACE(T3.ENAME,'{"ENAME":"',''),'"}','') AS ENAME_LIST
FROM (
SELECT DEPT, (SELECT ENAME AS [ENAME]
FROM EMPLOYEE T2
WHERE T2.DEPT=T1.DEPT
FOR JSON PATH,WITHOUT_ARRAY_WRAPPER) ENAME
FROM EMPLOYEE T1
GROUP BY DEPT) T3
For sample data and more ways click here
If you have clr enabled you could use the Group_Concat library from GitHub
Another example without the garbage: ",TYPE).value('(./text())[1]','VARCHAR(MAX)')"
WITH t AS (
SELECT 1 n, 1 g, 1 v
UNION ALL
SELECT 2 n, 1 g, 2 v
UNION ALL
SELECT 3 n, 2 g, 3 v
)
SELECT g
, STUFF (
(
SELECT ', ' + CAST(v AS VARCHAR(MAX))
FROM t sub_t
WHERE sub_t.g = main_t.g
FOR XML PATH('')
)
, 1, 2, ''
) cg
FROM t main_t
GROUP BY g
Input-output is
************************* -> *********************
* n * g * v * * g * cg *
* - * - * - * * - * - *
* 1 * 1 * 1 * * 1 * 1, 2 *
* 2 * 1 * 2 * * 2 * 3 *
* 3 * 2 * 3 * *********************
*************************
I used this approach which may be easier to grasp. Get a root element, then concat to choices any item with the same ID but not the 'official' name
Declare #IdxList as Table(id int, choices varchar(max),AisName varchar(255))
Insert into #IdxLIst(id,choices,AisName)
Select IdxId,''''+Max(Title)+'''',Max(Title) From [dbo].[dta_Alias]
where IdxId is not null group by IdxId
Update #IdxLIst
set choices=choices +','''+Title+''''
From #IdxLIst JOIN [dta_Alias] ON id=IdxId And Title <> AisName
where IdxId is not null
Select * from #IdxList where choices like '%,%'
For all my healthcare folks out there:
SELECT
s.NOTE_ID
,STUFF ((
SELECT
[note_text] + ' '
FROM
HNO_NOTE_TEXT s1
WHERE
(s1.NOTE_ID = s.NOTE_ID)
ORDER BY [line] ASC
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,
1,
2,
'') AS NOTE_TEXT_CONCATINATED
FROM
HNO_NOTE_TEXT s
GROUP BY NOTE_ID

Delete duplicate values from concatenated string

I have the following table:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
2 1 A;B;C;C
2 2 X
3 1 X;Y;Z
3 2 V;V;V;V;V;V;V;V;V;V;V
How can I select from this table only the unique values from the concatenated values? So:
Object Field Values
---------------------
1 1 A;B
2 1 A;B;C
2 2 X
3 1 X;Y;Z
3 2 V
In any scripting language, I would loop through the values from Values, explode on ; and loop through that array with some logic filtering out duplicates. However, I need to do this only using SQL (Server 2008).
Can anybody tell me if and how this can be done?
Any help is greatly appreciated :-)
To do this first create a split function. This is the one I use but if you search the internet (or even SO) for "SQL Server Split Function" you will find a number of alternatives if you don't like this:
ALTER FUNCTION [dbo].[Split](#StringToSplit NVARCHAR(MAX), #Delimiter NCHAR(1))
RETURNS TABLE
AS
RETURN
(
SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(#StringToSplit, Number, CHARINDEX(#Delimiter, #StringToSplit + #Delimiter, Number) - Number)
FROM ( SELECT TOP (LEN(#StringToSplit) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(#Delimiter + #StringToSplit + #Delimiter, n.Number, 1) = #Delimiter
);
Then you can split your field, So running:
SELECT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s
Will turn this:
Object Field Values
------------------------------------
1 1 A;A;A;B;A;A
into:
Object Field Values
------------------------------------
1 1 A
1 1 A
1 1 A
1 1 B
1 1 A
1 1 A
Then you can apply the DISTINCT Operator:
SELECT DISTINCT t.Object, t.Field, s.Value
FROM T
CROSS APPLY dbo.Split(t.[Values], ';') AS s;
To give:
Object Field Values
------------------------------------
1 1 A
1 1 B
Then you can concatenate your rows back into a single column giving a final query:
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x)
SQL Fiddle appears to be down, but once you have the Split function created the below is a full working example:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object, t.Field, [Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + s.Value
FROM dbo.Split(t.[Values], ';') AS s
FOR XML PATH(''), TYPE
) AS s (x);
EDIT
Based on your comment that you can't create tables or modify the DDL, I thought I would account for the situation where you can't create a function either. You can expand the above split function out into your query, so you don't actually need to create a function:
CREATE TABLE #T (Object INT, Field INT, [Values] VARCHAR(MAX));
INSERT #T
VALUES
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(2, 2, 'X'),
(3, 1, 'X;Y;Z'),
(3, 2, 'V;V;V;V;V;V;V;V;V;V;V');
SELECT t.Object,
t.Field,
[Values] = STUFF(x.value('.', 'NVARCHAR(MAX)'), 1, 1, '')
FROM #T AS T
CROSS APPLY
( SELECT DISTINCT ';' + SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
FOR XML PATH(''), TYPE
) AS s (x);
Here is a standalone solution:
DECLARE #t table(Object int, Field int, [Values] varchar(max))
INSERT #t values
(1, 1, 'A;A;A;B;A;A'),
(2, 1, 'A;B;C;C'),
(3, 1, 'X'),
(4, 1, 'X;Y;Z'),
(5, 1, 'V;V;V;V;V;V;V;V;V;V;V')
SELECT t.Object, t.Field, x.[NewValues]
FROM #t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
Result:
Object Field NewValues
1 1 A;B
2 1 A;B;C
3 1 X
4 1 X;Y;Z
5 1 V
According to #GarethD's comment this may perform slow.
Test data:
create table #t(Object int identity(1,1), Field int, [Values] varchar(max))
INSERT #t values
(1, 'A;A;A;B;A;A'),(1, 'A;B;C;C'), (1, 'X'), (1, 'X;Y;Z'),(1, 'V;V;V;V;V;V;V;V;V;V;V')
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
insert #t select field, [values] from #t union all select field, [values] from #t union all select field, [values] from #t
Performance testing my script:
SELECT t.Object, t.Field, x.[NewValues]
FROM #t t
CROSS APPLY
(
SELECT STUFF((
SELECT distinct ';'+t.c.value('.', 'VARCHAR(2000)') value
FROM (
SELECT x = CAST('<t>' +
REPLACE([Values], ';', '</t><t>') + '</t>' AS XML)
) a
CROSS APPLY x.nodes('/t') t(c)
for xml path(''), type
).value('.', 'varchar(max)'), 1, 1, '') [NewValues]
) x
Result less than 1 sec.
Performance testing Garath script
(had to edit testdata to get all rows. Identical rows were considered as 1 row):
WITH CTE AS
( SELECT DISTINCT t.Object, t.Field, s.Value
FROM #T AS T
CROSS APPLY
( SELECT ID = ROW_NUMBER() OVER(ORDER BY n.Number),
Position = Number,
Value = SUBSTRING(t.[Values], Number, CHARINDEX(';', t.[Values] + ';', Number) - Number)
FROM ( SELECT TOP (LEN(t.[Values]) + 1) Number = ROW_NUMBER() OVER(ORDER BY a.object_id)
FROM sys.all_objects a
CROSS JOIN sys.all_objects b
) n
WHERE SUBSTRING(';' + t.[Values] + ';', n.Number, 1) = ';'
) AS s
)
SELECT Object,
Field,
[Values] = STUFF((SELECT ';' + Value
FROM CTE AS T2
WHERE T2.Object = T.Object
AND T2.Field = T.Field
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)'), 1, 1, '')
FROM CTE AS T
GROUP BY Object, Field;
Result 6 seconds
If any row has null in values this script will also crash.
Just as a Scalar Value Function alternative without the CTE...
ALTER FUNCTION [SplitRemoveDupes] (
#String VARCHAR(MAX)
,#Delimiter VARCHAR(5)
)
RETURNS VARCHAR(MAX)
AS
BEGIN
DECLARE #SplitLength INT
DECLARE #DedupedValues VARCHAR(MAX)
DECLARE #SplittedValues TABLE
(
OccurenceId SMALLINT IDENTITY(1,1),
SplitValue VARCHAR(200)
)
WHILE LEN(#String) > 0
BEGIN
SELECT #SplitLength = (
CASE CHARINDEX(#Delimiter, #String)
WHEN 0
THEN LEN(#String)
ELSE CHARINDEX(#Delimiter, #String) - 1
END
)
INSERT INTO #SplittedValues
SELECT SUBSTRING(#String, 1, #SplitLength)
SELECT #String = (
CASE (LEN(#String) - #SplitLength)
WHEN 0
THEN ''
ELSE RIGHT(#String, LEN(#String) - #SplitLength - 1) END)
END
SET #DedupedValues=(SELECT DISTINCT STUFF((
SELECT DISTINCT (#Delimiter + SplitValue)
FROM #SplittedValues s
ORDER BY (#Delimiter + SplitValue)
FOR XML PATH('')
), 1, 1, '') AS a
FROM #SplittedValues ss)
RETURN #DedupedValues
END
Call it inline...
SELECT Object, Field, [dbo].[SplitRemoveDupes](Values,';') From Table

Duplicates without using While or Cursor in T-SQL

ID Name
1 A
1 B
1 C
2 X
2 Y
3 P
3 Q
3 R
These are the columns in a table. I want to get output like
ID Company
1 A,B,C
2 X, Y
3 P,Q,R
Restriction is that I cannot use WHILE or CURSOR. Please write a query for the same.
This query should do it - uses FOR XML PATH which is new in SQL Server 2005 - hope you are on 2005 or higher, you didn't clearly specify.....
SELECT
ID,
STUFF(CAST((SELECT ','+Name FROM dbo.YourTable t2
WHERE t2.ID = dbo.YourTable.ID
FOR XML PATH(''), TYPE) AS VARCHAR(MAX)), 1, 1, '') AS 'Company'
FROM
dbo.YourTable
GROUP BY
ID
Here's a solution using the CROSS APPLY method:
select id, sub.names
from (
select distinct id from YourTable
) a
cross apply (
select name + ', ' as [text()]
from YourTable b
where b.id = a.id
for xml path('')
) sub(names)
For 2005 version:
CREATE TABLE dbo.TEST([Type] INTEGER, [Name] NVARCHAR(100), [Qty] INTEGER)
GO
INSERT dbo.TEST VALUES(1, N'a', 5)
INSERT dbo.TEST VALUES(1, N'b', 6)
INSERT dbo.TEST VALUES(2, N'c', 44)
INSERT dbo.TEST VALUES(3, N'd', 1)
GO
select [Type],
[Description] = replace((select [Name] + ':' + cast([Qty] as varchar) as 'data()'
from TEST where [Type] = t.[Type] for xml path('')), ' ', ',')
from dbo.TEST t
group by [Type]
go
drop table dbo.TEST
You can group on the ID to get the unique values, then get the comma separated string for each using a for xml query:
select
a.ID,
substring((
select ', ' + Name
from Test1
where Test1.ID = a.ID
for xml path('')
), 3, 1000) as Company
from
TheTable a
group by
a.ID

How to use GROUP BY to concatenate strings in SQL Server?

How do I get:
id Name Value
1 A 4
1 B 8
2 C 9
to
id Column
1 A:4, B:8
2 C:9
No CURSOR, WHILE loop, or User-Defined Function needed.
Just need to be creative with FOR XML and PATH.
[Note: This solution only works on SQL 2005 and later. Original question didn't specify the version in use.]
CREATE TABLE #YourTable ([ID] INT, [Name] CHAR(1), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'A',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
SELECT
[ID],
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
If it is SQL Server 2017 or SQL Server Vnext, SQL Azure you can use STRING_AGG as below:
SELECT id, STRING_AGG(CONCAT(name, ':', [value]), ', ')
FROM #YourTable
GROUP BY id
using XML path will not perfectly concatenate as you might expect... it will replace "&" with "&" and will also mess with <" and ">
...maybe a few other things, not sure...but you can try this
I came across a workaround for this... you need to replace:
FOR XML PATH('')
)
with:
FOR XML PATH(''),TYPE
).value('(./text())[1]','VARCHAR(MAX)')
...or NVARCHAR(MAX) if thats what youre using.
why the hell doesn't SQL have a concatenate aggregate function? this is a PITA.
I ran into a couple of problems when I tried converting Kevin Fairchild's suggestion to work with strings containing spaces and special XML characters (&, <, >) which were encoded.
The final version of my code (which doesn't answer the original question but may be useful to someone) looks like this:
CREATE TABLE #YourTable ([ID] INT, [Name] VARCHAR(MAX), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'Oranges & Lemons',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'1 < 2',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
SELECT [ID],
STUFF((
SELECT ', ' + CAST([Name] AS VARCHAR(MAX))
FROM #YourTable WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE
/* Use .value to uncomment XML entities e.g. > < etc*/
).value('.','VARCHAR(MAX)')
,1,2,'') as NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
Rather than using a space as a delimiter and replacing all the spaces with commas, it just pre-pends a comma and space to each value then uses STUFF to remove the first two characters.
The XML encoding is taken care of automatically by using the TYPE directive.
Another option using Sql Server 2005 and above
---- test data
declare #t table (OUTPUTID int, SCHME varchar(10), DESCR varchar(10))
insert #t select 1125439 ,'CKT','Approved'
insert #t select 1125439 ,'RENO','Approved'
insert #t select 1134691 ,'CKT','Approved'
insert #t select 1134691 ,'RENO','Approved'
insert #t select 1134691 ,'pn','Approved'
---- actual query
;with cte(outputid,combined,rn)
as
(
select outputid, SCHME + ' ('+DESCR+')', rn=ROW_NUMBER() over (PARTITION by outputid order by schme, descr)
from #t
)
,cte2(outputid,finalstatus,rn)
as
(
select OUTPUTID, convert(varchar(max),combined), 1 from cte where rn=1
union all
select cte2.outputid, convert(varchar(max),cte2.finalstatus+', '+cte.combined), cte2.rn+1
from cte2
inner join cte on cte.OUTPUTID = cte2.outputid and cte.rn=cte2.rn+1
)
select outputid, MAX(finalstatus) from cte2 group by outputid
Install the SQLCLR Aggregates from http://groupconcat.codeplex.com
Then you can write code like this to get the result you asked for:
CREATE TABLE foo
(
id INT,
name CHAR(1),
Value CHAR(1)
);
INSERT INTO dbo.foo
(id, name, Value)
VALUES (1, 'A', '4'),
(1, 'B', '8'),
(2, 'C', '9');
SELECT id,
dbo.GROUP_CONCAT(name + ':' + Value) AS [Column]
FROM dbo.foo
GROUP BY id;
Eight years later... Microsoft SQL Server vNext Database Engine has finally enhanced Transact-SQL to directly support grouped string concatenation. The Community Technical Preview version 1.0 added the STRING_AGG function and CTP 1.1 added the WITHIN GROUP clause for the STRING_AGG function.
Reference: https://msdn.microsoft.com/en-us/library/mt775028.aspx
SQL Server 2005 and later allow you to create your own custom aggregate functions, including for things like concatenation- see the sample at the bottom of the linked article.
This is just an addition to Kevin Fairchild's post (very clever by the way). I would have added it as a comment, but I don't have enough points yet :)
I was using this idea for a view I was working on, however the items I was concatinating contained spaces. So I modified the code slightly to not use spaces as delimiters.
Again thanks for the cool workaround Kevin!
CREATE TABLE #YourTable ( [ID] INT, [Name] CHAR(1), [Value] INT )
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (1, 'A', 4)
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (1, 'B', 8)
INSERT INTO #YourTable ([ID], [Name], [Value]) VALUES (2, 'C', 9)
SELECT [ID],
REPLACE(REPLACE(REPLACE(
(SELECT [Name] + ':' + CAST([Value] AS VARCHAR(MAX)) as A
FROM #YourTable
WHERE ( ID = Results.ID )
FOR XML PATH (''))
, '</A><A>', ', ')
,'<A>','')
,'</A>','') AS NameValues
FROM #YourTable Results
GROUP BY ID
DROP TABLE #YourTable
An example would be
In Oracle you can use LISTAGG aggregate function.
Original records
name type
------------
name1 type1
name2 type2
name2 type3
Sql
SELECT name, LISTAGG(type, '; ') WITHIN GROUP(ORDER BY name)
FROM table
GROUP BY name
Result in
name type
------------
name1 type1
name2 type2; type3
This kind of question is asked here very often, and the solution is going to depend a lot on the underlying requirements:
https://stackoverflow.com/search?q=sql+pivot
and
https://stackoverflow.com/search?q=sql+concatenate
Typically, there is no SQL-only way to do this without either dynamic sql, a user-defined function, or a cursor.
Just to add to what Cade said, this is usually a front-end display thing and should therefore be handled there. I know that sometimes it's easier to write something 100% in SQL for things like file export or other "SQL only" solutions, but most of the times this concatenation should be handled in your display layer.
Don't need a cursor... a while loop is sufficient.
------------------------------
-- Setup
------------------------------
DECLARE #Source TABLE
(
id int,
Name varchar(30),
Value int
)
DECLARE #Target TABLE
(
id int,
Result varchar(max)
)
INSERT INTO #Source(id, Name, Value) SELECT 1, 'A', 4
INSERT INTO #Source(id, Name, Value) SELECT 1, 'B', 8
INSERT INTO #Source(id, Name, Value) SELECT 2, 'C', 9
------------------------------
-- Technique
------------------------------
INSERT INTO #Target (id)
SELECT id
FROM #Source
GROUP BY id
DECLARE #id int, #Result varchar(max)
SET #id = (SELECT MIN(id) FROM #Target)
WHILE #id is not null
BEGIN
SET #Result = null
SELECT #Result =
CASE
WHEN #Result is null
THEN ''
ELSE #Result + ', '
END + s.Name + ':' + convert(varchar(30),s.Value)
FROM #Source s
WHERE id = #id
UPDATE #Target
SET Result = #Result
WHERE id = #id
SET #id = (SELECT MIN(id) FROM #Target WHERE #id < id)
END
SELECT *
FROM #Target
Let's get very simple:
SELECT stuff(
(
select ', ' + x from (SELECT 'xxx' x union select 'yyyy') tb
FOR XML PATH('')
)
, 1, 2, '')
Replace this line:
select ', ' + x from (SELECT 'xxx' x union select 'yyyy') tb
With your query.
You can improve performance significant the following way if group by contains mostly one item:
SELECT
[ID],
CASE WHEN MAX( [Name]) = MIN( [Name]) THEN
MAX( [Name]) NameValues
ELSE
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = Results.ID)
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
END
FROM #YourTable Results
GROUP BY ID
didn't see any cross apply answers, also no need for xml extraction. Here is a slightly different version of what Kevin Fairchild wrote. It's faster and easier to use in more complex queries:
select T.ID
,MAX(X.cl) NameValues
from #YourTable T
CROSS APPLY
(select STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX))
FROM #YourTable
WHERE (ID = T.ID)
FOR XML PATH(''))
,1,2,'') [cl]) X
GROUP BY T.ID
Using the Stuff and for xml path operator to concatenate rows to string :Group By two columns -->
CREATE TABLE #YourTable ([ID] INT, [Name] CHAR(1), [Value] INT)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'A',4)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',8)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (1,'B',5)
INSERT INTO #YourTable ([ID],[Name],[Value]) VALUES (2,'C',9)
-- retrieve each unique id and name columns and concatonate the values into one column
SELECT
[ID],
STUFF((
SELECT ', ' + [Name] + ':' + CAST([Value] AS VARCHAR(MAX)) -- CONCATONATES EACH APPLICATION : VALUE SET
FROM #YourTable
WHERE (ID = Results.ID and Name = results.[name] )
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID
SELECT
[ID],[Name] , --these are acting as the group by clause
STUFF((
SELECT ', '+ CAST([Value] AS VARCHAR(MAX)) -- CONCATONATES THE VALUES FOR EACH ID NAME COMBINATION
FROM #YourTable
WHERE (ID = Results.ID and Name = results.[name] )
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,1,2,'') AS NameValues
FROM #YourTable Results
GROUP BY ID, name
DROP TABLE #YourTable
Using Replace Function and FOR JSON PATH
SELECT T3.DEPT, REPLACE(REPLACE(T3.ENAME,'{"ENAME":"',''),'"}','') AS ENAME_LIST
FROM (
SELECT DEPT, (SELECT ENAME AS [ENAME]
FROM EMPLOYEE T2
WHERE T2.DEPT=T1.DEPT
FOR JSON PATH,WITHOUT_ARRAY_WRAPPER) ENAME
FROM EMPLOYEE T1
GROUP BY DEPT) T3
For sample data and more ways click here
If you have clr enabled you could use the Group_Concat library from GitHub
Another example without the garbage: ",TYPE).value('(./text())[1]','VARCHAR(MAX)')"
WITH t AS (
SELECT 1 n, 1 g, 1 v
UNION ALL
SELECT 2 n, 1 g, 2 v
UNION ALL
SELECT 3 n, 2 g, 3 v
)
SELECT g
, STUFF (
(
SELECT ', ' + CAST(v AS VARCHAR(MAX))
FROM t sub_t
WHERE sub_t.g = main_t.g
FOR XML PATH('')
)
, 1, 2, ''
) cg
FROM t main_t
GROUP BY g
Input-output is
************************* -> *********************
* n * g * v * * g * cg *
* - * - * - * * - * - *
* 1 * 1 * 1 * * 1 * 1, 2 *
* 2 * 1 * 2 * * 2 * 3 *
* 3 * 2 * 3 * *********************
*************************
I used this approach which may be easier to grasp. Get a root element, then concat to choices any item with the same ID but not the 'official' name
Declare #IdxList as Table(id int, choices varchar(max),AisName varchar(255))
Insert into #IdxLIst(id,choices,AisName)
Select IdxId,''''+Max(Title)+'''',Max(Title) From [dbo].[dta_Alias]
where IdxId is not null group by IdxId
Update #IdxLIst
set choices=choices +','''+Title+''''
From #IdxLIst JOIN [dta_Alias] ON id=IdxId And Title <> AisName
where IdxId is not null
Select * from #IdxList where choices like '%,%'
For all my healthcare folks out there:
SELECT
s.NOTE_ID
,STUFF ((
SELECT
[note_text] + ' '
FROM
HNO_NOTE_TEXT s1
WHERE
(s1.NOTE_ID = s.NOTE_ID)
ORDER BY [line] ASC
FOR XML PATH(''),TYPE).value('(./text())[1]','VARCHAR(MAX)')
,
1,
2,
'') AS NOTE_TEXT_CONCATINATED
FROM
HNO_NOTE_TEXT s
GROUP BY NOTE_ID