Cannot pivot table with my query? - sql

I have a table which looks like this -
Id AttributeName AttributeValue
A1 Atr1 A1V1
A1 Atr2 A1V2
A1 Atr3 A1V3
A2 Atr1 A2V1
A2 Atr2 A2V2
A2 Atr3 A3V3
Each ID in this table has the exact same attributes, ie ATR1, ATR2, ATR3. The values of these attributes is unique.
I want to pivot this table and get the following output -
Id Atr1 Atr2 Atr3
A1 A1V1 A1V2 A1V3
A2 A2V1 A2V2 A2V3
How do I do this ?
I tried a query and it failed with the error - Msg 156, Level 15, State 1, Line 21
Incorrect syntax near the keyword 'FOR'.
-- Create a temporary table
DECLARE #MyTable TABLE
(Id varchar(25),
AttributeName varchar(30),
AttributeValue varchar(30))
-- Load Sample Data
INSERT INTO #MyTable VALUES ('A1', 'Atr1', 'A1V1')
INSERT INTO #MyTable VALUES ('A1', 'Atr2', 'A1V2')
INSERT INTO #MyTable VALUES ('A1', 'Atr3', 'A1V3')
INSERT INTO #MyTable VALUES ('A2', 'Atr1', 'A2V1')
INSERT INTO #MyTable VALUES ('A2', 'Atr2', 'A2V2')
INSERT INTO #MyTable VALUES ('A2', 'Atr3', 'A3V3')
SELECT Id, [Atr1], [Atr2],[Atr3]
FROM
(
SELECT ID, AttributeName, AttributeValue
FROM #MyTable) AS SourceTable
PIVOT
(
AttributeValue
FOR AttributeName IN ([ATR1], [ATR2], [ATR3])
) AS pvt

Just to expand on the other answers, the PIVOT function requires some type of aggregation. Since the value that you want to convert from a row into a column is a string, then you are limited to using either the max() or min() aggregate function.
While #Muhammed Ali's answer will work when you have a single AttributeName/AttributeValue pair, if you have multiple pairs for each ID, then you will only return either the max or min value.
For example if your sample data is:
INSERT INTO #MyTable VALUES ('A1', 'Atr1', 'A1V1');
INSERT INTO #MyTable VALUES ('A1', 'Atr1', 'A1V4');
INSERT INTO #MyTable VALUES ('A1', 'Atr2', 'A1V2');
INSERT INTO #MyTable VALUES ('A1', 'Atr3', 'A1V3');
INSERT INTO #MyTable VALUES ('A2', 'Atr1', 'A2V1');
INSERT INTO #MyTable VALUES ('A2', 'Atr2', 'A2V2');
INSERT INTO #MyTable VALUES ('A2', 'Atr3', 'A3V3');
Even though you have multiple rows for the combination of A1 and Atr1, the other queries are only returning the max(attributevalue):
| ID | ATR1 | ATR2 | ATR3 |
|----|------|------|------|
| A1 | A1V4 | A1V2 | A1V3 |
| A2 | A2V1 | A2V2 | A3V3 |
I would guess that you would actually want to return all of the combinations. I suggest expanding your query to include the windowing function, row_number() in your query. This query generates a unique value that will then be included in the grouping aspect of the PIVOT and will allow you to return more than one row for each ID.
By adding the row_number(), the query will be similar to the following:
SELECT Id, [Atr1], [Atr2],[Atr3]
FROM
(
SELECT ID, AttributeName, AttributeValue,
row_number() over(partition by id, attributename
order by attributevalue) seq
FROM #MyTable
) AS SourceTable
PIVOT
(
max(AttributeValue)
FOR AttributeName IN ([ATR1], [ATR2], [ATR3])
) AS pvt
order by id;
See SQL Fiddle with Demo. You will get a result that returns all rows:
| ID | ATR1 | ATR2 | ATR3 |
|----|------|--------|--------|
| A1 | A1V1 | A1V2 | A1V3 |
| A1 | A1V4 | (null) | (null) |
| A2 | A2V1 | A2V2 | A3V3 |
If you are having trouble grasping the concept of PIVOT, then I would suggest look at using a combination of an aggregate function with a CASE expression to get the result. You can then see the grouping of the sequence/id:
SELECT Id,
max(case when attributename = 'Atr1' then attributevalue end) Atr1,
max(case when attributename = 'Atr2' then attributevalue end) Atr2,
max(case when attributename = 'Atr3' then attributevalue end) Atr3
FROM
(
SELECT ID, AttributeName, AttributeValue,
row_number() over(partition by id, attributename
order by attributevalue) seq
FROM #MyTable
) AS SourceTable
group by id, seq
See SQL Fiddle with Demo

DECLARE #MyTable TABLE
(Id varchar(25),
AttributeName varchar(30),
AttributeValue varchar(30))
-- Load Sample Data
INSERT INTO #MyTable VALUES ('A1', 'Atr1', 'A1V1')
INSERT INTO #MyTable VALUES ('A1', 'Atr2', 'A1V2')
INSERT INTO #MyTable VALUES ('A1', 'Atr3', 'A1V3')
INSERT INTO #MyTable VALUES ('A2', 'Atr1', 'A2V1')
INSERT INTO #MyTable VALUES ('A2', 'Atr2', 'A2V2')
INSERT INTO #MyTable VALUES ('A2', 'Atr3', 'A3V3')
SELECT Id, [Atr1], [Atr2],[Atr3]
FROM
(
SELECT ID, AttributeName, AttributeValue
FROM #MyTable) AS SourceTable
PIVOT
(
MAX(AttributeValue)
FOR AttributeName IN ([ATR1], [ATR2], [ATR3])
) AS pvt
You are missing the function in you Pivot table syntax
Result Set
Id Atr1 Atr2 Atr3
A1 A1V1 A1V2 A1V3
A2 A2V1 A2V2 A3V3

Try this:
select id,
max(case AttributeName when 'Atr1' then AttributeName else '' end) as atr1,
max(case AttributeName when 'Atr2' then AttributeName else '' end) as atr2,
max(case AttributeName when 'Atr3' then AttributeName else '' end) as atr3,
AttributeValue
from YourTable
group by id, AttributeValue
order by id

Related

How to use Group by format for XML format data in SQL server

I have this data with me.
declare #T table
(
ID int,
[subject] varchar(30),
Marks int
)
insert into #T values
(1, 'Maths',78),
(1, 'Science',89),
(2, 'Maths',90),
(3, 'Maths',91),
(4, 'Maths',92)
I tried this query:
SELECT ID
,(SELECT t1.* FOR XML PATH('body'),TYPE) AS TheRowAsXml
FROM #T as t1
But it is giving 1 Row for every Entry.
I want to group by ID
My expected output is like:
<body>
<ID>1</ID>
<subject>Maths</subject>
<Marks>78</Marks>
<subject>Science</subject>
<Marks>89</Marks>
</body>
For ID 1 and so on
Please any suggestion would be appreciated
You can generate using subqueries, as given below:
declare #T table
(
ID int,
[subject] varchar(30),
Marks int
)
insert into #T values
(1, 'Maths',78),
(1, 'Science',89),
(2, 'Maths',90),
(3, 'Maths',91),
(4, 'Maths',92)
--SELECT id, subject, marks from #T WHERE id = 1
--for xml path('')
SELECT distinct id,CONCAT('<body><ID>',cast(id as varchar(10)),'</ID>',
(SELECT subject, marks from #T WHERE id = t.id
for xml path('')
),'</body>') as Rowxml
from #t as t
+----+-------------------------------------------------------------------------------------------------------------+
| id | Rowxml |
+----+-------------------------------------------------------------------------------------------------------------+
| 1 | <body><ID>1</ID><subject>Maths</subject><marks>78</marks><subject>Science</subject><marks>89</marks></body> |
| 2 | <body><ID>2</ID><subject>Maths</subject><marks>90</marks></body> |
| 3 | <body><ID>3</ID><subject>Maths</subject><marks>91</marks></body> |
| 4 | <body><ID>4</ID><subject>Maths</subject><marks>92</marks></body> |
+----+-------------------------------------------------------------------------------------------------------------+
UPDATE
one more cleaner approach
SELECT id "ID",max(t2.rowxml)
from #t as t
CROSS APPLY
(SELECT subject, marks
from #T WHERE id = t.id
for xml path('')
) as t2(rowxml)
group by t.id
for xml path('body')

SQL: Deleting row which values already exist

I have a table that look like this:
ID | DATE | NAME | VALUE_1 | VALUE_2
1 | 27.11.2015 | Homer | A | B
2 | 27.11.2015 | Bart | C | B
3 | 28.11.2015 | Homer | A | C
4 | 28.11.2015 | Maggie | C | B
5 | 28.11.2015 | Bart | C | B
I currently delete duplicate rows (thank to this thread) using this code :
WITH cte AS
(SELECT ROW_NUMBER() OVER (PARTITION BY [VALUE_1], [VALUE_2]
ORDER BY [DATE] DESC) RN
FROM [MY_TABLE])
DELETE FROM cte
WHERE RN > 1
But this code don't delete exactly the lines I want. I would like to delete only rows which values already exist so in my example I would like to delete only line 5 because line 2 have the same values and is older.
Code to create my table and insert values:
CREATE TABLE [t_diff_values]
([id] INT IDENTITY NOT NULL PRIMARY KEY,
[date] DATETIME NOT NULL,
[name] VARCHAR(255) NOT NULL DEFAULT '',
[val1] CHAR(1) NOT NULL DEFAULT '',
[val2] CHAR(1) NOT NULL DEFAULT '');
INSERT INTO [t_diff_values] ([date], [name], [val1], [val2]) VALUES
('2015-11-27','Homer', 'A','B'),
('2015-11-27','Bart', 'C','B'),
('2015-11-28','Homer', 'A','C'),
('2015-11-28','Maggie', 'C','B'),
('2015-11-28','Bart', 'C','B');
You need to add one more CTE where you will index all islands and then apply your duplicate logic in second CTE:
DECLARE #t TABLE
(
ID INT ,
DATE DATE ,
VALUE_1 CHAR(1) ,
VALUE_2 CHAR(1)
)
INSERT INTO #t
VALUES ( 1, '20151127', 'A', 'B' ),
( 2, '20151128', 'C', 'B' ),
( 3, '20151129', 'A', 'B' ),
( 4, '20151130', 'A', 'B' );
WITH cte1
AS ( SELECT * ,
ROW_NUMBER() OVER ( ORDER BY date)
- ROW_NUMBER() OVER ( PARTITION BY VALUE_1, VALUE_2 ORDER BY DATE) AS gr
FROM #t
),
cte2
AS ( SELECT * ,
ROW_NUMBER() OVER ( PARTITION BY VALUE_1, VALUE_2, gr ORDER BY date) AS rn
FROM cte1
)
DELETE FROM cte2
WHERE rn > 1
SELECT *
FROM #t
Try this
CREATE TABLE [dbo].[Employee](
[ID] INT NOT NULL,
[Date] DateTime NOT NULL,
[VAL1] varchar(20) NOT NULL,
[VAL2] varchar(20) NOT NULL
)
INSERT INTO [dbo].[Employee] VALUES
(1,'2015-11-27 10:44:33.087','A','B')
INSERT INTO [dbo].[Employee] VALUES
(2,'2015-11-28 10:44:33.087','C','B')
INSERT INTO [dbo].[Employee] VALUES
(3,'2015-11-29 10:44:33.087','A','B')
INSERT INTO [dbo].[Employee] VALUES
(4,'2015-11-30 10:44:33.087','A','B')
with cte as(
select
*,
rn = row_number() over(partition by [VAL1], [VAL2]
ORDER BY [DATE] DESC),
cc = count(*) over(partition by [VAL1], [VAL2])
from [Employee]
)
delete
from cte
where
rn > 1 and rn < cc
select * from [Employee]
You could use this query:
WITH cte AS
(
SELECT RN = ROW_NUMBER() OVER (ORDER BY ID)
, *
FROM #data
)
DELETE FROM c1
--SELECT *
FROM CTE c1
INNER JOIN CTE c2 ON c1.RN +1 = c2.RN AND c1.VALUE_1 = c2.VALUE_1 AND c1.VALUE_2 = c2.VALUE_2
Here I order them by ID. If the next one (RN+1) has similar V1 and V2, it is deleted.
Output:
ID DATE VALUE_1 VALUE_2
1 2015-11-27 A B
2 2015-11-28 C B
4 2015-11-30 A B
Data:
declare #data table(ID int, [DATE] date, VALUE_1 char(1), VALUE_2 char(1));
insert into #data(ID, [DATE], VALUE_1, VALUE_2) values
(1, '20151127', 'A', 'B'),
(2, '20151128', 'C', 'B'),
(3, '20151129', 'A', 'B'),
(4, '20151130', 'A', 'B');

Transfer Rows to columns

I have something like this in the xml
c1.1 test
c1.2 10
c1.3 100
c2.1 test1
c2.2 10
c2.3 1000
and i want to transform into like this
test 10 100
test1 10 1000
Please help .I tried with pivot and could not crack it .Need to mention here that c1.1 ,c1.2,c1.3 is a series and these 3 has to be in i row
Pivot will do exactly what you want, something like this:
select * from (
select left(type,2) as row, right(type, 1) as col, value
from Table1
) S pivot (
max(value) for col in ([1], [2], [3])
) P
Example in SQL Fiddle
SQL Fiddle
MS SQL Server 2014 Schema Setup:
CREATE TABLE Table1
([Code] varchar(20), [Value] varchar(20))
;
INSERT INTO Table1
([Code], [Value])
VALUES
('c1.1', 'test'),
('c1.2', '10'),
('c1.3', '100'),
('c2.1', 'test1'),
('c2.2', '10'),
('c2.3', '1000')
;
Query 1:
select
Value1, Value2, Value3
from (
select
Value as Value1
, lead(Value,1) over(partition by left(t1.code,charindex('.',t1.code)-1)
order by substring(t1.code,charindex('.',t1.code)+1,len(t1.code))) as Value2
, lead(Value,2) over(partition by left(t1.code,charindex('.',t1.code)-1)
order by substring(t1.code,charindex('.',t1.code)+1,len(t1.code))) as Value3
from table1 t1
) as derived
where Value3 is not null
Results:
| Value1 | Value2 | Value3 |
|--------|--------|--------|
| test | 10 | 100 |
| test1 | 10 | 1000 |
In SqlServer 2008 Schema Set Up we can do using Row_number. basing on the sample data i have given this code.
DECLARE #Table1 TABLE
(val varchar(4), col varchar(5))
;
INSERT INTO #Table1
(val, col)
VALUES
('c1.1', 'test'),
('c1.2', '10'),
('c1.3', '100'),
('c2.1', 'test1'),
('c2.2', '10'),
('c2.3', '1000')
;
Select [1] as [Value1],[2] as [Value2],[3] as [Value3]from (
select SUBSTRING(reverse(val),0,CHARINDEX('.',Reverse(val))) R,
col
,ROW_Number()OVER(PARTITION BY SUBSTRING(reverse(val),0,CHARINDEX('.',reverse(val))) Order by val) RN
from #Table1)T
PIVOT(max(col) for R IN ([1],[2],[3]))PVT

Merge a two way relation in the same table in SQL Server

Current Data
ID | Name1 | Name2
<guid1> | XMind | MindNode
<guid2> | MindNode | XMind
<guid3> | avast | Hitman Pro
<guid4> | Hitman Pro | avast
<guid5> | PPLive | Hola!
<guid6> | ZenMate | Hola!
<guid7> | Hola! | PPLive
<guid8> | Hola! | ZenMate
Required Output
ID1 | ID2 | Name1 | Name2
<guid1> | <guid2> | XMind | MindNode
<guid3> | <guid4> | avast | Hitman Pro
<guid5> | <guid7> | PPLive | Hola!
<guid6> | <guid8> | Hola! | ZenMate
These are relations between apps. I want to show that Avast and Hitman has a relation but in this view i do not need to show in what "direction" they have an relation. It's a given in this view that the relation goes both ways.
EDIT: Seems like my example was to simple. The solution doesn't work with more data.
DECLARE #a TABLE (ID INT, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( 5, 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 6, 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 7, 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( 8, 'Hola! Better Internet', 'ZenMate' )
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
AND a1.ID < a2.ID -- avoid duplicates
This works however so i guess it's the Guid that is messing with me.
EDIT AGAIN:
I haven't looked at this for a while and i thought it worked but i just realized it does not. I've struggled all morning with this but i must admit that SQL is not really my strong suite. The thing is this.
DECLARE #a TABLE (ID int, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 5, 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( 6, 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( 7, 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( 8, 'Hola! Better Internet', 'ZenMate' )
INSERT INTO #a VALUES ( 9, 'XX', 'A' )
INSERT INTO #a VALUES ( 10, 'XX', 'BB' )
INSERT INTO #a VALUES ( 11, 'BB', 'XX' )
INSERT INTO #a VALUES ( 12, 'A', 'XX' )
INSERT INTO #a VALUES ( 13, 'XX', 'CC' )
INSERT INTO #a VALUES ( 14, 'CC', 'XX' )
;With CTE as
(
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2,
CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end) ck, -- just for display
Row_Number() over (Partition by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)
order by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)) as rn
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
)
Select ID1, ID2,Name1, Name2
from CTE C1
where rn=1
When i use this code it sure works fine with the names but it doesn't match the ID's correctly.
The result is
ID1 | ID2 | Name1 | Name2
12 | 9 | A | X (Correct)
7 | 5 | Hola! | ZenMate (Not Correct)
[..]
I've pulled my hair all morning but i can't figure this out. I still use Guid's as ID's and just use Int's here to make it a bit more readable.
DECLARE #a TABLE (ID INT, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( 1, 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( 2, 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( 3, 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( 4, 'Hitman Pro', 'avast' )
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
AND a1.ID < a2.ID -- avoid duplicates
Referring to the amendment and extension of your question, a more complicated solution is required.
We form a CHECKSUM on a1.Name1,a2.Name (to get an identical we exchanged on size).
Using this we generate with ROW_NUMBER (Transact-SQL) a number and use only rows from the result with number 1.
DECLARE #a TABLE (ID uniqueIdentifier, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( NewID(), 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( NewID(), 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( NewID(), 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( NewID(), 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( NewID(), 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NewID(), 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NewID(), 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( NewID(), 'Hola! Better Internet', 'ZenMate' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'A' )
INSERT INTO #a VALUES ( NewID(), 'A', 'XX' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'BB' )
INSERT INTO #a VALUES ( NewID(), 'BB', 'XX' )
INSERT INTO #a VALUES ( NewID(), 'XX', 'CC' )
INSERT INTO #a VALUES ( NewID(), 'CC', 'XX' )
;With CTE as
(
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2,
CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end) ck, -- just for display
Row_Number() over (Partition by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)
order by CheckSum(Case when a1.Name1>a2.Name1 then a2.Name1+a1.Name1 else a1.Name1+a2.Name1 end)) as rn
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2
)
Select *
from CTE C1
where rn=1
Edit:
If you only want to get those where both fields are fitting the needed query would simply be:
SELECT a1.ID AS ID1 , a2.ID AS ID2 , a1.Name1 , a2.Name1 AS Name2
FROM #a a1
JOIN #a a2 ON a1.Name1 = a2.Name2 and a1.Name2 = a2.Name1 AND a1.ID < a2.ID
If the output should contain only two-way relations ('XX' + 'A') AND ('A' + 'XX'), try this:
;
WITH m (ID1, ID2, Name1, Name2) AS (
SELECT ID1, ID2, Name1, Name2
FROM (
SELECT a1.ID AS ID1
,a2.ID AS ID2
,a1.Name1 AS Name1
,a2.Name1 AS Name2
,ROW_NUMBER() OVER (PARTITION BY a1.Name1, a2.Name1 ORDER BY (SELECT 1)) AS n
FROM #a AS a1
JOIN #a AS a2
ON a1.Name1 = a2.Name2
AND a1.Name2 = a2.Name1
) AS T
WHERE n = 1
)
SELECT DISTINCT *
FROM (
SELECT ID1, ID2, Name1, Name2
FROM m
WHERE ID1 <= ID2
UNION ALL
SELECT ID2, ID1, Name2, Name1
FROM m
WHERE ID1 > ID2
) AS dm
It produces the output as follows:
+------+-----+--------------------------+-----------------------+
| ID1 | ID2 | Name1 | Name2 |
+------+-----+--------------------------+-----------------------+
| 1 | 2 | XMind | MindNode |
| 3 | 6 | avast | Hitman Pro |
| 4 | 7 | PPLive Video Accelerator | Hola! Better Internet |
| 5 | 8 | ZenMate | Hola! Better Internet |
| 9 | 12 | XX | A |
| 10 | 11 | XX | BB |
| 13 | 14 | XX | CC |
+------+-----+--------------------------+-----------------------+
Just rank your rows with ROW_NUMBER function and use this rank in join instead of original ID column:
DECLARE #a TABLE (ID UNIQUEIDENTIFIER, Name1 VARCHAR(50), Name2 VARCHAR(50))
INSERT INTO #a VALUES ( NEWID(), 'XMind', 'MindNode' )
INSERT INTO #a VALUES ( NEWID(), 'MindNode', 'XMind' )
INSERT INTO #a VALUES ( NEWID(), 'avast', 'Hitman Pro' )
INSERT INTO #a VALUES ( NEWID(), 'Hitman Pro', 'avast' )
INSERT INTO #a VALUES ( NEWID(), 'PPLive Video Accelerator', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NEWID(), 'ZenMate', 'Hola! Better Internet' )
INSERT INTO #a VALUES ( NEWID(), 'Hola! Better Internet', 'PPLive Video Accelerator' )
INSERT INTO #a VALUES ( NEWID(), 'Hola! Better Internet', 'ZenMate' )
;WITH cte AS(SELECT *, ROW_NUMBER() OVER (ORDER BY (SELECT 1)) rn FROM #a)
SELECT a1.ID AS ID1 ,
a2.ID AS ID2 ,
a1.Name1 ,
a2.Name1 AS Name2
FROM cte a1
JOIN cte a2 ON a1.Name1 = a2.Name2 AND
a2.Name1 = a1.Name2 AND
a1.rn < a2.rn
Output:
ID1 ID2 Name1 Name2
Guid Guid XMind MindNode
Guid Guid avast Hitman Pro
Guid Guid PPLive Video Accelerator Hola! Better Internet
Guid Guid ZenMate Hola! Better Internet
I suggest you to use this simple way:
SELECT
t2.ID, t3.ID ID2,
t1.Name1,t1.Name2
FROM (
SELECT DISTINCT
CASE WHEN Name1 <= Name2 THEN Name1 ELSE Name2 END AS Name1,
CASE WHEN Name1 <= Name2 THEN Name2 ELSE Name1 END AS Name2
FROM
#a) t1
JOIN
#a t2 ON t1.Name1+t1.Name2 = t2.Name1+t2.Name2
JOIN
#a t3 ON t1.Name1+t1.Name2 = t3.Name2+t3.Name1
For this:
ID | ID2 | Name1 | Name2
----+-----+-----------------------+---------------------------
12 | 9 | A | XX
3 | 4 | avast | Hitman Pro
11 | 10 | BB | XX
14 | 13 | CC | XX
7 | 5 | Hola! Better Internet | PPLive Video Accelerator
8 | 6 | Hola! Better Internet | ZenMate
2 | 1 | MindNode | XMind
You can solve this using a CROSS APPLY
SELECT a2.ID ID_1,a1.ID ID_2, a2.Name1 , a2.Name2
FROM #a a1
CROSS APPLY
(
SELECT ID, Name2, Name1
FROM #a aa
WHERE aa.Name1 = a1.Name2 AND a1.Name1 = aa.Name2 AND a1.ID > aa.ID
) a2
You can try also:
select min(ID) ID1,
max(ID) ID2,
Name1,
Name2
from ( -- Here I get all the IDs and each couple sorted
-- Change > to < if you don't like the order
select ID,
case
when Name1 > Name2 then Name1
else Name2
end Name1,
case
when Name1 > Name2 then Name2
else Name1
end Name2
from table1
) as t
group by Name1,
Name2
You can even tansform this in a simgle query, without the inner one, but I think in this way it's more readable and you can understand better my approach.

SQL Server Group Concat with Different characters

I have looked through a number of solutions to emulating "Group concat" functionality in SQL Server. I wanted to make a more human readable solution though and I can't work out how to do it.
I have a view:
ParentID | ChildName
Which contains the records, for example:
1 | Max
1 | Jessie
2 | Steven
2 | Lucy
2 | Jake
3 | Mark
I want to "Group Concat" these to get:
1 | Max and Jessie
2 | Steven, Lucy and Jake
3 | Mark
So If there is only 1 child, just return name, if there are more than one, concat the last 2 with an ' and ' and all others with a ', '.
I am a bit stuck on how to do this without resorting to CLR, which I don't want to do. I am happy with a function - but speed is an issue and how do I determine the child number so I can choose between ' and ', ', ' or ''?
make a more human readable solution
Sorry, this is the best I can do with your requirement.
SQL Fiddle
MS SQL Server 2008 Schema Setup:
create table YourTable
(
ParentID int,
ChildName varchar(10)
);
insert into YourTable values
(1, 'Max'),
(1, 'Jessie'),
(2, 'Steven'),
(2, 'Lucy'),
(2, 'Jake'),
(3, 'Mark');
Query 1:
with T as
(
select ParentID,
ChildName,
row_number() over(partition by ParentID order by ChildName) as rn,
count(*) over(partition by ParentID) as cc
from YourTable
)
select T1.ParentID,
(
select case
when T2.rn = 1 and T2.cc > 1 then ' and '
else ', '
end + T2.ChildName
from T as T2
where T1.ParentID = T2.ParentID
order by T2.rn desc
for xml path(''), type
).value('substring(text()[1], 3)', 'varchar(max)') as ChildNames
from T as T1
group by T1.ParentID
Results:
| PARENTID | CHILDNAMES |
------------------------------------
| 1 | Max and Jessie |
| 2 | Steven, Lucy and Jake |
| 3 | Mark |
select ParentID,STUFF((SELECT ' and '+ChildName
FROM Table1 where ParentID=a.ParentID
FOR XML PATH('')),1,4,'') as cnmae from Table1 a
group by ParentID
SQL FIDDLE DEMO
Good logical question. Please check the query below (bit lengthy, but could not stop myself posting my small logic :)).
CREATE TABLE #SampleTable ([ParentID] int, [ChildName] varchar(6));
INSERT INTO #SampleTable VALUES (1, 'Max')
INSERT INTO #SampleTable VALUES (1, 'Jessie')
INSERT INTO #SampleTable VALUES (2, 'Steven')
INSERT INTO #SampleTable VALUES (2, 'Lucy')
INSERT INTO #SampleTable VALUES (2, 'Jake')
INSERT INTO #SampleTable VALUES (3, 'Mark')
select * From #SampleTable
;WITH T(xParentID, xChildName, xChildNameResult, xC1, xC2)AS
(
SELECT * FROM(
SELECT
ParentID ,
ChildName,
CAST(ChildName AS NVARCHAR(MAX)) AS ChildNameResult,
ROW_NUMBER() OVER (PARTITION BY [ParentID] ORDER BY ChildName) C1,
COUNT(*) OVER (PARTITION BY [ParentID]) C2
FROM #SampleTable)x WHERE x.C1=1
UNION ALL
SELECT ParentID, ChildName,
CAST(T.xChildNameResult+(CASE WHEN C1=1 THEN '' WHEN C1=C2 THEN ' and ' ELSE ', ' END)+ChildName AS NVARCHAR(MAX)), C1, C2
FROM
(
SELECT
ParentID ,
ChildName,
ROW_NUMBER() OVER (PARTITION BY ParentID order by ChildName) C1,
COUNT(*) OVER (PARTITION BY ParentID) C2
FROM #SampleTable
)y INNER JOIN T ON y.ParentID=T.xParentID and y.c1=T.xC1+1
)SELECT xParentID, xChildNameResult FROM T where xC1=xC2
OPTION (MAXRECURSION 0);