Related
I want to transpose the below rows dynamically into columns.
**Process Id Attribute Values**
1 Equipment Normal
1 Complaints No
1 Availability 30 min
2 Phone1 123456789
2 Phone2 987654321
I have tried to pivot it but I am unable to get the desired results. I need the below output
Process ID Attribute1 Value1 Arrtibute2 Value2 Attribute3 Value3
1 Equipment Normal Complaints No Availability 30 min
2 Phone1 123456789 Phone2 987654321 NULL NULL
One process can have one or more attributes. So if a process have 10 attributes, it should create 10 columns in the desired output. Any suggestions?
All the newest ANSI compliant databases should support this. Witn no DBMS, I stick to the newest ANSI standard.
WITH
-- your input
input(Process_Id,Attribute,Values) AS (
SELECT 1,'Equipment','Normal'
UNION ALL SELECT 1,'Complaints','No'
UNION ALL SELECT 1,'Availability','30 min'
UNION ALL SELECT 2,'Phone1','123456789'
UNION ALL SELECT 2,'Phone2','987654321'
)
,
-- need a sequence counter ...
with_seq AS (
SELECT
ROW_NUMBER() OVER(PARTITION BY process_id) AS seq
, *
FROM input
)
SELECT
process_id
, MAX(CASE seq WHEN 1 THEN attribute END) AS attrib1
, MAX(CASE seq WHEN 1 THEN values END) AS val1
, MAX(CASE seq WHEN 2 THEN attribute END) AS attrib2
, MAX(CASE seq WHEN 2 THEN values END) AS val2
, MAX(CASE seq WHEN 3 THEN attribute END) AS attrib3
, MAX(CASE seq WHEN 3 THEN values END) AS val3
FROM with_seq
GROUP BY process_id;
-- out process_id | attrib1 | val1 | attrib2 | val2 | attrib3 | val3
-- out ------------+-----------+-----------+------------+-----------+--------------+-------
-- out 1 | Equipment | Normal | Complaints | No | Availability | 30 min
-- out 2 | Phone1 | 123456789 | Phone2 | 987654321 | |
Try this below code. It will provide desired output.
My sample Algorithm
1. Create manual named columns by using ROW_NUMBER()
2. Create Dynamic columns for Attribute,Value named as #columnsAttribute, #columnsValue
3. Create Dynamic group by columns for both Attribute & Value named as #Allcolumns
4. Dynamic query creation with pivot for both Attribute & Value
declare #tblAttribute as
table(processid int,
attribute NVARCHAR(100),
value NVARCHAR(200))
insert into #tblAttribute(processid,attribute,value)
values(1,'Equipment','Normal'),
(1,'Complaints','No'),
(1,'Availability','30 min'),
(1,'test','testvalue'),
(2,'Phone1','123456789'),
(2,'Phone2','987654321')
;with ctetbl as
(
select ROW_NUMBER() over (partition by processid order by processid) rno,* from #tblAttribute
)
,ctetbl1 as
(
select processid,
'Attribute'+ cast(rno as NVARCHAR(2)) as DynamicAttribute,
'Value'+cast(rno as NVARCHAR(2)) as DyanamicValue
,attribute,value from ctetbl
)
select * into #tblDynamicAttribute from ctetbl1
declare #Allcolumns as NVARCHAR(max),
#columnsAttribute as NVARCHAR(max),
#columnsValue as NVARCHAR(max),
#sql as NVARCHAR(MAX) = ''
select #Allcolumns=coalesce(#Allcolumns+',','')+'max('+QUOTENAME(B.DynamicAttribute)+') as '+ QUOTENAME(B.DynamicAttribute) +',max('+QUOTENAME(B.DyanamicValue)+') as '+ QUOTENAME(B.DyanamicValue)
from (select distinct DynamicAttribute,DyanamicValue from #tblDynamicAttribute) as B
order by b.DynamicAttribute
select #columnsAttribute=coalesce(#columnsAttribute+',','')+QUOTENAME(B.DynamicAttribute)
from (select distinct DynamicAttribute,DyanamicValue from #tblDynamicAttribute) as B
order by b.DynamicAttribute
select #columnsValue=coalesce(#columnsValue+',','')+QUOTENAME(B.DyanamicValue)
from (select distinct DynamicAttribute,DyanamicValue from #tblDynamicAttribute) as B
order by b.DynamicAttribute
-- construct dynamic SQL
SET #sql ='
select x.processid,' + #Allcolumns +' from (
SELECT processid,' + #columnsAttribute+','+ #columnsValue +' FROM
(
SELECT
processid,attribute,value,dynamicattribute,DyanamicValue
FROM
#tblDynamicAttribute p
) t
PIVOT(
max(attribute)
FOR dynamicattribute IN ('+ #columnsAttribute +')
) AS pivot_table
PIVOT(
max(value)
FOR DyanamicValue IN ('+ #columnsValue +')
) AS pivot_table1
) x group by processid;';
-- execute the dynamic SQL
EXECUTE sp_executesql #sql;
drop table #tblDynamicAttribute
Sample Output
I am trying to transpose rows to columns but I didn't find any good answers.
Here is an example of what I want:
Input tables:
TABLE A
ID | NAME
1 | BOB
2 | JIM
3 | ROB
TABLE B
ID | CLUB
1 | 2
1 | 3
1 | 4
2 | 2
2 | 1
3 | 5
OUTPUT will be:
ID | CLUB1 | CLUB2 | CLUB3
1 | 2 | 3 | 4
2 | 2 | 1 |
3 | 5 | |
You need to enumerate the values to pivot them:
select id,
max(case when seqnum = 1 then club end) as club_1,
max(case when seqnum = 2 then club end) as club_2,
max(case when seqnum = 3 then club end) as club_3
from (select b.*,
row_number() over (partition by id order by club) as seqnum
from b
) b
group by id;
use conditional aggregation
select id,
max(case when id=1 then club end) club1,
max(case when id=2 then club end) club2,
max(case when id=3 then club end) club3
from tablename
group by id
use case when
select a.id,max(case when name='BOB' then CLUB end) ,
max(case when name='JIM' then CLUB end),
max(case when name='ROB' then CLUB end)
tablea a join tableb b on a.id=b.id group by a.id
Sample Data
IF OBJECT_ID('tempdb..#TempTab')IS NOT NULL
DROP TABLE #TempTab
;WITH CTE (ID,CLUB)
AS
(
SELECT 1 , 2 UNION ALL
SELECT 1 , 3 UNION ALL
SELECT 1 , 4 UNION ALL
SELECT 2 , 2 UNION ALL
SELECT 2 , 1 UNION ALL
SELECT 3 , 5
)
SELECT ID,
CLUB,
'CLUB'+CAST(ROW_NUMBER()OVER(PARTITION BY ID ORDER BY ID) AS VARCHAR) AS CLUBData
INTO #TempTab
FROM CTE
Dynamic sql
DECLARE #Column nvarchar(1000),#Column2 nvarchar(max),
#Sql nvarchar(max)
SELECT #Column =STUFF((SELECT DISTINCT ', '+QUOTENAME(CLUBData)
FROM #TempTab FOR XML PATH ('')),1,1,'')
SET #Sql = 'SELECT Id,'+#Column +'
FROM
(
SELECT * FROM #TempTab
) AS SRc
PIVOT
(
MAX(CLUB) FOR CLUBData IN ('+#Column+')
) AS pvt
'
PRINT #Sql
EXEC (#Sql)
Result
Id CLUB1 CLUB2 CLUB3
-------------------------
1 3 4 2
2 1 2 NULL
3 5 NULL NULL
I have a query like below:
DECLARE #t TABLE
(
EmpName VARCHAR(10)
, Qty INT
, Item VARCHAR(12)
)
INSERT INTO #t
VALUES ('Jane',3,'Dog')
, ('Carle',1,'Cat')
, ('Abay',5,'Goat')
, ('Jane',1,'Dog')
, ('Carle',10,'Cat')
, ('Jane',2,'Dog')
, ('Jane',8,'Goat')
, ('Jane',3,'Ram')
, ('Carle',2,'Dog')
--SELECT * FROM #t
SELECT
EmpName, [Dog], [Cat], [Goat], [Ram]
FROM
(SELECT
EmpName, Qty, Item
FROM #t) AS b
PIVOT(SUM(Qty) FOR Item IN ([Dog], [Cat], [Goat], [Ram])) AS p
And the result is as seen in the screenshot below:
I want to calculate the average Qty across Item without ignoring null values in the calculation. For example, in row 1, EmpName Abay should be 5 divided by 4 (number of columns), as seen in this screenshot:
How do I get the average column?
I'm not really familiar with the PIVOT query, so here is an alternative using conditional aggregation:
SELECT
Empname,
Dog = SUM(CASE WHEN Item = 'Dog' THEN Qty ELSE 0 END),
Cat = SUM(CASE WHEN Item = 'Cat' THEN Qty ELSE 0 END),
Goat = SUM(CASE WHEN Item = 'Goat' THEN Qty ELSE 0 END),
Ram = SUM(CASE WHEN Item = 'Ram' THEN Qty ELSE 0 END),
Average = SUM(ISNULL(Qty, 0))/ 4.0
FROM #t
GROUP BY EmpName;
Note that this will only work if you only have 4 Items. Otherwise, you need to resort to dynamic crosstab.
ONLINE DEMO
For dynamic crosstab, I used a temporary table instead of a table variable:
DECLARE #sql NVARCHAR(MAX) = '';
SELECT #sql =
'SELECT
Empname' + CHAR(10);
SELECT #sql = #sql +
' , SUM(CASE WHEN Item = ''' + Item + ''' THEN Qty ELSE 0 END) AS ' + QUOTENAME(Item) + CHAR(10)
FROM (
SELECT DISTINCT Item FROM #t
) t;
SELECT #sql = #sql +
' , SUM(ISNULL(Qty, 0)) / (SELECT COUNT(DISTINCT Item) * 1.0 FROM #t) AS [Average]' + CHAR(10) +
'FROM #t
GROUP BY EmpName;';
ONLINE DEMO
Try a combination of AVG and ISNULL, i.e. AVG(ISNULL(Dog, 0)).
One simple method is:
select empname, goat, cat, dog, ram,
(coalesce(goat, 0) + coalesce(cat, 0) + coalesce(dog, 0) + coalesce( ram, 0)
) / 4.0 as average
from t;
Another simple method uses outer apply:
select t.*, v.average
from t outer apply
(select avg(coalesce(x, 0))
from (values (t.goat), (t.cat), (t.dog), (t.ram)
) v(x)
) v(average);
DECLARE #t TABLE
(
EmpName VARCHAR(10)
, Qty INT
, Item VARCHAR(12)
)
INSERT INTO #t
VALUES ('Jane',3,'Dog')
, ('Carle',1,'Cat')
, ('Abay',5,'Goat')
, ('Jane',1,'Dog')
, ('Carle',10,'Cat')
, ('Jane',2,'Dog')
, ('Jane',8,'Goat')
, ('Jane',3,'Ram')
, ('Carle',2,'Dog')
SELECT EmpName
, [Dog]
, [Cat]
, [Goat]
, [Ram]
,p.total/4.0 as av
FROM (SELECT EmpName, Qty, Item,SUM(qty)OVER(PARTITION BY EmpName) AS total FROM #t) AS b
PIVOT(SUM(Qty) FOR Item IN([Dog],[Cat],[Goat],[Ram])) AS p
EmpName Dog Cat Goat Ram av
---------- ----------- ----------- ----------- ----------- ---------------------------------------
Abay NULL NULL 5 NULL 1.250000
Carle 2 11 NULL NULL 3.250000
Jane 6 NULL 8 3 4.250000
V2: Dynamic script:
CREATE TABLE #t
(
EmpName VARCHAR(10)
, Qty INT
, Item VARCHAR(12)
)
INSERT INTO #t
VALUES ('Jane',3,'Dog')
, ('Carle',1,'Cat')
, ('Abay',5,'Goat')
, ('Jane',1,'Dog')
, ('Carle',10,'Cat')
, ('Jane',2,'Dog')
, ('Jane',8,'Goat')
, ('Jane',3,'Ram')
, ('Carle',2,'Dog')
INSERT #t ( EmpName, Qty, Item )VALUES('Abay',100,'abc')
DECLARE #cols VARCHAR(max),#sql VARCHAR(MAX),#cnt INT
SELECT #cols=ISNULL(#cols+',[','[')+Item+']',#cnt=ISNULL(#cnt+1,1) FROM #t GROUP BY Item
PRINT #cols
PRINT #cnt
SET #sql='SELECT EmpName, '+#cols+',p.total*1.0/'+LTRIM(#cnt)+' as av'+CHAR(13)
+' FROM (SELECT EmpName, Qty, Item,SUM(qty)OVER(PARTITION BY EmpName) AS total FROM #t) AS b'+CHAR(13)
+' PIVOT(SUM(Qty) FOR Item IN('+#cols+')) AS p'
EXEC(#sql)
EmpName abc Cat Dog Goat Ram av
---------- ----------- ----------- ----------- ----------- ----------- ---------------------------------------
Carle NULL 11 2 NULL NULL 2.600000
Jane NULL NULL 6 8 3 3.400000
Abay 100 NULL NULL 5 NULL 21.000000
Avoid NULL from your pivot sentence and compute AVG.
;with ct as
(
SELECT EmpName
, ISnull([Dog],0) Dog
, ISnull([Cat],0) Cat
, ISnull([Goat],0) Goat
, ISnull([Ram],0) Ram
FROM (SELECT EmpName, Qty, Item FROM #t) AS b
PIVOT(SUM(Qty) FOR Item IN([Dog],[Cat],[Goat],[Ram])) AS p
)
select empname, avg(dog) dog, avg(cat) cat, avg(goat) goat, avg(ram) ram
from ct
group by empname;
+---------+-----+-----+------+-----+
| empname | dog | cat | goat | ram |
+---------+-----+-----+------+-----+
| Abay | 0 | 0 | 5 | 0 |
+---------+-----+-----+------+-----+
| Carle | 2 | 11 | 0 | 0 |
+---------+-----+-----+------+-----+
| Jane | 6 | 0 | 8 | 3 |
+---------+-----+-----+------+-----+
SELECT EmpName
, [Dog]
, [Cat]
, [Goat]
, [Ram]
,(isnull(p.cat,0)+isnull(p.dog,0)+isnull(p.Goat,0)+isnull(p.Ram,0))/4.0 as average
FROM (SELECT EmpName, Qty, Item FROM #t) AS b
PIVOT(SUM(Qty) FOR Item IN([Dog],[Cat],[Goat],[Ram])) AS p
FYI, this question is already answered but I have some new requirements which is very complex to implement so, I am posting it as a new question instead of editing the old question: (Previous Question)
I have two tables "Controls" and "ControlChilds" (in the ControlChilds table we have added a new column called ControlChildComments which we need to show in PIVOT output)
Parent Table Structure:
Create table Controls(
ProjectID Varchar(20) NOT NULL,
ControlID INT NOT NULL,
ControlCode Varchar(2) NOT NULL,
ControlPoint Decimal NULL,
ControlScore Decimal NULL,
ControlValue Varchar(50)
)
Sample Data
ProjectID | ControlID | ControlCode | ControlPoint | ControlScore | ControlValue
P001 1 A 30.44 65 Invalid
P001 2 C 45.30 85 Valid
Child Table Structure:
Create table ControlChilds(
ControlID INT NOT NULL,
ControlChildID INT NOT NULL,
ControlChildValue Varchar(200) NULL,
ControlChildComments Varchar(200) NULL
)
Sample Data
ControlID | ControlChildID | ControlChildValue | ControlChildComments
1 100 Yes Something
1 101 No NULL
1 102 NA Others
1 103 Others NULL
2 104 Yes New one
2 105 SomeValue NULL
Based on my previous question (Previous Question) I got this output (You can refer to the PIVOT queries which produces this output in the answer given by #bluefeet. Thanks again #bluefeet.)
But now my requirement is changed and I need ControlChildComments after each Child values. For example, A_Child1, A_Child1Comments, A_Child2, A_Child2Comments etc...
Another tricky thing is I need to show the comments only when they are not null otherwise I shouldn't show the column. For example, in this case, it should be like this:
A_Child1, A_Child1Comments, A_Child2, A_Child3, A_Child3Comments, A_Child4, C_Child1, C_Child1Comments, C_Child2
Is this possible? I tried lot of things but the results are not accurate.
Since you now have multiple columns in your ControlChilds table that you need to PIVOT, you will need to use the similar method of unpivoting them first that you applied with the Controls table.
You will need to unpivot both the ChildControlValue and ChildControlComments using code similar to:
select
projectId,
col = ControlCode+'_'+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', ControlChildValue union all
select 'ChildComments', ControlChildComments
) c (subCol, value);
See SQL Fiddle with Demo. This gets your data in the format:
| PROJECTID | COL | VALUE |
|-----------|------------------|-----------|
| P001 | A_ChildValue1 | Yes |
| P001 | A_ChildComments1 | Something |
| P001 | A_ChildValue2 | No |
| P001 | A_ChildComments2 | (null) |
| P001 | A_ChildValue3 | NA |
You then use this code in your existing query:
select ProjectId,
A_ControlPoint, A_ControlScore, A_ControlValue,
A_ChildValue1, A_ChildComments1, A_ChildValue2,
A_ChildComments2, A_ChildValue3, A_ChildComments3,
A_ChildValue4, A_ChildComments4,
C_ControlPoint, C_ControlScore, C_ControlValue,
C_Child1, C_Child2
from
(
select
ProjectId,
col = ControlCode +'_'+col,
val
from
(
select
c.ProjectId,
c.ControlCode,
c.ControlPoint,
c.ControlScore,
c.ControlValue
from controls c
) d
cross apply
(
select 'ControlPoint', cast(controlpoint as varchar(10)) union all
select 'ControlScore', cast(ControlScore as varchar(10)) union all
select 'ControlValue', ControlValue
) c (col, val)
union all
select
projectId,
col = ControlCode+'_'+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', ControlChildValue union all
select 'ChildComments', ControlChildComments
) c (subCol, value)
) src
pivot
(
max(val)
for col in (A_ControlPoint, A_ControlScore, A_ControlValue,
A_ChildValue1, A_ChildComments1, A_ChildValue2,
A_ChildComments2, A_ChildValue3, A_ChildComments3,
A_ChildValue4, A_ChildComments4,
C_ControlPoint, C_ControlScore, C_ControlValue,
C_Child1, C_Child2)
) piv;
See SQL Fiddle with Demo. Finally, you'll implement this in your dynamic SQL script:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(col)
from
(
select ControlCode,
col = ControlCode +'_'+col,
seq,
so
from controls
cross apply
(
select 'ControlPoint', 0, 0 union all
select 'ControlScore', 0, 1 union all
select 'ControlValue', 0, 2
) c (col, seq, so)
union all
select ControlCode,
col = ControlCode+'_'+subcol+cast(rn as varchar(10)),
rn,
so
from
(
select ControlCode,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', seq, 3 union all
select 'ChildComments', seq, 4
) c (subcol, rn, so)
) src
group by ControlCode, seq, col, so
order by ControlCode, seq, so
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT ProjectId, ' + #cols + '
from
(
select ProjectId,
col = ControlCode +''_''+col,
val
from
(
select
c.ProjectId,
c.ControlCode,
c.ControlPoint,
c.ControlScore,
c.ControlValue
from controls c
) d
cross apply
(
select ''ControlPoint'', cast(controlpoint as varchar(10)) union all
select ''ControlScore'', cast(ControlScore as varchar(10)) union all
select ''ControlValue'', ControlValue
) c (col, val)
union all
select
projectId,
col = ControlCode+''_''+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select ''ChildValue'', ControlChildValue union all
select ''ChildComments'', ControlChildComments
) c (subCol, value)
) x
pivot
(
max(val)
for col in (' + #cols + ')
) p '
exec sp_executesql #query;
See SQL Fiddle with Demo. Both of these gives a result:
| PROJECTID | A_CONTROLPOINT | A_CONTROLSCORE | A_CONTROLVALUE | A_CHILDVALUE1 | A_CHILDCOMMENTS1 | A_CHILDVALUE2 | A_CHILDCOMMENTS2 | A_CHILDVALUE3 | A_CHILDCOMMENTS3 | A_CHILDVALUE4 | A_CHILDCOMMENTS4 | C_CONTROLPOINT | C_CONTROLSCORE | C_CONTROLVALUE | C_CHILDVALUE1 | C_CHILDCOMMENTS1 | C_CHILDVALUE2 | C_CHILDCOMMENTS2 |
|-----------|----------------|----------------|----------------|---------------|------------------|---------------|------------------|---------------|------------------|---------------|------------------|----------------|----------------|----------------|---------------|------------------|---------------|------------------|
| P001 | 30.44 | 65.00 | Invalid | Yes | Something | No | (null) | NA | Others | Others | (null) | 45.30 | 85.00 | Valid | Yes | New one | SomeValue | (null) |
Here is an example of a dynamic crosstab. Since you have multiple columns you would need to adjust the dynamic portion of this to suit.
if OBJECT_ID('Something') is not null
drop table Something
create table Something
(
ID int,
Subject1 varchar(50)
)
insert Something
select 10868952, 'NUR/3110/D507' union all
select 10868952, 'NUR/3110/D512' union all
select 10868952, 'NUR/4010/D523' union all
select 10868952, 'NUR/4010/HD20' union all
select 12345, 'asdfasdf'
declare #MaxCols int
declare #StaticPortion nvarchar(2000) =
'with OrderedResults as
(
select *, ROW_NUMBER() over(partition by ID order by Subject1) as RowNum
from Something
)
select ID';
declare #DynamicPortion nvarchar(max) = '';
declare #FinalStaticPortion nvarchar(2000) = ' from OrderedResults Group by ID order by ID';
with E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select #DynamicPortion = #DynamicPortion +
', MAX(Case when RowNum = ' + CAST(N as varchar(6)) + ' then Subject1 end) as Subject' + CAST(N as varchar(6)) + CHAR(10)
from cteTally t
where t.N <=
(
select top 1 Count(*)
from Something
group by ID
order by COUNT(*) desc
)
select #StaticPortion + #DynamicPortion + #FinalStaticPortion
--declare #SqlToExecute nvarchar(max) = #StaticPortion + #DynamicPortion + #FinalStaticPortion;
--exec sp_executesql #SqlToExecute
Here's a simplified example of my problem. I have a table where there's a "Name" column with duplicate entries:
ID Name
--- ----
1 AAA
2 AAA
3 AAA
4 BBB
5 CCC
6 CCC
7 DDD
8 DDD
9 DDD
10 DDD
Doing a GROUP BY like SELECT Name, COUNT(*) AS [Count] FROM Table GROUP BY Name results in this:
Name Count
---- -----
AAA 3
BBB 1
CCC 2
DDD 4
I'm only concerned about the duplicates, so I'll add a HAVING clause, SELECT Name, COUNT(*) AS [Count] FROM Table GROUP BY Name HAVING COUNT(*) > 1:
Name Count
---- -----
AAA 3
CCC 2
DDD 4
Trivial so far, but now things get tricky: I need a query to get me all the duplicate records, but with a nice incrementing indicator added to the Name column. The result should look something like this:
ID Name
--- --------
1 AAA
2 AAA (2)
3 AAA (3)
5 CCC
6 CCC (2)
7 DDD
8 DDD (2)
9 DDD (3)
10 DDD (4)
Note row 4 with "BBB" is excluded, and the first duplicate keeps the original Name.
Using an EXISTS statement gives me all the records I need, but how do I go about creating the new Name value?
SELECT * FROM Table AS T1
WHERE EXISTS (
SELECT Name, COUNT(*) AS [Count]
FROM Table
GROUP BY Name
HAVING (COUNT(*) > 1) AND (Name = T1.Name))
ORDER BY Name
I need to create an UPDATE statement that will fix all the duplicates, i.e. change the Name as per this pattern.
Update:
Figured it out now. It was the PARTITION BY clause I was missing.
With Dups As
(
Select Id, Name
, Row_Number() Over ( Partition By Name Order By Id ) As Rnk
From Table
)
Select D.Id
, D.Name + Case
When D.Rnk > 1 Then ' (' + Cast(D.Rnk As varchar(10)) + ')'
Else ''
End As Name
From Dups As D
If you want an update statement you can use pretty much the same structure:
With Dups As
(
Select Id, Name
, Row_Number() Over ( Partition By Name Order By Id ) As Rnk
From Table
)
Update Table
Set Name = T.Name + Case
When D.Rnk > 1 Then ' (' + Cast(D.Rnk As varchar(10)) + ')'
Else ''
End
From Table As T
Join Dups As D
On D.Id = T.Id
Just update the subquery directly:
update d
set Name = Name+'('+cast(r as varchar(10))+')'
from ( select Name,
row_number() over (partition by Name order by Name) as r
from [table]
) d
where r > 1
SELECT ROW_NUMBER() OVER(ORDER BY Name) AS RowNum,
Name,
Name + '(' + ROW_NUMBER() OVER(PARTITION BY Name ORDER BY Name) + ')' concatenatedName
FROM Table
WHERE Name IN
(
SELECT Name
FROM Table
GROUP BY Name
HAVING COUNT(*) > 1
)
This will get you what you originally asked for. For the update statement, you'll want to do a while and update the top 1
DECLARE #Pointer VARCHAR(20), #Count INT
WHILE EXISTS(SELECT Name FROM Table GROUP BY Name HAVING COUNT(1) > 1)
BEGIN
SELECT TOP 1 #Pointer = Name, #Count = COUNT(1) FROM Table GROUP BY Name HAVING COUNT(1) > 1
UPDATE TOP (1) TABLE
SET Name = Name + '(' + #Count + ')'
WHERE Name = #Pointer
END
There's no need to do an UPDATE at all. The following will create the table for INSERT as desired
SELECT
ROW_NUMBER() OVER(ORDER BY tb2.Id) Id,
tb2.Name + CASE WHEN COUNT(*) > 1 THEN ' (' + CONVERT(VARCHAR, Count(*)) + ')' ELSE '' END [Name]
FROM
tb tb1,
tb tb2
WHERE
tb1.Name = tb2.Name AND
tb1.Id <= tb2.Id
GROUP BY
tb2.Name,
tb2.Id
Here's an even simpler UPDATE statement:
UPDATE
tb
SET
[Name] = [Name] + ' (' + CONVERT(VARCHAR, ROW_NUMBER () OVER (PARTITION BY [Name] ORDER BY Id)) + ')'
WHERE
ROW_NUMBER () OVER (PARTITION BY [Name] ORDER BY Id) > 1