Using pivot table with column and row totals in sql server 2008 - sql

I have a table with following columns
defect_id, developer_name, status, summary, root_cause,
Secondary_RC, description, Comments, environment_name
The column root_cause has Enviro, Requi, Dev, TSc, TD, Unkn as its values and
column environment_name has QA1, QA2, QA3
I need to prepare a report in the below format
Enviro Requi Dev TSc TD Unkn Total
QA1 9 1 14 17 2 3 46
QA2 8 1 14 0 5 1 29
QA3 1 1 7 0 0 1 10
Total 18 3 35 17 7 5 85
I have prepare the report till
Enviro Requi Dev TSc TD Unkn
QA1 9 1 14 17 2 3
QA2 8 1 14 0 5 1
QA3 1 1 7 0 0 1
I used the below query to get the above result
select *
from
(
select environment_name as " ", value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
Can anyone help to get the totals for columns and rows?

There may be various approaches to this. You can calculate all the totals after the pivot, or you can get the totals first, then pivot all the results. It is also possible to have kind of middle ground: get one kind of the totals (e.g. the row-wise ones), pivot, then get the other kind, although that might be overdoing it.
The first of the mentioned approaches, getting all the totals after the pivot, could be done in a very straightforward way, and the only thing potentially new to you in the below implementation might be GROUP BY ROLLUP():
SELECT
[ ] = ISNULL(environment_name, 'Total'),
[Enviro] = SUM([Enviro]),
[Requi] = SUM([Requi]),
[Dev] = SUM([Dev]),
[Tsc] = SUM([Tsc]),
[TD] = SUM([TD]),
[Unkn] = SUM([Unkn]),
Total = SUM([Enviro] + [Requi] + [Dev] + [Tsc] + [TD] + [Unkn])
FROM (
SELECT environment_name, root_cause
FROM test1
) s
PIVOT (
COUNT(root_cause)
FOR root_cause IN ([Enviro], [Requi], [Dev], [Tsc], [TD], [Unkn])
) p
GROUP BY
ROLLUP(environment_name)
;
Basically, the GROUP BY ROLLUP() part produces the Total row for you. The grouping is first done by environment_name, then the grand total row is added.
To do just the opposite, i.e. get the totals prior to pivoting, you could employ GROUP BY CUBE() like this:
SELECT
[ ] = environment_name,
[Enviro] = ISNULL([Enviro], 0),
[Requi] = ISNULL([Requi] , 0),
[Dev] = ISNULL([Dev] , 0),
[Tsc] = ISNULL([Tsc] , 0),
[TD] = ISNULL([TD] , 0),
[Unkn] = ISNULL([Unkn] , 0),
Total = ISNULL(Total , 0)
FROM (
SELECT
environment_name = ISNULL(environment_name, 'Total'),
root_cause = ISNULL(root_cause, 'Total'),
cnt = COUNT(*)
FROM test1
WHERE root_cause IS NOT NULL
GROUP BY
CUBE(environment_name, root_cause)
) s
PIVOT (
SUM(cnt)
FOR root_cause IN ([Enviro], [Requi], [Dev], [Tsc], [TD], [Unkn], Total)
) p
;
Both methods can be tested and played with at SQL Fiddle:
Method 1
Method 2
Note. I've omitted the unpivoting step in both suggestions because unpivoting a single column seemed clearly redundant. If there's more to it, though, adjusting either of the queries should be easy.

You can find Total for root_cause and environment_name using ROLLUP.
RNO_COLTOTAL - Logic to place Total in last column, since the columns Tsc,Unkn will overlap the column Total when pivoting, since its ordering alphabetically.
RNO_ROWTOTAL - Logic to place Total in last row since a value that is starting with U,W,X,Y,Z can overlap the value Total, since its ordering alphabetically.
SUM(VALUE) - Can define on what aggregate function we can use with ROLLUP.
QUERY 1
SELECT CASE WHEN root_cause IS NULL THEN 1 ELSE 0 END RNO_COLTOTAL,
CASE WHEN environment_name IS NULL THEN 1 ELSE 0 END RNO_ROWTOTAL,
ISNULL(environment_name,'Total')environment_name,
ISNULL(root_cause,'Total')root_cause,
SUM(VALUE) VALUE
INTO #NEWTABLE
FROM
(
-- Find the count for environment_name,root_cause
SELECT DISTINCT *,COUNT(*) OVER(PARTITION BY environment_name,root_cause)VALUE
FROM #TEMP
)TAB
GROUP BY root_cause,environment_name
WITH CUBE
We will get the following logic when CUBE is used
We declare variables for pivoting.
#cols - Column values for pivoting.
#NulltoZeroCols - Replace null values with zero.
QUERY 2
DECLARE #cols NVARCHAR (MAX)
SELECT #cols = COALESCE (#cols + ',[' + root_cause + ']',
'[' + root_cause + ']')
FROM (SELECT DISTINCT RNO_COLTOTAL,root_cause FROM #NEWTABLE) PV
ORDER BY RNO_COLTOTAL,root_cause
DECLARE #NulltoZeroCols NVARCHAR (MAX)
SET #NullToZeroCols = SUBSTRING((SELECT ',ISNULL(['+root_cause+'],0) AS ['+root_cause+']'
FROM(SELECT DISTINCT RNO_COLTOTAL,root_cause FROM #NEWTABLE GROUP BY RNO_COLTOTAL,root_cause)TAB
ORDER BY RNO_COLTOTAL FOR XML PATH('')),2,8000)
Now pivot it dynamically
DECLARE #query NVARCHAR(MAX)
SET #query = 'SELECT environment_name,'+ #NulltoZeroCols +' FROM
(
SELECT RNO_ROWTOTAL,environment_name,root_cause,VALUE
FROM #NEWTABLE
) x
PIVOT
(
MIN(VALUE)
FOR [root_cause] IN (' + #cols + ')
) p
ORDER BY RNO_ROWTOTAL,environment_name;'
EXEC SP_EXECUTESQL #query
RESULT

I think you need to calculate the Total separately. Using this simple query for the total (sorry, had to give the alias name for your " " column):
select environment_name as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
GROUP BY environment_name
you can easily join both queries together to get the required report:
SELECT * FROM
(select *
from
(
select environment_name as en, value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
) AS a
INNER JOIN
( select environment_name as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
GROUP BY environment_name
) AS b ON a.en = b.en
UNION ALL
SELECT * FROM
(select *
from
(
select 'Total' as en, value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
) AS a
INNER JOIN
( select 'Total' as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
) AS b
I have not tested it, but believe it will work

You can also put you select pivot, without totals, in a temp table and then add the totalS with a new select:
SELECT environment_name=ISNULL(environment_name, ‘Total’) , Enviro=SUM(Enviro), Requi=SUM(Requi), Dev=SUM(Dev), TSc=SUM(TSc), TD=SUM(TD), Unkn =SUM(Unkn),
Total = sum(Enviro+Requi+Dev+TSc+TD+Unkn)
FROM #temp
GROUP BY ROLLUP(environment_name)

Related

SQL Recursive CTE Replacing records in each recursion

I have a table like this:
ItemID ItemFormula
100 'ID_3+ID_5'
110 'ID_2+ID_6'
120 'ID_100+ID_110'
130 'ID_120+ID_4'
This is the simplified version of a formula table with nearly 1000 records and up to 40 levels of reference (items used in other items). The task is breaking down the formulas to just one level reference where no other items are in one item. For example in the table above for id=130 I should have '((ID_3+ID_5)+(ID_2+ID_6))+ID_4'
EDIT: The operations do not limit to "+" and items have a character between them to be recognizable. For the sake of simplicity, I removed that character.
I can use recursive CTE for that. but my problem is that due to high levels of reference, my recursive select has lots of records joining so it takes a lot to complete.
My question is that: Can I keep the previous recursion only each time the recursion happens?
Here is my CTE Code
WITH Formula
AS (SELECT A.ItemID
,'ID_' + CONVERT(VARCHAR(20), A.ItemID) AS ItemText
,CONVERT(VARCHAR(MAX), A.ItemFormula) AS ItemFormula
FROM (VALUES (100,'ID_3+ID_5'),
(110,'ID_2+ID_6'),
(120,'ID_100+ID_110'),
(130,'ID_120+ID_4')
) A (ItemID,ItemFormula)
)
,REC
AS
(
SELECT A.ItemID
,A.ItemText
,A.ItemFormula
,1 AS LevelID
FROM Formula A
UNION ALL
SELECT A.ItemID
,A.ItemText
,' '
+ TRIM (REPLACE (REPLACE (A.ItemFormula, B.ItemText, ' ( ' + B.ItemFormula + ' ) '), ' ', ' '))
+ ' ' AS ItemFormula
,A.LevelID + 1 AS LevelID
FROM REC A
CROSS APPLY
(
SELECT *
FROM
(
SELECT *
,ROW_NUMBER () OVER (ORDER BY GETDATE ()) AS RowNum
FROM Formula B2
WHERE CHARINDEX (B2.ItemText, A.ItemFormula) > 0
) B3
WHERE B3.RowNum = 1
) B
)
,FinalQ
AS
(
SELECT A2.ItemID
,A2.ItemFormula
,A2.LevelID
FROM
(
SELECT A.ItemID
,REPLACE (TRIM (A.ItemFormula), ' ', '') AS ItemFormula
,A.LevelID
,ROW_NUMBER () OVER (PARTITION BY A.ItemID ORDER BY A.LevelID DESC) AS RowNum
FROM REC A
) A2
WHERE A2.RowNum = 1
)
SELECT * FROM FinalQ A2 ORDER BY A2.ItemID;
Thanks in advance.
My question is that: Can I keep the previous recursion only each time the recursion happens?
No. The recursive CTE will keep adding rows to the ones found in previous iterations. You don't have some kind of control that would allow you to remove rows of the recursive CTE during its iterations.
You can, however, filter them out after the recursive CTE is complete, maybe on a secondary CTE that takes into account only the last meaninful rows (by some kind of rule to be defined).
The only vaguely similar idea is found in PostgreSQL where you can use the UNION clause in addition to UNION ALL, to avoid producing more identical rows. But this is different to what you need, anyway.
This is an enormously complicated problem. Here are the ideas:
Find which items do not need any insertions. These are the ones that have no references to any others.
Build an ordering for item insertion. An insertion can go into an item, assuming that the item is already defined. A recursive CTE can be used for this.
Enumerate the insertions. Everything from (1) gets a "1". The rest are in order.
Process the insertions in the insertion order.
Here is my solution:
with ordering as (
select itemid, itemtext, itemformula, convert(varchar(max), null) as otheritemtext, 1 as lev
from formula f
where not exists (select 1
from formula f2 join
string_split(f.itemformula, '+') s
on f2.itemtext = s.value
where f2.itemid <> f.itemid
)
union all
select f.itemid, f.itemtext, f.itemformula, convert(varchar(max), s.value), lev + 1
from formula f cross apply
string_split(f.itemformula, '+') s join
ordering o
on o.itemtext = s.value
-- where lev <= 2
),
ordered as (
select distinct o.*,
dense_rank() over (order by (case when lev = 1 then -1 else lev end), (case when lev = 1 then '' else otheritemtext end)) as seqnum
from ordering o
),
cte as (
select o.itemid, o.itemtext, o.itemformula, convert(varchar(max), o.otheritemtext) as otheritemtext,
o.itemformula as newformula, o.seqnum, 1 as lev
from ordered o
where seqnum = 1
union all
select cte.itemid, o.itemtext, o.itemformula, convert(varchar(max), cte.itemtext),
replace(o.itemformula, o.otheritemtext, concat('(', cte.newformula, ')')), o.seqnum, cte.lev + 1
from cte join
ordered o
on cte.itemtext = o.otheritemtext and cte.seqnum < o.seqnum
)
select *
from cte;
And the db<>fiddle.
You could take advantage of the logical order of the formulas if any (Item_100 can not reference Item_150) and process items in a descending order.
The following uses LIKE and it will not work for formulas which have overlapping patterns (eg ID_10 & ID_100) you could fix that by some string manipulation or by keeping ItemIDs of fixed length (eg. ID_10010 & ID_10100: start numbering of items from a high number like 10000)
declare #f table
(
ItemId int,
ItemFormula varchar(1000)
);
insert into #f(ItemId, ItemFormula)
values
(100, 'ID_3+ID_5'),
(110, 'ID_2+ID_6'),
(120, 'ID_100+ID_110'),
(130, 'ID_120+ID_4'),
(140, '(ID_130+ID_110)/ID_100'),
(150, 'sqrt(ID_140, ID_130)'),
(160, 'ID_150-ID_120+ID_140');
;with cte
as
(
select f.ItemId, replace(cast(f.ItemFormula as varchar(max)), isnull('ID_' + cast(r.ItemId as varchar(max)), ''), isnull('(' + r.ItemFormula+ ')', '')) as therepl, 1 as lvl
from #f as f
outer apply (
select *
from
(
select rr.*, row_number() over(order by rr.ItemId desc) as rownum
from #f as rr
where f.ItemFormula like '%ID_' + cast(rr.ItemId as varchar(1000)) + '%'
) as src
where rownum = 1
) as r
union all
select c.ItemId, replace(c.therepl, 'ID_' + cast(r.ItemId as varchar(max)), '(' + r.ItemFormula+ ')'), c.lvl+1
from cte as c
cross apply (
select *
from
(
select rr.*, row_number() over(order by rr.ItemId desc) as rownum
from #f as rr
where c.therepl like '%ID_' + cast(rr.ItemId as varchar(1000)) + '%'
) as src
where rownum = 1
) as r
),
rown
as
(
select *, row_number() over (partition by itemid order by lvl desc) as rownum
from cte
)
select *
from rown
where rownum = 1;

How to pivot multiple columns without aggregation

I use SqlServer and i have to admit that i'm not realy good with it ...
This might be and easy question for the advanced users (I hope)
I have two tables which look like this
First table (ID isn't the primary key)
ID IdCust Ref
1 300 123
1 300 124
2 302 345
And the second (ID isn't the primary key)
ID Ref Code Price
1 123 A 10
1 123 Y 15
2 124 A 14
3 345 C 18
In the second table, the column "Ref" is the foreign key of "Ref" in the first table
I'm trying to produce the following output:
[EDIT]
The column "Stock", "Code" and "Price" can have x values, so I don't know it, in advance...
I tried so many things like "PIVOT" but it didn't give me the right result, so i hope someone can solve my problem ...
Use row_number() function and do the conditional aggregation :
select id, IdCust, Ref,
max(case when Seq = 1 then stock end) as [Stock A], -- second table *id*
max(case when Seq = 1 then code end) as [Code 1],
max(case when Seq = 1 then price end) as [Price1],
max(case when Seq = 2 then stock end) as [Stock B], -- second table *id*
max(case when Seq = 2 then code end) as [Code 2],
max(case when Seq = 2 then price end) as [Price2]
from (select f.*, s.Id Stock, s.Code, s.Price,
row_number() over (partition by f.Ref order by s.id) as Seq
from first f
inner join second s on s.Ref = f.Ref
) t
group by id, IdCust, Ref;
However, this would go with known values else you would need go with dynamic solution for that.
#YogeshSharma's provided an excellent answer.
Here's the same done using Pivot; SQL Fiddle Demo.
Functionally there's no difference between the two answers. However, Yogesh's solution's simpler to understand, and performs better; so personally I'd opt for that... I included this answer only because you mention PIVOT in the question:
select ft.Id
, ft.IdCust
, ft.Ref
, x.Stock1
, x.Code1
, x.Price1
, x.Stock2
, x.Code2
, x.Price2
from FirstTable ft
left outer join (
select Ref
, max([Stock1]) Stock1
, max([Stock2]) Stock2
, max([Code1]) Code1
, max([Code2]) Code2
, max([Price1]) Price1
, max([Price2]) Price2
from
(
select Ref
, Id Stock
, Code
, Price
, ('Stock' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) StockLineNo
, ('Code' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) CodeLineNo
, ('Price' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) PriceLineNo
from SecondTable
) st
pivot (max(Stock) for StockLineNo in ([Stock1],[Stock2])) pvtStock
pivot (max(Code) for CodeLineNo in ([Code1],[Code2])) pvtCode
pivot (max(Price) for PriceLineNo in ([Price1],[Price2])) pvtPrice
Group by Ref
) x
on x.Ref = ft.Ref
order by ft.Ref
Like Yogesh's solution, this will only handle as many columns as you specify; it won't dynamically alter the number of columns to match the data. For that you'd need to do dynamic SQL. However; if you need to do that, it's more likely you're attempting to solve the problem in the wrong way... so consider your design / determine if you really need additional columns per result rather than additional rows / some alternate approach...
Here's a Dynamic SQL implementation based on #YogeshSharma's answer: DBFiddle
declare #sql nvarchar(max) = 'select id, IdCust, Ref'
select #sql = #sql + '
,max(case when Seq = 1 then stock end) as [Stock' + rowNumVarchar + ']
,max(case when Seq = 1 then code end) as [Code' + rowNumVarchar + ']
,max(case when Seq = 1 then price end) as [Price' + rowNumVarchar + ']
'
from
(
select distinct cast(row_number() over (partition by ref order by ref) as nvarchar) rowNumVarchar
from second s
) z
set #sql = #sql + '
from (select f.*, s.Id Stock, s.Code, s.Price,
row_number() over (partition by f.Ref order by s.id) as Seq
from first f
inner join second s on s.Ref = f.Ref
) t
group by id, IdCust, Ref;
'
print #sql --see what the SQL produced is
exec (#sql)
(Here's a SQL Fiddle link for this one; but it's not working despite the SQL being valid

How to get data from 2 rows which has same data in all columns except one in MSSQL

As in my title I want to take data from 2 rows but In my case each 2nd row has one different value compare to the first row.
I want to take all the common data along with the different data as a single row .
Here you can see each row has same values in another row except the 2nd rows last column.
Thanks.
Edits Result :
I suspect you have a some kind of ordering columns that could specify your actual data ordering if so, then you can use row_number() function
select * from (
select *,
row_number() over (partition by <common data cols> order by ? desc) Seq
from table t
) t
where seq = 1;
EDIT : I don't believe your inventort_item_id columns but yes you could use creation_date for ordering purpose
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION AS POPULAR_PRODUCTS_IMAGE_PATHS
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPI.Seq = 1 AND
EPPIM.ITEM_STATUS = 'ACTIVE';
EDIT 2: In that case you need to use GROUP BY clause with conditional aggregation
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
MAX(CASE WHEN EPI.Seq = 2
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS POPULAR_PRODUCTS_IMAGE_PATHS,
MAX(CASE WHEN EPI.Seq = 1
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS PATH_NEW
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPPIM.ITEM_STATUS = 'ACTIVE'
GROUP BY EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE;
here is my approach, also using a window function.
sample data
if object_id('tempdb..#x') is not null drop table #x
CREATE TABLE #x (ITEM_CODE VARCHAR(10), PROD_DESC VARCHAR(20),
COLOR_PROD_PRICE DECIMAL, POPULAR_PRODUCTS_IMAGE_PATHS VARCHAR(200))
INSERT INTO #X(ITEM_CODE,PROD_DESC,COLOR_PROD_PRICE,POPULAR_PRODUCTS_IMAGE_PATHS) VALUES
('P0001', 'Axe Brand', 88.000, 'some_path_to_img1.jpg'),
('P0001', 'Axe Brand', 88.000, 'some_path_to_img2.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img1.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img2.jpg')
query - just change #x to your table and it should work
;WITH my_cte as
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ITEM_CODE ORDER BY POPULAR_PRODUCTS_IMAGE_PATHS) AS 'track_row'
FROM #x
)
SELECT a.ITEM_CODE, a.PROD_DESC, a.COLOR_PROD_PRICE,
a.POPULAR_PRODUCTS_IMAGE_PATHS + ' ' + b.POPULAR_PRODUCTS_IMAGE_PATHS AS 'POPULAR_PRODUCTS_IMAGE_PATHS'
FROM my_cte AS a
INNER JOIN
my_cte AS b ON a.ITEM_CODE=b.ITEM_CODE
WHERE a.track_row=1 AND b.track_row=2
output
ITEM_CODE PROD_DESC COLOR_PROD_PRICE POPULAR_PRODUCTS_IMAGE_PATHS
P0001 Axe Brand 88 some_path_to_img1.jpg some_path_to_img2.jpg
P0002 Almond Nuts 499 some_path_to_img1.jpg some_path_to_img2.jpg

Convert rows to constant columns in SQL Server?

I have the following query with its result:
SELECT * FROM dbo.DeviceView AS dv
WHERE DeviceId = 5
Result:
Id Name AttachId ColorId Date
--- ------- ---------- ------- -------
5 Apple iPhone 5s A1533 NULL 1 2013-09-10 00:00:00.000
5 Apple iPhone 5s A1533 NULL 8 2013-09-10 00:00:00.000
5 Apple iPhone 5s A1533 NULL 19 2013-09-10 00:00:00.000
ColorId is within diffrent values and it can be more or less than 3 values
I want to convert ColorId to 3 columns, such as first value in ColorId1 and second value in ColorId2 and third value in ColorId3.
eg:
Id Name AttachId ColorId1 ColorId2 ColorId3 Date
--- ------- ---------- ---------- ---------- ---------- -------
5 Apple iPhone 5s A1533 NULL 1 8 19 2013-09-10 00:00:00.000
How can I convert it to the following?
Edit:
All other fields except ColorId are the same.
You can use this generic query based on PIVOT and CTE's that can easily be extended for any number of colors and that performs very well:
-- First, we assign unique numbers to each of the ColorId's. These will become column names
;WITH NumberedColors (ColorId,ColorNumber) AS (
SELECT ColorId,'Color'+CAST((ROW_NUMBER() OVER (ORDER BY ColorId)) AS VARCHAR) AS ColorNumber
FROM dbo.DeviceView
GROUP BY ColorId
),
-- Here we return the dbo.DeviceView extended with ColorNumber column name
DeviceViewWithNumberedColors (Id,Name,AttachId,[Date],ColorNumber,ColorId) AS (
SELECT Id,Name,AttachId,Date,NC.ColorNumber,NC.ColorId
FROM dbo.DeviceView DV
INNER JOIN NumberedColors NC ON DV.ColorId=NC.ColorId
)
-- Finally, we use the PIVOT to assign color's to the appropriate columns
SELECT *
FROM (
SELECT Id,Name,AttachId,[Date],ColorId,ColorNumber
FROM DeviceViewWithNumberedColors D
) AS Source
PIVOT (
SUM(ColorId) FOR ColorNumber IN ([Color1],[Color2],[Color3],[Color4],[Color5],[Color6],[Color7],[Color8],[Color9],[Color10])
) Piv
In the PIVOT clause, make sure you have enough Color columns. If this can not be hard-coded, i.e. the number of colors could grow beyond a fixed number, then use dynamic SQL to generate this query.
If you want exactly three colors, you can use conditional aggregation or pivot:
select id, name, attachid,
max(case when seqnum = 1 then color end) as color1,
max(case when seqnum = 2 then color end) as color2,
max(case when seqnum = 3 then color end) as color3,
date
from (select t.*,
row_number() over (partition by id order by (select null)) as seqnum
from t
) t
group by id, name, attachid, date;
If count of ColorId values is not a fixed number you must use dynamic sql.
DECLARE #sql nvarchar(MAX) = 'SELECT Id, Name, AttachId'
SELECT #sql = #sql
+ ', MAX(IIF(ValueNum = ' + LTRIM(STR(ColumnNum)) + ', ColorId, NULL)) AS ColorId' + LTRIM(STR(ColumnNum))
FROM (
SELECT DISTINCT
DENSE_RANK() OVER (ORDER BY ColorId) AS ColumnNum
FROM dbo.DeviceView
WHERE Id IN (
SELECT TOP 1 Id
FROM dbo.DeviceView
GROUP BY Id
ORDER BY COUNT(DISTINCT ColorId) DESC)
) AS c
EXEC (#sql
+ ', [Date] FROM ('
+ 'SELECT Id, Name, AttachId, ColorId, [Date], DENSE_RANK() OVER (PARTITION BY Id ORDER BY ColorId) AS ValueNum FROM dbo.DeviceView'
+ ') AS it '
+ 'GROUP BY Id, Name, AttachId, [Date]');

Find overlapping sets of data in a table

I need to identify duplicate sets of data and give those sets who's data is similar a group id.
id threshold cost
-- ---------- ----------
1 0 9
1 100 7
1 500 6
2 0 9
2 100 7
2 500 6
I have thousands of these sets, most are the same with different id's. I need find all the like sets that have the same thresholds and cost amounts and give them a group id. I'm just not sure where to begin. Is the best way to iterate and insert each set into a table and then each iterate through each set in the table to find what already exists?
This is one of those cases where you can try to do something with relational operators. Or, you can just say: "let's put all the information in a string and use that as the group id". SQL Server seems to discourage this approach, but it is possible. So, let's characterize the groups using:
select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id;
Oh, I think that solves your problem. The groupname can serve as the group id. If you want a numeric id (which is probably a good idea, use dense_rank():
select d.id, dense_rank() over (order by groupname) as groupid
from (select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id
) d;
Here's the solution to my interpretation of the question:
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
;
WITH BaseTable AS
(
SELECT 1 id, 0 as threshold, 9 as cost
UNION SELECT 1, 100, 7
UNION SELECT 1, 500, 6
UNION SELECT 2, 0, 9
UNION SELECT 2, 100, 7
UNION SELECT 2, 500, 6
UNION SELECT 3, 1, 9
UNION SELECT 3, 100, 7
UNION SELECT 3, 500, 6
)
, BaseCTE AS
(
SELECT
id
--,dense_rank() over (order by threshold, cost ) as GroupId
,
(
SELECT CAST(TblGrouping.threshold AS varchar(8000)) + '/' + CAST(TblGrouping.cost AS varchar(8000)) + ';'
FROM BaseTable AS TblGrouping
WHERE TblGrouping.id = BaseTable.id
ORDER BY TblGrouping.threshold, TblGrouping.cost
FOR XML PATH ('')
) AS MultiGroup
FROM BaseTable
GROUP BY id
)
,
CTE AS
(
SELECT
*
,DENSE_RANK() OVER (ORDER BY MultiGroup) AS GroupId
FROM BaseCTE
)
SELECT *
INTO #tempGrouping
FROM CTE
-- SELECT * FROM #tempGrouping;
UPDATE BaseTable
SET BaseTable.GroupId = #tempGrouping.GroupId
FROM BaseTable
INNER JOIN #tempGrouping
ON BaseTable.Id = #tempGrouping.Id
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
Where BaseTable is your table, and and you don't need the CTE "BaseTable", because you have a data table.
You may need to take extra-precautions if your threshold and cost fields can be NULL.