Convert rows to constant columns in SQL Server? - sql

I have the following query with its result:
SELECT * FROM dbo.DeviceView AS dv
WHERE DeviceId = 5
Result:
Id Name AttachId ColorId Date
--- ------- ---------- ------- -------
5 Apple iPhone 5s A1533 NULL 1 2013-09-10 00:00:00.000
5 Apple iPhone 5s A1533 NULL 8 2013-09-10 00:00:00.000
5 Apple iPhone 5s A1533 NULL 19 2013-09-10 00:00:00.000
ColorId is within diffrent values and it can be more or less than 3 values
I want to convert ColorId to 3 columns, such as first value in ColorId1 and second value in ColorId2 and third value in ColorId3.
eg:
Id Name AttachId ColorId1 ColorId2 ColorId3 Date
--- ------- ---------- ---------- ---------- ---------- -------
5 Apple iPhone 5s A1533 NULL 1 8 19 2013-09-10 00:00:00.000
How can I convert it to the following?
Edit:
All other fields except ColorId are the same.

You can use this generic query based on PIVOT and CTE's that can easily be extended for any number of colors and that performs very well:
-- First, we assign unique numbers to each of the ColorId's. These will become column names
;WITH NumberedColors (ColorId,ColorNumber) AS (
SELECT ColorId,'Color'+CAST((ROW_NUMBER() OVER (ORDER BY ColorId)) AS VARCHAR) AS ColorNumber
FROM dbo.DeviceView
GROUP BY ColorId
),
-- Here we return the dbo.DeviceView extended with ColorNumber column name
DeviceViewWithNumberedColors (Id,Name,AttachId,[Date],ColorNumber,ColorId) AS (
SELECT Id,Name,AttachId,Date,NC.ColorNumber,NC.ColorId
FROM dbo.DeviceView DV
INNER JOIN NumberedColors NC ON DV.ColorId=NC.ColorId
)
-- Finally, we use the PIVOT to assign color's to the appropriate columns
SELECT *
FROM (
SELECT Id,Name,AttachId,[Date],ColorId,ColorNumber
FROM DeviceViewWithNumberedColors D
) AS Source
PIVOT (
SUM(ColorId) FOR ColorNumber IN ([Color1],[Color2],[Color3],[Color4],[Color5],[Color6],[Color7],[Color8],[Color9],[Color10])
) Piv
In the PIVOT clause, make sure you have enough Color columns. If this can not be hard-coded, i.e. the number of colors could grow beyond a fixed number, then use dynamic SQL to generate this query.

If you want exactly three colors, you can use conditional aggregation or pivot:
select id, name, attachid,
max(case when seqnum = 1 then color end) as color1,
max(case when seqnum = 2 then color end) as color2,
max(case when seqnum = 3 then color end) as color3,
date
from (select t.*,
row_number() over (partition by id order by (select null)) as seqnum
from t
) t
group by id, name, attachid, date;

If count of ColorId values is not a fixed number you must use dynamic sql.
DECLARE #sql nvarchar(MAX) = 'SELECT Id, Name, AttachId'
SELECT #sql = #sql
+ ', MAX(IIF(ValueNum = ' + LTRIM(STR(ColumnNum)) + ', ColorId, NULL)) AS ColorId' + LTRIM(STR(ColumnNum))
FROM (
SELECT DISTINCT
DENSE_RANK() OVER (ORDER BY ColorId) AS ColumnNum
FROM dbo.DeviceView
WHERE Id IN (
SELECT TOP 1 Id
FROM dbo.DeviceView
GROUP BY Id
ORDER BY COUNT(DISTINCT ColorId) DESC)
) AS c
EXEC (#sql
+ ', [Date] FROM ('
+ 'SELECT Id, Name, AttachId, ColorId, [Date], DENSE_RANK() OVER (PARTITION BY Id ORDER BY ColorId) AS ValueNum FROM dbo.DeviceView'
+ ') AS it '
+ 'GROUP BY Id, Name, AttachId, [Date]');

Related

How to pivot multiple columns without aggregation

I use SqlServer and i have to admit that i'm not realy good with it ...
This might be and easy question for the advanced users (I hope)
I have two tables which look like this
First table (ID isn't the primary key)
ID IdCust Ref
1 300 123
1 300 124
2 302 345
And the second (ID isn't the primary key)
ID Ref Code Price
1 123 A 10
1 123 Y 15
2 124 A 14
3 345 C 18
In the second table, the column "Ref" is the foreign key of "Ref" in the first table
I'm trying to produce the following output:
[EDIT]
The column "Stock", "Code" and "Price" can have x values, so I don't know it, in advance...
I tried so many things like "PIVOT" but it didn't give me the right result, so i hope someone can solve my problem ...
Use row_number() function and do the conditional aggregation :
select id, IdCust, Ref,
max(case when Seq = 1 then stock end) as [Stock A], -- second table *id*
max(case when Seq = 1 then code end) as [Code 1],
max(case when Seq = 1 then price end) as [Price1],
max(case when Seq = 2 then stock end) as [Stock B], -- second table *id*
max(case when Seq = 2 then code end) as [Code 2],
max(case when Seq = 2 then price end) as [Price2]
from (select f.*, s.Id Stock, s.Code, s.Price,
row_number() over (partition by f.Ref order by s.id) as Seq
from first f
inner join second s on s.Ref = f.Ref
) t
group by id, IdCust, Ref;
However, this would go with known values else you would need go with dynamic solution for that.
#YogeshSharma's provided an excellent answer.
Here's the same done using Pivot; SQL Fiddle Demo.
Functionally there's no difference between the two answers. However, Yogesh's solution's simpler to understand, and performs better; so personally I'd opt for that... I included this answer only because you mention PIVOT in the question:
select ft.Id
, ft.IdCust
, ft.Ref
, x.Stock1
, x.Code1
, x.Price1
, x.Stock2
, x.Code2
, x.Price2
from FirstTable ft
left outer join (
select Ref
, max([Stock1]) Stock1
, max([Stock2]) Stock2
, max([Code1]) Code1
, max([Code2]) Code2
, max([Price1]) Price1
, max([Price2]) Price2
from
(
select Ref
, Id Stock
, Code
, Price
, ('Stock' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) StockLineNo
, ('Code' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) CodeLineNo
, ('Price' + cast(Row_Number() over (partition by Ref order by Id, Code) as nvarchar)) PriceLineNo
from SecondTable
) st
pivot (max(Stock) for StockLineNo in ([Stock1],[Stock2])) pvtStock
pivot (max(Code) for CodeLineNo in ([Code1],[Code2])) pvtCode
pivot (max(Price) for PriceLineNo in ([Price1],[Price2])) pvtPrice
Group by Ref
) x
on x.Ref = ft.Ref
order by ft.Ref
Like Yogesh's solution, this will only handle as many columns as you specify; it won't dynamically alter the number of columns to match the data. For that you'd need to do dynamic SQL. However; if you need to do that, it's more likely you're attempting to solve the problem in the wrong way... so consider your design / determine if you really need additional columns per result rather than additional rows / some alternate approach...
Here's a Dynamic SQL implementation based on #YogeshSharma's answer: DBFiddle
declare #sql nvarchar(max) = 'select id, IdCust, Ref'
select #sql = #sql + '
,max(case when Seq = 1 then stock end) as [Stock' + rowNumVarchar + ']
,max(case when Seq = 1 then code end) as [Code' + rowNumVarchar + ']
,max(case when Seq = 1 then price end) as [Price' + rowNumVarchar + ']
'
from
(
select distinct cast(row_number() over (partition by ref order by ref) as nvarchar) rowNumVarchar
from second s
) z
set #sql = #sql + '
from (select f.*, s.Id Stock, s.Code, s.Price,
row_number() over (partition by f.Ref order by s.id) as Seq
from first f
inner join second s on s.Ref = f.Ref
) t
group by id, IdCust, Ref;
'
print #sql --see what the SQL produced is
exec (#sql)
(Here's a SQL Fiddle link for this one; but it's not working despite the SQL being valid

How to get data from 2 rows which has same data in all columns except one in MSSQL

As in my title I want to take data from 2 rows but In my case each 2nd row has one different value compare to the first row.
I want to take all the common data along with the different data as a single row .
Here you can see each row has same values in another row except the 2nd rows last column.
Thanks.
Edits Result :
I suspect you have a some kind of ordering columns that could specify your actual data ordering if so, then you can use row_number() function
select * from (
select *,
row_number() over (partition by <common data cols> order by ? desc) Seq
from table t
) t
where seq = 1;
EDIT : I don't believe your inventort_item_id columns but yes you could use creation_date for ordering purpose
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION AS POPULAR_PRODUCTS_IMAGE_PATHS
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPI.Seq = 1 AND
EPPIM.ITEM_STATUS = 'ACTIVE';
EDIT 2: In that case you need to use GROUP BY clause with conditional aggregation
SELECT
EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE,
MAX(CASE WHEN EPI.Seq = 2
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS POPULAR_PRODUCTS_IMAGE_PATHS,
MAX(CASE WHEN EPI.Seq = 1
THEN (BASE_PATH + '' + EPI.IMAGE_FOLDER_NAME + '/' + EPI.IMAGE_DESCRIPTION)
END) AS PATH_NEW
FROM (SELECT *,
ROW_NUMBER() OVER (PARTITION BY ITEM_CODE ORDER BY creation_date DESC) as Seq
FROM ECOM_PRODUCT_IMAGES EPI
) EPI
INNER JOIN ECOM_POPULAR_PRODUCTS_MAPPING EPPIM ON EPPIM.ITEM_CODE = EPI.ITEM_CODE
INNER JOIN LOM_MST_PRODUCT LMP ON LMP.PROD_CODE = EPI.ITEM_CODE
INNER JOIN LOM_LNK_PROD_COMP LLPC ON LLPC.COLOC_PROD_CODE = LMP.PROD_CODE
WHERE EPPIM.ITEM_STATUS = 'ACTIVE'
GROUP BY EPI.ITEM_CODE, LMP.PROD_DESC, LLPC.COLOC_PROD_PRICE;
here is my approach, also using a window function.
sample data
if object_id('tempdb..#x') is not null drop table #x
CREATE TABLE #x (ITEM_CODE VARCHAR(10), PROD_DESC VARCHAR(20),
COLOR_PROD_PRICE DECIMAL, POPULAR_PRODUCTS_IMAGE_PATHS VARCHAR(200))
INSERT INTO #X(ITEM_CODE,PROD_DESC,COLOR_PROD_PRICE,POPULAR_PRODUCTS_IMAGE_PATHS) VALUES
('P0001', 'Axe Brand', 88.000, 'some_path_to_img1.jpg'),
('P0001', 'Axe Brand', 88.000, 'some_path_to_img2.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img1.jpg'),
('P0002', 'Almond Nuts', 499.000, 'some_path_to_img2.jpg')
query - just change #x to your table and it should work
;WITH my_cte as
(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY ITEM_CODE ORDER BY POPULAR_PRODUCTS_IMAGE_PATHS) AS 'track_row'
FROM #x
)
SELECT a.ITEM_CODE, a.PROD_DESC, a.COLOR_PROD_PRICE,
a.POPULAR_PRODUCTS_IMAGE_PATHS + ' ' + b.POPULAR_PRODUCTS_IMAGE_PATHS AS 'POPULAR_PRODUCTS_IMAGE_PATHS'
FROM my_cte AS a
INNER JOIN
my_cte AS b ON a.ITEM_CODE=b.ITEM_CODE
WHERE a.track_row=1 AND b.track_row=2
output
ITEM_CODE PROD_DESC COLOR_PROD_PRICE POPULAR_PRODUCTS_IMAGE_PATHS
P0001 Axe Brand 88 some_path_to_img1.jpg some_path_to_img2.jpg
P0002 Almond Nuts 499 some_path_to_img1.jpg some_path_to_img2.jpg

Case statement not supporting horizontal search with column name in query

I am new to ORACLE SQL and I am trying to learn it quickly.
I have following table definition:
Create table Sales_Biodata
(
Saler_Id INTEGER NOT NULL UNIQUE,
Jan_Sales INTEGER NOT NULL,
Feb_Sales INTEGER NOT NULL,
March_Sales INTEGER NOT NULL
);
Insert into Sales_Biodata (SALER_ID,JAN_SALES,Feb_Sales,March_Sales)
values ('101',22,525,255);
Insert into Sales_Biodata (SALER_ID,JAN_SALES,Feb_Sales,March_Sales)
values ('102',22,55,25);
Insert into Sales_Biodata (SALER_ID,JAN_SALES,Feb_Sales,March_Sales)
values ('103',45545,5125,2865);
My objective is the following:
1- Searching the highest sales and second highest sales against each saler_id.
For example in our above case:
For saler_id =101 highest sales is 525 and second highest sales is 255
similary for saler_id=102 highest sales is 55 and second highest sales is 25
For my above approach I am using the following query:
Select Saler_Id,
(
CASE
WHEN JAN_SALES>FEB_SALES AND JAN_SALES>MARCH_SALES THEN JAN_SALES
WHEN FEB_SALES>JAN_SALES AND FEB_SALES>MARCH_SALES THEN FEB_SALES
WHEN MARCH_SALES>JAN_SALES AND MARCH_SALES>FEB_SALES THEN MARCH_SALES
WHEN JAN_SALES=FEB_SALES AND JAN_SALES=MARCH_SALES THEN JAN_SALES
WHEN JAN_SALES=FEB_SALES AND JAN_SALES>MARCH_SALES THEN JAN_SALES
WHEN JAN_SALES=MARCH_SALES AND JAN_SALES>FEB_SALES THEN JAN_SALES
WHEN FEB_SALES=JAN_SALES AND FEB_SALES>MARCH_SALES THEN FEB_SALES
WHEN FEB_SALES=MARCH_SALES AND FEB_SALES>JAN_SALES THEN FEB_SALES
WHEN MARCH_SALES=JAN_SALES AND MARCH_SALES>FEB_SALES THEN MARCH_SALES
WHEN MARCH_SALES=FEB_SALES AND MARCH_SALES>JAN_SALES THEN MARCH_SALES
ELSE 'NEW_CASE_FOUND'
END
) FIRST_HIGHEST,
(
CASE
WHEN JAN_SALES>FEB_SALES AND FEB_SALES>MARCH_SALES THEN FEB_SALES
WHEN FEB_SALES>JAN_SALES AND JAN_SALES>MARCH_SALES THEN JAN_SALES
WHEN JAN_SALES>MARCH_SALES AND MARCH_SALES>FEB_SALES THEN MARCH_SALES
ELSE 'NEW_CASE_FOUND'
END
) SECOND_HIGHEST
from
Sales_Biodata;
but I am getting the following errors:
ORA-00932: inconsistent datatypes: expected NUMBER got CHAR
00932. 00000 - "inconsistent datatypes: expected %s got %s"
*Cause:
*Action:
Error at Line: 60 Column: 6
Please guide me on the following:
1- How to search the data horizontally for maximum and second maximum.
2- Please guide me on alternate approaches for searching data for a row horizontally.
Getting the maximum value is simply:
select greatest(jan_sales, feb_sales, mar_sales)
If you want the second value:
select (case when jan_sales = greatest(jan_sales, feb_sales, mar_sales)
then greatest(feb_sales, mar_sales)
when feb_sales = greatest(jan_sales, feb_sales, mar_sales)
then greatest(jan_sales, mar_sales)
else greatest(jan_sales, feb_sales)
end)
However, this is the wrong approach to the whole problem. The main issues is that you have the wrong data structure. Store values in rows not columns. So, you need to unpivot your data and re-aggregation, such as:
select saler_id,
max(case when seqnum = 1 then sales end) as sales_1,
max(case when seqnum = 2 then sales end) as sales_2,
max(case when seqnum = 3 then sales end) as sales_3
from (select s.*, dense_rank() over (partition by saler_id order by sales desc) as seqnum
from (select saler_id, jan_sales as sales Sales_Biodata union all
select saler_id, feb_sales Sales_Biodata union all
select saler_id, mar_sales Sales_Biodata
) s
) s
group by saler_id;
Your data model is wrong.
The first thing I would do is to unpivot data using this query:
select * from sales_biodata
unpivot (
val for mon in ( JAN_SALES,FEB_SALES,MARCH_SALES )
)
;
and after this, getting two top values is relatively easy:
SELECT *
FROM (
SELECT t.*,
dense_rank() over (partition by saler_id order by val desc ) x
FROM (
select * from sales_biodata
unpivot (
val for mon in ( JAN_SALES,FEB_SALES,MARCH_SALES )
)
) t
)
WHERE x <= 2
the above query will give a result in this format:
SALER_ID MON VAL X
---------- ----------- ---------- ----------
101 FEB_SALES 525 1
101 MARCH_SALES 255 2
102 FEB_SALES 55 1
102 MARCH_SALES 25 2
103 JAN_SALES 45545 1
103 FEB_SALES 5125 2
If you have more month than 3 months, you can easily extend this query changing this part:
val for mon in ( JAN_SALES,FEB_SALES,MARCH_SALES, April_sales, MAY_SALES, JUNE_SALES, JULY_SALES, ...... NOVEMBER_SALES, DECEMBER_SALES )
If you want both two values in one row, you need to pivot data back:
WITH src_data AS(
SELECT saler_id, val, x
FROM (
SELECT t.*,
dense_rank() over (partition by saler_id order by val desc ) x
FROM (
select * from sales_biodata
unpivot (
val for mon in ( JAN_SALES,FEB_SALES,MARCH_SALES )
)
) t
)
WHERE x <= 2
)
SELECT *
FROM src_data
PIVOT(
max(val) FOR x IN ( 1 As "First value", 2 As "Second value" )
);
This gives a result in this form:
SALER_ID First value Second value
---------- ----------- ------------
101 525 255
102 55 25
103 45545 5125
EDIT - why MAX is used in the PIVOT query
The short answer is: because the syntax reuires an aggregate function here.
See this link for the syntax: http://docs.oracle.com/cd/E11882_01/server.112/e41084/statements_10002.htm#CHDCEJJE
A broader answer:
The PIVOT clause is only a syntactic sugar that simplifies a general "classic" pivot query which is using aggregate function and GROUP BY clause, like this:
SELECT id,
max( CASE WHEN some_column = 'X' THEN value END ) As x,
max( CASE WHEN some_column = 'Y' THEN value END ) As y,
max( CASE WHEN some_column = 'Z' THEN value END ) As z
FROM table11
GROUP BY id
More on PIVOT queries you can find on the net, there is a lot of excelent explanations how the pivot query works.
The above pivot query, written in "standard" SQL, is equivalent to this Oracle's query:
SELECT *
FROM table11
PIVOT (
max(value) FOR some_column IN ( 'X', 'Y', 'Z' )
)
These PIVOT queries transform records like this:
ID SOME_COLUMN VALUE
---------- ----------- ----------
1 X 10
1 X 15
1 Y 20
1 Z 30
into one record (for each id) like this:
ID 'X' 'Y' 'Z'
---------- ---------- ---------- ----------
1 15 20 30
Please note, that the source table contains two values for id=1 and some_column='X' -> 10 and 15. PIVOT queries uses aggregate function to support that "general" case, where there could be many source records for one record in the output. In this example 'MAX' function is used to pick greater value 15.
However PIVOT queries supports also your specific case where there is only one source record for each value in the result.
You are coming across the error as the string 'new case found' is added in the else part and the rest of the case statement deals with number . data type in the when and else clause should match.
Coming to alternate approaches you may use unpivot and get the months sales data into a single row and use analytical functions to get the 1st highest or second highest.
As others have said, the problem is that the WHEN clauses in your CASE statement are returning INTEGER values, but the ELSE is returning a character string. I completely agree with the comments regarding normalization but if you really just want to make this query work you'll need to convert the results of each WHEN clause to character, as in:
Select Saler_Id,
(
CASE
WHEN JAN_SALES>FEB_SALES AND JAN_SALES>MARCH_SALES THEN TO_CHAR(JAN_SALES)
WHEN FEB_SALES>JAN_SALES AND FEB_SALES>MARCH_SALES THEN TO_CHAR(FEB_SALES)
WHEN MARCH_SALES>JAN_SALES AND MARCH_SALES>FEB_SALES THEN TO_CHAR(MARCH_SALES)
WHEN JAN_SALES=FEB_SALES AND JAN_SALES=MARCH_SALES THEN TO_CHAR(JAN_SALES)
WHEN JAN_SALES=FEB_SALES AND JAN_SALES>MARCH_SALES THEN TO_CHAR(JAN_SALES)
WHEN JAN_SALES=MARCH_SALES AND JAN_SALES>FEB_SALES THEN TO_CHAR(JAN_SALES)
WHEN FEB_SALES=JAN_SALES AND FEB_SALES>MARCH_SALES THEN TO_CHAR(FEB_SALES)
WHEN FEB_SALES=MARCH_SALES AND FEB_SALES>JAN_SALES THEN TO_CHAR(FEB_SALES)
WHEN MARCH_SALES=JAN_SALES AND MARCH_SALES>FEB_SALES THEN TO_CHAR(MARCH_SALES)
WHEN MARCH_SALES=FEB_SALES AND MARCH_SALES>JAN_SALES THEN TO_CHAR(MARCH_SALES)
ELSE 'NEW_CASE_FOUND'
END
) FIRST_HIGHEST,
(
CASE
WHEN JAN_SALES>FEB_SALES AND FEB_SALES>MARCH_SALES THEN TO_CHAR(FEB_SALES)
WHEN FEB_SALES>JAN_SALES AND JAN_SALES>MARCH_SALES THEN TO_CHAR(JAN_SALES)
WHEN JAN_SALES>MARCH_SALES AND MARCH_SALES>FEB_SALES THEN TO_CHAR(MARCH_SALES)
ELSE 'NEW_CASE_FOUND'
END
) SECOND_HIGHEST
from
Sales_Biodata;
Best of luck.

Find overlapping sets of data in a table

I need to identify duplicate sets of data and give those sets who's data is similar a group id.
id threshold cost
-- ---------- ----------
1 0 9
1 100 7
1 500 6
2 0 9
2 100 7
2 500 6
I have thousands of these sets, most are the same with different id's. I need find all the like sets that have the same thresholds and cost amounts and give them a group id. I'm just not sure where to begin. Is the best way to iterate and insert each set into a table and then each iterate through each set in the table to find what already exists?
This is one of those cases where you can try to do something with relational operators. Or, you can just say: "let's put all the information in a string and use that as the group id". SQL Server seems to discourage this approach, but it is possible. So, let's characterize the groups using:
select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id;
Oh, I think that solves your problem. The groupname can serve as the group id. If you want a numeric id (which is probably a good idea, use dense_rank():
select d.id, dense_rank() over (order by groupname) as groupid
from (select d.id,
(select cast(threshold as varchar(8000)) + '-' + cast(cost as varchar(8000)) + ';'
from data d2
where d2.id = d.id
for xml path ('')
order by threshold
) as groupname
from data d
group by d.id
) d;
Here's the solution to my interpretation of the question:
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
;
WITH BaseTable AS
(
SELECT 1 id, 0 as threshold, 9 as cost
UNION SELECT 1, 100, 7
UNION SELECT 1, 500, 6
UNION SELECT 2, 0, 9
UNION SELECT 2, 100, 7
UNION SELECT 2, 500, 6
UNION SELECT 3, 1, 9
UNION SELECT 3, 100, 7
UNION SELECT 3, 500, 6
)
, BaseCTE AS
(
SELECT
id
--,dense_rank() over (order by threshold, cost ) as GroupId
,
(
SELECT CAST(TblGrouping.threshold AS varchar(8000)) + '/' + CAST(TblGrouping.cost AS varchar(8000)) + ';'
FROM BaseTable AS TblGrouping
WHERE TblGrouping.id = BaseTable.id
ORDER BY TblGrouping.threshold, TblGrouping.cost
FOR XML PATH ('')
) AS MultiGroup
FROM BaseTable
GROUP BY id
)
,
CTE AS
(
SELECT
*
,DENSE_RANK() OVER (ORDER BY MultiGroup) AS GroupId
FROM BaseCTE
)
SELECT *
INTO #tempGrouping
FROM CTE
-- SELECT * FROM #tempGrouping;
UPDATE BaseTable
SET BaseTable.GroupId = #tempGrouping.GroupId
FROM BaseTable
INNER JOIN #tempGrouping
ON BaseTable.Id = #tempGrouping.Id
IF OBJECT_ID('tempdb..#tempGrouping') IS NOT NULL DROP Table #tempGrouping;
Where BaseTable is your table, and and you don't need the CTE "BaseTable", because you have a data table.
You may need to take extra-precautions if your threshold and cost fields can be NULL.

Using pivot table with column and row totals in sql server 2008

I have a table with following columns
defect_id, developer_name, status, summary, root_cause,
Secondary_RC, description, Comments, environment_name
The column root_cause has Enviro, Requi, Dev, TSc, TD, Unkn as its values and
column environment_name has QA1, QA2, QA3
I need to prepare a report in the below format
Enviro Requi Dev TSc TD Unkn Total
QA1 9 1 14 17 2 3 46
QA2 8 1 14 0 5 1 29
QA3 1 1 7 0 0 1 10
Total 18 3 35 17 7 5 85
I have prepare the report till
Enviro Requi Dev TSc TD Unkn
QA1 9 1 14 17 2 3
QA2 8 1 14 0 5 1
QA3 1 1 7 0 0 1
I used the below query to get the above result
select *
from
(
select environment_name as " ", value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
Can anyone help to get the totals for columns and rows?
There may be various approaches to this. You can calculate all the totals after the pivot, or you can get the totals first, then pivot all the results. It is also possible to have kind of middle ground: get one kind of the totals (e.g. the row-wise ones), pivot, then get the other kind, although that might be overdoing it.
The first of the mentioned approaches, getting all the totals after the pivot, could be done in a very straightforward way, and the only thing potentially new to you in the below implementation might be GROUP BY ROLLUP():
SELECT
[ ] = ISNULL(environment_name, 'Total'),
[Enviro] = SUM([Enviro]),
[Requi] = SUM([Requi]),
[Dev] = SUM([Dev]),
[Tsc] = SUM([Tsc]),
[TD] = SUM([TD]),
[Unkn] = SUM([Unkn]),
Total = SUM([Enviro] + [Requi] + [Dev] + [Tsc] + [TD] + [Unkn])
FROM (
SELECT environment_name, root_cause
FROM test1
) s
PIVOT (
COUNT(root_cause)
FOR root_cause IN ([Enviro], [Requi], [Dev], [Tsc], [TD], [Unkn])
) p
GROUP BY
ROLLUP(environment_name)
;
Basically, the GROUP BY ROLLUP() part produces the Total row for you. The grouping is first done by environment_name, then the grand total row is added.
To do just the opposite, i.e. get the totals prior to pivoting, you could employ GROUP BY CUBE() like this:
SELECT
[ ] = environment_name,
[Enviro] = ISNULL([Enviro], 0),
[Requi] = ISNULL([Requi] , 0),
[Dev] = ISNULL([Dev] , 0),
[Tsc] = ISNULL([Tsc] , 0),
[TD] = ISNULL([TD] , 0),
[Unkn] = ISNULL([Unkn] , 0),
Total = ISNULL(Total , 0)
FROM (
SELECT
environment_name = ISNULL(environment_name, 'Total'),
root_cause = ISNULL(root_cause, 'Total'),
cnt = COUNT(*)
FROM test1
WHERE root_cause IS NOT NULL
GROUP BY
CUBE(environment_name, root_cause)
) s
PIVOT (
SUM(cnt)
FOR root_cause IN ([Enviro], [Requi], [Dev], [Tsc], [TD], [Unkn], Total)
) p
;
Both methods can be tested and played with at SQL Fiddle:
Method 1
Method 2
Note. I've omitted the unpivoting step in both suggestions because unpivoting a single column seemed clearly redundant. If there's more to it, though, adjusting either of the queries should be easy.
You can find Total for root_cause and environment_name using ROLLUP.
RNO_COLTOTAL - Logic to place Total in last column, since the columns Tsc,Unkn will overlap the column Total when pivoting, since its ordering alphabetically.
RNO_ROWTOTAL - Logic to place Total in last row since a value that is starting with U,W,X,Y,Z can overlap the value Total, since its ordering alphabetically.
SUM(VALUE) - Can define on what aggregate function we can use with ROLLUP.
QUERY 1
SELECT CASE WHEN root_cause IS NULL THEN 1 ELSE 0 END RNO_COLTOTAL,
CASE WHEN environment_name IS NULL THEN 1 ELSE 0 END RNO_ROWTOTAL,
ISNULL(environment_name,'Total')environment_name,
ISNULL(root_cause,'Total')root_cause,
SUM(VALUE) VALUE
INTO #NEWTABLE
FROM
(
-- Find the count for environment_name,root_cause
SELECT DISTINCT *,COUNT(*) OVER(PARTITION BY environment_name,root_cause)VALUE
FROM #TEMP
)TAB
GROUP BY root_cause,environment_name
WITH CUBE
We will get the following logic when CUBE is used
We declare variables for pivoting.
#cols - Column values for pivoting.
#NulltoZeroCols - Replace null values with zero.
QUERY 2
DECLARE #cols NVARCHAR (MAX)
SELECT #cols = COALESCE (#cols + ',[' + root_cause + ']',
'[' + root_cause + ']')
FROM (SELECT DISTINCT RNO_COLTOTAL,root_cause FROM #NEWTABLE) PV
ORDER BY RNO_COLTOTAL,root_cause
DECLARE #NulltoZeroCols NVARCHAR (MAX)
SET #NullToZeroCols = SUBSTRING((SELECT ',ISNULL(['+root_cause+'],0) AS ['+root_cause+']'
FROM(SELECT DISTINCT RNO_COLTOTAL,root_cause FROM #NEWTABLE GROUP BY RNO_COLTOTAL,root_cause)TAB
ORDER BY RNO_COLTOTAL FOR XML PATH('')),2,8000)
Now pivot it dynamically
DECLARE #query NVARCHAR(MAX)
SET #query = 'SELECT environment_name,'+ #NulltoZeroCols +' FROM
(
SELECT RNO_ROWTOTAL,environment_name,root_cause,VALUE
FROM #NEWTABLE
) x
PIVOT
(
MIN(VALUE)
FOR [root_cause] IN (' + #cols + ')
) p
ORDER BY RNO_ROWTOTAL,environment_name;'
EXEC SP_EXECUTESQL #query
RESULT
I think you need to calculate the Total separately. Using this simple query for the total (sorry, had to give the alias name for your " " column):
select environment_name as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
GROUP BY environment_name
you can easily join both queries together to get the required report:
SELECT * FROM
(select *
from
(
select environment_name as en, value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
) AS a
INNER JOIN
( select environment_name as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
GROUP BY environment_name
) AS b ON a.en = b.en
UNION ALL
SELECT * FROM
(select *
from
(
select 'Total' as en, value
from test1
unpivot
(
value
for col in (root_cause)
) unp
) src
pivot
(
count(value)
for value in ([Enviro] , [Requi] , [Dev] , [Tsc], [TD] , [Unkn])
) piv
) AS a
INNER JOIN
( select 'Total' as en,
count (*) AS Total
FROM test1
WHERE value in ('Enviro', 'Requi', 'Dev', 'Tsc', 'TD', 'Unkn')
) AS b
I have not tested it, but believe it will work
You can also put you select pivot, without totals, in a temp table and then add the totalS with a new select:
SELECT environment_name=ISNULL(environment_name, ‘Total’) , Enviro=SUM(Enviro), Requi=SUM(Requi), Dev=SUM(Dev), TSc=SUM(TSc), TD=SUM(TD), Unkn =SUM(Unkn),
Total = sum(Enviro+Requi+Dev+TSc+TD+Unkn)
FROM #temp
GROUP BY ROLLUP(environment_name)