Convert row to column when data are not numbers - sql

I have a Question table, which has a unknown number of questions.(first table in the figure)
I also have an AnswerSheet table, which records student's answer to question.(second table in the figure)
Create table Question
(
Id int,
Text nvarchar(50),
PRIMARY KEY (Id)
)
Create table AnswerSheet
(
StudentId int,
QuestionId int,
Answer nvarchar(50),
PRIMARY KEY (StudentId,QuestionId),
FOREIGN KEY (QuestionId) REFERENCES Question (Id)
)
insert into Question
values(1,'What''s your age'),
(2,'What''s your gender'),
(3,'When do you go home'),
....
insert into AnswerSheet
values(500,1,'20'),
(500,2,'Male'),
(500,3,'5:00pm'),
(501,1,'50'),
(502,2,'I don''t know##'),
....
How do I write a SQL to generate a table like this?
StudentId What's your age What's your gender When do you go home ...
--------- ---------------- ------------------- -------------------
500 20 Male 5:00pm ...
501 50 NULL NULL
502 NULL I don''t know## NULL ...
I feel Pivot is promising but I'm not sure how to use it especially PIVOT requires an aggreation function but my data are not numbers.

Assuming you wanted to go Dynamic
Example
Declare #SQL varchar(max) = Stuff((Select ',' + QuoteName(Text) From Question Order by ID For XML Path('')),1,1,'')
Select #SQL = '
Select *
From (
Select StudentID
,Col = B.Text
,Value = A.Answer
From AnswerSheet A
Join Question B on A.QuestionID=B.ID
) A
Pivot (max(Value) For [Col] in (' + #SQL + ') ) p'
Exec(#SQL);
Returns
StudentID What's your age What's your gender When do you go home
500 20 Male 5:00pm
501 50 NULL NULL
502 NULL I don't know## NULL
If it Helps, the Generated SQL Looks Like This
Select *
From (
Select StudentID
,Col = B.Text
,Value = A.Answer
From AnswerSheet A
Join Question B on A.QuestionID=B.ID
) A
Pivot (max(Value) For [Col] in ([What's your age],[What's your gender],[When do you go home]) ) p

I know this question is answered by accepted one, but I hope this approach helps others.
simply you can achieve your goal without using Pivot, via using Group by as next:-
Select b.StudentId,
Min(Case a.text When 'What''s your age' Then b.answer End) 'What''s your age',
Min(Case a.text When 'What''s your gender' Then b.answer End) 'What''s your gender',
Min(Case a.text When 'When do you go home' Then b.answer End) 'When do you go home'
from Question a inner join AnswerSheet b
on a.id = b.Questionid
Group By StudentId
and you mentioned unknown number of questions, so the next code for dynamic:-
DECLARE #DynamicQuestions VARCHAR(8000)
SELECT #DynamicQuestions = Stuff(
(SELECT N' Min(Case a.text When''' + replace (Text,'''','''''')
+ ''' Then b.answer End) '''
+ replace (Text,'''','''''') + ''','
FROM Question FOR XML PATH(''),TYPE)
.value('text()[1]','nvarchar(max)'),1,1,N'')
select #DynamicQuestions =
left(#DynamicQuestions,len(#DynamicQuestions)-1) -- for Removing last comma
exec ('Select b.StudentId, '+ #DynamicQuestions +
'from Question a inner join AnswerSheet b
on a.id = b.Questionid
Group By StudentId' )
Result:-
StudentId What's your age What's your gender When do you go home
500 20 Male 5:00pm
501 50 NULL NULL
502 NULL I don't know## NULL

Related

How to pivot data in SQL with multiple conditions

I am working on a data where it looks like this
I want to pivot it in this way :
I have written in this way :
select *
from (select * FROM records) as test
PIVOT (
max(value) for
[Question_ID] in ((SELECT distinct [Question_ID] from records order by 1))) AS PivotTable
Can you please help me?
If you know the number of columns in PIVOT, you can directly hardcode like below.
SELECT * FROM
(
SELECT * FROM test
) t
PIVOT (
MAX(value) for
[Question_ID] in ([120],[122],[149],[150],[151],[189],[253],[554],[774] )) AS Pivot_Table
If not,you have to use Dynamic Pivot table for unknown number of column values.
DECLARE
#columns NVARCHAR(MAX) = '',
#sql NVARCHAR(MAX) = '';
-- select the QuestionIDs
SELECT
#columns+=QUOTENAME(Question_ID) + ','
FROM
test
GROUP BY Question_ID
ORDER BY Question_ID;
-- remove the last comma
SET #columns = LEFT(#columns, LEN(#columns) - 1);
-- construct dynamic SQL
SET #sql ='
SELECT * FROM
(
SELECT * FROM TEST
) t
PIVOT(
MAX(value) for
[Question_ID] IN ('+ #columns +')
) AS pivot_table;';
-- execute the dynamic SQL
EXECUTE sp_executesql #sql;
CHECK DEMO HERE
you got your wish result
select *
from (select * FROM #temp) as test--your table name replace with #temp
PIVOT (
max(value) for
[Question_ID] in ([120],[122],[149],[150],[151],[189],[253],[554],[774] )) AS PivotTable
or if you want to dynamic your query
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(Question_ID)
FROM #temp c
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
--print #cols
set #query = 'SELECT ID, ' + #cols + ' from
(
select ID
, Question_ID
, [Value]
from #temp
) x
pivot
(
max([Value])
for [Question_ID] in (' + #cols + ')
) p '
execute(#query)
drop table #temp
----------
If you are sure that the number of columns will not change, then you can use case statement
CREATE TABLE #TBL (ID INT, Question_ID INT, [Value] VARCHAR(20))
INSERT INTO #TBL VALUES
(1,149,'abc'),(1,150,'def'),(1,151,'aff'),(1,122,'www'),
(2,120,'add'),(2,150,'sub'),(2,189,'asf'),(3,253,'asd'),
(3,150,'gfre'),(3,554,'cvew'),(3,744,'qwert')
-- aNSWER
SELECT
ID,
MAX(CASE WHEN Question_ID = 149 THEN [Value] END) AS '149',
MAX(CASE WHEN Question_ID = 150 THEN [Value] END) AS '150',
MAX(CASE WHEN Question_ID = 151 THEN [Value] END) AS '151',
MAX(CASE WHEN Question_ID = 122 THEN [Value] END) AS '122',
MAX(CASE WHEN Question_ID = 120 THEN [Value] END) AS '120',
MAX(CASE WHEN Question_ID = 150 THEN [Value] END) AS '150',
MAX(CASE WHEN Question_ID = 189 THEN [Value] END) AS '189',
MAX(CASE WHEN Question_ID = 253 THEN [Value] END) AS '253',
MAX(CASE WHEN Question_ID = 150 THEN [Value] END) AS '150',
MAX(CASE WHEN Question_ID = 554 THEN [Value] END) AS '554',
MAX(CASE WHEN Question_ID = 744 THEN [Value] END) AS '744'
FROM #TBL
GROUP BY ID
DROP TABLE #TBL
Output
ID 149 150 151 122 120 150 189 253 150 554 744
1 abc def aff www NULL def NULL NULL def NULL NULL
2 NULL sub NULL NULL add sub asf NULL sub NULL NULL
3 NULL gfre NULL NULL NULL gfre NULL asd gfre cvew qwert

Group Columns based on Row ID

I have a table pulling data, like:
ID FID Value
001 20 200
001 20 400
001 50 600
002 50 100
How do write a query to get a column for each row ID that would sum the Value's?
For example, I want to return the following:
ID 20 50
001 600 600
002 NULL 100
A pattern like this:
SELECT
ID,
SUM(CASE WHEN FID = 20 THEN Value END) as sum20,
SUM(CASE WHEN FID = 50 THEN Value END) as sum50 --extend by adding more CASE WHEN rows
FROM
table
GROUP BY ID
..has the advantage of working in databases that don't support PIVOT syntax.
If you'd like PIVOT syntax:
SELECT ID, [20], [50] --extend by providing more values in square brackets
FROM
table
PIVOT
(
SUM(Value)
FOR FID IN ([20], [50]) --extend by providing more values in square brackets
) pvt
If you have dynamic list of FID's you can use dynamic query as below:
Declare #cols1 varchar(max)
Declare #query nvarchar(max)
Select #cols1 = stuff((select Distinct ','+QuoteName(Fid) from #data for xml path('')),1,1,'')
Set #query = ' Select * from (
Select Id, [Fid], [Value] from #data ) a
pivot (sum([Value]) for [Fid] in (' + #cols1 + ') ) p '
Exec sp_executesql #query

Joining two tables and getting rows as columns

I have two tables. One is a transaction table of user id with following details.
user_id Product_id timestamp transaction_id
123 A_1 ID1
123 A_2 ID1
124 A_1 ID2
125 A_2
Now there is a product_id mapping with the division to which the product belongs
Mapping:
Product_id Division
A_1 Grocery
A_2 Electronics and so on
I need a final table where I have one record for each user id and the corresponding items bought in each division as separate columns. Like
User_ID Grocery Electronics
123 1 1
124 1 0
I did something like this:
select user_id, case (when Division ="Grocery" then count(product_id) else 0) end as Grocery
when Division="Electronics" then count(product_id) else 0) end as Electronics
from
( select user_id, a.product_id, b.division from transact as a
left join
mapping b
on a.product_id=b.product_id
)
group by user_id
Does this sound good?
When you use conditional aggregation, the case is the argument to the aggregation function:
select user_id,
sum(case when m.Division = 'Grocery' then 1 else 0 end) as Grocery,
sum(case when m.Division = 'Electronics' then 1 else 0 end) as Electronics
from transact t left join
mapping m
on t.product_id = m.product_id
group by user_id
SQL Fiddle
In addition:
Your table aliases should be abbreviations for the table. Makes the code easier to understand.
You don't need a subquery.
Use single quotes for string constants. This is the SQL standard.
This is a dynamic version of Gordon Linoff's answer, which uses conditional aggregation. This works for SQL-Server.
DECLARE #sql1 VARCHAR(4000) = ''
DECLARE #sql2 VARCHAR(4000) = ''
DECLARE #sql3 VARCHAR(4000) = ''
SELECT #sql1 =
'SELECT
t.user_id
'
SELECT #sql2 = #sql2 +
' , SUM(CASE WHEN m.division = ''' + division + ''' THEN 1 ELSE 0 END) AS [' + division + ']' + CHAR(10)
FROM(
SELECT DISTINCT division FROM Mapping
)t
ORDER BY division
SELECT #sql3 =
'FROM Transact t
LEFT JOIN Mapping m
ON m.product_id = t.product_id
GROUP BY t.user_id'
PRINT (#sql1 + #sql2 + #sql3)
EXEC (#sql1 + #sql2 + #sql3)
SQL Fiddle
as per my understanding it can be acheived using PIVOT also
declare #transact table (user_id int,product_id varchar(5),transaction_id varchar(5))
insert into #transact (user_id,product_id,transaction_id)values (123,'A_1','ID1'),(123,'A_2','ID1'),(124,'A_1','ID2'),(125,'A_2',NULL)
declare #mapping table (product_id varchar(5),division varchar(20))
insert into #mapping (product_id,division)values ('A_1','Grocery'),('A_2','Electronics')
select * from (
select t.user_id,tt.division as Division,tt.product_id As product_id from #transact t
left join #mapping tt
on t.product_id = tt.product_id)T
PIVOT(COUNT(product_id)FOR division IN ([Grocery],[Electronics]))P

Dynamic SELECT statement, generate columns based on present and future values

Currently building a SELECT statement in SQL Server 2008 but would like to make this SELECT statement dynamic, so the columns can be defined based on values in a table. I heard about pivot table and cursors, but seems kind of hard to understand at my current level, here is the code;
DECLARE #date DATE = null
IF #date is null
set # date = GETDATE() as DATE
SELECT
Name,
value1,
value2,
value3,
value4
FROM ref_Table a
FULL OUTER JOIN (
SELECT
PK_ID ID,
sum(case when FK_ContainerType_ID = 1 then 1 else null) Box,
sum(case when FK_ContainerType_ID = 2 then 1 else null) Pallet,
sum(case when FK_ContainerType_ID = 3 then 1 else null) Bag,
sum(case when FK_ContainerType_ID = 4 then 1 else null) Drum
from
Packages
WHERE
#date between PackageStart AND PackageEnd
group by PK_ID ) b on a.Name = b.ID
where
Group = 0
The following works great for me , but PK_Type_ID and the name of the column(PackageNameX,..) are hard coded, I need to be dynamic and it can build itself based on present or futures values in the Package table.
Any help or guidance on the right direction would be greatly appreciated...,
As requested
ref_Table (PK_ID, Name)
1, John
2, Mary
3, Albert
4, Jane
Packages (PK_ID, FK_ref_Table_ID, FK_ContainerType_ID, PackageStartDate, PackageEndDate)
1 , 1, 4, 1JAN2014, 30JAN2014
2 , 2, 3, 1JAN2014, 30JAN2014
3 , 3, 2, 1JAN2014, 30JAN2014
4 , 4, 1, 1JAN2014, 30JAN2014
ContainerType (PK_ID, Type)
1, Box
2, Pallet
3, Bag
4, Drum
and the result should look like this;
Name Box Pallet Bag Drum
---------------------------------------
John 1
Mary 1
Albert 1
Jane 1
The following code like I said works great, the issue is the Container table is going to grow and I need to replicated the same report without hard coding the columns.
What you need to build is called a dynamic pivot. There are plenty of good references on Stack if you search out that term.
Here is a solution to your scenario:
IF OBJECT_ID('tempdb..##ref_Table') IS NOT NULL
DROP TABLE ##ref_Table
IF OBJECT_ID('tempdb..##Packages') IS NOT NULL
DROP TABLE ##Packages
IF OBJECT_ID('tempdb..##ContainerType') IS NOT NULL
DROP TABLE ##ContainerType
SET NOCOUNT ON
CREATE TABLE ##ref_Table (PK_ID INT, NAME NVARCHAR(50))
CREATE TABLE ##Packages (PK_ID INT, FK_ref_Table_ID INT, FK_ContainerType_ID INT, PackageStartDate DATE, PackageEndDate DATE)
CREATE TABLE ##ContainerType (PK_ID INT, [Type] NVARCHAR(50))
INSERT INTO ##ref_Table (PK_ID,NAME)
SELECT 1,'John' UNION
SELECT 2,'Mary' UNION
SELECT 3,'Albert' UNION
SELECT 4,'Jane'
INSERT INTO ##Packages (PK_ID, FK_ref_Table_ID, FK_ContainerType_ID, PackageStartDate, PackageEndDate)
SELECT 1,1,4,'2014-01-01','2014-01-30' UNION
SELECT 2,2,3,'2014-01-01','2014-01-30' UNION
SELECT 3,3,2,'2014-01-01','2014-01-30' UNION
SELECT 4,4,1,'2014-01-01','2014-01-30'
INSERT INTO ##ContainerType (PK_ID, [Type])
SELECT 1,'Box' UNION
SELECT 2,'Pallet' UNION
SELECT 3,'Bag' UNION
SELECT 4,'Drum'
DECLARE #DATE DATE, #PARAMDEF NVARCHAR(MAX), #COLS NVARCHAR(MAX), #SQL NVARCHAR(MAX)
SET #DATE = '2014-01-15'
SET #COLS = STUFF((SELECT DISTINCT ',' + QUOTENAME(T.[Type])
FROM ##ContainerType T
FOR XML PATH, TYPE).value('.', 'NVARCHAR(MAX)'),1,1,'')
SET #SQL = 'SELECT [Name], ' + #COLS + '
FROM (SELECT [Name], [Type], 1 AS Value
FROM ##ref_Table R
JOIN ##Packages P ON R.PK_ID = P.FK_ref_Table_ID
JOIN ##ContainerType T ON P.FK_ContainerType_ID = T.PK_ID
WHERE #DATE BETWEEN P.PackageStartDate AND P.PackageEndDate) X
PIVOT (COUNT(Value) FOR [Type] IN (' + #COLS + ')) P
'
PRINT #COLS
PRINT #SQL
SET #PARAMDEF = '#DATE DATE'
EXEC SP_EXECUTESQL #SQL, #PARAMDEF, #DATE=#DATE
Output:
Name Bag Box Drum Pallet
Albert 0 0 0 1
Jane 0 1 0 0
John 0 0 1 0
Mary 1 0 0 0
Static Query:
SELECT [Name],[Box],[Pallet],[Bag],[Drum] FROM
(
SELECT *
FROM
(
SELECT rf.Name,cnt.[Type], pk.PK_ID AS PKID, rf.PK_ID AS RFID
FROM ref_Table rf INNER JOIN Packages pk ON rf.PK_ID = pk.FK_ref_Table_ID
INNER JOIN ContanerType cnt ON cnt.PK_ID = pk.FK_ContainerType_ID
) AS SourceTable
PIVOT
(
COUNT(PKID )
FOR [Type]
IN ( [Box],[Pallet],[Bag],[Drum])
) AS PivotTable
) AS Main
ORDER BY RFID
Dynamic Query:
DECLARE #columnList nvarchar (MAX)
DECLARE #pivotsql nvarchar (MAX)
SELECT #columnList = STUFF(
(
SELECT ',' + '[' + [Type] + ']'
FROM ContanerType
FOR XML PATH( '')
)
,1, 1,'' )
SET #pivotsql =
N'SELECT [Name],' + #columnList + ' FROM
(
SELECT *
FROM
(
SELECT rf.Name,cnt.[Type], pk.PK_ID AS PKID, rf.PK_ID AS RFID
FROM ref_Table rf INNER JOIN Packages pk ON rf.PK_ID = pk.FK_ref_Table_ID
INNER JOIN ContanerType cnt ON cnt.PK_ID = pk.FK_ContainerType_ID
) AS SourceTable
PIVOT
(
COUNT(PKID )
FOR [Type]
IN ( ' + #columnList + ')
) AS PivotTable
) AS Main
ORDER BY RFID;'
EXEC sp_executesql #pivotsql
Following my tutorial below will help you to understand the PIVOT functionality:
We write sql queries in order to get different result sets like full, partial, calculated, grouped, sorted etc from the database tables. However sometimes we have requirements that we have to rotate our tables. Sounds confusing?
Let's keep it simple and consider the following two screen grabs.
SQL Table:
Expected Results:
Wow, that's look like a lot of work! That is a combination of tricky sql, temporary tables, loops, aggregation......, blah blah blah
Don't worry let's keep it simple, stupid(KISS).
MS SQL Server 2005 and above has a function called PIVOT. It s very simple to use and powerful. With the help of this function we will be able to rotate sql tables and result sets.
Simple steps to make it happen:
Identify all the columns those will be part of the desired result set.
Find the column on which we will apply aggregation(sum,ave,max,min etc)
Identify the column which values will be the column header.
Specify the column values mentioned in step3 with comma separated and surrounded by square brackets.
So, if we now follow above four steps and extract information from the above sales table, it will be as below:
Year, Month, SalesAmount
SalesAmount
Month
[Jan],[Feb] ,[Mar] .... etc
We are nearly there if all the above steps made sense to you so far.
Now we have all the information we need. All we have to do now is to fill the below template with required information.
Template:
Our SQL query should look like below:
SELECT *
FROM
(
SELECT SalesYear, SalesMonth,Amount
FROM Sales
) AS SourceTable
PIVOT
(
SUM(Amount )
FOR SalesMonth
IN ( [Jan],[Feb] ,[Mar],
[Apr],[May],[Jun] ,[Jul],
[Aug],[Sep] ,[Oct],[Nov] ,[Dec])
) AS PivotTable;
In the above query we have hard coded the column names. Well it's not fun when you have to specify a number of columns.
However, there is a work arround as follows:
DECLARE #columnList nvarchar (MAX)
DECLARE #pivotsql nvarchar (MAX)
SELECT #columnList = STUFF(
(
SELECT ',' + '[' + SalesMonth + ']'
FROM Sales
GROUP BY SalesMonth
FOR XML PATH( '')
)
,1, 1,'' )
SET #pivotsql =
N'SELECT *
FROM
(
SELECT SalesYear, SalesMonth,Amount
FROM Sales
) AS SourceTable
PIVOT
(
SUM(Amount )
FOR SalesMonth
IN ( ' + #columnList +' )
) AS PivotTable;'
EXEC sp_executesql #pivotsql
Hopefully this tutorial will be a help to someone somewhere.
Enjoy coding.

How do I dynamically pivot row values into columns without aggregare?

Lets say I have the following table representing results from a user survey system.
SurveyID ResponseID QuestionID Answer
-------- ---------- ---------- ------
1 1 1 'Answer 1'
1 1 2 'Answer 2'
1 1 3 'Answer 3'
1 2 1 'red'
1 2 2 'blue'
1 2 3 'green'
What I want is a pivoted output such that shown below.
SurveyID ResponseID Q1 Q2 Q3
-------- ---------- -- -- --
1 1 'Answer 1' 'Answer 2' 'Answer 3'
1 2 'red' 'blue' 'green'
I know how to achieve this if there were always only the same three questions but this database hosts multiple surveys which could have any number of unique QuestionIDs so I need the Q1, Q2, Q3 columns to be dynamic depending upon the number and IDs of that survey's questions.
I thought this would be a fairly standard problem but I cannot find anything that fully satisfies this issue. Any solution must work with SQL Server 2005.
Hope that makes sense. Thanks.
1) Pivots need an aggregate. You may know in advance that you are only interested in one row, but SQL doesn't know that. If you are only dealing with one row per group, just use MIN() as your aggregate.
2) Dynamic pivot is not a standard problem for SQL. That's a task for the presentation layer, not the data layer. You will have to use dynamic SQL, which still won't be able to handle an arbitrary number of columns and will open up injection attacks if you aren't careful.
If you still want to do it this way:
CREATE TABLE #t (Surveyid int, Responseid int, Questionid int, Answer varchar(max))
INSERT #t VALUES (1,1,1,'Answer1'),(1,1,2,'Answer2'),(1,1,3,'Answer3'),(1,2,1,'red'),(1,2,2,'blue'),(1,2,3,'green')
DECLARE #qids nvarchar(4000)
SELECT #qids = COALESCE(#qids+',','') + qid
FROM (SELECT DISTINCT QUOTENAME(Questionid) qid FROM #t) t
EXEC ('SELECT [SurveyID],[ResponseID],'+#qids+' FROM #t PIVOT(MIN(Answer) FOR Questionid IN('+#qids+')) p')
OK, finally discovered how to do this so thought I would share.
DECLARE #SurveyID SMALLINT;
DECLARE #SQL as VARCHAR(MAX);
DECLARE #Columns AS VARCHAR(MAX);
DECLARE #ColumnHeadings AS Varchar(MAX);
SET #SurveyID = 1;
SELECT
#Columns = COALESCE(#Columns + ', ','') + '[' + QuestionID + ']'
,#ColumnHeadings = COALESCE(#ColumnHeadings + ', ','') + '[' + QuestionID + '] AS [Q' + QuestionNumber + ']'
FROM
(
SELECT DISTINCT
CAST(a.QuestionID AS VARCHAR) AS QuestionID
,CASE WHEN q.QuestionNumber IS NULL THEN '' ELSE q.QuestionNumber END AS QuestionNumber
FROM dbo.Answers AS a
JOIN dbo.Questions AS q
ON a.QuestionID = q.ID
JOIN dbo.SurveyResponses AS r
ON a.ResponseID = r.ID
WHERE r.SurveyID = #SurveyID
)
SET #SQL = '
WITH PivotData AS
(
SELECT
a.QuestionID
,a.ResponseID
,a.Answer
FROM dbo.Answers AS a
JOIN dbo.SurveyResponses AS r
ON a.ResponseID = r.ID
)
SELECT
ResponseID
,' + #ColumnHeadings + '
FROM PivotData
PIVOT
(
MAX(Answer)
FOR QuestionID
IN (' + #Columns + ')
) AS PivotResult
ORDER BY ResponseID' ASC
EXEC (#SQL);