Create a sparse matrix from observations - sql

I am new to SQL server, I am a R user but R can't be used with my huge dataset (not enough memory).
What I want to do:
I want to create a sparse matrix from a table with only 2 columns, I dont have any value column, it seems easy but I didn't find the right way to do it.
My data:
ID_patient | ID_product
-----------------------
123 A
123 B
111 C
222 A
333 D
333 E
Ouput wanted:
ID_patient | A | B | C | D | E |
----------------------------------------------------
123 1 1
111 1
222 1
333 1 1
I have read that I can use the GROUP BY function or Pivot feature but what I have tried so far failed.
Edit
I don't know all the products, so the right way to do that is by using dynamic pivot ?

You can try something like PIVOT
See demo
Select * from
(select *, copy=Id_product from t)t
pivot
(
count(copy) for ID_product in ([A],[B],[C],[D],[E]))p
If you don't know A, B, C, D, .. before hand then you should go for dynamic pivot
update:
updated dynamic piv demo
declare #cols nvarchar(max)
declare #query nvarchar(max)
select #cols= Stuff((select ','+ quotename( ID_product) from
(select distinct id_product from t) t for xml path ('')),1,1,'')
select #query='Select * from
( select *, copy=Id_product from t ) t
pivot
(count(copy) for ID_product in ( '+#cols+' ))p '
exec(#query)

Try this
IF OBJECT_ID('tempdb..#Temp')IS NOT NULL
DROP TABLE #Temp
DECLARE #temp AS TABLE (ID_patient INT, ID_product varchar(10))
INSERT INTO #temp
SELECT 123,'A' UNION ALL
SELECT 123,'B' UNION ALL
SELECT 111,'C' UNION ALL
SELECT 222,'A' UNION ALL
SELECT 333,'D' UNION ALL
SELECT 333,'E'
SELECT * INTO #Temp
FROM #temp
DECLARE #Sql nvarchar(max),
#Col nvarchar(max),
#Col2 nvarchar(max)
SELECT #Col=STUFF((SELECT DISTINCT ', '+QUOTENAME(ID_product) FROM #Temp FOR XML PATH ('')),1,1,'')
SELECT #Col2=STUFF((SELECT DISTINCT ', '+'ISNULL('+QUOTENAME(ID_product)+','' '') AS '+QUOTENAME(ID_product) FROM #Temp FOR XML PATH ('')),1,1,'')
SET #Sql='
SELECT ID_patient,'+#Col2+'
FROM
(
SELECT *, DENSE_RANK()OVER( PARTITION BY ID_patient ORDER By ID_patient) AS [Val] FROM #Temp
)AS Src
PIVOT
(MAX(Val) FOR ID_product IN ('+#Col+')
)AS PVT '
PRINT #Sql
EXEC (#Sql)
Result
ID_patient A B C D E
------------------------------
111 0 0 1 0 0
123 1 1 0 0 0
222 1 0 0 0 0
333 0 0 0 1 1

Related

How to split group result into unlimited individual columns

If I have a table:
id tstamp x y
-----------------
1 111 1 A
2 111 2 B
3 111 3 C
4 222 1 D
5 222 2 E
6 222 3 F
7 333 1 G
8 333 2 H
9 333 3 I
... nnn ... ...
How can I split it into the following, where the number of columns is dependent on a range of tstamp?
x y111 y222 y333 ynnn
----------------------------------
1 A D G ...
2 B E H ...
3 C F I ...
It seems fundamentally simple, but I can't find any relevant example or API? There's a good 10 or so similar questions but they all deal with string handling, csv, xml???
I imagine it would be pretty straight forward to do in c# or another scripting language, but I would expect it should be an easy thing in sql?
you need to construct column values of tstamp for PIVOT and use dynamic query
DECLARE #cols NVARCHAR(MAX)
SELECT #cols = STUFF(( SELECT DISTINCT TOP 100 PERCENT
'],[' + tstamp
FROM Table1
ORDER BY '],[' + tstamp
FOR XML PATH('')
), 1, 2, '') + ']'
DECLARE #query NVARCHAR(4000)
SET #query = N'SELECT x, '+
#cols +'
FROM
(SELECT x, y, tstamp
FROM Table1
) p
PIVOT
(
MAX(y)
FOR tstamp IN
( '+
#cols +' )
) AS pvt
'
exec sp_executesql #query

SQL Transpose table - sqlserver [duplicate]

This question already has answers here:
Efficiently convert rows to columns in sql server
(5 answers)
Closed 8 years ago.
i have the following table
create table mytab (
mID int primary key,
pname varchar(100) not null,
pvalue varchar(100) not null
)
example data looks like
mID |pname |pvalue
-----------------------
1 |AAR | 2.3
1 |AAM | 1.2
1 |GXX | 5
2 |AAR | 5.4
2 |AAM | 3.0
3 |AAR | 0.2
I want to flip the table so that i get
mID | AAR | AAM | GXX|
---------------------------------
1 | 2.3 | 1.2 | 5|
2 | 5.4 | 3.0 | 0|
3 | 0.2 | 0 | 0
Is this somehow possible and if so, is there a way to create a dynamic query because there are lots of these pname pvalue pairs
Write Dynamic Pivot Query as:
DECLARE #cols AS NVARCHAR(MAX)
DECLARE #query AS NVARCHAR(MAX)
DECLARE #colsFinal AS NVARCHAR(MAX)
select #cols = STUFF((SELECT distinct ',' +
QUOTENAME(pname)
FROM mytab
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '')
select #colsFinal = STUFF((SELECT distinct ',' +
'ISNULL('+QUOTENAME(pname)+',0) AS '+ QUOTENAME(pname)
FROM mytab
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '')
--Edited query to replace null with 0 in Final result set.
SELECT #query = 'SELECT mID, '+#colsFinal +'
FROM mytab
PIVOT
(
MAX(pvalue)
FOR pname IN(' + #cols + ')) AS p;'
exec sp_executesql #query
Check demo here..
Use pivot as following format:
select *
From (select *
From mytab)p
PIVOT(SUM(pvalue) FOR pname IN ([AAR],[AAM],[GXX]))pvt
In order to dynamic PIVOT use flowing reference:
Dynamic PIVOT Sample1
Dynamic PIVOT Sample2
Declare #t table (mID INT, pname VARCHAR(10), pvalue FLOAT)
INSERT INTO #t (mID,pname,pvalue)values (1,'AAR',2.3)
INSERT INTO #t (mID,pname,pvalue)values (1,'AAM', 1.2)
INSERT INTO #t (mID,pname,pvalue)values (1,'GXX', 5)
INSERT INTO #t (mID,pname,pvalue)values (2,'AAR', 5.4)
INSERT INTO #t (mID,pname,pvalue)values (2,'AAM', 0.3)
INSERT INTO #t (mID,pname,pvalue)values (3,'AAR', 0.2)
select mid,
CASE WHEN [AAR]IS NOT NULL THEN [AAR] ELSE ISNULL([AAR],0)END [AAR],
CASE WHEN [AAM]IS NOT NULL THEN [AAM] ELSE ISNULL([AAM],0)END [AAM],
CASE WHEN [GXX]IS NOT NULL THEN [GXX] ELSE ISNULL([GXX],0)END [GXX]
from
(
select mID, pvalue,pname
from #t
) d
pivot
(
max(pvalue)
for pname in ( [AAR], [AAM], [GXX])
) piv;

SQL Server - Complex Dynamic Pivot multiple columns

FYI, this question is already answered but I have some new requirements which is very complex to implement so, I am posting it as a new question instead of editing the old question: (Previous Question)
I have two tables "Controls" and "ControlChilds" (in the ControlChilds table we have added a new column called ControlChildComments which we need to show in PIVOT output)
Parent Table Structure:
Create table Controls(
ProjectID Varchar(20) NOT NULL,
ControlID INT NOT NULL,
ControlCode Varchar(2) NOT NULL,
ControlPoint Decimal NULL,
ControlScore Decimal NULL,
ControlValue Varchar(50)
)
Sample Data
ProjectID | ControlID | ControlCode | ControlPoint | ControlScore | ControlValue
P001 1 A 30.44 65 Invalid
P001 2 C 45.30 85 Valid
Child Table Structure:
Create table ControlChilds(
ControlID INT NOT NULL,
ControlChildID INT NOT NULL,
ControlChildValue Varchar(200) NULL,
ControlChildComments Varchar(200) NULL
)
Sample Data
ControlID | ControlChildID | ControlChildValue | ControlChildComments
1 100 Yes Something
1 101 No NULL
1 102 NA Others
1 103 Others NULL
2 104 Yes New one
2 105 SomeValue NULL
Based on my previous question (Previous Question) I got this output (You can refer to the PIVOT queries which produces this output in the answer given by #bluefeet. Thanks again #bluefeet.)
But now my requirement is changed and I need ControlChildComments after each Child values. For example, A_Child1, A_Child1Comments, A_Child2, A_Child2Comments etc...
Another tricky thing is I need to show the comments only when they are not null otherwise I shouldn't show the column. For example, in this case, it should be like this:
A_Child1, A_Child1Comments, A_Child2, A_Child3, A_Child3Comments, A_Child4, C_Child1, C_Child1Comments, C_Child2
Is this possible? I tried lot of things but the results are not accurate.
Since you now have multiple columns in your ControlChilds table that you need to PIVOT, you will need to use the similar method of unpivoting them first that you applied with the Controls table.
You will need to unpivot both the ChildControlValue and ChildControlComments using code similar to:
select
projectId,
col = ControlCode+'_'+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', ControlChildValue union all
select 'ChildComments', ControlChildComments
) c (subCol, value);
See SQL Fiddle with Demo. This gets your data in the format:
| PROJECTID | COL | VALUE |
|-----------|------------------|-----------|
| P001 | A_ChildValue1 | Yes |
| P001 | A_ChildComments1 | Something |
| P001 | A_ChildValue2 | No |
| P001 | A_ChildComments2 | (null) |
| P001 | A_ChildValue3 | NA |
You then use this code in your existing query:
select ProjectId,
A_ControlPoint, A_ControlScore, A_ControlValue,
A_ChildValue1, A_ChildComments1, A_ChildValue2,
A_ChildComments2, A_ChildValue3, A_ChildComments3,
A_ChildValue4, A_ChildComments4,
C_ControlPoint, C_ControlScore, C_ControlValue,
C_Child1, C_Child2
from
(
select
ProjectId,
col = ControlCode +'_'+col,
val
from
(
select
c.ProjectId,
c.ControlCode,
c.ControlPoint,
c.ControlScore,
c.ControlValue
from controls c
) d
cross apply
(
select 'ControlPoint', cast(controlpoint as varchar(10)) union all
select 'ControlScore', cast(ControlScore as varchar(10)) union all
select 'ControlValue', ControlValue
) c (col, val)
union all
select
projectId,
col = ControlCode+'_'+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', ControlChildValue union all
select 'ChildComments', ControlChildComments
) c (subCol, value)
) src
pivot
(
max(val)
for col in (A_ControlPoint, A_ControlScore, A_ControlValue,
A_ChildValue1, A_ChildComments1, A_ChildValue2,
A_ChildComments2, A_ChildValue3, A_ChildComments3,
A_ChildValue4, A_ChildComments4,
C_ControlPoint, C_ControlScore, C_ControlValue,
C_Child1, C_Child2)
) piv;
See SQL Fiddle with Demo. Finally, you'll implement this in your dynamic SQL script:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT ',' + QUOTENAME(col)
from
(
select ControlCode,
col = ControlCode +'_'+col,
seq,
so
from controls
cross apply
(
select 'ControlPoint', 0, 0 union all
select 'ControlScore', 0, 1 union all
select 'ControlValue', 0, 2
) c (col, seq, so)
union all
select ControlCode,
col = ControlCode+'_'+subcol+cast(rn as varchar(10)),
rn,
so
from
(
select ControlCode,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select 'ChildValue', seq, 3 union all
select 'ChildComments', seq, 4
) c (subcol, rn, so)
) src
group by ControlCode, seq, col, so
order by ControlCode, seq, so
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT ProjectId, ' + #cols + '
from
(
select ProjectId,
col = ControlCode +''_''+col,
val
from
(
select
c.ProjectId,
c.ControlCode,
c.ControlPoint,
c.ControlScore,
c.ControlValue
from controls c
) d
cross apply
(
select ''ControlPoint'', cast(controlpoint as varchar(10)) union all
select ''ControlScore'', cast(ControlScore as varchar(10)) union all
select ''ControlValue'', ControlValue
) c (col, val)
union all
select
projectId,
col = ControlCode+''_''+subCol+cast(seq as varchar(10)),
value
from
(
select c.ProjectId,
c.ControlCode,
cc.ControlChildValue,
cc.ControlChildComments,
row_number() over(partition by c.ProjectId, c.ControlCode
order by cc.ControlChildId) seq
from controls c
inner join controlchilds cc
on c.controlid = cc.controlid
) d
cross apply
(
select ''ChildValue'', ControlChildValue union all
select ''ChildComments'', ControlChildComments
) c (subCol, value)
) x
pivot
(
max(val)
for col in (' + #cols + ')
) p '
exec sp_executesql #query;
See SQL Fiddle with Demo. Both of these gives a result:
| PROJECTID | A_CONTROLPOINT | A_CONTROLSCORE | A_CONTROLVALUE | A_CHILDVALUE1 | A_CHILDCOMMENTS1 | A_CHILDVALUE2 | A_CHILDCOMMENTS2 | A_CHILDVALUE3 | A_CHILDCOMMENTS3 | A_CHILDVALUE4 | A_CHILDCOMMENTS4 | C_CONTROLPOINT | C_CONTROLSCORE | C_CONTROLVALUE | C_CHILDVALUE1 | C_CHILDCOMMENTS1 | C_CHILDVALUE2 | C_CHILDCOMMENTS2 |
|-----------|----------------|----------------|----------------|---------------|------------------|---------------|------------------|---------------|------------------|---------------|------------------|----------------|----------------|----------------|---------------|------------------|---------------|------------------|
| P001 | 30.44 | 65.00 | Invalid | Yes | Something | No | (null) | NA | Others | Others | (null) | 45.30 | 85.00 | Valid | Yes | New one | SomeValue | (null) |
Here is an example of a dynamic crosstab. Since you have multiple columns you would need to adjust the dynamic portion of this to suit.
if OBJECT_ID('Something') is not null
drop table Something
create table Something
(
ID int,
Subject1 varchar(50)
)
insert Something
select 10868952, 'NUR/3110/D507' union all
select 10868952, 'NUR/3110/D512' union all
select 10868952, 'NUR/4010/D523' union all
select 10868952, 'NUR/4010/HD20' union all
select 12345, 'asdfasdf'
declare #MaxCols int
declare #StaticPortion nvarchar(2000) =
'with OrderedResults as
(
select *, ROW_NUMBER() over(partition by ID order by Subject1) as RowNum
from Something
)
select ID';
declare #DynamicPortion nvarchar(max) = '';
declare #FinalStaticPortion nvarchar(2000) = ' from OrderedResults Group by ID order by ID';
with E1(N) AS (select 1 from (values (1),(1),(1),(1),(1),(1),(1),(1),(1),(1))dt(n)),
E2(N) AS (SELECT 1 FROM E1 a, E1 b), --10E+2 or 100 rows
E4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows max
cteTally(N) AS
(
SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) FROM E4
)
select #DynamicPortion = #DynamicPortion +
', MAX(Case when RowNum = ' + CAST(N as varchar(6)) + ' then Subject1 end) as Subject' + CAST(N as varchar(6)) + CHAR(10)
from cteTally t
where t.N <=
(
select top 1 Count(*)
from Something
group by ID
order by COUNT(*) desc
)
select #StaticPortion + #DynamicPortion + #FinalStaticPortion
--declare #SqlToExecute nvarchar(max) = #StaticPortion + #DynamicPortion + #FinalStaticPortion;
--exec sp_executesql #SqlToExecute

Dynamic Pivot (row to columns)

I have a Table1:
ID Instance Name Size Tech
1 0 D1 123 ABC
1 1 D2 234 CDV
2 2 D3 234 CDV
2 3 D4 345 SDF
I need the resultset using Dynamic PIVOT to look like along with the headers:
ID | Instance0_Name | Instance0_Size | Instance0_Tech | Instance1_Name | Instance1_Size | Instance1_tech
1 | D1 | 123 | ABC | D2 | 234 | CDV
Any help would be appreciated. using Sql Server 2008.
Sorry for the earlier post.
Your desired output is not exactly clear, but you can use the both the UNPIVOT and PIVOT function to get the result
If you know the number of columns, then you can hard code the values:
select *
from
(
select id,
'Instance'+cast(instance as varchar(10))+'_'+col col,
value
from
(
select id,
Instance,
Name,
cast(Size as varchar(50)) Size,
Tech
from yourtable
) x
unpivot
(
value
for col in (Name, Size, Tech)
) u
) x1
pivot
(
max(value)
for col in
([Instance0_Name], [Instance0_Size], [Instance0_Tech],
[Instance1_Name], [Instance1_Size], [Instance1_Tech],
[Instance2_Name], [Instance2_Size], [Instance2_Tech],
[Instance3_Name], [Instance3_Size], [Instance3_Tech])
) p
See SQL Fiddle with Demo
Then if you have an unknown number of values, you can use dynamic sql:
DECLARE #query AS NVARCHAR(MAX),
#colsPivot as NVARCHAR(MAX)
select #colsPivot = STUFF((SELECT ','
+ quotename('Instance'+ cast(instance as varchar(10))+'_'+c.name)
from yourtable t
cross apply sys.columns as C
where C.object_id = object_id('yourtable') and
C.name not in ('id', 'instance')
group by t.instance, c.name
order by t.instance
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query
= 'select *
from
(
select id,
''Instance''+cast(instance as varchar(10))+''_''+col col,
value
from
(
select id,
Instance,
Name,
cast(Size as varchar(50)) Size,
Tech
from yourtable
) x
unpivot
(
value
for col in (Name, Size, Tech)
) u
) x1
pivot
(
max(value)
for col in ('+ #colspivot +')
) p'
exec(#query)
See SQL Fiddle with Demo
If the result is not correct, then please edit your OP and post the result that you expect from both of the Ids you provided.

How to write a query table value as column name?

My Sql table is similar like below
Code Value ID
A 100 1
A 200 2
A 300 3
B 200 1
B 500 2
B 600 3
C 800 1
C 700 2
C 200 3
How I can write query in sql server 2008 to get values in below format.
ID A B C
1 100 200 800
2 200 500 700
3 300 600 200
You can use SUM function for that:
SELECT ID,
SUM(CASE Code when 'A' then Value else 0 end)as A,
SUM(CASE Code when 'B' then Value else 0 end)as B,
SUM(CASE Code when 'C' then Value else 0 end)as C
FROM myTable
GROUP BY ID;
See this SQLFiddle
Use PIVOT
select ID,[A],[B],[C]
from your_table T
PIVOT (MAX(Value) FOR Code in ([A],[B],[C]) )P
IF the number if Codes are not fixed you could use dynamic pivot
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT distinct ',' + QUOTENAME(Code)
from your_table
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT ID, ' + #cols + '
from your_table
pivot
(
MAX([Value])
for Code in (' + #cols + ')
) p '
print(#query)
execute(#query)
The answer is PIVOT
DECLARE #t TABLE (Code varchar(10), Value int, Id int)
INSERT INTO #t VALUES
('A',100,1),
('A',200,2),
('A',300,3),
('B',200,1),
('B',500,2),
('B',600,3),
('C',800,1),
('C',700,2),
('C',200,3);
SELECT ID,[A],[B],[C]
FROM #t
PIVOT (SUM(Value) FOR Code IN ([A],[B],[C]))P
Result
ID A B C
1 100 200 800
2 200 500 700
3 300 600 200