Dynamic bit-based flattening of multiple rows by pivoting into additional columns - sql

I have data that looks like this:
ID | Value
-----------
1 | a
1 | b
2 | a
2 | c
3 | a
3 | d
And I would like it to look like this:
ID | Value_a | Value_b | Value_c | Value_d
---------------------------------------------
1 | 1 | 1 | 0 | 0
2 | 1 | 0 | 1 | 0
3 | 1 | 0 | 0 | 1
I think a dynamic conditional aggregation is required. Any help would be appreciated.

Conditional aggregation goes like:
select
id,
max(case when value = 'a' then 1 else 0 end) value_a,
max(case when value = 'b' then 1 else 0 end) value_b,
max(case when value = 'c' then 1 else 0 end) value_c,
max(case when value = 'd' then 1 else 0 end) value_d
from mytable
group by id

Here is a sample implementation of dynamic conditional aggregation:
--create test table
create table #values (
[ID] int
,[Value] char(1))
--populate test table
insert into #values
values
(1, 'a')
,(1, 'b')
,(2, 'a')
,(2, 'c')
,(3, 'a')
,(3, 'd')
--declare variable that will hold dynamic query
declare #query nvarchar(max) = ' select [ID] '
--build dynamic query and assign it to variable
select
#query = #query + max(',max(case when [value] = '''
+ [value] + ''' then 1 else 0 end) as Value_' + [value] )
from
#values
group by
[value]
--add group by clause to dunamic query
set #query = #query + ' from #values group by [id]'
--execute dynamic query
exec (#query)
this is the result:
Now you can add a value (for example id = 4 and value = 'e') replacing the original insert with this one:
insert into #values
values
(1, 'a')
,(1, 'b')
,(2, 'a')
,(2, 'c')
,(3, 'a')
,(3, 'd')
,(4, 'a')
,(4, 'e')
this is the new output:

Related

Update the Unique number for the co-related records between two columns in the group

I need to identify and update co-related records associated rank under Req_Result column as depicted below.
Table name is tblSource.
+------+-----+-----------------+---------+
| Item | key | DenseRankWrtKey | Req_Res |
+------+-----+-----------------+---------+
| a | 1 | 1 | 1 |
+------+-----+-----------------+---------+
| a | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| a | 3 | 3 | 1 |
+------+-----+-----------------+---------+
| b | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| b | 9 | 7 | 1 |
+------+-----+-----------------+---------+
| c | 1 | 1 | 1 |
+------+-----+-----------------+---------+
| c | 6 | 5 | 1 |
+------+-----+-----------------+---------+
| d | 5 | 4 | 4 |
+------+-----+-----------------+---------+
| e | 8 | 6 | 6 |
+------+-----+-----------------+---------+
| f | 2 | 2 | 1 |
+------+-----+-----------------+---------+
| f | 6 | 5 | 1 |
+------+-----+-----------------+---------+
Item and Key are co-related columns and DenseRankWrtKey is created by using Dense rank with respect to key. I need to assign the same DenseRankWrtKey values to all the co-related values.
Scenario explained:
Item a has the key value 1 and 1 is co-related with c as well, so all related values for a and 1 are a,b,c,f,2,3,7,6,5 hence all these values are assigned as 1 by referring DenseRank column, d and e are not further related to any other values hence its value is kept as is from DenseRank column.
I tried the queries
Update a
SET a.Req_Res = b.DenseRankWrtKey
from tblSource a
inner join tblSource b on a.DenseRankWrtKey = b.DenseRankWrtKey
which is not sufficient.
Just try for this table too : DECLARE #Table AS TABLE
(
Id INT IDENTITY(1,1) PRIMARY KEY
,Item varchar(100)
,[key] INT
,DenseRankWrtKey INT
,Req_Res INT
)
INSERT INTO #Table
(
Item
,[key]
,DenseRankWrtKey
)
VALUES
('p', 10 ,1 ),
('q', 10 ,1 ),
('r', 20 ,2 ),
('s', 30 ,3 ),
('t', 30 ,3 ),
('u', 40 ,4 ),
('v', 40 ,4 ),
('w', 40 ,4 ),
('p', 50 ,5 ),
('q', 50 ,5 ),
('r', 50 ,5 ),
('s', 50 ,5 ),
('t', 50 ,5 ),
('u', 50 ,5 ),
('v', 50 ,5 ),
('w', 50 ,5 )
I find this way easier to read and maintain
DECLARE #TestTable TABLE (Item CHAR(1), ItemKey INT, DenseRankWrtKey INT, Req_Res INT)
INSERT #TestTable (Item, ItemKey, DenseRankWrtKey) VALUES
('a' , 1 , 1)
, ('a' , 2 , 2)
, ('a' , 3 , 3)
, ('b' , 2 , 2)
, ('b' , 9 , 7)
, ('c' , 1 , 1)
, ('c' , 6 , 5)
, ('d' , 5 , 4)
, ('e' , 8 , 6)
, ('f' , 2 , 2)
, ('f' , 6 , 5)
DECLARE #OtpTable TABLE (Item CHAR(1), ItemKey INT, DenseRankWrtKey INT)
DECLARE #RC INT = 1
WHILE #RC > 0
BEGIN
DELETE #OtpTable
;WITH UpdateCTE AS (
SELECT TOP 1 * from #TestTable
WHERE Req_Res IS NULL
)
UPDATE UpdateCTE
set Req_Res = DenseRankWrtKey
OUTPUT Inserted.Item, Inserted.ItemKey, inserted.DenseRankWrtKey INTO #OtpTable
SET #RC = ##ROWCOUNT
WHILE ##ROWCOUNT > 0
UPDATE T
SET Req_Res = (SELECT TOP 1 DenseRankWrtKey FROM #OtpTable)
OUTPUT Inserted.Item, Inserted.ItemKey, inserted.DenseRankWrtKey INTO #OtpTable
FROM #TestTable T
WHERE T.Req_Res IS NULL AND EXISTS (SELECT 1 FROM #OtpTable OT WHERE (T.Item = OT.Item OR T.ItemKey = OT.ItemKey))
END
SELECT * FROM #TestTable
You can not do update in single statement.
CREATE TABLE #Table
(
Id INT
,Item varchar(30)
,[key] INT
,DenseRankWrtKey INT
,Req_Res INT
)
INSERT INTO #Table
(
Item
,[key]
,DenseRankWrtKey
)
VALUES
<YOUR DATA>
;WITH CTE
AS
(
SELECT
T.Item
,T.[Key]
,Id = RANK() OVER(order by T.DenseRankWrtKey,T.Item)
FROM
#Table AS T
)
UPDATE
T
SET
T.Id = CTE.Id
FROM
CTE
INNER JOIN #Table AS T ON T.Item = CTE.Item AND T.[key] = CTE.[key]
DECLARE #LoopVal INT = 0
,#LoopReq INT = NULL
,#LoopKey VARCHAR(50) = NULL
WHILE 1 = 1
BEGIN
SELECT TOP 1
#LoopVal = T.DenseRankWrtKey
,#LoopReq = T.Req_Res
FROM
#Table AS T
WHERE
T.DenseRankWrtKey > #LoopVal
ORDER BY
T.DenseRankWrtKey ASC
IF ##ROWCOUNT = 0
BREAK;
UPDATE T2
SET Req_Res = CASE WHEN #LoopReq IS NOT NULL THEN #LoopReq ELSE T.DenseRankWrtKey END
FROM
#Table AS T
INNER JOIN #Table AS T2 ON T.[key] = T2.[key]
WHERE
T.DenseRankWrtKey = #LoopVal
AND T2.Req_Res IS NULL
UPDATE
T
SET
T.Req_Res = CASE WHEN #LoopReq IS NOT NULL THEN #LoopReq ELSE T2.Req_Res END
FROM
#Table AS T
INNER JOIN #Table AS T2 ON T.Item = T2.Item
AND T2.Req_Res IS NOT NULL
AND T.Req_Res IS NULL
END
SELECT * FROM #Table
ORDER BY
DenseRankWrtKey
DROP TABLE #Table
GO

How to build up a sparse output from given records in SQL Server statements?

Given some records
declare #t table
(
idx varchar(10),
class varchar(10),
head varchar(10),
qty VARCHAR(10)
)
insert #t (idx, class, head, qty)
values ('row1', 'H1', 'C1', 1), ('row1', 'H1', 'C2', 2),
('row1', 'H1', 'C3', 3), ('row2', 'H2', 'D1', 2),
('row2', 'H2', 'D2', 3), ('row2', 'H2', 'D3', 4),
('row3', 'H1', 'C2', 8), ('row3', 'H2', 'D2', 9),
('row3', 'H2', 'D3', 10), ('row4', '', '', ''),
('row5', 'H2', 'D2', 10), ('row5', 'H2', 'D3', 11),
('row5', 'H3', 'E1', 12), ('row6', '','','')
SELECT * FROM #t
OUTPUT:
idx memo class head qty
--------------------------------
row1 ida H1 C1 1
row1 ida H1 C2 2
row1 ida H1 C3 3
row2 id H2 D1 2
row2 id H2 D2 3
row2 id H2 D3 4
row3 id H1 C2 8
row3 id H2 D2 9
row3 id H2 D3 10
row4 ida
row5 idf H2 D2 10
row5 idf H2 D3 11
row5 idf H3 E1 12
row6 id
How to make a spare array output like below efficiently? ZEROs could be replaced by blank string. A similar question and answer in Mathematica could be found here.https://mathematica.stackexchange.com/questions/186835/building-a-sparse-array-from-given-lists-the-2nd-case
"arrays" are not something SQL is generally known for although Postgres does have array features.
In T-SQL you can "pivot" you table, like this:
SELECT idx, [C1],[C2],[C3],[D1],[D2],[D3],[E1]
FROM (
SELECT
idx, head, qty
FROM #t
) sourcedata
pivot
(
max([qty])
FOR [head] IN ([C1],[C2],[C3],[D1],[D2],[D3],[E1])
) p
order by idx
which will produce this:
+----+------+------+------+------+------+------+------+------+
| | idx | C1 | C2 | C3 | D1 | D2 | D3 | E1 |
+----+------+------+------+------+------+------+------+------+
| 1 | row1 | 1 | 2 | 3 | NULL | NULL | NULL | NULL |
| 2 | row2 | NULL | NULL | NULL | 2 | 3 | 4 | NULL |
| 3 | row3 | NULL | 8 | NULL | NULL | 9 | 10 | NULL |
| 4 | row4 | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
| 5 | row5 | NULL | NULL | NULL | NULL | 10 | 11 | 12 |
| 6 | row6 | NULL | NULL | NULL | NULL | NULL | NULL | NULL |
+----+------+------+------+------+------+------+------+------+
and you can even generate the pivot query if needed:
DECLARE #cols AS NVARCHAR(MAX)
DECLARE #query AS NVARCHAR(MAX)
SET #cols = STUFF((SELECT DISTINCT ',' + QUOTENAME(head)
FROM some_table s
where head is not null and head <> ''
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
SET #query = 'SELECT idx, ' + #cols + '
FROM (
SELECT
idx, head, qty
FROM some_table
) sourcedata
pivot
(
max([qty])
FOR [idx] IN (' + #cols + ')
) p
order by idx '
select #query -- use select to inspect the generated sql
--execute(#query) -- once satisfied that sql is OK, use execute
but you can't use #t as the data source when executing the #query.
I have no idea if this really helps because as there no columnar references back to Classes
EDIT
To replace NULLs in the final output requires changing the first query seen above to this:
SELECT
idx
, COALESCE( CAST( [C1] AS varchar ), '' ) -- converted into STRINGS
, COALESCE( CAST( [C2] AS varchar ), '' )
, COALESCE( CAST( [C3] AS varchar ), '' )
, COALESCE( CAST( [D1] AS varchar ), '' )
, COALESCE( CAST( [D2] AS varchar ), '' )
, COALESCE( CAST( [D3] AS varchar ), '' )
, COALESCE( CAST( [E1] AS varchar ), '' )
FROM (
SELECT
idx
, head
, qty
FROM #t
) sourcedata
PIVOT
(
MAX( [qty] )
FOR [head] IN ([C1], [C2], [C3], [D1], [D2], [D3], [E1])
) p
ORDER BY
idx

Can I modify below GroupBy clause into better one

Consider following cases
CASE 1: tbl_a
---------------
| colA | colB |
--------------- expected O/P: 1 0
| 1 | 0 |
---------------
CASE 2: tbl_a
---------------
| colA | colB |
--------------- expected O/P: 1 1
| 1 | 1 |
---------------
CASE 3: tbl_a
---------------
| colA | colB |
--------------- expected O/P: 1 1
| 1 | 0 |
| 1 | 1 |
---------------
CASE 4: tbl_a
---------------
| colA | colB |
--------------- expected O/P: NULL NULL
| null | null |
---------------
The query is simple, If there is a record where colA = 1 and colB = 1 then return it, if no such record exists then return the existing record for colA = 1.
I have tried various ways. I came with groupBy clause but is there a simple way to do it.
If I use ColA = 1 And colB = 1 then it fails for case 1 it returns no rows.
SELECT colA, Max(colb) group by (colA)
Is this the valid query? Any help is greatly appreciated.
Please try the following. It provides the desired results for the posted cases.
SELECT TOP 1 colA, colB
FROM tbl_a
WHERE colA = 1 OR colA IS NULL
ORDER BY colA DESC, colB DESC;
A different approach using row_number with ordering based on your priority and getting the row with the minimum row number as the result.
select colA,colB
from (select x.*,min(rn) over() as minrn
from (select t.*
,row_number() over(order by case when colA=1 and colB=1 then 1
when colA=1 then 2
else 3 end) as rn
from t
) x
) y
where rn=minrn
I think the query that you wrote fits the data to well, but doesn't work for the general case which you described.
Try the below script and play with the value for the #scenario variable to see what data it returns for different data.
Use / adapt the last query for your table structure.
declare #tbl table (colA int, colB int)
declare #scenario char(1) = 'D'
if #scenario = 'A'
insert #tbl values (1, 0)
else if #scenario = 'B'
insert #tbl values (1, 1)
else if #scenario = 'C'
insert #tbl values (1, 0), (1, 1)
else if #scenario = 'D'
insert #tbl values (null, null)
select *
from #tbl
where (colA = 1 and colB = 1)
or (colA = 1 and not exists (select 1 from #tbl where colA = 1 and colB = 1))
or (colA is null and colB is null and not exists (select 1 from #tbl where colA = 1 and colB = 1))
You can also test the query with "more random data" in each scenario, like below:
declare #tbl table (colA int, colB int)
declare #scenario char(1) = 'B'
if #scenario = 'A'
insert #tbl values (1, 0), (0, 1), (0, 0), (0, null)
else if #scenario = 'B'
insert #tbl values (1, 1), (1, 0), (null, null)
else if #scenario = 'C'
insert #tbl values (1, 0), (1, 1), (0, 0), (1, 0), (null, 0)
else if #scenario = 'D'
insert #tbl values (null, null)
select *
from #tbl
where (colA = 1 and colB = 1)
or (colA = 1 and not exists (select 1 from #tbl where colA = 1 and colB = 1))
or (colA is null and colB is null and not exists (select 1 from #tbl where colA = 1 and colB = 1))

How to subtract two rows from one another if they have they share a value in another column

I am currently working in a database with the following structure:
Var | Value | ID
--------------
A | 1 | 1
B | 2 | 1
C | 3 | 1
A | 2 | 2
B | 4 | 2
C | 6 | 2
What I am trying to achieve is to subtract the value of Var C from the other Var's (B and C) sharing the same ID as Var C. In this case the output would be:
Var | Value | ID
--------------
A | -2 | 1
B | -1 | 1
C | 3 | 1
A | -4 | 2
B | -2 | 2
C | 6 | 2
To be honest I have absolutely no idea how to start on achieving this. I am familiar with many other programming languages, but SQL is still a challenge with difficult/specific queries.
Do a self join:
select t1.var,
case when t1.var = 'C' then t1.value
else t1.value - t2.value
end as value,
t1.id
from tablename t1
join tablename t2 ON t1.id = t2.id
where t2.var = 'C'
Note that value is a reserved word in ANSI SQL, so it should be delimited as "Value".
You could pre-analyse the "C" Values and then use this to remove them?
DECLARE #Data TABLE (
[Var] VARCHAR(1),
Value INT,
ID INT);
INSERT INTO #Data SELECT 'A', 1, 1;
INSERT INTO #Data SELECT 'B', 2, 1;
INSERT INTO #Data SELECT 'C', 3, 1;
INSERT INTO #Data SELECT 'A', 2, 2;
INSERT INTO #Data SELECT 'B', 4, 2;
INSERT INTO #Data SELECT 'C', 6, 2;
WITH CValues AS (
SELECT
ID,
Value
FROM
#Data
WHERE
[Var] = 'C')
SELECT
d.[Var],
CASE WHEN d.[Var] != 'C' THEN d.Value - c.Value ELSE d.Value END AS Value,
d.ID
FROM
#Data d
LEFT JOIN CValues c ON c.ID = d.ID;
...but yes, a self-join is probably a better solution:
DECLARE #Data TABLE (
[Var] VARCHAR(1),
Value INT,
ID INT);
INSERT INTO #Data SELECT 'A', 1, 1;
INSERT INTO #Data SELECT 'B', 2, 1;
INSERT INTO #Data SELECT 'C', 3, 1;
INSERT INTO #Data SELECT 'A', 2, 2;
INSERT INTO #Data SELECT 'B', 4, 2;
INSERT INTO #Data SELECT 'C', 6, 2;
SELECT
d.[Var],
CASE WHEN d.[Var] != 'C' THEN d.Value - c.Value ELSE d.Value END AS Value,
d.ID
FROM
#Data d
LEFT JOIN #Data c ON c.[Var] = 'C' AND c.ID = d.ID;

How to use SQL table pivot for a table with multiple aggregates

I have a procedure that returns the following table:
And I want to pivot it around so that for each Name that is returned, you have a row for Planned, Actual and Difference.
For example:
| Key | Name1 | Name2 | Name3 | Name4
| Planned | 0 | 0 | 0 | 0
| Actual | 8957 | 5401 | NULL | NULL
|Difference| -8957 | -5401 | NULL | NULL
I'm trying to use the PIVOT function, but I've never used it before and am struggling to get my head around it. How would one achieve something similar to the above?
Without a pivot, you can use a cross join instead
Note this only works if you know how many names you will have everytime you run it and if each name only appears once in the original table.(otherwise the max function below is not appropriate)
create table #test(ID int, Name char(5), planned int, actual int, difference_between int)
insert into dbo.#test
values
(54, 'Name1', 0, 8975, -8957),
(54, 'Name2', 0, 5401, -5401),
(54, 'Name3', 0, NULL, NULL),
(54, 'Name4', 0, NULL, NULL)
select case t.occurno when 1 then 'Planned' when 2 then 'Actual' when 3 then 'Difference' end as [Key]
, max(case when Name = 'name1' then case t.occurno when 1 then planned when 2 then actual when 3 then difference_between else 0 end end) as Name1
, max(case when Name = 'name2' then case t.occurno when 1 then planned when 2 then actual when 3 then difference_between else 0 end end) as name2
, max(case when Name = 'name3' then case t.occurno when 1 then planned when 2 then actual when 3 then difference_between else 0 end end) as name3
, max(case when Name = 'name4' then case t.occurno when 1 then planned when 2 then actual when 3 then difference_between else 0 end end) as name4
from dbo.#test
cross join
(select top 3 ROW_NUMBER() over(order by occurno) as occurno
from (select 1 as occurno) t
group by cube(occurno,occurno,occurno,occurno)
) t
group by t.occurno
create table #T
(
Name varchar(255),
Planned int,
Actual int,
[Difference] int
)
insert into #T(Name, Planned, Actual, [Difference])
select 'Name1', 0, 8957, -8957
union
select 'Name2', 0, 5401, -5401
union
select 'Name3', 0, NULL, NULL
union
select 'Name4', 0, NULL, NULL
-- unpivoting data
create table #T2
(
[Key] varchar(255),
Name varchar(255),
Value int
)
insert into #T2
select [Key], Name, Value
from
(select Name, Planned, Actual, [Difference] from #T) P
UNPIVOT
(Value for [Key] IN (Planned, Actual, [Difference])) as UNP
-- getting sequence of column names
declare #columns nvarchar(max)
set #columns = ''
select #columns = #columns + ', [' + Name + ']'
from (select distinct Name from #T2) as T
order by Name
set #columns = substring(#columns, 2, len(#columns))
declare #sql nvarchar(max)
-- building dynamic sql for pivoting
set #sql =
'
SELECT *
FROM
(SELECT
[Key], Name, Value
FROM
#T2
) AS SourceTable
PIVOT
(
SUM(Value)
FOR Name in ('+#columns+')
) AS PivotTable
order by
case [Key]
when ''Planned'' then 1
when ''Actual'' then 2
when ''Difference'' then 3 end
'
exec sp_executeSQL #SQL
drop table #T2
drop table #T