Restructure table by removing NULL values - sql

I have a table in SQL that looks like this:
Customer Product 1999 2000 2001 2002 2003
Smith 51 NULL NULL 15 14 NULL
Jones 14 11 7 NULL NULL NULL
Jackson 13 NULL NULL NULL 3 9
The figures under each year column are amounts, in dollars. Each customer has two consecutive years of amounts, and the rest of the years are zero. I would like to re-structure this table so that instead of wide list of years, it just has two columns Amount-Year1 and Amount-Year2. So it selects the two non-zero years and puts them in those columns, in the correct order. This would greatly reduce the size of my table.
So far I've been able to re-structure it so that there is one amount column and one year column, but I then get multiple rows per customer, which unfortunately I can't have (due to downstream analysis). Can anyone think of a way to get the two Amount-Year columns?
I would like the final table to look like this:
Customer Product Amount_Y1 Amount_Y2
Smith 51 15 14
Jones 14 11 7
Jackson 13 3 9
I don't mind that I lose the information about the specific years, as I can get that from another source. The actual table has data for all years between 1999 and 2018, and there will be further years in the future.
Thanks

Thankfully UNPIVOT removes NULLs anyway, so we can do this with UNPIVOT/ROWNUMBER(),PIVOT:
declare #t table (Customer varchar(15),Product int,[1999] int,
[2000] int,[2001] int,[2002] int,[2003] int)
insert into #T(CUstomer,Product,[1999],[2000],[2001],[2002],[2003]) values
('Smith' ,51,NULL,NULL, 15, 14,NULL),
('Jones' ,14, 11, 7,NULL,NULL,NULL),
('Jackson',13,NULL,NULL,NULL, 3, 9)
;With Numbered as (
select
Customer,Product,Value,
ROW_NUMBER() OVER (PARTITION BY Customer,Product
ORDER BY Year) rn
from
#t t
unpivot
(Value for Year in ([1999],[2000],[2001],[2002],[2003])) u
)
select
*
from
Numbered n
pivot
(SUM(Value) for rn in ([1],[2])) w
Results:
Customer Product 1 2
--------------- ----------- ----------- -----------
Jackson 13 3 9
Jones 14 11 7
Smith 51 15 14

Use COALESCE that will do the job for you. The query is dynamics so that if tomorrow year columns are changed, i.e. removed or added you do not have to change anything.
Sample query: (Assuming table to be table1 and column names to be same as year).
DECLARE #columnsdesc nvarchar(max), #columnsasc nvarchar(max)
SET #columnsdesc = ''
SELECT #columnsdesc = (select + '[' + ltrim(c.Name) + ']' + ','
FROM sys.columns c
JOIN sys.objects o ON o.object_id = c.object_id
WHERE o.type = 'U' and o.Name = 'table1' and c.Name not in ('Customer', 'Product')
ORDER BY c.Name desc for xml path ( '' ))
SET #columnsasc = ''
SELECT #columnsasc = (select + '[' + ltrim(c.Name) + ']' + ','
FROM sys.columns c
JOIN sys.objects o ON o.object_id = c.object_id
WHERE o.type = 'U' and o.Name = 'table1' and c.Name not in ('Customer', 'Product')
ORDER BY c.Name asc for xml path ( '' ))
SELECT #columnsasc = LEFT( #columnsasc,LEN(#columnsasc)-1)
SELECT #columnsdesc = LEFT( #columnsdesc,LEN(#columnsdesc)-1)
DECLARE #sql nvarchar(max)
SET #sql = 'SELECT Customer, Product, COALESCE('+ #columnsasc +') as Amount_Y1,
COALESCE(' + #columnsdesc +' ) as Amount_Y2
FROM Table1'
EXEC(#sql)
If you're dealing with a temporary table, then the code will change slightly:
Test it here: http://rextester.com/MRVR48808
DECLARE #columnsdesc nvarchar(max), #columnsasc nvarchar(max)
SET #columnsdesc = ''
SELECT #columnsdesc = (select + '[' + ltrim(c.Name) + ']' + ','
FROM tempdb.sys.columns c --Changes here
JOIN tempdb.sys.objects o ON o.object_id = c.object_id --Changes here
WHERE o.type = 'U' and o.Name like '#table1%' and c.Name not in ('Customer', 'Product') --Changes here
ORDER BY c.Name desc for xml path ( '' ))
SET #columnsasc = ''
SELECT #columnsasc = (select + '[' + ltrim(c.Name) + ']' + ','
FROM tempdb.sys.columns c --Changes here
JOIN tempdb.sys.objects o ON o.object_id = c.object_id --Changes here
WHERE o.type = 'U' and o.Name like '#table1%' and c.Name not in ('Customer', 'Product') --Changes here
ORDER BY c.Name asc for xml path ( '' ))
SELECT #columnsasc = LEFT( #columnsasc,LEN(#columnsasc)-1)
SELECT #columnsdesc = LEFT( #columnsdesc,LEN(#columnsdesc)-1)
DECLARE #sql nvarchar(max)
SET #sql = 'SELECT Customer, Product, COALESCE('+ #columnsasc +') as Amount_Y1,
COALESCE(' + #columnsdesc +' ) as Amount_Y2
FROM #Table1' --Changes here
EXEC(#sql)

Try using COALESCE as follows : For one field from beginning to end and for the second in the reverse manner.
SELECT Customer,Product, COALESCE([1999],[2000],[2001],[2002],[2003]) as Y1,
COALESCE([2003],[2002],[2001],[2000],[1999]) as Y2
FROM #TEMPDATA

I would do this using cross apply:
select t.customer, t.product, v.Amount_Y1, v.Amount_Y2
from t cross apply
(select max(case when which = 1 then val end) as Amount_Y1,
max(case when which = 2 then val end) as Amount_Y2
from (select val, yr, row_number() over (order by yr) as which
from (values (t.[1999], 1999), (t.[2000], 2000), (t.[2001], 2001),
(t.[2002], 2002), (t.[2003], 2003)
) v(val, yr)
where val is not null
) v

Related

Is it possible to Sum columns if columns name is like '2020%'

I have a table on my server where they have already pivoted the data so now I am sitting with a table that looks something like this.
Client_No
20200201
20200401
20220101
20220201
20220301
20220401
20220501
123456789
3
1
0
0
0
0
0
321654987
4
4
0
4
2
1
0
this table gets updated monthly so to automate the script I want to be able to do something like this
Select Client_No
, Sum(column like '2022%')
From [table_name]
Is this possible? Basically I want the script to sum all the columns that start with 2022, and yes I am currently running this in SSMS
Since you mentioned that you already have table which is pivoted you will have to unpivot it first.
SELECT CLIENT_NO, SUM(CAST(ORDERS AS BIGINT)) OrdersNumber FROM (
SELECT Client_No, Years, Orders
FROM
(SELECT Client_No,[20200201], [20220101], [20200401], [20220201]
FROM myTable) p
UNPIVOT
(Orders FOR Years IN
([20200201], [20200401], [20220101], [20220201])
)AS unpvt
) SRC
WHERE LEFT(SRC.Years,4) = '2022'
GROUP BY CLIENT_NO
Or if you don't want to put down all columns you can use dynamic unpivot
DECLARE #Pivot_Column [nvarchar](max);
DECLARE #Query [nvarchar](max);
set #Pivot_Column = (SELECT STRING_AGG('[' + cName + ']', ',') FROM
(select c.Name cName from sys.all_columns c
left join sys.objects o on c.object_id = o.object_id
where o.name = 'MyTable' and c.name <> 'Client_No' )Tab
)
SELECT #Query='
SELECT CLIENT_NO, SUM(CAST(ORDERS AS BIGINT)) OrdersNumber FROM (
SELECT Client_No, Years, Orders
FROM
(SELECT Client_No,' + #Pivot_Column + '
FROM myTable) p
UNPIVOT
(Orders FOR Years IN
(' + #Pivot_Column + ')
)AS unpvt
) SRC
WHERE LEFT(SRC.Years,4) = ''2022''
GROUP BY CLIENT_NO
'
EXEC sp_executesql #Query
Note: You can use STRING_AGG starting from SQL Server 2017 (14.x) and later
To do a Sum, you usually need a GROUP BY clause. In this case you might be better-off using a sub-query to create a calculated column for grouping. Try this:
SELECT Client_No, Yr, Sum(ItemValue)
FROM (
Select Client_No,
Left(datemask_or_whatever, 4) as Yr, --column like '2022%'
ItemValue
From [table_name]
) SubQuery
GROUP BY Client_No, Yr

Create a pivot of same columns in to 1 row

I'm using a SQL Server, I've a query which return the data of all the fields, The main thing is that 1 field can belongs to multiple records, the record ID differentiate them.
I've a data set like this.
This is my current data set
My current query:
Select fd.FieldName ,FV.FieldID, Data , R.RecordID from FieldValues FV
Inner Join Records R on R.RecordID = FV.RecordID
Inner Join Forms F On f.FormID = R.FormID
Inner join Fields fd on fd.FieldID = fv.FieldID
Where R.RecordID IN (45,46)
I need to create 1 row of each columns that belongs to the same RecordID like this.
Service Name Location city VendorCode RecordID
Raj ABC LOCATION ABC CITY 32 45
BEN ABC LOCATION ABC CITY -- 46
The above is my desired output.
I've tried with pivot but have not succeeded.
If you don't like to deal with dynamic pivot and you do know the key of the rows you want to convert into columns, you can use standard sql with max and case when
select
max(case fd.FieldName when 'SelectService' then Data else null end) as ServiceName,
max(case fd.FieldName when 'EnterYourLocation' then Data else null end) as Location,
max(case fd.FieldName when 'City' then Data else null end) as city,
max(case fd.FieldName when 'VendorCodeOption' then Data else null end) as VendorCode,
R.RecordId
from FieldValues FV
Inner Join Records R on R.RecordID = FV.RecordID
Inner Join Forms F On f.FormID = R.FormID
Inner join Fields fd on fd.FieldID = fv.FieldID
where R.RecordID IN (45,46)
group by R.RecordId
This is the solution with pivot but it is missing to include adjust joins
declare #columns varchar(max) set #columns = ''
select #columns = coalesce(#columns + '[' + cast(col as varchar(MAX)) + '],', '')
FROM ( select FieldName as col from FieldValues group by FieldName ) m
set #columns = left(#columns,LEN(#columns)-1)
DECLARE #SQLString nvarchar(max);
set #SQLString = '
select * from
( select RecordId, FieldName, Data from FieldValues) m
PIVOT
( MAX(Data)
FOR FieldName in (' + #columns + ')
) AS PVT'
EXECUTE sp_executesql #SQLString

Embed child records into single row with parent information

I have the following tables
BATCH
BatchID Name CustomerID DateCreated Status
12 A 1 01/01/2013 Active
13 B 12 01/01/2013 Inactive
14 C 245 01/01/2013 Complete
BATCHDETAIL
BatchDetailID BatchID Weight Price DestinationCode
1 12 55 500.00 99
2 12 119 1500.00 55
3 13 12 133 1212
A batch record can have many batch detail records linked via the FK BatchDetail.BatchID
I want to write a query to select a single row back to the user which combines the information in the BATCH record and the Weight,Price and DestinationCode from both BATCHDETAIL records for BatchID = 12
So the output would be :
BatchID Name CustomerID DateCreated Status WeightA PriceA DestinationCodeA WeightB PriceB DestinationCodeB
12 A 1 01/01/2013 Active 55 500.00 99 119 1500 55
So you can see I want to have 1 row with all information combined in the one row and differentiate each detail record with A or B ( Lets assume a maximum of 2 detail records is only allowed )
I have thought of creating a table with these fields and then building up the information in a series of select statements and finally doing a select on the temp table but getting the query into a single block of SQL would be ideal.
Here is a solution using dynamic SQL:
-- Get the MAX total number of records per BatchID (how many sets of columns do we need?)
DECLARE #requiredLevels int = (SELECT MAX(C) FROM (SELECT COUNT(*) C FROM BATCHDETAIL GROUP BY BatchID) Q)
;
-- Build a dynamic statement for the final SELECT fields
DECLARE
#finalFieldsSQL varchar(1000) = ''
, #finalFieldsN int = 1
;
WHILE #finalFieldsN <= #requiredLevels
BEGIN
SET #finalFieldsSQL = #finalFieldsSQL + ', Weight' + CHAR(64 + #finalFieldsN) + ', Price' + CHAR(64 + #finalFieldsN) + ', DestinationCode' + CHAR(64 + #finalFieldsN)
SET #finalFieldsN = #finalFieldsN + 1
END
-- Build a dynamic statement for the subquery SELECT fields
DECLARE
#subqueryFieldsSQL varchar(1000) = ''
, #subqueryFieldsN int = 1
;
WHILE #subqueryFieldsN <= #requiredLevels
BEGIN
SET #subqueryFieldsSQL = #subqueryFieldsSQL + ', MAX([' + CAST(#subqueryFieldsN AS varchar) + ']) ColumnName' + CHAR(64 + #subqueryFieldsN)
SET #subqueryFieldsN = #subqueryFieldsN + 1
END
-- Build a dynamic statement for the PIVOT fields
DECLARE
#pivotFieldsSQL varchar(1000) = ''
, #pivotFieldsN int = 1
;
WHILE #pivotFieldsN <= #requiredLevels
BEGIN
SET #pivotFieldsSQL = #pivotFieldsSQL + ', [' + CAST(#pivotFieldsN AS varchar) + ']'
SET #pivotFieldsN = #pivotFieldsN + 1
END
SET #pivotFieldsSQL = SUBSTRING(#pivotFieldsSQL, 3, LEN(#pivotFieldsSQL) - 2)
-- Build the final SQL statement and execute
DECLARE #SQL varchar(8000) =
'
SELECT
B.BatchID, B.Name, B.CustomerID, B.DateCreated, [Status]' + #finalFieldsSQL + '
FROM
BATCH B
LEFT JOIN
(
SELECT
BatchID' + REPLACE(#subqueryFieldsSQL, 'ColumnName', 'Weight') + '
FROM
(
SELECT BD.BatchID, [Weight], ROW_NUMBER() OVER (PARTITION BY B.BatchID ORDER BY BatchDetailID) R
FROM
BATCH B
JOIN BATCHDETAIL BD ON B.BatchID = BD.BatchID
) Q
PIVOT
(
MAX([Weight])
FOR R IN (' + #pivotFieldsSQL + ')
) P
GROUP BY BatchID
) W
ON B.BatchID = W.BatchID
LEFT JOIN
(
SELECT
BatchID' + REPLACE(#subqueryFieldsSQL, 'ColumnName', 'Price') + '
FROM
(
SELECT BD.BatchID, Price, ROW_NUMBER() OVER (PARTITION BY B.BatchID ORDER BY BatchDetailID) R
FROM
BATCH B
JOIN BATCHDETAIL BD ON B.BatchID = BD.BatchID
) Q
PIVOT
(
MAX(Price)
FOR R IN (' + #pivotFieldsSQL + ')
) P
GROUP BY BatchID
) P
ON B.BatchID = P.BatchID
LEFT JOIN
(
SELECT
BatchID' + REPLACE(#subqueryFieldsSQL, 'ColumnName', 'DestinationCode') + '
FROM
(
SELECT BD.BatchID, DestinationCode, ROW_NUMBER() OVER (PARTITION BY B.BatchID ORDER BY BatchDetailID) R
FROM
BATCH B
JOIN BATCHDETAIL BD ON B.BatchID = BD.BatchID
) Q
PIVOT
(
MAX(DestinationCode)
FOR R IN (' + #pivotFieldsSQL + ')
) P
GROUP BY BatchID
) D
ON B.BatchID = D.BatchID
'
EXEC (#SQL)
If you don't want to show empty records, replace LEFT JOIN with JOIN in the final statement (3 occurences).
you can use Pivot and UnPivot to achieve this result. Try Something like this:
SELECT
BatchID,[Name],[CustomerID],[DateCreated],[Status],
MAX(Weight1) as WeightA,
MAX(Price1) as PriceA,
MAX(DestinationCode1) as DestinationCodeA,
MAX(Weight2) as WeightB,
MAX(Price2) as PriceB,
MAX(DestinationCode2) as DestinationCodeB
FROM (
SELECT *,COL + CAST(DENSE_RANK() OVER (PARTITION BY Batchid ORDER BY BatchDetailID ASC) AS VARCHAR) AS BATCHPIVOT
FROM
(
SELECT b.*,cast(d.Weight as varchar(255)) as Weight, cast(d.Price as varchar(255)) as Price, cast(d.DestinationCode as varchar(255)) as DestinationCode,d.BatchDetailID
FROM #Batch B
INNER JOIN #BATCHDETAIL D on b.BatchID = d.BatchID
) AS cp
UNPIVOT
(
Val FOR Col IN ([Weight], [Price], [DestinationCode])
) AS up
) AS query
PIVOT (MAX(Val)
FOR BATCHPIVOT IN (Weight1,Price1,DestinationCode1, Weight2, Price2, DestinationCode2)) AS Pivot1
GROUP BY BatchID,[Name],[CustomerID],[DateCreated],[Status]
This is just standard query. you can make this script Dynamic according to your liking:
Complete Script:
Create table #Batch
(BatchID int,
[Name] char(1),
[CustomerID] int,
[DateCreated] date,
[Status] varchar(50)
)
Create table #BATCHDETAIL
(BatchDetailID int,
BatchID int,
[Weight] int,
Price money,
DestinationCode int
)
INSERT INTO #Batch
VALUES(12,'A',1,'01/01/2013','Active')
,(13,'B',12,'01/01/2013','Inactive')
,(14,'C',245,'01/01/2013','Complete')
INSERT INTO #BATCHDETAIL
VALUES(1,12,55,500.00,99)
,(2,12,119,1500.00,55)
,(3,13,12,133,1212)
SELECT
BatchID,[Name],[CustomerID],[DateCreated],[Status],
MAX(Weight1) as WeightA,
MAX(Price1) as PriceA,
MAX(DestinationCode1) as DestinationCodeA,
MAX(Weight2) as WeightB,
MAX(Price2) as PriceB,
MAX(DestinationCode2) as DestinationCodeB
FROM (
SELECT *,COL + CAST(DENSE_RANK() OVER (PARTITION BY Batchid ORDER BY BatchDetailID ASC) AS VARCHAR) AS BATCHPIVOT
FROM
(
SELECT b.*,cast(d.Weight as varchar(255)) as Weight, cast(d.Price as varchar(255)) as Price, cast(d.DestinationCode as varchar(255)) as DestinationCode,d.BatchDetailID
FROM #Batch B
INNER JOIN #BATCHDETAIL D on b.BatchID = d.BatchID
) AS cp
UNPIVOT
(
Val FOR Col IN ([Weight], [Price], [DestinationCode])
) AS up
) AS query
PIVOT (MAX(Val)
FOR BATCHPIVOT IN (Weight1,Price1,DestinationCode1, Weight2, Price2, DestinationCode2)) AS Pivot1
GROUP BY BatchID,[Name],[CustomerID],[DateCreated],[Status]

T:SQL: select values from rows as columns

I have a table for Profiles stores profile properties values in row style, ex:
[ProfileID] [PropertyDefinitionID] [PropertyValue]
1 6 Jone
1 7 Smith
1 8 Mr
1 3 50000
and another table for property definitions :
[PropertyDefinitionID] [PropertyName]
6 FirstName
7 LastName
8 Prefix
3 Salary
How to use PIVOT or any other way to show it in this way:
[ProfileID] [FirstName] [LastName] [Salary]
1 Jone Smith 5000
It's easy to do this without PIVOT keyword, just by grouping
select
P.ProfileID,
min(case when PD.PropertyName = 'FirstName' then P.PropertyValue else null end) as FirstName,
min(case when PD.PropertyName = 'LastName' then P.PropertyValue else null end) as LastName,
min(case when PD.PropertyName = 'Salary' then P.PropertyValue else null end) as Salary
from Profiles as P
left outer join PropertyDefinitions as PD on PD.PropertyDefinitionID = P.PropertyDefinitionID
group by P.ProfileID
you can also do this with PIVOT keyword
select
*
from
(
select P.ProfileID, P.PropertyValue, PD.PropertyName
from Profiles as P
left outer join PropertyDefinitions as PD on PD.PropertyDefinitionID = P.PropertyDefinitionID
) as P
pivot
(
min(P.PropertyValue)
for P.PropertyName in ([FirstName], [LastName], [Salary])
) as PIV
UPDATE: For dynamic number of properties - take a look at Increment value in SQL SELECT statement
It looks like you might have an unknown number of PropertyName's that you need to turn into columns. If that is the case, then you can use dynamic sql to generate the result:
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX)
select #cols = STUFF((SELECT distinct ',' + QUOTENAME(PropertyName)
from propertydefinitions
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT profileid, ' + #cols + ' from
(
select p.profileid,
p.propertyvalue,
d.propertyname
from profiles p
left join propertydefinitions d
on p.PropertyDefinitionID = d.PropertyDefinitionID
) x
pivot
(
max(propertyvalue)
for propertyname in (' + #cols + ')
) p '
execute(#query)
See SQL Fiddle with Demo.

How to count in SQL all fields with null values in one record?

Is there any way to count all fields with null values for specific record excluding PrimaryKey column?
Example:
ID Name Age City Zip
1 Alex 32 Miami NULL
2 NULL 24 NULL NULL
As output I need to get 1 and 3. Without explicitly specifying column names.
declare #T table
(
ID int,
Name varchar(10),
Age int,
City varchar(10),
Zip varchar(10)
)
insert into #T values
(1, 'Alex', 32, 'Miami', NULL),
(2, NULL, 24, NULL, NULL)
;with xmlnamespaces('http://www.w3.org/2001/XMLSchema-instance' as ns)
select ID,
(
select *
from #T as T2
where T1.ID = T2.ID
for xml path('row'), elements xsinil, type
).value('count(/row/*[#ns:nil = "true"])', 'int') as NullCount
from #T as T1
Result:
ID NullCount
----------- -----------
1 1
2 3
Update:
Here is a better version. Thanks to Martin Smith.
;with xmlnamespaces('http://www.w3.org/2001/XMLSchema-instance' as ns)
select ID,
(
select T1.*
for xml path('row'), elements xsinil, type
).value('count(/row/*[#ns:nil = "true"])', 'int') as NullCount
from #T as T1
Update:
And with a bit faster XQuery expression.
;with xmlnamespaces('http://www.w3.org/2001/XMLSchema-instance' as ns)
select ID,
(
select T1.*
for xml path('row'), elements xsinil, type
).value('count(//*/#ns:nil)', 'int') as NullCount
from #T as T1
SELECT id,
CASE WHEN Name IS NULL THEN 1 ELSE 0 END +
CASE WHEN City IS NULL THEN 1 ELSE 0 END +
CASE WHEN Zip IS NULL THEN 1 ELSE 0 END
FROM YourTable
If you do not want explicit column names in query, welcome to dynamic querying
DECLARE #sql NVARCHAR(MAX) = ''
SELECT #sql = #sql + N' CASE WHEN '+QUOTENAME(c.name)+N' IS NULL THEN 1 ELSE 0 END +'
FROM sys.tables t
JOIN sys.columns c
ON t.object_id = c.object_id
WHERE
c.is_nullable = 1
AND t.object_id = OBJECT_ID('YourTableName')
SET #sql = N'SELECT id, '+#sql +N'+0 AS Cnt FROM [YourTableName]'
EXEC(#sql)
This should solve your problem:
select count (id)
where ( isnull(Name,"") = "" or isnull(City,"") = "" or isnull(Zip,"") = "" )
Not a smart solution, but it should do the work.
DECLARE #tempSQL nvarchar(max)
SET #tempSQL = N'SELECT '
SELECT #tempSQL = #tempSQL + 'sum(case when ' + cols.name + ' is null then 1 else 0 end) "Null Values for ' + cols.name + '",
sum(case when ' + cols.name + ' is null then 0 else 1 end) "Non-Null Values for ' + cols.name + '",' FROM sys.columns cols WHERE cols.object_id = object_id('TABLE1');
SET #tempSQL = SUBSTRING(#tempSQL, 1, LEN(#tempSQL) - 1) + ' FROM TABLE1;'
EXEC sp_executesql #tempSQL