Count number of NULL values in each column in SQL - sql

I am trying to write a script that will show the number of non-null values in each column as well as the total number of rows in the table.
I have found a couple ways to do this:
SELECT sum(case my_column when null then 1 else 0) "Null Values",
sum(case my_column when null then 0 else 1) "Non-Null Values"
FROM my_table;
and
SELECT count(*) FROM my_table WHERE my_column IS NULL
UNION ALL
SELECT count(*) FROM my_table WHERE my_column IS NOT NULL
But these require me to type in each column name manually. Is there a way to perform this action for each column without listing them?

You should use execute:
DECLARE #t nvarchar(max)
SET #t = N'SELECT '
SELECT #t = #t + 'sum(case when ' + c.name + ' is null then 1 else 0 end) "Null Values for ' + c.name + '",
sum(case when ' + c.name + ' is null then 0 else 1 end) "Non-Null Values for ' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('my_table');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + ' FROM my_table;'
EXEC sp_executesql #t

As Paolo said, but here is an example:
DECLARE #TableName VARCHAR(512) = 'invoiceTbl';
DECLARE #SQL VARCHAR(1024);
WITH SQLText AS (
SELECT
ROW_NUMBER() OVER (ORDER BY c.Name) AS RowNum,
'SELECT ''' + c.name + ''', SUM(CASE WHEN ' + c.Name + ' IS NULL THEN 1 ELSE 0 END) AS NullValues FROM ' + #TableName AS SQLRow
FROM
sys.tables t
INNER JOIN sys.columns c ON c.object_id = t.object_id
WHERE
t.name = #TableName),
Recur AS (
SELECT
RowNum,
CONVERT(VARCHAR(MAX), SQLRow) AS SQLRow
FROM
SQLText
WHERE
RowNum = 1
UNION ALL
SELECT
t.RowNum,
CONVERT(VARCHAR(MAX), r.SQLRow + ' UNION ALL ' + t.SQLRow)
FROM
SQLText t
INNER JOIN Recur r ON t.RowNum = r.RowNum + 1
)
SELECT #SQL = SQLRow FROM Recur WHERE RowNum = (SELECT MAX(RowNum) FROM Recur);
EXEC(#SQL);

may be this works
select count(case when Column1 is null then 1 end) as Column1NullCount,
count(case when Column2 is null then 1 end) as Column2NullCount,
count(case when Column3 is null then 1 end) as Column3NullCount
...
from My_Table

you can go with dynamic sql and sys tables.
depending on the version of sql you are using the syntax will change slightly but these are the steps:
- list the columns reading sys.columns and save the list in a temp table or table variable
- make a loop through that temp table and build the sql using the same logic you would apply manually
- execute the dynamic sql built on previous step with sp_executesql

Related

Aggregate dynamic columns in SQL Server

I have a narrow table containing unique key and source data
Unique_Key
System
1
IT
1
ACCOUNTS
1
PAYROLL
2
IT
2
PAYROLL
3
IT
4
HR
5
PAYROLL
I want to be able to pick a system as a base - in this case IT - then create a dynamic SQL query where it counts:
distinct unique key in the chosen system
proportion of shared unique key with other systems. These systems could be dynamic and there are lot more than 4
I'm thinking of using dynamic SQL and PIVOT to first pick out all the system names outside of IT. Then using IT as a base, join to that table to get the information.
select distinct Unique_Key, System_Name
into #staging
from dbo.data
where System_Name <> 'IT'
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX);
SET #cols = STUFF((SELECT distinct ',' + QUOTENAME(System_Name)
FROM #staging
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT Unique_Key, ' + #cols + ' into dbo.temp from
(
select Unique_Key, System_Name
from #staging
) x
pivot
(
count(System_Name)
for System_Name in (' + #cols + ')
) p '
execute(#query)
select *
from
(
select distinct Unique_Key
from dbo.data
where System_Name = 'IT'
) a
left join dbo.temp b
on a.Unique_Key = b.Unique_Key
So the resulting table is:
Unique_Key
PAYROLL
ACCOUNTS
HR
1
1
1
0
2
1
0
0
3
0
0
0
What I want is one step further:
Distinct Count IT Key
PAYROLL
ACCOUNTS
HR
3
67%
33%
0%
I can do a simple join with specific case when/sum statement but wondering if there's a way to do it dynamically so I don't need to specify every system name.
Appreciate any tips/hints.
You can try to use dynamic SQL as below, I would use condition aggregate function get pivot value then we might add OUTER JOIN or EXISTS condition in dynamic SQL.
I would use sp_executesql instead of exec to avoid sql-injection.
DECLARE #System_Name NVARCHAR(50) = 'IT'
DECLARE #cols AS NVARCHAR(MAX),
#query AS NVARCHAR(MAX),
#parameter AS NVARCHAR(MAX);
SET #parameter = '#System_Name NVARCHAR(50)'
select DISTINCT System_Name
into #staging
from dbo.data t1
WHERE t1.System_Name <> #System_Name
SET #cols = STUFF((SELECT distinct ', SUM(IIF(System_Name = '''+ System_Name+''',1,0)) * 100.0 / SUM(IIF(System_Name = #System_Name,0,1)) ' + QUOTENAME(System_Name)
FROM #staging
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
,1,1,'')
set #query = 'SELECT SUM(IIF(System_Name = #System_Name,0,1)) [Distinct Count IT Key], ' + #cols + ' from dbo.data t1
WHERE EXISTS (
SELECT 1
FROM dbo.data tt
WHERE tt.Unique_Key = t1.Unique_Key
AND tt.System_Name = #System_Name
) '
EXECUTE sp_executesql #query, #parameter, #System_Name
sqlfiddle
When writing Dynamic Query, you start off with a non-dynamic query. Make sure you gets the result of the query is correct before you convert to dynamic query.
For the result that you required, the query will be
with cte as
(
select it.Unique_Key, ot.System_Name
from data it
left join data ot on it.Unique_Key = ot.Unique_Key
and ot.System_Name <> 'IT'
where it.System_Name = 'IT'
)
select [ITKey] = count(distinct Unique_Key),
[ACCOUNTS] = count(case when System_Name = 'ACCOUNTS' then Unique_Key end) * 100.0
/ count(distinct Unique_Key),
[HR] = count(case when System_Name = 'HR' then Unique_Key end) * 100.0
/ count(distinct Unique_Key),
[PAYROLL] = count(case when System_Name = 'PAYROLL' then Unique_Key end) * 100.0
/ count(distinct Unique_Key)
from cte;
Once you get the result correct, it is not that difficult to convert to dynamic query. Use string_agg() or for xml path for those repeated rows
declare #sql nvarchar(max);
; with cte as
(
select distinct System_Name
from data
where System_Name <> 'IT'
)
select #sql = string_agg(sql1 + ' / ' + sql2, ',' + char(13))
from cte
cross apply
(
select sql1 = char(9) + quotename(System_Name) + ' = '
+ 'count(case when System_Name = ''' + System_Name + ''' then Unique_Key end) * 100.0 ',
sql2 = 'count(distinct Unique_Key)'
) a
select #sql = 'with cte as' + char(13)
+ '(' + char(13)
+ ' select it.Unique_Key, ot.System_Name' + char(13)
+ ' from data it' + char(13)
+ ' left join data ot on it.Unique_Key = ot.Unique_Key' + char(13)
+ ' and ot.System_Name <> ''IT''' + char(13)
+ ' where it.System_Name = ''IT''' + char(13)
+ ')' + char(13)
+ 'select [ITKey] = count(distinct Unique_Key), ' + char(13)
+ #sql + char(13)
+ 'from cte;' + char(13)
print #sql;
exec sp_executesql #sql;
db<>fiddle demo
This solution changes the aggregation function of the PIVOT itself.
First, let's add a column [has_it] to #staging that keeps track of whether each Unique_Key has an IT row:
select Unique_Key, System_Name, case when exists(select 1 from data d2 where d2.Unique_Key=d1.Unique_Key and d2.System_Name='IT') then 1 else 0 end as has_it
into #staging
from data d1
where System_Name <> 'IT'
group by Unique_Key, System_Name
Now, the per-System aggregation (sum) of this column divided by the final total unique keys needed (example case=3) returns the requests numbers. Change the PIVOT to the following and it's ready as-is, without further queries:
set #query = ' select *
from
(
select System_Name,cnt as [Distinct Count IT Key],has_it*1.0/cnt as divcnt
from #staging
cross join
(
select count(distinct Unique_Key) as cnt
from dbo.data
where System_Name = ''IT''
)y
) x
pivot
(
sum(divcnt)
for System_Name in (' + #cols + ')
) p'

Grabbing Null and blank Columns to a table

I'm trying to find a way to grab the count of nulls and blanks of every column in a table and put the results into a table. I found this but don't know how to push it to a table? Would also like to add in a Count(distinct fieldname) as well. Any help would be appreciated!
DECLARE #t nvarchar(max)
SET #t = N'SELECT '
SELECT #t = #t + 'sum(case when ' + c.name + ' is null or ' + c.name + ' = '''' then 1 else 0 end) "' + c.name + '",
sum(case when ' + c.name + ' is null then 0 else 1 end) "Non-Null Values for ' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('TableName');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + ' FROM TableName;'
EXEC sp_executesql #t
**Edit
This provides the results in one row. Is there a way to do it so each field gets it's own row in a table?
FieldName CountofNullsBlanks
FieldA 0
FieldB 100
Here:
DECLARE #t nvarchar(max)
SET #t = N'SELECT '+convert(nvarchar(max),count(*))+' as [Distinct fieldnames],' -- added the distinct
SELECT #t = #t + 'sum(case when ' + c.name + ' is null or ' + c.name + ' = '''' then 1 else 0 end) "' + c.name + '",
sum(case when ' + c.name + ' is null then 0 else 1 end) "Non-Null Values for ' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('YOURTABLE');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + 'into ##a FROM YOURTABLE;' --added the into ##a
EXEC sp_executesql #t
Notice the "into ##a" towards the end; This saves the result "table" to a global temp, which will be available until all sessions that accessed it finish. Careful with the name or it might cause conflicts in the database.

Get table's column names which are always null

I have a table with a lot of columns. Some of them are always NULL and does not contain any value.
Is there a way to list those columns using a SQL query instead testing them one by one ?
I'd like to avoid :
SELECT Col1 from MyTable where Col1 IS NOT NULL
SELECT Col2 from MyTable where Col2 IS NOT NULL
...
Assuming you are using Sql-Server, just assign your table name to #strTablename
In this example i assumed that dbo.MyTable is the table name
DECLARE #strTablename varchar(100) = 'dbo.MyTable'
DECLARE #strQuery varchar(max) = ''
DECLARE #strUnPivot as varchar(max) = ' UNPIVOT ([Count] for [Column] IN ('
CREATE TABLE ##tblTemp([Column] varchar(50), [Count] Int)
SELECT #strQuery = ISNULL(#strQuery,'') + 'Count([' + name + ']) as [' + name + '] ,' from sys.columns where object_id = object_id(#strTablename) and is_nullable = 1
SELECT #strUnPivot = ISNULL(#strUnPivot,'') + '[' + name + '] ,' from sys.columns where object_id = object_id(#strTablename) and is_nullable = 1
SET #strQuery = 'SELECT [Column],[Count] FROM ( SELECT ' + SUBSTRING(#strQuery,1,LEN(#strQuery) - 1) + ' FROM ' + #strTablename + ') AS p ' + SUBSTRING(#strUnPivot,1,LEN(#strUnPivot) - 1) + ')) AS unpvt '
INSERT INTO ##tblTemp EXEC (#strQuery)
SELECT [Column] from ##tblTemp Where [Count] =0
DROP TABLE ##tblTemp
MAX(col) is null only when all rows are null for this column. So check this for every column and concatenate those names for which the expression is null.
select
'null columns: ' ||
case when max(col1) is null then 'col1 ' else '' end ||
case when max(col2) is null then 'col2 ' else '' end ||
case when max(col3) is null then 'col3 ' else '' end ||
case when max(col4) is null then 'col4 ' else '' end ||
...
from mytable;

T-SQL dynamic row checking

I am trying to run a check on a table where, for each row, I want to see whether the column has a a null in it and add a 1 like here:
SELECT
((CASE WHEN col1 IS NULL THEN 1 ELSE 0 END)
+ (CASE WHEN col2 IS NULL THEN 1 ELSE 0 END)
+ (CASE WHEN col3 IS NULL THEN 1 ELSE 0 END)
...
...
+ (CASE WHEN col10 IS NULL THEN 1 ELSE 0 END)) AS sum_of_nulls
FROM table
WHERE Customer=some_cust_id'
This method does what its supposed to in a static way but is it possible to dynamically build the columns based on the table as opposed to writing out each case statement.
Many thanks!
Here's what I came up with
declare
#sql nvarchar(max),
#columns nvarchar(max),
#table nvarchar(128) = '<your table here>'
select
#columns = stuff((select '+case when ' + quotename(column_name) + ' is null then 1 else 0 end'
from information_schema.columns
where table_name = #table
order by ordinal_position
for xml path('')), 1, 1, ''),
#sql = 'select sum_of_nulls = ' + #columns + ', *
from dbo.' + quotename(#table)
exec sp_executesql #sql
You can slot in a where clause after the table name if you want also. Right now, it just returns for everything in the table.

Dynamic SQL to generate column names?

I have a query where I'm trying pivot row values into column names and currently I'm using SUM(Case...) As 'ColumnName' statements, like so:
SELECT
SKU1,
SUM(Case When Sku2=157 Then Quantity Else 0 End) As '157',
SUM(Case When Sku2=158 Then Quantity Else 0 End) As '158',
SUM(Case When Sku2=167 Then Quantity Else 0 End) As '167'
FROM
OrderDetailDeliveryReview
Group By
OrderShipToID,
DeliveryDate,
SKU1
The above query works great and gives me exactly what I need. However, I'm writing out the SUM(Case... statements by hand based on the results of the following query:
Select Distinct Sku2 From OrderDetailDeliveryReview
Is there a way, using T-SQL inside a stored procedure, that I can dynamically generate the SUM(Case... statements from the Select Distinct Sku2 From OrderDetailDeliveryReview query and then execute the resulting SQL code?
Having answered a lot of these over the years by generating dynamic pivot SQL from the metadata, have a look at these examples:
SQL Dynamic Pivot - how to order columns
SQL Server 2005 Pivot on Unknown Number of Columns
What SQL query or view will show "dynamic columns"
How do I Pivot on an XML column's attributes in T-SQL
How to apply the DRY principle to SQL Statements that Pivot Months
In your particular case (using the ANSI pivot instead of SQL Server 2005's PIVOT feature):
DECLARE #template AS varchar(max)
SET #template = 'SELECT
SKU1
{COLUMN_LIST}
FROM
OrderDetailDeliveryReview
Group By
OrderShipToID,
DeliveryDate,
SKU1
'
DECLARE #column_list AS varchar(max)
SELECT #column_list = COALESCE(#column_list, ',') + 'SUM(Case When Sku2=' + CONVERT(varchar, Sku2) + ' Then Quantity Else 0 End) As [' + CONVERT(varchar, Sku2) + '],'
FROM OrderDetailDeliveryReview
GROUP BY Sku2
ORDER BY Sku2
Set #column_list = Left(#column_list,Len(#column_list)-1)
SET #template = REPLACE(#template, '{COLUMN_LIST}', #column_list)
EXEC (#template)
I know that SO search engine is not perfect, but your question has been answered in SQL Server PIVOT Column Data.
Also see Creating cross tab queries and pivot tables in SQL.
Why do this using hard coded column names when you can pull all this dynamically from any table?
Using UNPIVOT and COALESCE, I can dynamically pull a list of columns from any table and associated column values for any record in a record listing and combine them in a list of column names with values by row. Here is the code. Just drop in your database and table name. The column/value table will be generated for you in SQL Server. Keep in mind, to get a shared column of values for the columns you want to convert to sql variant or text strings. But a great way to get a sample column list of values with matching column names and types with our while loops or cursors. Its pretty fast:
-- First get a list of all known columns in your database, dynamically...
DECLARE #COLUMNS nvarchar(max)
SELECT #COLUMNS =
CASE
WHEN A.DATA_TYPE = 'nvarchar' OR A.DATA_TYPE = 'ntext' THEN
COALESCE(#COLUMNS + ',','') + 'CAST(CONVERT(nvarchar(4000),['+A.[name]+']) AS sql_variant) AS ['+A.[name]+']'
WHEN A.DATA_TYPE = 'datetime' OR A.DATA_TYPE = 'smalldatetime' THEN
COALESCE(#COLUMNS + ',','') + 'CAST(CONVERT(nvarchar,['+A.[name]+'],101) AS sql_variant) AS ['+A.[name]+']'
ELSE
COALESCE(#COLUMNS + ',','') + 'CAST(['+A.[name]+'] AS sql_variant) AS ['+A.[name]+']'
END
FROM
(
SELECT
A.name,
C.DATA_TYPE
FROM YOURDATABASENAME.dbo.syscolumns A
INNER JOIN YOURDATABASENAME.dbo.sysobjects B ON B.id = A.id
LEFT JOIN
(
SELECT
COLUMN_NAME,
DATA_TYPE
FROM YOURDATABASENAME.INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'YOURTABLENAME'
) C ON C.COLUMN_NAME = A.name
WHERE B.name = 'YOURTABLENAME'
AND C.DATA_TYPE <> 'timestamp'
) A
-- Test that the formatted columns list is returned...
--SELECT #COLUMNS
-- This gets a second string list of all known columns in your database, dynamically...
DECLARE #COLUMNS2 nvarchar(max)
SELECT #COLUMNS2 = COALESCE(#COLUMNS2 + ',','') + '['+A.[name]+']'
FROM
(
SELECT
A.name,
C.DATA_TYPE
FROM YOURDATABASENAME.dbo.syscolumns A
INNER JOIN YOURDATABASENAME.dbo.sysobjects B ON B.id = A.id
LEFT JOIN
(
SELECT
COLUMN_NAME,
DATA_TYPE
FROM YOURDATABASENAME.INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'YOURTABLENAME'
) C ON C.COLUMN_NAME = A.name
WHERE B.name = 'YOURTABLENAME'
AND C.DATA_TYPE <> 'timestamp'
) A
-- Test that the formatted columns list is returned...
--SELECT #COLUMNS2
-- Now plug in the list of the dynamic columns list into an UNPIVOT to get a Column Name / Column Value list table...
DECLARE #sql nvarchar(max)
SET #sql =
'
SELECT
ColumnName,ColumnValue
FROM
(
SELECT
'+#COLUMNS+'
FROM YOURDATABASENAME.dbo.YOURTABLENAME
WHERE CHANGE_ID IN (SELECT ChangeId FROM YOURDATABASENAME.dbo.OperatorProcess WHERE OperatorProcessID = 3)
) AS SourceTable
UNPIVOT
(
ColumnValue FOR ColumnName IN ('+#COLUMNS2+')
) AS PivotTable
'
EXEC (#sql)
-- Darshankar Madhusudan i can do dynamic columnheading table easly...
--thanks
declare #incr int = 1,
#col int,
#str varchar(max),
#tblcrt varchar(max),
#insrt varchar(max),
set #tblcrt = 'DECLARE #Results table ('
set #str = ''
set #insrt = ''
select #col = max(column_id) From tempdb.sys.all_columns where object_id = object_id('tempdb.dbo.#aaa')
while #incr <= #col
BEGIN
SELECT #STR = #STR +case when #incr = 1 then '''' else ',''' end +rtrim(ltrim(NAME))+'''' FROM TEMPDB.SYS.ALL_COLUMNS WHERE OBJECT_ID = OBJECT_ID('TEMPDB.DBO.#AAA') and column_id = #incr
set #tblcrt = #tblcrt + case when #incr = 1 then '' else ',' end + 'Fld'+CAST(#incr as varchar(3)) +' varchar(50)'
set #insrt = #insrt + case when #incr = 1 then '' else ',' end + 'Fld'+CAST(#incr as varchar(3))
SET #INCR = #INCR + 1
END
set #tblcrt = #tblcrt + ')'
set #insrt = 'insert into #Results('+#insrt+') values (' + #STR +')'
set #tblcrt = #tblcrt+ ';' + #insrt + 'select * from #Results '
exec(#tblcrt)