How can I search multiple fields and count nulls for all? - sql

Is there an easy way to count nulls in all fields in a table without writing 40+ very similar, but slightly different, queries? I would think there is some kind of statistics maintained for all tables, and this may be the easiest way to go with it, but I don't know for sure. Thoughts, anyone? Thanks!!
BTW, I am using SQL Server 2008.

Not sure if you consider this simple or not, but this will total the NULLs by column in a table.
DECLARE #table sysname;
SET #table = 'MyTable'; --replace this with your table name
DECLARE #colname sysname;
DECLARE #sql NVARCHAR(MAX);
DECLARE COLS CURSOR FOR
SELECT c.name
FROM sys.tables t
INNER JOIN sys.columns c ON t.object_id = c.object_id
WHERE t.name = #table;
SET #sql = 'SELECT ';
OPEN COLS;
FETCH NEXT FROM COLS INTO #colname;
WHILE ##FETCH_STATUS = 0
BEGIN
SET #sql = #sql + 'COUNT(CASE WHEN ' + #colname + ' IS NULL THEN 1 END) AS ' + #colname + '_NULLS,'
FETCH NEXT FROM COLS INTO #colname;
END;
CLOSE COLS;
DEALLOCATE COLS;
SET #sql = LEFT(#sql,LEN(#sql) - 1) --trim tailing ,
SET #sql = #sql + ' FROM ' + #table;
EXEC sp_executesql #sql;

SELECT COUNT( CASE WHEN field01 IS NULL THEN 1 END) +
COUNT( CASE WHEN field02 IS NULL THEN 1 END) +
...
COUNT( CASE WHEN field40 IS NULL THEN 1 END) as total_nulls

This answer will return a table containing the name of each column of a specified table. (#tab is the name of the table you're trying to count NULLs in.)
You can loop through the column names, count NULLs in each column, and add the result to a total running count.

Related

Looping through a column in SQL table that contains names of other tables

I have fairly new to using SQL, currently I have a table that has a column that contains the names of all the tables I want to use for one query, so what I want to do is to loop through that column and go to every single one of these tables and then search one of their columns for a value (there could be multiple values), so whenever a table contains the value, I will list the name of the table. Could someone give me a hint of how this is done? Is cursor needed for this?
I don't have enough reputation to comment but is the table with the column that contain the table names all in one column, meaning that all the table names are comma separated or marked with some sort of separator? This would cause the query to be a little more complicated as you would have to take care of that before you start looping through your table.
However, this would require a cursor, as well as some dynamic sql.
I will give a basic example of how you can go about this.
declare #value varchar(50)
declare #tableName varchar(50)
declare #sqlstring nvarchar(100)
set #value = 'whateveryouwant'
declare #getTableName = cursor for
select tableName from TablewithTableNames
OPEN #getTableName
fetch NEXT
from #getTableName into #tableName
while ##FETCH_STATUS = 0
BEGIN
set #sqlstring = 'Select Count(*) from ' + #tableName + 'where ColumnNameYouwant = ' + #value
exec #sqlstring
If ##ROWcount > 0
insert into #temptable values (#tableName)
fetch next
from #getTableName into #tableName
END
select * from #temptable
drop table #temptable
close #getTableName
deallocate #getTableName
I'm currently not able to test this out as for time constraint reasons, but this is how I would go about doing this.
You could try something like this:
--Generate dynamic SQL
DECLARE #TablesToSearch TABLE (
TableName VARCHAR(50));
INSERT INTO #TablesToSearch VALUES ('invoiceTbl');
DECLARE #SQL TABLE (
RowNum INT,
SQLText VARCHAR(500));
INSERT INTO
#SQL
SELECT
ROW_NUMBER() OVER (ORDER BY ts.TableName) AS RowNum,
'SELECT * FROM ' + ts.TableName + ' WHERE ' + c.name + ' = 1;'
FROM
#TablesToSearch ts
INNER JOIN sys.tables t ON t.name = ts.TableName
INNER JOIN sys.columns c ON c.object_id = t.object_id;
--Now run the queries
DECLARE #Count INT;
SELECT #Count = COUNT(*) FROM #SQL;
WHILE #Count > 0
BEGIN
DECLARE #RowNum INT;
DECLARE #SQLText VARCHAR(500);
SELECT TOP 1 #RowNum = RowNum, #SQLText = SQLText FROM #SQL;
EXEC (#SQLText);
DELETE FROM #SQL WHERE RowNum = #RowNum;
SELECT #Count = COUNT(*) FROM #SQL;
END;
You would need to change the "1" I am using as an example to the value you are looking for and probably add a CONVERT/ CAST to make sure the column is the right data type?
You actually said that you wanted the name of the table, so you would need to change the SQL to:
'SELECT ''' + ts.TableName + ''' FROM ' + ts.TableName + ' WHERE ' + c.name + ' = 1;'
Another thought, it would probably be best to insert the results from this into a temporary table so you can dump out the results in one go at the end?

INSERT INTO command doesn't work

I have one outer cursor and one inner cursor also have two tables to work with. Now with the outer cursor i'm making new columns in the table 1 and naming them by the values from the table two, and that works just fine. Problem is with the inner cursor witch i used to insert the values into those new columns from one specific column from another table. This seams not to work, but what confusing me is that i do not get any error messages. Now i hope you understand what i'm trying to do, here is the code so comment for more description about the problem :
DECLARE #rbr_param nvarchar(255)
DECLARE #vrednost nvarchar(255)
DECLARE #cName nvarchar(255)
DECLARE #sql nvarchar (255)
DECLARE curs CURSOR FOR SELECT DISTINCT rbr_param FROM dbo.parametri_pomocna ORDER BY rbr_param
OPEN curs
FETCH NEXT FROM curs
INTO #rbr_param
WHILE ##FETCH_STATUS = 0
BEGIN
SET #cName = 'P_'+#rbr_param+'_P'
EXEC('ALTER TABLE dbo.Parametri ADD ' + #cName + ' nvarchar(255)')
DECLARE vrd CURSOR FOR SELECT DISTINCT vrednost FROM dbo.parametri_pomocna
OPEN vrd
FETCH NEXT FROM vrd
INTO #vrednost
WHILE ##FETCH_STATUS = 0
BEGIN
SET #sql = 'INSERT INTO dbo.Parametri'+(#cName)+ ' SELECT vrednost FROM dbo.parametri_pomocna WHERE vrednost = '+#vrednost+ ' AND rbr_param = '+#rbr_param
if exists (select * from INFORMATION_SCHEMA.COLUMNS where table_name = 'dbo.Parametri' and column_name = '#cName')
begin
exec(#sql)
end
FETCH NEXT FROM vrd
INTO #vrednost
END --end vrd
CLOSE vrd
DEALLOCATE vrd
FETCH NEXT FROM curs
INTO #rbr_param
END
CLOSE curs
DEALLOCATE curs
You have two problems here:
if exists ( select * from INFORMATION_SCHEMA.COLUMNS
where table_name = 'dbo.Parametri'
and column_name = '#cName'
)
(1) This view will never have table_name = schema name and table name.
(2) You have enclosed your variable name in single quotes for some reason.
For both of these reasons, your IF condition will never return true.
Try:
IF EXISTS
(
SELECT 1 FROM sys.columns
WHERE [object_id] = OBJECT_ID('dbo.Parametri')
AND name = #cName
)
(And here is why I prefer catalog views over INFORMATION_SCHEMA.)
Also this double-nested cursor thing seems quite inefficient and a lot more code than necessary to achieve what I think you're trying to do. How about something like this instead:
DECLARE #sql NVARCHAR(MAX);
SET #sql = N'';
SELECT #sql = #sql + N'ALTER TABLE dbo.Parametri ADD '
+ QUOTENAME('P_' + rbr_param + '_P') + ' NVARCHAR(255);'
FROM dbo.parametri_pomocna GROUP BY rbr_param;
EXEC sp_executesql #sql;
SET #sql = N'';
SELECT #sql = #sql + N'INSERT dbo.Parametri('+QUOTENAME('P_' + rbr_param + '_P')+ ')
SELECT vrednost
FROM dbo.parametri_pomocna WHERE rbr_param = ''' + rbr_param + '''
GROUP BY vrednost;'
FROM dbo.parametri_pomocna
GROUP BY rbr_param;
EXEC sp_executesql #sql;

Count the number of nulls in each column

I've run into a DB that has tables that are excessively wide. (600+ columns) Even asking for the top 100 rows with no parameters takes 4 seconds. I'd like to slim these tables down a bit.
To figure out which columns can be most easily moved to new tables, or removed entirely, I would like to know how many nulls are in each column. This should tell me what information is likely to be least important.
How would I write a query that can find all columns and count the nulls inside those columns?
Edit The DB is SQL server 2008. I'm really hoping not to type each of the columns individually. It looks like sys.columns could help with this?
Edit2 The columns are all different types.
Try this
declare #Table_Name nvarchar(max), #Columns nvarchar(max), #stmt nvarchar(max)
declare table_cursor cursor local fast_forward for
select
s.name,
stuff(
(
select
', count(case when ' + name +
' is null then 1 else null end) as count_' + name
from sys.columns as c
where c.object_id = s.object_id
for xml path(''), type
).value('data(.)', 'nvarchar(max)')
, 1, 2, '')
from sys.tables as s
open table_cursor
fetch table_cursor into #Table_Name, #Columns
while ##FETCH_STATUS = 0
begin
select #stmt = 'select ''' + #Table_Name + ''' as Table_Name, ' + #Columns + ' from ' + #Table_Name
exec sp_executesql
#stmt = #stmt
fetch table_cursor into #Table_Name, #Columns
end
close table_cursor
deallocate table_cursor
select count(case when Column1 is null then 1 end) as Column1NullCount,
count(case when Column2 is null then 1 end) as Column2NullCount,
count(case when Column3 is null then 1 end) as Column3NullCount,
...
from MyTable

SQL query shorthand for not selecting null columns when doing select all

when I do:
SELECT *
FROM SOMETABLE
I get all the columns from SOMETABLE, but I DON'T want the columns which are NULL (for all records). How do I do this?
Reason: this table has 20 columns, 10 of these are set but 10 of them are null for certain queries. And it is time consuming to type the columnnames....
Thanks,
Voodoo
SQL supports the * wildcard which means all columns. There is no wildcard for all columns except the ones you don't want.
Type out the column names. It can't be more work than asking questions on Stack Overflow. Also, copy & paste is your friend.
Another suggestion is to define a view that selects the columns you want, and then subsequently you can select * from the view any time you want.
It's possible to do, but kind of complicated. You can retrieve the list of columns in a table from INFORMATION_SCHEMA.COLUMNS. For each column, you can run a query to see if any non-null row exists. Finally, you can run a query based on the resulting column list.
Here's one way to do that, with a cursor:
declare #table_name varchar(256)
set #table_name = 'Airports'
declare #rc int
declare #query nvarchar(max)
declare #column_list varchar(256)
declare columns cursor local for select column_name
from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = #table_name
open columns
declare #column_name varchar(256)
fetch next from columns into #column_name
while ##FETCH_STATUS = 0
begin
set #query = 'select #rc = count(*) from ' + #table_name + ' where ' +
#column_name + ' is not null'
exec sp_executesql #query = #query, #params = N'#rc int output',
#rc = #rc output
if #rc > 0
set #column_list = case when #column_list is null then '' else
#column_list + ', ' end + #column_name
fetch next from columns into #column_name
end
close columns
deallocate columns
set #query = 'select ' + #column_list + ' from ' + #table_name
exec sp_executesql #query = #query
This runs on SQL Server. It might be close enough for Sybase. Hopefully, this demonstrates that typing out a column list isn't that bad :-)

How to detect and remove a column that contains only null values?

In my table table1 there are 6 columns Locations,a,b,c,d,e.
Locations [a] [b] [c] [d] [e]
[1] 10.00 Null Null 20.00 Null
[2] Null 30.00 Null Null Null
i need the result like
Locations [a] [b] [d]
[1] 10.00 Null 20.00
[2] Null 30.00 Null
My question is how to detect and delete column that contains all null values using sql query.
Is it possible?
If yes then please help and give sample.
Here is a fast (and ugly) stored proc that takes the name of the table and print (or drop if you want it to) the fields that are full of nulls.
ALTER procedure mysp_DropEmptyColumns
#tableName nvarchar(max)
as begin
declare #FieldName nvarchar(max)
declare #SQL nvarchar(max)
declare #CountDef nvarchar(max)
declare #FieldCount int
declare fieldNames cursor local fast_forward for
select c.name
from syscolumns c
inner join sysobjects o on c.id=o.id
where o.xtype='U'
and o.Name=#tableName
open fieldNames
fetch next from fieldNames into #FieldName
while (##fetch_status=0)
begin
set #SQL=N'select #Count=count(*) from "'+#TableName+'" where "'+#FieldName+'" is not null'
SET #CountDef = N'#Count int output';
exec sp_executeSQL #SQL, #CountDef, #Count = #FieldCount output
if (#FieldCount=0)
begin
set #SQL = 'alter table '+#TableName+' drop column '+#FieldName
/* exec sp_executeSQL #SQL */
print #SQL
end
fetch next from fieldNames into #FieldName
end
close fieldNames
end
This uses a cursor, and is a bit slow and convoluted, but I suspect that this is a kind of procedure that you'll be running often
How to detect whether a given column has only the NULL value:
SELECT 1 -- no GROUP BY therefore use a literal
FROM Locations
HAVING COUNT(a) = 0
AND COUNT(*) > 0;
The resultset will either consist of zero rows (column a has a non-NULL value) or one row (column a has only the NULL value). FWIW this code is Standard SQL-92.
SQL is more about working on rows rather than columns.
If you're talking about deleting rows where c is null, use:
delete from table1 where c is null
If you're talking about dropping a column when all rows have null for that column, I would just find a time where you could lock out the DB from users and execute one of:
select c from table1 group by c
select distinct c from table1
select count(c) from table1 where c is not null
Then, if you only get back just NULL (or 0 for that last one), weave your magic (the SQL Server command may be different):
alter table table1 drop column c
Do this for whatever columns you want.
You really need to be careful if you're deleting columns. Even though they may be full of nulls, there may be SQL queries out there that use that column. Dropping the column will break those queries.
SELECT * FROM table1 WHERE c IS NOT NULL -- or also SELECT COUNT(*)
To detect if indeed this column has no values at all.
ALTER TABLE table1 DROP COLUMN c
is the query to remove the column if it is deemed desirable.
Try this stored procedure with your table name as input.
alter proc USP_DropEmptyColumns
#TableName varchar(255)
as
begin
Declare #col varchar(255), #cmd varchar(max)
DECLARE getinfo cursor for
SELECT c.name FROM sys.tables t JOIN sys.columns c ON t.Object_ID = c.Object_ID
WHERE t.Name = #TableName
OPEN getinfo
FETCH NEXT FROM getinfo into #col
WHILE ##FETCH_STATUS = 0
BEGIN
SELECT #cmd = 'IF NOT EXISTS (SELECT top 1 * FROM [' + #TableName + '] WHERE [' + #col + '] IS NOT NULL)
BEGIN
ALTER TABLE [' + #TableName + '] DROP Column [' + #col + ']
end'
EXEC(#cmd)
FETCH NEXT FROM getinfo into #col
END
CLOSE getinfo
DEALLOCATE getinfo
end
PROC PRINT DATA=TABLE1;RUN;
PROC TRANSPOSE DATA=TABLE1 OUT=TRANS1;VAR A B C D E;RUN;
DATA TRANS2;SET TRANS1;IF COL1 = . AND COL2 = . THEN DELETE;RUN;
PROC TRANSPOSE DATA=TRANS2 OUT=TABLE2 (DROP=_NAME_);VAR COL1-COL2;RUN;
PROC PRINT DATA=TABLE2;RUN;
If you want to perform the stored proc on all the tables in your database.
DECLARE #table_name AS VARCHAR(128);
DECLARE table_cursor CURSOR FOR
SELECT name FROM sys.tables;
OPEN table_cursor;
FETCH NEXT FROM table_cursor INTO #table_name;
WHILE ##FETCH_STATUS = 0
BEGIN
EXEC USP_DropEmptyColumns #table_name;
FETCH NEXT FROM table_cursor INTO #table_name;
END;
CLOSE table_cursor;
DEALLOCATE table_cursor;