How to check a condition against all the columns of a table? - sql

I have a table which has more than 30 columns(all are varchar). I need to list out all the columns which contains blank i.e.' ' values.
I tried using 'coalesce' but it is only for NULL.

The following query will give you all the columns in a table that might have null or '' values.
It is written so that you can run it for all tables in your database but you can limit it to a single table, as I have done for this specific example, checking a table called testingNulls:
--two variables needed for table name and column name, when looping through all tables
declare #table varchar(255), #col varchar(255), #sql varchar(max)
--this will be used to store the result, to have one result set instead of one row per each cursor cycle
if object_id('tempdb..#nullcolumns') is not null drop table #nullcolumns
create table #nullcolumns (tablename varchar(255), columnname varchar(255))
declare getinfo cursor for
select t.name tablename, c.name
from sys.tables t join sys.columns c on t.object_id = c.object_id
where t.name = 'testingnulls' --here the condition for the table name
open getinfo
fetch next from getinfo into #table, #col
while ##fetch_status = 0
begin
select #sql = 'if exists (select top 1 * from [' + #table + '] where [' + #col + '] is null or [' + #col + '] like '''' ) begin insert into #nullcolumns select ''' + #table + ''' as tablename, ''' + #col + ''' as all_nulls end'
print(#sql)
exec(#sql)
fetch next from getinfo into #table, #col
end
close getinfo
deallocate getinfo
--this should be the result you need:
select * from #nullcolumns
You can see a working example here. I hope this is what you need.

List all columns that contain a blank in some record? You'd use a query per column and collect the results with UNION ALL:
select 'COL1' where exists (select * from mytable where col1 like '% %')
union all
select 'COL2' where exists (select * from mytable where col2 like '% %')
union all
...
union all
select 'COL30' where exists (select * from mytable where col30 like '% %');

If you want like select * from [your_table_name] where [col1] = '' and [col2] = ''....., then use dynamic sql query like below.
Query
declare #sql as varchar(max);
select #sql = 'select * from [your_table_name] where '
+ stuff((
select ' and [' + [column_name] + '] = ' + char(39) + char(39)
from information_schema.columns
where table_name = 'your_table_name'
for xml path('')
)
, 1, 5, ''
);
exec(#sql);
Update
Or else if you want to list the column names which have a blank value, then you can use the below dynamic sql query.
Query
declare #sql as varchar(max);
select #sql = stuff((
select ' union all select ' + [column_name] + ' as [col1], '
+ char(39) + [column_name] + char(39) + ' as [col2]'
+ ' from your_table_name'
from information_schema.columns
where table_name = 'your_table_name'
for xml path('')
)
, 1, 11, ''
);
set #sql = 'select distinct t.col2 as [blank_cols] from(' + #sql
+ ')t
where coalesce(ltrim(rtrim(t.col1)), ' + char(39) + char(39) + ') = '
+ char(39) + char(39) + ';';
exec(#sql);
Find a demo here
But still I'm not sure that this is what you are looking out for.

you have not many choices but to specify all the columns in your where clause
WHERE COL1 = '' AND COL2 = '' AND COL3 = '' AND . . .
or you can use Dynamic SQL to form your query, but that is not an easy path to go

If you want to count number of columns having '' value in a table (not for each row) then use the following
SELECT max(CASE WHEN col1 = '' THEN 1 ELSE 0 END) +
max(CASE WHEN col2 = '' THEN 1 ELSE 0 END) +
max(CASE WHEN col3 = '' THEN 1 ELSE 0 END) +
...
FROM t
demo

I created a dynamic SQL script that you can use by providing the table name only
Here it is
declare #sql nvarchar(max)
declare #table sysname = 'ProductAttributes'
select #sql =
'select * from ' + #table + ' where ' +
string_agg('[' + name + '] = '' '' ', ' and ')
from sys.columns
where object_id = OBJECT_ID(#table)
select #sql
exec sp_executesql #sql
Unfortunately, for SQL string concatenation String_Agg function is new with SQL Server 2017
But it is also possible to use SQL XML Path to concatenate WHERE clause fragments
SELECT #sql = 'select * from ' + #table + ' where ' +
STUFF(
(
SELECT
' and ' + '[' + [name] + '] = '' '' '
from sys.columns
where object_id = OBJECT_ID(#table)
FOR XML PATH(''),TYPE
).value('.','VARCHAR(MAX)'
), 1, 5, ''
)
select #sql as sqlscript
exec sp_executesql #sql

Related

SQL count distinct or not null for each column for many columns

I need to analyze a large table with hundreds of columns. A lot of columns are unused.
To investigate I could do something like
SELECT DISTINCT Column1
FROM myTable
or
WITH C AS
(
SELECT DISTINCT Column1
FROM MyTable
)
SELECT COUNT(*)
FROM C
Then I do the same for column2 and so on. However these queries only work for one column which is time consuming and does not give overview in one glance.
Any idea how to build such investigation query for all columns in one?
You need only 1 query where you have to list all the columns of the table:
SELECT COUNT(DISTINCT Column1) column1_count,
COUNT(DISTINCT Column2) column2_count,
COUNT(DISTINCT Column3) column3_count
.....................................
FROM MyTable;
For local purposes only, you can make it dynamic like this:
Get the columns of the table
the query is created as the colleagues did and then it is executed with the EXEC()
DECLARE #columns as Table(RowId INT IDENTITY(1,1), ColumnName nVarchar(50))
DECLARE #ii int = 0
DECLARE #max int = 0
DECLARE #sqlQuery nVarchar(MAX)
INSERT INTO #columns
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'Customer'
SET #sqlQuery = 'SELECT '
SELECT #max = COUNT(*) FROM #columns
WHILE #ii <= #max
BEGIN
SELECT #sqlQuery = CONCAT(#sqlQuery,'COUNT(DISTINCT ',ColumnName,') ',LOWER(ColumnName),'_count, ')
FROM #columns
WHERE RowId = #ii
SET #ii = #ii + 1
END
SELECT #sqlQuery = CONCAT(#sqlQuery,'FROM Customer')
SELECT #sqlQuery = REPLACE(#sqlQuery,', FROM',' FROM')
select #sqlQuery
EXEC (#sqlQuery)
You should flesh out your requirement a bit more. If all you want to know is if a column contains only NULLs, you'll want to check for max(ColumnName) is null
declare #sql table (id int identity(1,1), QueryString nvarchar(max))
create table ##emptyColumns (emptyColumn nvarchar(128))
declare #i int = 0
declare #iMax int
declare #runthis nvarchar(max)
insert #sql
select 'select ''' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + quotename(c.name) + ''' as ''column''
from ' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + '
having max(' + c.name + ') is null'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where o.type = 'U'
order by s.name
, o.name
, c.colorder
select #iMax = count(*)
from #sql
print #iMax
while #i < #iMax
begin
set #i = #i + 1
select #runthis = 'insert into ##emptyColumns
' + QueryString
from #sql
where id = #i
execute sp_executesql #runthis
end
select *
from ##emptyColumns
drop table ##emptyColumns
One further option you might consider:
declare #sql nvarchar(max)
select #sql = isnull(#sql + ' union all ', '') + 'select ''' + COLUMN_NAME + ''',
sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end) as null_values,
count(distinct ' + COLUMN_NAME + ') as count_distinct
from ' + TABLE_SCHEMA + '.' + TABLE_NAME + '
'
from information_schema.columns
where TABLE_SCHEMA = 'MySchema' and TABLE_NAME = 'MyTable'
exec (#sql)
If you had very big tables with large numbers of columns and were only interested in empty columns you could look into something like checksum_agg(checksum(column_name)). It may help improve performance.
You'd need to be wary of column data types, as they are not all compatible with distinct.

Dynamic union of table if a certain field exists

I'm trying to build a dynamic union over tables that have certain fields (in my example field1 and field2). The union already works but over any table. Now I need to include only the ones that have field1 and field2.
DECLARE #SQL VARCHAR(max)
SET #SQL = ''
SELECT #SQL = #SQL + CASE Len(#SQL) WHEN 0 THEN '' ELSE ' UNION ALL ' END
+ ' SELECT [field1], [field2] FROM dbo.['
+ NAME + ']'
FROM sys.tables
WHERE NAME LIKE 'CUST_TABLE%'
EXEC (#SQL)
I guess I need to combine this query somehow:
SELECT TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME like 'CUST_TABLE%'
and COLUMN_NAME='field1'
You are close. Query the view INFORMATION_SCHEMA.COLUMNS. Aggregate per table name and make sure both columns exist for the table by counting them in the HAVING clause.
DECLARE #SQL VARCHAR(max)
SET #SQL = ''
SELECT #SQL = #SQL + CASE Len(#SQL) WHEN 0 THEN '' ELSE ' UNION ALL ' END
+ ' SELECT [field1], [field2] FROM dbo.[' + table_name + ']'
FROM information_schema.columns
WHERE table_name LIKE 'CUST_TABLE%'
GROUP BY table_name
HAVING COUNT(CASE WHEN COLUMN_NAME = 'FIELD1' THEN 1 END) > 0
AND COUNT(CASE WHEN COLUMN_NAME = 'FIELD2' THEN 1 END) > 0
EXEC (#SQL)

select columns with value NA

How to select columns in a table that only contain a specific value for all the rows? I am trying to find these columns to do an update on those values with a NULL value. In my columns I have varied range of values including NA
I am using SQL Server 2012.
I've tried doing: thsi only gives me column names. Can i add to this condition for columns with value 'NA'?
SELECT COLUMN_NAME AS NAMES,COLUMN_DEFAULT
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'ABC'
I am a beginner in SQL. Trying to figure out how to do this.
If min of column equals to max then that column contains same values:
Select
case when min(col1) = max(col1) then 1 else 0 end as Col1IsSame,
case when min(col2) = max(col2) then 1 else 0 end as Col2IsSame,
...
from Table
With dynamic query:
declare #s nvarchar(max) = 'select '
select #s = #s + 'case when min(' + COLUMN_NAME + ') = max(' +
COLUMN_NAME + ') then 1 else 0 end as ' + COLUMN_NAME + ','
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'Table'
Set #s = substring(#s, 1, len(#s) - 1) + ' from Table'
exec(#s)
TRY THIS QUERY
DECLARE #SQLQUERY NVARCHAR(MAX)
declare #tableName varchar(50)
DECLARE #NAME VARCHAR(50)
Declare #ParamDefinition AS NVarchar(2000)
Set #ParamDefinition = '#OIM VARCHAR(20)'
SELECT NAME
FROM sys.objects
WHERE [object_id]=#OIM
set #tableName= (SELECT NAME
FROM sys.objects
WHERE [object_id]=#OIM)
SET #NAME=(SELECT C.NAME
FROM sys.columns c
JOIN
sys.tables t ON c.object_id = t.object_id
WHERE c.name in (select distinct name
from sys.columns
where object_id=#OIM))
SET #SQLQUERY = ''
SELECT #SQLQUERY = #SQLQUERY + 'UPDATE ' + #tableName + ' SET ' + #NAME + ' = NULL WHERE ' + #NAME + ' = NA ; '
PRINT #SQLQUERY
Execute sp_Executesql #SQLQUERY , #ParamDefinition, #OIM
end

Dynamically Count Null Values in SQL Server

I'm a little new at SQL so please bear with me. I am attempting to write some a query that will allow me to loop through an entire table and find the number of times null values appear in each column. This is easy to do the hard way by typing the following:
Select
SUM(CASE COL_1 WHEN IS NULL THEN 1 ELSE 0 END) AS COL_1_NULLS
,SUM(CASE COL_2 WHEN IS NULL THEN 1 ELSE 0 END) AS COL_2_NULLS
FROM TABLE1
This is easy but it can become arduous if you want to do this for multiple tables or if a single table has a lot of columns.
I'm looking for a way to write a query that passes a table name into it and then loops through each column in the defined table (possibly pulling the column name by ordinance via a join to a metadata view?) and then sums the number of nulls in the column. Before anyone jumps on the nitpick bandwagon please keep in mind that this basic idea could be used for more than just finding nulls. Any assistance with this issue is greatly appreciated.
You need to use dynamic sql:
declare #custom_sql varchar(max)
set #custom_sql = 'SELECT null as first_row'
select
#custom_sql = #custom_sql + ', ' + 'SUM(CASE WHEN ' + COLUMN_NAME + ' IS NULL THEN 1 ELSE 0 END) as ' + COLUMN_NAME + '_NULLS'
from
INFORMATION_SCHEMA.COLUMNS where table_name = 'MYTABLE'
set #custom_sql = #custom_sql + ' FROM MYTABLE'
exec(#custom_sql)
You can also use the COALESCE term (just for a slightly different approach):
declare #custom_sql varchar(max)
select
#custom_sql = COALESCE(#custom_sql + ', ', '') + 'SUM(CASE WHEN ' + COLUMN_NAME + ' IS NULL THEN 1 ELSE 0 END) as ' + COLUMN_NAME + '_NULLS'
from
INFORMATION_SCHEMA.COLUMNS where table_name = 'users'
set #custom_sql = 'SELECT ' + #custom_sql
set #custom_sql = #custom_sql + ' FROM Users'
print #custom_sql
exec(#custom_sql)
I don't know how to make a generic query, but you can always generate the script like this
declare #sql nvarchar(max) = 'select 1 as dummy'
select #sql = #sql + '
, sum(case when [' + c.name + '] is null then 1 else 0 end) as [' + c.name + '_NULLS]'
from sys.columns c
join sys.tables t on t.object_id = c.object_id
where t.name = 'TABLE1'
set #sql = #sql + ' from TABLE1'
select #sql
Then you can execute the result eg. with exec sp_executesql #sql
For a cooler approach, you can use ISNULL to skip the first comma.
declare #sql nvarchar(max)
declare #tablename nvarchar(255) = 'xxxx'
Select #sql = ISNULL(#SQL + ',','') + ' ' + COLUMN_NAME + '_count = Sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end)' + char(13)
From information_schema.columns
where table_name = #tablename
set #sql = 'Select' + #sql + ' From ' + #tablename
print #sql
exec sp_executesql #sql

get a count of each value from every column in a table SQL Server

So I looked this up and this question is very similar but it's missing a key piece: SQL Server count number of distinct values in each column of a table
So in that question they want the distinct count for each column. What I am looking to do is to get a count of each distinct value for each column in a table (and I'm doing this for all the tables in a particular database which is why I'm looking to try to automate this as much as possible). Currently my code looks like this which I have to run for each column:
select mycol1, COUNT(*) as [Count]
from mytable
group by mycol1
order by [Count] desc
Ideally my output would look like this:
ColumnName1 Count
val1 24457620
val2 17958530
val3 13350
ColumnName2 Count
val1 24457620
val2 17958530
val3 13350
val4 12
and so on for all the columns in the table
This answer below (provided by #beargle) from that previous question is really close to what I'm looking to do but I can't seem to figure out a way to get it to work for what I am trying to do so I would appreciate any help.
DECLARE #Table SYSNAME = 'TableName';
-- REVERSE and STUFF used to remove trailing UNION in string
SELECT REVERSE(STUFF(REVERSE((SELECT 'SELECT ''' + name
+ ''' AS [Column], COUNT(DISTINCT('
+ QUOTENAME(name) + ')) AS [Count] FROM '
+ QUOTENAME(#Table) + ' UNION '
-- get column name from sys.columns
FROM sys.columns
WHERE object_id = Object_id(#Table)
-- concatenate result strings with FOR XML PATH
FOR XML PATH (''))), 1, 7, ';'));
You could use:
DECLARE #Table SYSNAME = 'TableName';
DECLARE #SQL NVARCHAR(MAX) = ''
SELECT #SQL = STUFF((SELECT ' UNION SELECT ''' + name
+ ''' AS [Column], '
+ 'CAST(' + QUOTENAME(Name)
+ ' AS NVARCHAR(MAX)) AS [ColumnValue], COUNT(*) AS [Count] FROM '
+ QUOTENAME(#Table) + ' GROUP BY ' + QUOTENAME(Name)
FROM sys.columns
WHERE object_id = Object_id(#Table)
-- concatenate result strings with FOR XML PATH
FOR XML PATH ('')), 1, 7, '');
EXECUTE sp_executesql #SQL;
Which will produce SQL Like the following for a table with two columns (Column1 and Column2)
SELECT 'Column1' AS [Column],
CAST([Column1] AS NVARCHAR(MAX)) AS [ColumnValue],
COUNT(*) AS [Count]
FROM [TableName]
GROUP BY [Column1]
UNION
SELECT 'Column2' AS [Column],
CAST([Column2] AS NVARCHAR(MAX)) AS [ColumnValue],
COUNT(*) AS [Count]
FROM [TableName]
GROUP BY [Column2]
EDIT
If you want a new result set for each column then use:
DECLARE #Table SYSNAME = 'TableName';
DECLARE #SQL NVARCHAR(MAX) = '';
SELECT #SQL = (SELECT ' SELECT ' + QUOTENAME(Name)
+ ', COUNT(*) AS [Count] FROM '
+ QUOTENAME(#Table) + ' GROUP BY ' + QUOTENAME(Name) + ';'
FROM sys.columns
WHERE object_id = Object_id(#Table)
-- concatenate result strings with FOR XML PATH
FOR XML PATH (''));
EXECUTE sp_executesql #SQL;
Which would produce SQL Like:
SELECT [Column1],
COUNT(*) AS [Count]
FROM [callsupplier]
GROUP BY [Column1];
SELECT [Column2],
COUNT(*) AS [Count]
FROM [callsupplier]
GROUP BY [Column2];
thought i would take a stab at this whilst waiting for a backup to restore
hope this does what you require
create Table #Temp
(tableName varchar(100),
columnName varchar(100),
value varchar(1000),
distinctItems int)
Declare #tabName as varchar(100)
Declare #colName as varchar(100)
Declare #tabid as int
Declare cursorTables Cursor
for
select t.object_id , t.name , c.name from sys.tables t inner join sys.columns c on t.object_id = c.object_id
open cursorTables
Fetch Next from cursorTables into
#tabid,#tabName,#colName
while ##Fetch_Status = 0
Begin
declare #query as nVarchar(1000)
set #query = 'Insert into #Temp SELECT ''' + #tabName + ''' , '''+ #colName +''', ' + #colName + ', COUNT([' + #colName +']) AS Expr1 FROM [' + #tabName+ '] group by [' + #colName + ']'
print #query
exec sp_executesql #query
Fetch Next from cursorTables into
#tabid,#tabName,#colName
End
Close cursorTables
Deallocate cursorTables
select * from #temp
drop table #temp
produces some not very useful results on PK values and i suspect it would not work on columns greater than varchar(1000) but works on a fe of my dbs
This version makes a good snippet:
DECLARE #sql NVARCHAR(MAX) = N'';
SELECT #sql += 'SELECT ''' + t.name + ''', ''' + c.name + ''', ' + c.name + ', COUNT(' + c.name + ') AS C FROM ' + QUOTENAME(s.name) + '.' + QUOTENAME(t.name) + ' GROUP BY ' + c.name + ';' + CHAR(13)
FROM sys.tables AS t
INNER join sys.columns c on t.object_id = c.object_id
INNER JOIN sys.schemas AS s ON t.[schema_id] = s.[schema_id]
WHERE s.name LIKE 'stage' AND t.name LIKE 'table' AND c.name LIKE '%whatever%';
--PRINT #sql;
EXEC sp_executesql #sql