Dynamic union of table if a certain field exists - sql

I'm trying to build a dynamic union over tables that have certain fields (in my example field1 and field2). The union already works but over any table. Now I need to include only the ones that have field1 and field2.
DECLARE #SQL VARCHAR(max)
SET #SQL = ''
SELECT #SQL = #SQL + CASE Len(#SQL) WHEN 0 THEN '' ELSE ' UNION ALL ' END
+ ' SELECT [field1], [field2] FROM dbo.['
+ NAME + ']'
FROM sys.tables
WHERE NAME LIKE 'CUST_TABLE%'
EXEC (#SQL)
I guess I need to combine this query somehow:
SELECT TABLE_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME like 'CUST_TABLE%'
and COLUMN_NAME='field1'

You are close. Query the view INFORMATION_SCHEMA.COLUMNS. Aggregate per table name and make sure both columns exist for the table by counting them in the HAVING clause.
DECLARE #SQL VARCHAR(max)
SET #SQL = ''
SELECT #SQL = #SQL + CASE Len(#SQL) WHEN 0 THEN '' ELSE ' UNION ALL ' END
+ ' SELECT [field1], [field2] FROM dbo.[' + table_name + ']'
FROM information_schema.columns
WHERE table_name LIKE 'CUST_TABLE%'
GROUP BY table_name
HAVING COUNT(CASE WHEN COLUMN_NAME = 'FIELD1' THEN 1 END) > 0
AND COUNT(CASE WHEN COLUMN_NAME = 'FIELD2' THEN 1 END) > 0
EXEC (#SQL)

Related

SQL count distinct or not null for each column for many columns

I need to analyze a large table with hundreds of columns. A lot of columns are unused.
To investigate I could do something like
SELECT DISTINCT Column1
FROM myTable
or
WITH C AS
(
SELECT DISTINCT Column1
FROM MyTable
)
SELECT COUNT(*)
FROM C
Then I do the same for column2 and so on. However these queries only work for one column which is time consuming and does not give overview in one glance.
Any idea how to build such investigation query for all columns in one?
You need only 1 query where you have to list all the columns of the table:
SELECT COUNT(DISTINCT Column1) column1_count,
COUNT(DISTINCT Column2) column2_count,
COUNT(DISTINCT Column3) column3_count
.....................................
FROM MyTable;
For local purposes only, you can make it dynamic like this:
Get the columns of the table
the query is created as the colleagues did and then it is executed with the EXEC()
DECLARE #columns as Table(RowId INT IDENTITY(1,1), ColumnName nVarchar(50))
DECLARE #ii int = 0
DECLARE #max int = 0
DECLARE #sqlQuery nVarchar(MAX)
INSERT INTO #columns
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'Customer'
SET #sqlQuery = 'SELECT '
SELECT #max = COUNT(*) FROM #columns
WHILE #ii <= #max
BEGIN
SELECT #sqlQuery = CONCAT(#sqlQuery,'COUNT(DISTINCT ',ColumnName,') ',LOWER(ColumnName),'_count, ')
FROM #columns
WHERE RowId = #ii
SET #ii = #ii + 1
END
SELECT #sqlQuery = CONCAT(#sqlQuery,'FROM Customer')
SELECT #sqlQuery = REPLACE(#sqlQuery,', FROM',' FROM')
select #sqlQuery
EXEC (#sqlQuery)
You should flesh out your requirement a bit more. If all you want to know is if a column contains only NULLs, you'll want to check for max(ColumnName) is null
declare #sql table (id int identity(1,1), QueryString nvarchar(max))
create table ##emptyColumns (emptyColumn nvarchar(128))
declare #i int = 0
declare #iMax int
declare #runthis nvarchar(max)
insert #sql
select 'select ''' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + quotename(c.name) + ''' as ''column''
from ' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + '
having max(' + c.name + ') is null'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where o.type = 'U'
order by s.name
, o.name
, c.colorder
select #iMax = count(*)
from #sql
print #iMax
while #i < #iMax
begin
set #i = #i + 1
select #runthis = 'insert into ##emptyColumns
' + QueryString
from #sql
where id = #i
execute sp_executesql #runthis
end
select *
from ##emptyColumns
drop table ##emptyColumns
One further option you might consider:
declare #sql nvarchar(max)
select #sql = isnull(#sql + ' union all ', '') + 'select ''' + COLUMN_NAME + ''',
sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end) as null_values,
count(distinct ' + COLUMN_NAME + ') as count_distinct
from ' + TABLE_SCHEMA + '.' + TABLE_NAME + '
'
from information_schema.columns
where TABLE_SCHEMA = 'MySchema' and TABLE_NAME = 'MyTable'
exec (#sql)
If you had very big tables with large numbers of columns and were only interested in empty columns you could look into something like checksum_agg(checksum(column_name)). It may help improve performance.
You'd need to be wary of column data types, as they are not all compatible with distinct.

Ambiguous column name when select a column to return value if column is available in table by Execute Dynamic SQL commands

I want to return a column value if column is available in table,if not, return a default value, then I face COLUMN_NAME ambiguous error when join two table SHAIN1 and RIREKI14. If select from only one table then query works ok but if I join two table, I face problem.
declare #sql nvarchar(max) = ' SELECT 1 as id, '+ (case when exists (SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA ='dbo' and TABLE_NAME='RIREKI14' and COLUMN_NAME='KOM001') then 'KOM001' else 'NULL' end) + ' as day ' + ' From RIREKI14 join SHAIN1 on RIREKI14.INCODE = SHAIN1.INCODE '; exec sp_executesql #sql
Help me please!
This is your logic:
declare #sql nvarchar(max) = '
SELECT 1 as id, '+
(case when exists (SELECT 1 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'dbo' and TABLE_NAME = 'RIREKI14' and COLUMN_NAME = 'KOM001')
then 'KOM001'
else 'NULL'
end) + ' as day ' + '
From RIREKI14 join
SHAIN1
on RIREKI14.INCODE = SHAIN1.INCODE
';
exec sp_executesql #sql;
The only possibility for an ambiguous column name is the name coming from the case. So, let's qualify it:
declare #sql nvarchar(max) = '
SELECT 1 as id, '+
(case when exists (SELECT 1 FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'dbo' and TABLE_NAME = 'RIREKI14' and COLUMN_NAME = 'KOM001')
then 'r.KOM001'
else 'NULL'
end) + ' as day ' + '
From RIREKI14 r join
SHAIN1 s
on r.INCODE = s.INCODE
';
exec sp_executesql #sql;

How to check a condition against all the columns of a table?

I have a table which has more than 30 columns(all are varchar). I need to list out all the columns which contains blank i.e.' ' values.
I tried using 'coalesce' but it is only for NULL.
The following query will give you all the columns in a table that might have null or '' values.
It is written so that you can run it for all tables in your database but you can limit it to a single table, as I have done for this specific example, checking a table called testingNulls:
--two variables needed for table name and column name, when looping through all tables
declare #table varchar(255), #col varchar(255), #sql varchar(max)
--this will be used to store the result, to have one result set instead of one row per each cursor cycle
if object_id('tempdb..#nullcolumns') is not null drop table #nullcolumns
create table #nullcolumns (tablename varchar(255), columnname varchar(255))
declare getinfo cursor for
select t.name tablename, c.name
from sys.tables t join sys.columns c on t.object_id = c.object_id
where t.name = 'testingnulls' --here the condition for the table name
open getinfo
fetch next from getinfo into #table, #col
while ##fetch_status = 0
begin
select #sql = 'if exists (select top 1 * from [' + #table + '] where [' + #col + '] is null or [' + #col + '] like '''' ) begin insert into #nullcolumns select ''' + #table + ''' as tablename, ''' + #col + ''' as all_nulls end'
print(#sql)
exec(#sql)
fetch next from getinfo into #table, #col
end
close getinfo
deallocate getinfo
--this should be the result you need:
select * from #nullcolumns
You can see a working example here. I hope this is what you need.
List all columns that contain a blank in some record? You'd use a query per column and collect the results with UNION ALL:
select 'COL1' where exists (select * from mytable where col1 like '% %')
union all
select 'COL2' where exists (select * from mytable where col2 like '% %')
union all
...
union all
select 'COL30' where exists (select * from mytable where col30 like '% %');
If you want like select * from [your_table_name] where [col1] = '' and [col2] = ''....., then use dynamic sql query like below.
Query
declare #sql as varchar(max);
select #sql = 'select * from [your_table_name] where '
+ stuff((
select ' and [' + [column_name] + '] = ' + char(39) + char(39)
from information_schema.columns
where table_name = 'your_table_name'
for xml path('')
)
, 1, 5, ''
);
exec(#sql);
Update
Or else if you want to list the column names which have a blank value, then you can use the below dynamic sql query.
Query
declare #sql as varchar(max);
select #sql = stuff((
select ' union all select ' + [column_name] + ' as [col1], '
+ char(39) + [column_name] + char(39) + ' as [col2]'
+ ' from your_table_name'
from information_schema.columns
where table_name = 'your_table_name'
for xml path('')
)
, 1, 11, ''
);
set #sql = 'select distinct t.col2 as [blank_cols] from(' + #sql
+ ')t
where coalesce(ltrim(rtrim(t.col1)), ' + char(39) + char(39) + ') = '
+ char(39) + char(39) + ';';
exec(#sql);
Find a demo here
But still I'm not sure that this is what you are looking out for.
you have not many choices but to specify all the columns in your where clause
WHERE COL1 = '' AND COL2 = '' AND COL3 = '' AND . . .
or you can use Dynamic SQL to form your query, but that is not an easy path to go
If you want to count number of columns having '' value in a table (not for each row) then use the following
SELECT max(CASE WHEN col1 = '' THEN 1 ELSE 0 END) +
max(CASE WHEN col2 = '' THEN 1 ELSE 0 END) +
max(CASE WHEN col3 = '' THEN 1 ELSE 0 END) +
...
FROM t
demo
I created a dynamic SQL script that you can use by providing the table name only
Here it is
declare #sql nvarchar(max)
declare #table sysname = 'ProductAttributes'
select #sql =
'select * from ' + #table + ' where ' +
string_agg('[' + name + '] = '' '' ', ' and ')
from sys.columns
where object_id = OBJECT_ID(#table)
select #sql
exec sp_executesql #sql
Unfortunately, for SQL string concatenation String_Agg function is new with SQL Server 2017
But it is also possible to use SQL XML Path to concatenate WHERE clause fragments
SELECT #sql = 'select * from ' + #table + ' where ' +
STUFF(
(
SELECT
' and ' + '[' + [name] + '] = '' '' '
from sys.columns
where object_id = OBJECT_ID(#table)
FOR XML PATH(''),TYPE
).value('.','VARCHAR(MAX)'
), 1, 5, ''
)
select #sql as sqlscript
exec sp_executesql #sql

select columns with value NA

How to select columns in a table that only contain a specific value for all the rows? I am trying to find these columns to do an update on those values with a NULL value. In my columns I have varied range of values including NA
I am using SQL Server 2012.
I've tried doing: thsi only gives me column names. Can i add to this condition for columns with value 'NA'?
SELECT COLUMN_NAME AS NAMES,COLUMN_DEFAULT
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'ABC'
I am a beginner in SQL. Trying to figure out how to do this.
If min of column equals to max then that column contains same values:
Select
case when min(col1) = max(col1) then 1 else 0 end as Col1IsSame,
case when min(col2) = max(col2) then 1 else 0 end as Col2IsSame,
...
from Table
With dynamic query:
declare #s nvarchar(max) = 'select '
select #s = #s + 'case when min(' + COLUMN_NAME + ') = max(' +
COLUMN_NAME + ') then 1 else 0 end as ' + COLUMN_NAME + ','
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'Table'
Set #s = substring(#s, 1, len(#s) - 1) + ' from Table'
exec(#s)
TRY THIS QUERY
DECLARE #SQLQUERY NVARCHAR(MAX)
declare #tableName varchar(50)
DECLARE #NAME VARCHAR(50)
Declare #ParamDefinition AS NVarchar(2000)
Set #ParamDefinition = '#OIM VARCHAR(20)'
SELECT NAME
FROM sys.objects
WHERE [object_id]=#OIM
set #tableName= (SELECT NAME
FROM sys.objects
WHERE [object_id]=#OIM)
SET #NAME=(SELECT C.NAME
FROM sys.columns c
JOIN
sys.tables t ON c.object_id = t.object_id
WHERE c.name in (select distinct name
from sys.columns
where object_id=#OIM))
SET #SQLQUERY = ''
SELECT #SQLQUERY = #SQLQUERY + 'UPDATE ' + #tableName + ' SET ' + #NAME + ' = NULL WHERE ' + #NAME + ' = NA ; '
PRINT #SQLQUERY
Execute sp_Executesql #SQLQUERY , #ParamDefinition, #OIM
end

Dynamically Count Null Values in SQL Server

I'm a little new at SQL so please bear with me. I am attempting to write some a query that will allow me to loop through an entire table and find the number of times null values appear in each column. This is easy to do the hard way by typing the following:
Select
SUM(CASE COL_1 WHEN IS NULL THEN 1 ELSE 0 END) AS COL_1_NULLS
,SUM(CASE COL_2 WHEN IS NULL THEN 1 ELSE 0 END) AS COL_2_NULLS
FROM TABLE1
This is easy but it can become arduous if you want to do this for multiple tables or if a single table has a lot of columns.
I'm looking for a way to write a query that passes a table name into it and then loops through each column in the defined table (possibly pulling the column name by ordinance via a join to a metadata view?) and then sums the number of nulls in the column. Before anyone jumps on the nitpick bandwagon please keep in mind that this basic idea could be used for more than just finding nulls. Any assistance with this issue is greatly appreciated.
You need to use dynamic sql:
declare #custom_sql varchar(max)
set #custom_sql = 'SELECT null as first_row'
select
#custom_sql = #custom_sql + ', ' + 'SUM(CASE WHEN ' + COLUMN_NAME + ' IS NULL THEN 1 ELSE 0 END) as ' + COLUMN_NAME + '_NULLS'
from
INFORMATION_SCHEMA.COLUMNS where table_name = 'MYTABLE'
set #custom_sql = #custom_sql + ' FROM MYTABLE'
exec(#custom_sql)
You can also use the COALESCE term (just for a slightly different approach):
declare #custom_sql varchar(max)
select
#custom_sql = COALESCE(#custom_sql + ', ', '') + 'SUM(CASE WHEN ' + COLUMN_NAME + ' IS NULL THEN 1 ELSE 0 END) as ' + COLUMN_NAME + '_NULLS'
from
INFORMATION_SCHEMA.COLUMNS where table_name = 'users'
set #custom_sql = 'SELECT ' + #custom_sql
set #custom_sql = #custom_sql + ' FROM Users'
print #custom_sql
exec(#custom_sql)
I don't know how to make a generic query, but you can always generate the script like this
declare #sql nvarchar(max) = 'select 1 as dummy'
select #sql = #sql + '
, sum(case when [' + c.name + '] is null then 1 else 0 end) as [' + c.name + '_NULLS]'
from sys.columns c
join sys.tables t on t.object_id = c.object_id
where t.name = 'TABLE1'
set #sql = #sql + ' from TABLE1'
select #sql
Then you can execute the result eg. with exec sp_executesql #sql
For a cooler approach, you can use ISNULL to skip the first comma.
declare #sql nvarchar(max)
declare #tablename nvarchar(255) = 'xxxx'
Select #sql = ISNULL(#SQL + ',','') + ' ' + COLUMN_NAME + '_count = Sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end)' + char(13)
From information_schema.columns
where table_name = #tablename
set #sql = 'Select' + #sql + ' From ' + #tablename
print #sql
exec sp_executesql #sql