Select all columns except those with only null values - sql

Is there a way to select the column names of a certain table except those columns with only null values without knowing how many columns the table have.
-------------------------
| col1 | col2 | col3 |
------------------------
| val1 | null | val2 |
| val1 | null | null |
| null | null | val2 |
-------------------------
Should result in:
------------------------------------
| cols_except_those_with_null_only |
-----------------------------------
| col1 |
| col3 |
------------------------------------
Thanks!

Create a stored procedure with following content:
create table #cols (colname varchar(255), nullCount int)
insert into #cols (colname)
select name from syscolumns where id = object_id('tblTest')
declare #c varchar(255)
declare curCols cursor for select colname from #cols
open curCols
fetch next from curCols into #c
while ##fetch_status = 0 begin
exec ('update #cols set nullCount = (select count(*) from tblTest where ' + #c + ' is not null) where colname = ''' + #c + '''')
fetch next from curCols into #c
end
close curCols
deallocate curCols
declare #rv table (cols_expect_those_with_null_only varchar(255))
insert into #rv (cols_expect_those_with_null_only)
select colname from #cols
where nullCount > 0
drop table #cols
select * from #rv

Try this, it's not the tidiest but will work, just set #Table to your table name.
DECLARE #Table AS VARCHAR(100)
SET #Table = 'Example'
DECLARE #TempColumn VARCHAR(100)
DECLARE #Sql NVARCHAR(300)
DECLARE #HasNoNulls INT
CREATE TABLE #Columns (
ColumnName VARCHAR(100)
)
DECLARE ColumnCursor CURSOR FOR
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.Columns
WHERE TABLE_NAME = #Table
OPEN ColumnCursor
FETCH NEXT FROM ColumnCursor
INTO #TempColumn
WHILE ##FETCH_STATUS = 0
BEGIN
SET #SQL = 'SELECT #HasNoNullsOut = COUNT(*) FROM ' + #Table + ' WHERE ' + #TempColumn + ' IS NOT NULL'
PRINT #SQL
EXECUTE sp_executesql #SQL, N'#HasNoNullsOut int OUTPUT', #HasNoNullsOut=#HasNoNulls OUTPUT
IF #HasNoNulls > 0
BEGIN
INSERT INTO #Columns
VALUES(#TempColumn)
END
FETCH NEXT FROM ColumnCursor
INTO #TempColumn
END
CLOSE ColumnCursor
DEALLOCATE ColumnCursor
SELECT * FROM #Columns
DROP TABLE #Columns

With this structure you can do a query in a store procedure that allows you to ask for each column name of the table and if it has null values without caring how many columns your table has
SELECT a.[name] as 'Table',
b.[name] as 'Column'
FROM sysobjects a
INNER JOIN syscolumns b
ON a.[id] = b.[id]
where table='yourtable'

Related

How to loop through a list of table names and see if a specific value in a column exists?

I have produced a table in SQL with a list of tables. This list of tables is stored under the column 'table_name'. I want to loop through each entry under 'table_name' and return a 1 if that table has a value in a specific column or 0 if that table does not have a value in a specific column.
How would I do that?
Edited With sample data
table_name
tabel1
table2
table3
table4
Pseudo Code
For i in table_name
if count(table_name["col_name"] = "value") > 0
return 1
else
return 0
Try this:
drop table if exists #t
create table #t (A int)
insert into #t
select 1
drop table if exists #t2
create table #t2 (A int)
insert into #t2
select 0
drop table if exists #tables
create table #tables (tab varchar(100))
declare
#loop table (rn int, tab varchar(100))
declare
#res table (cnt int)
declare
#i int=1
,#tab varchar (100)=''
,#query nvarchar (max)
insert into #tables
select '#t'
union all
select '#t2'
insert into #loop
select ROW_NUMBER () over (partition by (select 1) order by tab),tab from #tables
while #i<=(select max(rn) from #loop)
begin
select #tab=tab from #loop where rn=#i
set #query='select count(*) from '+#tab+' where a=1'
insert into #res
exec(#query)
if (select cnt from #res)>0 select 'Exists' else select 'Not Exists'
delete #res
set #i=#i+1
end
Assuming you have a singular column/value in question, you can try the following in SSMS:
DECLARE #Tables table ( table_name varchar(50) );
INSERT INTO #Tables VALUES
( 'Child' ), ( 'COS' ), ( 'CustomData' ), ( 'Misc' ), ( 'Misc2' );
DECLARE
#col varchar(50) = 'id', -- column to be queried
#val varchar(50) = '1', -- value to be queried
#sql varchar(MAX) = '' -- important! set to empty string;
SELECT
#sql = #sql + CASE WHEN LEN( #sql ) > 0 THEN ' UNION ' ELSE '' END
+ 'SELECT ' + QUOTENAME( table_name, '''' ) + ' AS [table_name], COUNT(*) AS [value_count] FROM [' + table_name + '] WHERE [' + #col + ']=' + QUOTENAME( #val, '''' )
FROM #Tables t WHERE EXISTS (
SELECT * FROM sys.columns c WHERE c.object_id = OBJECT_ID( t.table_name ) AND c.[name] = #col
);
EXEC( #sql );
In my environment this returns:
+------------+-------------+
| table_name | value_count |
+------------+-------------+
| Child | 1 |
| Misc | 1 |
| Misc2 | 0 |
+------------+-------------+
This is the (beautified) dynamic SQL created:
SELECT 'Child' AS [table_name], COUNT(*) AS [value_count] FROM [Child] WHERE [id]='1'
UNION
SELECT 'Misc' AS [table_name], COUNT(*) AS [value_count] FROM [Misc] WHERE [id]='1'
UNION
SELECT 'Misc2' AS [table_name], COUNT(*) AS [value_count] FROM [Misc2] WHERE [id]='1'
The EXISTS in the WHERE clause eliminates any tables that do not have the column in question, and thereby any errors related to it.

MS SQL Server - How to define Table names from list in From clause

how can I make below code work. I got table names stored in temp table which I need to put in From clause
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
CREATE TABLE #temp
(
TABLENAME NVARCHAR(50)
)
INSERT INTO #temp (TABLENAME)
VALUES ('SALES'),
('CUSTOMER'),
('ORDERS')
DECLARE #BranchID AS INT,
#TABLENAME AS NVARCHAR(20),
#SQL AS NVARCHAR(MAX)
SET #BranchID = 8
SET #TABLENAME = (SELECT TABLENAME FROM #temp)
SET #SQL = 'SELECT [B#] AS BranchID , ' + #TABLENAME + ' AS TABLENAME
FROM [DB001].[dbo].[' + #TABLENAME + ']
WHERE [B#] = ' + #BranchID + '
GROUP BY [B#]'
EXEC(#SQL)
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
I want result to appear like below
BranchID | TABLENAME
--------------------
8 | SALES
8 | CUSTOMER
NULL | ORDERS
You need to use double quotes:
SELECT [B#] AS BranchID , ''' + #TABLENAME + ''' AS TABLENAME
You can also use parameters for this. But since you have to munge the query string, that is less important.
Hi I doesn't understand very well what you are looking for, but I write you 2 examples I think that could help you.
`--Option 1 join tempname with branchid
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
CREATE TABLE #temp
(
TABLENAME NVARCHAR(50)
)
INSERT INTO #temp (TABLENAME)
VALUES ('SALES'),
('CUSTOMER'),
('ORDERS')
DECLARE #BranchID AS INT,
#TABLENAME AS NVARCHAR(20),
#SQL AS NVARCHAR(MAX),
#branchname as nvarchar(max)
set #branchname = 'asdf'
SET #BranchID = 8
select #BranchID as BranchId, tablename
from #temp
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
--Option2 get data from table with name is in temptable for each table, but you must have to create that tables before
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
CREATE TABLE #temp
(
TABLENAME NVARCHAR(50)
)
INSERT INTO #temp (TABLENAME)
VALUES ('SALES'),
('CUSTOMER'),
('ORDERS')
DECLARE #BranchID AS INT,
#TABLENAME AS NVARCHAR(20),
#SQL AS NVARCHAR(MAX),
#branchname as nvarchar(max)
set #branchname = 'asdf'
SET #BranchID = 8
declare curhandler cursor for select tablename from #temp
open curhandler
fetch next from curhandler into #tablename
while ##FETCH_STATUS = 0
begin
set #SQL = 'SELECT [B#] AS BranchID , ' + #TABLENAME + ' AS TABLENAME
FROM [DB001].[dbo].[' + #TABLENAME + ']
WHERE [B#] = ' + #BranchName + '
GROUP BY [B#]'
exec (#sql)
fetch next from curhandler into #tablename
end
close curhandler
deallocate curhandler
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp
SET #TABLENAME = (SELECT TABLENAME FROM #temp)
SET #SQL = 'SELECT [B#] AS BranchID , ' + #TABLENAME + ' AS TABLENAME
FROM [DB001].[dbo].[' + #TABLENAME + ']
WHERE [B#] = ' + #BranchName + '
GROUP BY [B#]'
EXEC(#SQL)
IF OBJECT_ID('tempdb..#temp') IS NOT NULL DROP TABLE #temp`

How to compare columns and return only 1 of them in SQL

Background: I need to write a function in T-SQL on SQL Server 2008 10.0.5869.
Here's the table I'm working on (for the sake of simplicity - I only put in 3 columns here - but I have 10 columns for the actual work):
ID | Column1 | Column2 | Column3
1 | 2014-05 | 2015-02 | 2013-04
2 | 2012-09 | 2011-02 | 2013-03
ID is varchar and Column(x) are all datetime.
My end goal is to design a function fn_CompareDate to do something like this:
select fn_CompareDate(ID) from table where ID = 1
The query above should return the latest date from Column(x)s which should be 2015-02.
I used CASE WHEN but it would be almost impossible to use it for 10 columns. Is there another way to achieve the same result?
One approach is to use apply:
select d.maxd
from table t cross apply
(select max(d) as maxd
from values ((id, column1), (id, column2), (id, column3)) as val(id, d)
where val.id = t.id
) d
where t.id = 1;
EDIT:
You can do this without values():
select d.maxd
from table t cross apply
(select max(d) as maxd
from (select id, column1 as d union all
select id, column2 union all
select id, column3 union all
select id, column4
) val
where t.id = val.id
) d
where t.id = 1;
I think the below Function serves requirment better
CREATE FUNCTION fn_CompareDate(#ID VARCHAR(10))
RETURNS DATETIME
AS
BEGIN
DECLARE #maxDate DATETIME;
SELECT #maxDate =
(SELECT Max(v)
FROM (VALUES (COLUMN1), (COLUMN2), (COLUMN3)) AS value(v))
FROM table
WHERE ID = #ID
RETURN #maxDate;
END;
Now run the below query
select dbo.fn_CompareDate(ID) from table where ID = 1
Hope you got it.
You can use dynamic sql and INFORMATION_SCHEMA.COLUMNS. It supposed to work in SQL Server 2008. Try this:
CREATE PROCEDURE sp_CompareDate
#ID int,
#tableName NVARCHAR(MAX) = 'table2', -- Your table name
#dbName NVARCHAR(MAX) = 'temp' -- Your database name
AS
BEGIN
DECLARE #maxFieldValue DATETIME
DECLARE #curFieldName NVARCHAR(MAX)
DECLARE #curFieldValue DATETIME
DECLARE #sql NVARCHAR(MAX)
DECLARE fieldCursor CURSOR FOR
SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #tableName
AND TABLE_CATALOG = #dbName AND COLUMN_NAME != 'ID'
OPEN fieldCursor
FETCH NEXT FROM fieldCursor INTO #curFieldName
SET #sql = N'USE [' + #dbName + N'] SELECT #curDate=' + #curFieldName
+ N' FROM ' + #tableName + N' WHERE ID=' + CAST(#ID AS NVARCHAR)
EXEC sp_executesql #sql, N'#curDate DATETIME output', #curFieldValue output;
SET #maxFieldValue = #curFieldValue
WHILE (##FETCH_STATUS = 0)
BEGIN
SET #sql = N'USE [' + #dbName + N'] SELECT #curDate=' + #curFieldName
+ N' FROM ' + #tableName + N' WHERE ID=' + CAST(#ID AS NVARCHAR)
EXEC sp_executesql #sql, N'#curDate DATETIME output', #curFieldValue output;
FETCH NEXT FROM fieldCursor INTO #curFieldName
IF (#maxFieldValue < #curFieldValue) SET #maxFieldValue = #curFieldValue
END
CLOSE fieldCursor;
DEALLOCATE fieldCursor;
SELECT #maxFieldValue
END
Hope this helps.
I found the 2nd solution from this question works quite well for me:
Create a function similar to this:
select max(col) from
(
select column1 [col] from table where id = #id
union all
select column2 from table where id = #id
union all
select column3 from table where id = #id
)

Selecting columns from a query

I'm using a request to get a collection of columns name:
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE [...]
From this collection, I'd like to count every not null, not empty value from the original table group by column name.
Let's say I have a table containing
COL1 | COL2 | COL3
------------------
VAL1 | VAL2 | NULL
VAL3 | | VAL4
VAL5 | |
I'm looking for a request to get:
COL1 | 3
COL2 | 1
COL2 | 1
It's for analytics purpose.
Thanks for your help!
Here is a simple process. Run the following query:
SELECT 'SELECT ''' + COLUMN_NAME + ''', COUNT(['+COLUMN_NAME']) as NotNull FROM [' +SCHEMA_NAME+ '].['+TABLE_NAME+ '] union all '
FROM INFORMATION_SCHEMA.COLUMNS
WHERE [...]
Copy the results into a query window, remove the final union all, and run the query.
The below code seems to work for your issue
create table sample
(
col1 varchar(10),
col2 varchar(10),
col3 varchar(10)
)
INSERT INTO sample (COL1,COL2,COL3) VALUES ('VAL1 ',' VAL2 ',NULL);
INSERT INTO sample (COL1,COL2,COL3) VALUES ('VAL3 ',' ',' VAL4');
INSERT INTO sample (COL1,COL2,COL3) VALUES ('VAL5 ',' ',' ');
DECLARE #cols1 NVARCHAR(MAX);
DECLARE #sql NVARCHAR(MAX);
SELECT #cols1 = STUFF((
SELECT ', COUNT(CASE WHEN len(['+ t1.NAME + '])!=0 THEN 1 END) AS ' + t1.name
FROM sys.columns AS t1
WHERE t1.object_id = OBJECT_ID('sample')
--ORDER BY ', COUNT([' + t1.name + ']) AS ' + t1.name
FOR XML PATH('')
), 1, 2, '');
SET #sql = '
SELECT ' + #cols1 + '
FROM sample
'
EXEC(#sql)
Hereis my little longer take on this:
declare #cols table (colID integer, colName varchar(50))
declare #results table (colName nvarchar(50), valueCount bigint)
-- table name
declare #tableName nvarchar(50) = 'INSERT TABLE NAME HERE'
-- select column names from table
insert into #cols
select column_id, name from sys.columns where object_id = object_id(#tableName) order by column_id
declare #currentColID int = 0
declare #currentName nvarchar(50) = ''
declare #currentCount bigint = 0
declare #sql nvarchar(max) -- where the dynamic sql will be stored
-- go through all columns
while (1 = 1)
begin
-- step by id
select top 1 #currentColID = c.colID, #currentName = c.colName from #cols c
where c.colid > #currentColID order by c.colID
if ##ROWCOUNT = 0 break;
-- dynamic query to get non-empty, not-null counts
select #sql = 'select #p1=COUNT(' + #currentName + ') from ' + #tableName +
' where ' + #currentName + ' is not null or LEN(' + #currentName + ') > 0'
exec sp_executesql #sql, N'#p1 bigint output', #p1 = #currentCount output
-- insert result to buffer
insert into #results values (#currentName, #currentCount)
end
-- print the buffer
select * from #results
Have fun :)

Select a non-empty value for each column in each table in each database

There might be a better way to do this. But I'm trying to find columns that might contain personal information.
Problem is that the tables are poorly named (non-english, abbreviations). So I'm running this dynamic script, that will return all tables in all databases and their columns.
USE master;
DECLARE #SQL varchar(max)
SET #SQL=';WITH cteCols (dbName, colName) AS (SELECT NULL, NULL '
SELECT #SQL=#SQL+'UNION
SELECT
'''+d.name COLLATE Czech_CI_AS +'.''+sh.name COLLATE Czech_CI_AS +''.''+o.name COLLATE Czech_CI_AS ''dbSchTab''
, c.name COLLATE Czech_CI_AS ''colName''
FROM ['+d.name+'].sys.columns c
JOIN ['+d.name+'].sys.objects o ON c.object_id=o.object_id
JOIN ['+d.name+'].sys.schemas sh ON o.schema_id=sh.schema_id
WHERE o.[type] = ''U'' COLLATE Czech_CI_AS'
FROM sys.databases d
SET #SQL = #SQL + ')
SELECT
*
FROM cteCols cs
ORDER BY 1;'
EXEC (#SQL);
Result:
+---------------------+------------+
| DatabaseSchemaTable | ColumnName |
+---------------------+------------+
| dev1.dbo.Users | Col1 |
| dev1.dbo.Users | Col2 |
| dev1.dbo.Users | Col3 |
| dev1.dbo.Users | Col4 |
+---------------------+------------+
But because of the poor column naming, I can't tell what data is in these columns. I'd like to select a TOP (1) non NULL value from each column, but I'm struggling.
Required result:
+---------------------+------------+--------------+
| DatabaseSchemaTable | ColumnName | ColumnValue |
+---------------------+------------+--------------+
| dev1.dbo.Users | Col1 | 20 |
| dev1.dbo.Users | Col2 | 2018-02-06 |
| dev1.dbo.Users | Col3 | 202-555-0133 |
| dev1.dbo.Users | Col4 | John Doe |
+---------------------+------------+--------------+
Ideas I had:
I would need to either transpose each of the tables (probably not a
job for PIVOT)
I could join with the table dynamically and only display the current column. But I can't use dynamic column in correlated subquery.
Any ideas?
I would create a temporary table such as ##cols, and then use this temporary table to loop through the table, running update queries on the table itself. Mind you, we have a lot of spaces and other potentially troublesome characters in our field names. Therefore I updated your cte with some QUOTENAMEs around the field / table / schema / db names.
USE master;
DECLARE #SQL varchar(max);
SET #SQL=';WITH cteCols (dbName, colName, top1Value) AS (SELECT NULL, NULL, CAST(NULL AS VARCHAR(MAX)) '
SELECT #SQL=#SQL+' UNION
SELECT
'''+QUOTENAME(d.[name]) COLLATE Czech_CI_AS +'.''+QUOTENAME(sh.name) COLLATE Czech_CI_AS +''.''+QUOTENAME(o.name) COLLATE Czech_CI_AS ''dbSchTab''
, QUOTENAME(c.name) COLLATE Czech_CI_AS ''colName'', CAST(NULL AS VARCHAR(MAX)) AS ''top1Value''
FROM ['+d.[name]+'].sys.columns c
JOIN ['+d.[name]+'].sys.objects o ON c.object_id=o.object_id
JOIN ['+d.[name]+'].sys.schemas sh ON o.schema_id=sh.schema_id
WHERE o.[type] = ''U'' COLLATE Czech_CI_AS'
FROM sys.databases d;
SET #SQL = #SQL + ')
SELECT
*
INTO ##Cols
FROM cteCols cs
ORDER BY 1;'
EXEC (#SQL);
DECLARE #colName VARCHAR(255), #dbName VARCHAR(255), #SQL2 NVARCHAR(MAX);
DECLARE C CURSOR FOR SELECT [colName],[dbName] FROM ##Cols;
OPEN C;
FETCH NEXT FROM C INTO #colName, #dbName;
WHILE ##FETCH_STATUS=0
BEGIN
SET #SQL2='UPDATE ##Cols SET [top1Value] = (SELECT TOP 1 x.'+#colName+' FROM '+#dbName+' x WHERE x.'+#colName+' IS NOT NULL) WHERE [colName]='''+#colName+''' AND [dbName]='''+#dbName+''''
EXEC sp_executesql #SQL2
FETCH NEXT FROM C INTO #colName, #dbName
END;
CLOSE C;
DEALLOCATE C;
SELECT * FROM ##Cols;
It's not pretty, but it'd suit your needs.
You might try this:
--In this table we write our findings
CREATE TABLE ##TargetTable(ID INT IDENTITY, TableName VARCHAR(500), FirstRowXML XML);
--the undocumented sp "MSforeachtable" allows to create a statement where the
--question mark is a place holder for the actual table
--(SELECT TOP 1 * FROM ? FOR XML PATH('row')) will create one single XML with all first row's values
EXEC sp_MSforeachtable 'INSERT INTO ##TargetTable(TableName,FirstRowXML) SELECT ''?'', (SELECT TOP 1 * FROM ? FOR XML PATH(''row''))';
--Now it is easy to get what you want
SELECT ID
,TableName
,col.value('local-name(.)','nvarchar(max)') AS colname
,col.value('text()[1]','nvarchar(max)') AS colval
FROM ##TargetTable
CROSS APPLY FirstRowXML.nodes('/row/*') A(col);
GO
DROP TABLE ##TargetTable
Just use SELECT TOP X to get more than one row...
UPDATE
The following will create a table with all columns of all tables of all databases and fetch one value per row.
CREATE TABLE ##TargetTable(ID INT IDENTITY
,TABLE_CATALOG VARCHAR(300),TABLE_SCHEMA VARCHAR(300),TABLE_NAME VARCHAR(300),COLUMN_NAME VARCHAR(300)
,DATA_TYPE VARCHAR(300),CHARACTER_MAXIMUM_LENGTH INT, IS_NULLABLE VARCHAR(10),Command VARCHAR(MAX),OneValue NVARCHAR(MAX));
EXEC sp_MSforeachdb
'USE ?;
INSERT INTO ##TargetTable(TABLE_CATALOG,TABLE_SCHEMA,TABLE_NAME,COLUMN_NAME,DATA_TYPE,CHARACTER_MAXIMUM_LENGTH,IS_NULLABLE,Command)
SELECT ''?''
,c.TABLE_SCHEMA
,c.TABLE_NAME
,c.COLUMN_NAME
,c.DATA_TYPE
,c.CHARACTER_MAXIMUM_LENGTH
,c.IS_NULLABLE
, CASE WHEN c.IS_NULLABLE=''YES''
THEN ''SELECT CAST(MAX('' + QUOTENAME(c.COLUMN_NAME) + '') AS NVARCHAR(MAX))''
ELSE ''SELECT TOP 1 CAST('' + QUOTENAME(c.COLUMN_NAME) + '' AS NVARCHAR(MAX))''
END
+ '' FROM '' + QUOTENAME(''?'') + ''.'' + QUOTENAME(c.TABLE_SCHEMA) + ''.'' + QUOTENAME(c.TABLE_NAME)
FROM INFORMATION_SCHEMA.COLUMNS c
INNER JOIN INFORMATION_SCHEMA.TABLES t ON c.TABLE_CATALOG=t.TABLE_CATALOG AND c.TABLE_SCHEMA=t.TABLE_SCHEMA AND c.TABLE_NAME=T.TABLE_NAME AND t.TABLE_TYPE=''BASE TABLE''
WHERE c.DATA_TYPE NOT IN(''BINARY'',''VARBINARY'',''IMAGE'',''NTEXT'')';
DECLARE #ID INT,#Command VARCHAR(MAX);
DECLARE cur CURSOR FOR SELECT ID,Command FROM ##TargetTable
OPEN cur;
FETCH NEXT FROM cur INTO #ID,#Command;
WHILE ##FETCH_STATUS=0
BEGIN
SET #Command = 'UPDATE ##TargetTable SET OneValue=(' + #Command + ') WHERE ID=' + CAST(#ID AS VARCHAR(100))
PRINT #command;
EXEC(#Command);
FETCH NEXT FROM cur INTO #ID,#Command;
END
CLOSE cur;
DEALLOCATE cur;
GO
SELECT * FROM ##TargetTable;
GO
DROP TABLE ##TargetTable;