I have loaded the column names from a table into the temporary table called #COLUMN_NAMES. I would like to build a while loop that passes each of the rows from the temp table through my #Data_Quality_Check table that I created below.
There are 54 rows of column_names in my table #Column_names currently. Ideally I would like to have the while loop process each of the variables and put the output into the #Data_Quality_Check table.
if object_id('tempdb..#COLUMN_NAMES') is not null
drop table #COLUMN_NAMES
SELECT COLUMN_NAME AS Column_Names
INTO #COLUMN_NAMES
FROM information_schema.columns
WHERE table_name = 'ssrs_sourcedata'
----- >>>> !!!!!! INSERT CODE TO BUILD WHILE LOOP !!!!
if object_id('tempdb..#DATA_QUALITY_CHECK') is not null
drop table #DATA_QUALITY_CHECK
SELECT
periodenddate,
'#Column_Name' AS Label,
MIN(#Column_Name) AS Min_Value,
MAX(#Column_Name) AS Max_Value,
SUM(#Column_Name) AS Sum_Value,
AVG(#Column_Name) AS Avg_Value,
SUM(Case when #Column_Name IS NULL THEN 1 ELSE 0 END) AS Null_Count,
SUM(Case when len(cast(#Column_Name AS VARCHAR)) = 0 THEN 1 ELSE 0 END) AS Space_Count,
MAX(len(cast(#Column_Name AS VARCHAR))) AS max_length,
MIN(len(cast(#Column_Name AS VARCHAR))) AS min_length
INTO
#DATA_QUALITY_CHECK
FROM
dbcrms.report.ssrs_sourcedata
WHERE
periodenddate = '2017-06-30'
GROUP BY
periodenddate
Dynamic that badboy. You need to create the empty temp table first though
declare #cnt int = 1;
declare #mcnt int;
declare #vsql varchar(2000);
declare #column_name varchar(100);
select #mcnt = count(*)
from #COLUMN_NAMES;
while #cnt <= #mcnt
begin
select #column_name = column_names
from
(
select column_names, row_number() over(order by column_names) rn
from #COLUMN_NAMES
)
where rn = #cnt;
set #vsql = 'insert into #DATA_QUALITY_CHECK (Label, min_value, max_value, sum_value, avg_value, null_count, space_count, max_length, min_length)
SELECT periodenddate, ''' + #Column_Name + ''' AS Label,
MIN(' + #Column_Name+ ') AS Min_Value,
MAX(' + #Column_Name + ') AS Max_Value,
SUM(' + #Column_Name + ') AS Sum_Value,
AVG(' + #Column_Name + ') AS Avg_Value,
SUM(Case when ' + #Column_Name + ' IS NULL THEN 1 ELSE 0 END) AS Null_Count,
SUM(Case when len(cast(' + #Column_Name + ' AS VARCHAR)) = 0 THEN 1 ELSE 0 END) AS Space_Count,
MAX(len(cast(' + #Column_Name + ' AS VARCHAR))) AS max_length,
MIN(len(cast(' + #Column_Name + ' AS VARCHAR))) AS min_length
FROM dbcrms.report.ssrs_sourcedata WHERE periodenddate = '2017-06-30'
GROUP BY periodenddate ';
execute(#vsql);
set #cnt = #cnt + 1;
end;
Related
Here is my sql:
if object_id('tempdb..#COLUMN_NAMES') is not null
drop table #COLUMN_NAMES
SELECT COLUMN_NAME AS Column_Names
INTO #COLUMN_NAMES
FROM information_schema.columns
WHERE table_name = 'ssrs_sourcedata'
----- select * from #COLUMN_NAMES ----
if object_id('tempdb..#DATA_QUALITY_CHECK') is not null
drop table #DATA_QUALITY_CHECK
SELECT
periodenddate,
'#Column_Name' AS Label,
MIN(#Column_Name) AS Min_Value,
MAX(#Column_Name) AS Max_Value,
SUM(#Column_Name) AS Sum_Value,
AVG(#Column_Name) AS Avg_Value,
SUM(Case when #Column_Name IS NULL THEN 1 ELSE 0 END) AS Null_Count,
SUM(Case when len(cast(#Column_Name AS VARCHAR)) = 0 THEN 1 ELSE 0 END) AS
Space_Count,
MAX(len(cast(#Column_Name AS VARCHAR))) AS max_length,
MIN(len(cast(#Column_Name AS VARCHAR))) AS min_length
INTO #DATA_QUALITY_CHECK
FROM dbcrms.report.ssrs_sourcedata WHERE periodenddate = '2017-06-30'
GROUP BY periodenddate
How do I create a variable from the table #column_names and pass it through the data quality check table and store the results in the data quality check table. Also how do I get the sql to read the first column until the last column. There are 51 records in the #column_names table.
DECLARE #Column_Name TABLE
(
Column_Name varchar(500)
)
INSERT INTO #Column_Name (Column_Name)
SELECT COLUMN_NAME AS Column_Names
FROM information_schema.columns
WHERE table_name = 'ssrs_sourcedata'
You can now use this table in your query.
You can use curosr as follows:
DECLARE #Column_Name varchar(255)
DECLARE #SQL VARCHAR(MAX)
create table ##DATA_QUALITY_CHECK
(
Label varchar(255),
Min_Value int,
Max_Value int,
Sum_Value int,
Avg_Value int,
Null_Count int,
max_length int,
min_length int
)
DECLARE COLUMN_NAME_CURSOR CURSOR
FOR
SELECT COLUMN_NAMES
FROM #COLUMN_NAMES
OPEN COLUMN_NAME_CURSOR
FETCH NEXT FROM COLUMN_NAME_CURSOR INTO #Column_Name
WHILE ##FETCH_STATUS = 0
BEGIN
select #sql = 'INSERT INTO ##DATA_QUALITY_CHECK
SELECT
periodenddate,
' + #Column_Name + ' AS Label,
MIN(' + #Column_Name + ') AS Min_Value,
MAX(' + #Column_Name + ') AS Max_Value,
SUM(' + #Column_Name + ') AS Sum_Value,
AVG(' + #Column_Name + ') AS Avg_Value,
SUM(Case when ' + #Column_Name + ' IS NULL THEN 1 ELSE 0 END) AS Null_Count,
SUM(Case when len(cast(' + #Column_Name + ' AS VARCHAR)) = 0 THEN 1 ELSE 0 END) AS
Space_Count,
MAX(len(cast(' + #Column_Name + ' AS VARCHAR))) AS max_length,
MIN(len(cast(' + #Column_Name + ' AS VARCHAR))) AS min_length
FROM dbcrms.report.ssrs_sourcedata WHERE periodenddate = ''2017-06-30''
GROUP BY periodenddate '
--print #sql
exec(#SQL)
FETCH NEXT FROM COLUMN_NAME_CURSOR INTO #Column_Name
END
CLOSE COLUMN_NAME_CURSOR
DEALLOCATE COLUMN_NAME_CURSOR
I have the following sql:
SET NOCOUNT ON
DECLARE #Schema NVARCHAR(100) = 'dbo'
DECLARE #Table NVARCHAR(100) = NULL
DECLARE #sql NVARCHAR(MAX) =''
IF OBJECT_ID ('tempdb..#Nulls') IS NOT NULL
DROP TABLE #Nulls
CREATE TABLE #Nulls
(
TableName sysname,
ColumnName sysname,
ColumnPosition int,
NullCount int,
NonNullCount int
)
select #sql += 'select TableName = ''' + quotename(table_schema) + '.' +
quotename(table_name) + ''' , ColumnName = ''' + quotename(column_name)
+ ''' , ColumnPosition = ''' + convert(varchar(5) , ordinal_position)
+ ''' , CountNulls = sum(case when ' + quotename(column_name) + ' is null
then 1 else 0 end) , CountnonNulls = count(' + quotename(column_name) + ')
from ' + quotename(table_schema) + '.' + quotename(table_name) + ';' + char(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = #Schema AND (#Table IS NULL OR TABLE_NAME = #Table)
INSERT INTO #Nulls
EXEC sp_executesql #sql
SELECT *
FROM #Nulls
However, I would like the database name in the first column, the schema name inn the second and not joined like it currently is and lastly at the end column showing total number of null as a percentage?
Thank you
If you want your code to gather data about tables in all databases on the server, you need to iterate over each database separately:
SET NOCOUNT ON
DECLARE #Schema NVARCHAR(100) = 'dbo'
DECLARE #Table NVARCHAR(100) = NULL
DECLARE #sql NVARCHAR(MAX) =''
IF OBJECT_ID ('tempdb..#Nulls') IS NOT NULL
DROP TABLE #Nulls
CREATE TABLE #Nulls
(
DbName sysname,
SchemaName sysname,
TableName sysname,
ColumnName sysname,
ColumnPosition int,
NullCount int,
NonNullCount int
)
;
DECLARE #db_name VARCHAR(50) -- database name
DECLARE db_cursor CURSOR FOR
SELECT name
FROM msdb.sys.databases
-- Iterate over all databases, except for these
WHERE name NOT IN ('master','model','msdb','tempdb')
OPEN db_cursor
FETCH NEXT FROM db_cursor INTO #db_name
WHILE ##FETCH_STATUS = 0
BEGIN
SET #sql = 'USE ' + QUOTENAME(#db_name)
EXEC sp_executesql #sql
select #sql += 'select DbName = ''' + quotename(#db_name)
+ ''', SchemaName = ''' + quotename(table_schema)
+ ''', TableName = ''' + quotename(table_schema) + '.' + quotename(table_name)
+ ''' , ColumnName = ''' + quotename(column_name)
+ ''' , ColumnPosition = ''' + convert(varchar(5) , ordinal_position)
+ ''' , CountNulls = sum(case when ' + quotename(column_name) + ' is null
then 1 else 0 end) , CountnonNulls = count(' + quotename(column_name) + ')
from ' + quotename(table_schema) + '.' + quotename(table_name) + ';' + char(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE (#Table IS NULL OR TABLE_NAME = #Table)
INSERT INTO #Nulls
EXEC sp_executesql #sql
FETCH NEXT FROM db_cursor INTO #db_name
END
CLOSE db_cursor
DEALLOCATE db_cursor
SELECT *
FROM #Nulls
SET NOCOUNT ON
DECLARE #Schema NVARCHAR(100) = 'dbo'
DECLARE #Table NVARCHAR(100) = NULL
DECLARE #sql NVARCHAR(MAX) =''
IF OBJECT_ID ('tempdb..#Nulls') IS NOT NULL DROP TABLE #Nulls
CREATE TABLE #Nulls (DataBaseName sysname,SchemaName sysname,TableName sysname, ColumnName sysname,
ColumnPosition int
, NullCount int , NonNullCount int)
--select * from INFORMATION_SCHEMA.COLUMNS
select #sql += 'select DBName = '''+ quotename(TABLE_CATALOG)
+''',SchemaName = '''+ quotename(table_schema) +''',TableName = '''
+ quotename(table_name) + ''' , ColumnName = ''' + quotename(column_name)
+ ''' , ColumnPosition = ''' + convert(varchar(5) , ordinal_position)
+ ''' , CountNulls = sum(case when ' + quotename(column_name) + ' is null
then 1 else 0 end) , CountnonNulls = count(' + quotename(column_name) + ')
from ' + quotename(table_schema) + '.' + quotename(table_name) + ';' + char(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = #Schema AND (#Table IS NULL OR TABLE_NAME = #Table)
INSERT INTO #Nulls
EXEC sp_executesql #sql
SELECT *,NullCntPercentage = (case when isnull(NullCount,0)=0 then 0 else cast((NullCount*100.0/(NullCount+NonNullCount)) as decimal(15,2)) end)
,NonNullCntPercentage = (case when NonNullCount=0 then 0 else cast((NonNullCount*100.0/(NullCount+NonNullCount)) as decimal(15,2)) end)
FROM #Nulls order by TableName,ColumnPosition
Below is the output
DataBaseName SchemaName TableName ColumnName ColumnPosition NullCount NonNullCount NullCntPercentage NonNullCntPercentage
[MyDB] [dbo] [EmpDtl1] [EmpId] 1 1 6 14.29 85.71
[MyDB] [dbo] [EmpDtl1] [EmpName] 2 1 6 14.29 85.71
I have the following SQL, however I would like to add a date key to this but been struggling: SQL as follows
SET NOCOUNT ON
DECLARE #Schema NVARCHAR(100) = 'dbo'
DECLARE #Table NVARCHAR(100) = NULL
DECLARE #sql NVARCHAR(MAX) =''
IF OBJECT_ID ('tempdb..#Nulls') IS NOT NULL DROP TABLE #Nulls
CREATE TABLE #Nulls (DataBaseName sysname,SchemaName sysname,TableName sysname, ColumnName sysname,
ColumnPosition int
, NullCount int , NonNullCount int)
select #sql += 'select DBName = '''+ quotename(TABLE_CATALOG)
+''',SchemaName = '''+ quotename(table_schema) +''',TableName = '''
+ quotename(table_name) + ''' , ColumnName = ''' + quotename(column_name)
+ ''' , ColumnPosition = ''' + convert(varchar(5) , ordinal_position)
+ ''' , CountNulls = sum(case when ' + quotename(column_name) + ' is null
then 1 else 0 end) , CountnonNulls = count(' + quotename(column_name) + ')
from ' + quotename(table_schema) + '.' + quotename(table_name) + ';' + char(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_SCHEMA = #Schema AND (#Table IS NULL OR TABLE_NAME = #Table)
INSERT INTO #Nulls
EXEC sp_executesql #sql
SELECT *,NullCntPercentage = (case when isnull(NullCount,0)=0 then 0 else cast((NullCount*100.0/(NullCount+NonNullCount)) as decimal(15,2)) end)
,NonNullCntPercentage = (case when NonNullCount=0 then 0 else cast((NonNullCount*100.0/(NullCount+NonNullCount)) as decimal(15,2)) end)
FROM #Nulls order by TableName,ColumnPosition
I would like to add the date key using the following
convert(nvarchar(11),getdate(),112)
But been struggling to add this to dynamic sql and not in the output in the second stage which does the percentage
Thank you
The following query returns the values of the table for each field in terms of null percentage . What I want is to get the sum of those percentages for a specific ProductID. Also, I would like to get a percentage (in an extra column) of the fields do not have value i.e. ="". Any ideas?
use AdventureWorks
DECLARE #TotalCount decimal(10,2), #SQL NVARCHAR(MAX)
SELECT #TotalCount = COUNT(*) FROM [AdventureWorks].[Production].[Product]
SELECT #SQL =
COALESCE(#SQL + ', ','SELECT ') +
'cast(sum (case when ' + QUOTENAME(column_Name) +
' IS NULL then 1 else 0 end)/#TotalCount*100.00 as decimal(10,2)) as [' +
column_Name + ' NULL %]
'
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'Product' and TABLE_SCHEMA = 'Production'
SET #SQL = 'set #TotalCount = NULLIF(#TotalCount,0)
' + #SQL + '
FROM [AdventureWorks].Production.Product'
print #SQL
EXECUTE SP_EXECUTESQL #SQL, N'#TotalCount decimal(10,2)', #TotalCount
You can use the following:
use AdventureWorks
DECLARE #colCount int;
DECLARE #nullCheck nvarchar(max) = N'';
DECLARE #emptyCheck nvarchar(max) = N'';
DECLARE #SQL NVARCHAR(MAX);
DECLARE #KeyToCheck int = 123; -- adapt as necessary
SELECT
#nullCheck += '
+ ' + 'count(' + QUOTENAME(column_Name) + ')'
,#emptyCheck += '
+ ' +
CASE
WHEN DATA_TYPE IN('bigint', 'int', 'smallint', 'tinyint', 'bit', 'money', 'smallmoney', 'numeric', 'decimal', 'float', 'real') THEN
-- check numeric data for zero
'sum(case when coalesce(' + QUOTENAME(column_Name) + ', 0) = 0 then 1 else 0 end)'
WHEN DATA_TYPE like '%char' or DATA_TYPE like '%text' THEN
--check character data types for empty string
'sum(case when coalesce(' + QUOTENAME(column_Name) + ', '''') = '''' then 1 else 0 end)'
ELSE -- otherwise, only check for null
'sum(case when ' + QUOTENAME(column_Name) + ' IS NULL then 1 else 0 end)'
END
,#colCount =
count(*) over()
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'Product' and TABLE_SCHEMA = 'Production'
;
SET #SQL = 'SELECT case when count(*) > 0 then 100.00 - (' + #nullCheck + '
) * 100.00 / ' + cast(#colCount as nvarchar(max)) + '.00 / count(*) end as null_percent
, case when count(*) > 0 then (' + #emptyCheck + '
) * 100.00 / ' + cast(#colCount as nvarchar(max)) + '.00 / count(*) end as empty_percent
FROM Production.Product
WHERE ProductID = ' + cast(#KeyToCheck as nvarchar(max))
;
print #SQL;
EXECUTE (#SQL)
I simplified one of your expressions: Instead of sum (case when <column> IS NULL then 1 else 0 end), you can just use count(<column>). When using count with an expression instead of *, it counts the rows where this expression is non-null. As this is the opposite from what you need, I added the 100.00 - as the start of the SELECT.
For the "empty check", this would make the logic more complex to understand, hence I left the original logic there and extended it. There, I implemented an check for emptiness for numeric and character/text data types. You can easily extend that for date, binary data etc. with whichever logic you use to determine if a column is empty.
I also found it more simple to leave first + in the two variables #nullCheck and #emptyCheck, as it is valid SQL to start an expression wit this.
I also extended the statement so that if there would potentially be more than one record with ProductId = 123, it shows the average across all records, i. e. the total sum divided by the count of rows. And the outermost case expressions just avoid an division by zero error if count(*) would be zero, i. e. no record with ProductId = 123 found. In that case the return value is null.
You could use AVG function:
SELECT AVG(CASE WHEN value IS NULL THEN 100 ELSE 0 END) AS Percents
FROM Table
UPDATE:
Here is your script:
DECLARE #SQL NVARCHAR(MAX), #TABLE_NAME NVARCHAR(MAX), #TABLE_SCHEMA NVARCHAR(MAX), #PK NVARCHAR(MAX)
SET #TABLE_NAME = 'tblBigTable'
SET #TABLE_SCHEMA = 'dbo'
SET #PK = '8'
SELECT
#SQL = COALESCE(#SQL + ', ', 'SELECT ') +'AVG(CASE WHEN ' + COLUMN_NAME + ' IS NULL THEN 100 ELSE 0 END) AS [' + COLUMN_NAME +' NULL %]'
FROM
INFORMATION_SCHEMA.COLUMNS
WHERE
TABLE_SCHEMA = #TABLE_SCHEMA AND
TABLE_NAME = #TABLE_NAME
SET #SQL = #SQL + ' FROM ' + #TABLE_NAME + ' WHERE pkId = ''' + #PK + ''''
print #SQL
EXECUTE SP_EXECUTESQL #SQL
I have below table structure in MS SQL
AirQuoteID Name SalesValue
7 M 49.50
7 N 23.10
7 +45 233.20
7 +100 233.20
7 +250 2333.10
I want a query which can return
AirQuoteID M N +45 +100 +250
7 49.50 23.10 233.20 233.20 2333.10
What will be the optimum solution. The Values are dynamic
You need to look at the PIVOT operator.
;With T As
(
SELECT 7 AirQuoteID,'M' Name,49.50 SalesValue UNION ALL
SELECT 7 AirQuoteID,'N', 23.10 UNION ALL
SELECT 7 AirQuoteID,'+45',233.20 UNION ALL
SELECT 7 AirQuoteID,'+100',233.20 UNION ALL
SELECT 7 AirQuoteID,'+250',2333.10
)
SELECT AirQuoteID, [M], [N], [+45], [+100], [+250]
FROM T
PIVOT
(
MAX(SalesValue)
FOR Name IN ([M], [N], [+45], [+100], [+250])
) AS pvt;
However if the values for the columns are not fixed you will need to use dynamic SQL.
As the data is dynamic, pivot wont help
http://www.sqlteam.com/article/dynamic-cross-tabs-pivot-tables link posted by #Martin really helped.
first create the below procedure
CREATE PROCEDURE [dbo].[crosstab]
#select varchar(8000),
#sumfunc varchar(100),
#pivot varchar(100),
#table varchar(100)
AS
DECLARE #sql varchar(8000), #delim varchar(1)
SET NOCOUNT ON
SET ANSI_WARNINGS OFF
print ('SELECT ' + #pivot + ' AS [pivot] INTO ##pivot FROM ' + #table + ' WHERE 1=2')
EXEC ('SELECT ' + #pivot + ' AS [pivot] INTO ##pivot FROM ' + #table + ' WHERE 1=2')
EXEC ('INSERT INTO ##pivot SELECT DISTINCT ' + #pivot + ' FROM ' + #table + ' WHERE '
+ #pivot + ' Is Not Null')
SELECT #sql='', #sumfunc=stuff(#sumfunc, len(#sumfunc), 1, ' END)' )
SELECT #delim=CASE Sign( CharIndex('char', data_type)+CharIndex('date', data_type) )
WHEN 0 THEN '' ELSE '''' END
FROM tempdb.information_schema.columns
WHERE table_name='##pivot' AND column_name='pivot'
SELECT #sql=#sql + '''' + convert(varchar(100), [pivot]) + ''' = ' +
stuff(#sumfunc,charindex( '(', #sumfunc )+1, 0, ' CASE ' + #pivot + ' WHEN '
+ #delim + convert(varchar(100), [pivot]) + #delim + ' THEN ' ) + ', ' FROM ##pivot
DROP TABLE ##pivot
SELECT #sql=left(#sql, len(#sql)-1)
SELECT #select=stuff(#select, charindex(' FROM ', #select)+1, 0, ', ' + #sql + ' ')
EXEC (#select)
SET ANSI_WARNINGS ON
GO
The article didnt mention to retrieve data from same table, so below is how you do it
EXECUTE crosstab 'select titles.AirQuoteID from AirSaleQuoteRateSlab titles
inner join
(select distinct AirQuoteID,[Name] from AirSaleQuoteRateSlab) sales
on (sales.AirQuoteID=titles.AirQuoteID)
group by titles.AirQuoteID', 'AVG(titles.SalesValue)','titles.Name','AirSaleQuoteRateSlab titles'
select AirQuoteID,
sum(case Name when 'M' then SalesValue else 0 end) 'M',
sum(case Name when 'N' then SalesValue else 0 end) 'N',
sum(case Name when '+45' then SalesValue else 0 end) '+45',
sum(case Name when '+100' then SalesValue else 0 end) '+100',
sum(case Name when '+250' then SalesValue else 0 end) '+250'
from Table1
group by AirQuoteID