Check if set of rows fall in specific values in SQL? - sql

I want to check if a given table has specific column names, I want it to return true if it has them all and if one column name doesn't exist I want it to return false, this is my query :
SELECT COLUMN_NAME
FROM db.INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'MyTable'
I want to check if these names in the query result:
('UpdatedDate', 'CreatedDate', 'UpdatedBy')

If you know count of list this query help you
SELECT COUNT(COLUMN_NAME)
FROM db.INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'MyTable' AND COLUMN_NAME IN ('UpdatedDate', 'CreatedDate', 'UpdatedBy')
GROUP BY COLUMN_NAME HAVING COUNT(COLUMN_NAME) = 3;

Create a table variable with the column names which we need to check with information_schema.columns. Then do a check between the table variable and information_schema.columns. Not sure how efficient is this.
Query
declare #cols as varchar(max) = 'UpdatedDate,CreatedDate,UpdatedBy';
declare #cols2 as varchar(max) = '(' + char(39);
set #cols2 += replace(#cols, ',', '''),(''') + ''');';
declare #sql as varchar(max) = 'declare #tbl as table([col] varchar(1000));';
set #sql += 'insert into #tbl values' + #cols2;
set #sql += 'declare #tot as int;set #tot = (select count(*) from #tbl);';
set #sql += 'select case when count(*) = #tot
then ''true'' else ''false'' end as [status]
from #tbl t1 where exists(
select 1 from information_schema.columns t2
where t1.[col] = t2.[column_name]
and t2.[table_name] = ''MyTable'');';
exec(#sql);

Related

SQL count distinct or not null for each column for many columns

I need to analyze a large table with hundreds of columns. A lot of columns are unused.
To investigate I could do something like
SELECT DISTINCT Column1
FROM myTable
or
WITH C AS
(
SELECT DISTINCT Column1
FROM MyTable
)
SELECT COUNT(*)
FROM C
Then I do the same for column2 and so on. However these queries only work for one column which is time consuming and does not give overview in one glance.
Any idea how to build such investigation query for all columns in one?
You need only 1 query where you have to list all the columns of the table:
SELECT COUNT(DISTINCT Column1) column1_count,
COUNT(DISTINCT Column2) column2_count,
COUNT(DISTINCT Column3) column3_count
.....................................
FROM MyTable;
For local purposes only, you can make it dynamic like this:
Get the columns of the table
the query is created as the colleagues did and then it is executed with the EXEC()
DECLARE #columns as Table(RowId INT IDENTITY(1,1), ColumnName nVarchar(50))
DECLARE #ii int = 0
DECLARE #max int = 0
DECLARE #sqlQuery nVarchar(MAX)
INSERT INTO #columns
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'Customer'
SET #sqlQuery = 'SELECT '
SELECT #max = COUNT(*) FROM #columns
WHILE #ii <= #max
BEGIN
SELECT #sqlQuery = CONCAT(#sqlQuery,'COUNT(DISTINCT ',ColumnName,') ',LOWER(ColumnName),'_count, ')
FROM #columns
WHERE RowId = #ii
SET #ii = #ii + 1
END
SELECT #sqlQuery = CONCAT(#sqlQuery,'FROM Customer')
SELECT #sqlQuery = REPLACE(#sqlQuery,', FROM',' FROM')
select #sqlQuery
EXEC (#sqlQuery)
You should flesh out your requirement a bit more. If all you want to know is if a column contains only NULLs, you'll want to check for max(ColumnName) is null
declare #sql table (id int identity(1,1), QueryString nvarchar(max))
create table ##emptyColumns (emptyColumn nvarchar(128))
declare #i int = 0
declare #iMax int
declare #runthis nvarchar(max)
insert #sql
select 'select ''' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + quotename(c.name) + ''' as ''column''
from ' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + '
having max(' + c.name + ') is null'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where o.type = 'U'
order by s.name
, o.name
, c.colorder
select #iMax = count(*)
from #sql
print #iMax
while #i < #iMax
begin
set #i = #i + 1
select #runthis = 'insert into ##emptyColumns
' + QueryString
from #sql
where id = #i
execute sp_executesql #runthis
end
select *
from ##emptyColumns
drop table ##emptyColumns
One further option you might consider:
declare #sql nvarchar(max)
select #sql = isnull(#sql + ' union all ', '') + 'select ''' + COLUMN_NAME + ''',
sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end) as null_values,
count(distinct ' + COLUMN_NAME + ') as count_distinct
from ' + TABLE_SCHEMA + '.' + TABLE_NAME + '
'
from information_schema.columns
where TABLE_SCHEMA = 'MySchema' and TABLE_NAME = 'MyTable'
exec (#sql)
If you had very big tables with large numbers of columns and were only interested in empty columns you could look into something like checksum_agg(checksum(column_name)). It may help improve performance.
You'd need to be wary of column data types, as they are not all compatible with distinct.

How to make 0 in all column in a specific table where value is null using MSSQL

Let the table name is "MyTable"
My current data looks like:
Following, I need after a query on above table:
Actually I need to update all column where value is "NULL", in a single query.
Use ISNULL if you want to see the NULL as 0. Like this
SELECT ISNULL(Column1,0) FROM YourTable
or what you need is to update the value as 0 if NULL and keep the value as it is otherwise. these use a case in the update statement. Like this
Update YourTable
SET Column1 = CASE WHEN Column1 IS NULL THEN 0 ELSE Column1 END,
Column2 = CASE WHEN Column2 IS NULL THEN 0 ELSE Column2 END
and so on for the rest of the columns. Or this is also possible
Update YourTable
SET Column1 = ISNULL(Column1,0),
cOLUMN2 = ISNULL(Column2,0)
You can use something like this.
It is elegant, but it will update all columns in the table. Huge tables might kill the server
DECLARE #TableName sysname = 'tablename'
Declare #UptQuery varchar(max)
Select #UptQuery = stuff(T.X.query('name').value('.', 'varchar(max)'), 1, 1, '')
from
(Select ','+name + '=ISNULL('+name+', 0)' name from
sys.columns where object_id = object_id(#TableName) for xml path(''), type) T(X)
exec ('Update ' + #TableName + ' set ' + #UptQuery)
Old answer
It will go through all columns for a table and update everything with 0 if it is null. It is a lot of updates, and I think it is still better to design the table correctly from the start.
DECLARE #TableName sysname = 'tablename'
Declare #ColName sysname
Select name into #temp from sys.columns where object_id = object_id(#TableName)
while(0 < (Select count(1) from #temp))
BEGIN
SET ROWCOUNT 1
Select #ColName = name from #temp
SET ROWCOUNT 0
exec('Update ' + #TableName + ' set ' + #ColName + ' = ISNULL('+#ColName+', 0) where ' + #ColName + ' is null')
delete #temp where name = #ColName
END
Update table
Set column1 = coalesce (column1,0), ....
declare #tableName varchar(30)
set #tableName='MyTable'
DECLARE #MakeString AS NVARCHAR(MAX)
SELECT #MakeString=
(SELECT cname + ',' AS 'data()'
FROM ( select COLUMN_NAME +'= isnull(['+COLUMN_NAME+'],0)' as cname from INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #tableName
) as ccc
FOR XML PATH(''))
SET #MakeString = LEFT(#MakeString, LEN(#MakeString) - 1)
DECLARE #Sql AS NVARCHAR(MAX)
set #Sql='Update '+#tableName+'
SET '+#MakeString+''
EXEC(#Sql);
UPDATE Table SET ColumnName1=0 WHERE ColumnName1 IS NULL
...

Counting rows in the table which have 1 or more missing values

Could you please advise how to find the number of rows in the table which have 1 or more missing values? The missing values are represented in my table by question marks = '?'. The table has 15 columns and ~50k rows. When I run the following query for some of the columns I can receive some results:
SELECT
COUNT(*)
FROM table_name
WHERE column_name ='?'
However I have also columns which bring me result: "Error converting data type varchar to float"
I would like to be able to find the number of rows in the table which have 1 or more missing values using 1 query/not run separately for each column.
Thank you in advance for your support!
Select Count(*)
From mySchema.myTable
Where Cast(Col1 As NVarChar(128)) +
Cast(Col2 As NVarChar(128)) +
Cast(Coln As NVarChar(128)) Like '%?%'
It's ugly and WILL be slow and you may need to modify the Casts accordingly, but should do the trick.
This should work for any column:
select count(*)
from table_name
where column_name is null or cast(column_name as varchar(255)) = '?';
Try following query:
Just set table name and it will get all columns
Also you can give value_to_match like '?' in your case or any other if you want.
DECLARE #table_name nvarchar(max) = 'table_name'
DECLARE #value_to_match nvarchar(max) = '1'
DECLARE #query nvarchar(max) = ''
DECLARE #Condition nvarchar(max) = ' OR ' -- 1 OR when you want to count row if any column has that value -- 2 when you want all all columns to have same value
SELECT #query = #query + ' cast(' + COLUMN_NAME + ' as nvarchar(500)) = ''' + #value_to_match + '''' + #Condition FROM informatioN_schema.columns WHERE table_name = #table_name
if ##rowcount = 0
BEGIN
SELECT 'Table doesn''t Exists'
RETURN
END
SELECT #query = LEFT(#query,LEN(#query)-3)
PRINT ('select count(9) FROM ' + #table_name + ' WHERE ' + #query)
EXEC ('select count(9) FROM ' + #table_name + ' WHERE ' + #query)

SQL schema and value

I have a select statement I want to make. I want to select
SELECT COLUMN_NAME AS FieldName FROM
INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'table1'
However I want to create another column named Value which is a particular row in table1
so I have rows of the column name and the corresponding single value. Any thoughts on how to approach this?
The following query produces a value (the minimum) for each column:
SELECT '''select '+COLUMN_NAME+''' AS FieldName, (select cast(MIN('+COLUMN_NAME+') as varchar(8000)) from '+const.tablename+')'
FROM INFORMATION_SCHEMA.COLUMNS c cross join
(select 'AllCurveNames' as tablename) const
WHERE c.TABLE_NAME = const.tablename
However, this produces a separate query for each row. To combine them together, you need a string aggregate concatenation. This is how you would do it in SQL Server:
declare #sql varchar(max);
SELECT #sql = (select 'select '''+COLUMN_NAME+''' AS FieldName, (select cast(MIN('+COLUMN_NAME+') as varchar(8000)) from '+const.tablename + ') union all '
FROM INFORMATION_SCHEMA.COLUMNS c cross join
(select WHATEVER as tablename) const
WHERE c.TABLE_NAME = const.tablename
for xml path('')
);
select #sql = LEFT(#sql, len(#sql) - 9);
exec(#sql);
Use a cross join, which is implicit if you just select from two tables with no join (i.e., from t1, t2):
SELECT COLUMN_NAME AS FieldName,
Table1.MyField
FROM
INFORMATION_SCHEMA.COLUMNS, Table1
WHERE
TABLE_NAME = 'table1'
AND
MyTable.ID = 123
I actually came up with a bit of a crazy solution but it works:
declare #tbl_name as varchar(255)
declare #field as varchar(255)
declare #val as varchar(255)
declare #SQL as nvarchar(4000)
create table #tbl ( [FieldName][varchar](255), [FieldVal][varchar](255))
set #tbl_name = 'table1'
DECLARE mah_cursor CURSOR FAST_FORWARD
FOR
SELECT COLUMN_NAME FROM
INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #tbl_name
OPEN mah_cursor
FETCH NEXT FROM mah_cursor INTO #field
WHILE ##FETCH_STATUS = 0
BEGIN
set #SQL = 'set #val = (Select top 1 ' + #field + ' from ' + #tbl_name + ')'
print #SQL
exec sp_executesql #query = #SQL, #params = N'#val varchar(255) OUTPUT', #val = #val OUTPUT
insert into #tbl ([FieldName],[FieldVal] ) values (#field, #val)
FETCH NEXT FROM mah_cursor INTO #field
END
CLOSE mah_cursor
DEALLOCATE mah_cursor
select * from #tbl
drop table #tbl
It loops through each value and adds it. The Fast_Forward feature optimizes the query for high performance

Remove trailing empty space in a field content

I am using SQL server MSDE 2000. I have a field called notes of type nvarchar(65).
The content is 'Something ' with an extra space after the content (quotes for clarity) in all the records. I used the following command.
UPDATE TABLE1
SET notes = RTRIM(LTRIM(notes))
But it does not work. Is there any alternate way to do it?
Are you sure the query isn't working? Try:
SELECT TOP 100 '~'+ t.notes +'~'
FROM TABLE1 t
TOP 100 will limit the results to the first 100 rows, enough to get an idea if there's really a space in the output. If there is, and RTRIM/LTRIM is not removing it - then you aren't dealing with a whitespace character. In that case, try:
UPDATE TABLE1
SET notes = REPLACE(notes,
SUBSTRING(notes, PATINDEX('%[^a-zA-Z0-9 '''''']%', notes), 1),
'')
WHERE PATINDEX('%[^a-zA-Z0-9 '''''']%', notes) <> 0
... OR you could literally just copy/paste the blank ' ' (space) at the end of a field as a result of your query into your replace statement and update everything from there.
update TABLE1
set notes = replace(notes, ' ', '')
And just in case you need to TRIM all spaces in all columns, you can use this script to do it dynamically:
--Just change table name
declare #MyTable varchar(100)
set #MyTable = 'MyTable'
--temp table to get column names and a row id
select column_name, ROW_NUMBER() OVER(ORDER BY column_name) as id into #tempcols from INFORMATION_SCHEMA.COLUMNS
WHERE DATA_TYPE IN ('varchar', 'nvarchar') and TABLE_NAME = #MyTable
declare #tri int
select #tri = count(*) from #tempcols
declare #i int
select #i = 0
declare #trimmer nvarchar(max)
declare #comma varchar(1)
set #comma = ', '
--Build Update query
select #trimmer = 'UPDATE [dbo].[' + #MyTable + '] SET '
WHILE #i <= #tri
BEGIN
IF (#i = #tri)
BEGIN
set #comma = ''
END
SELECT #trimmer = #trimmer + CHAR(10)+ '[' + COLUMN_NAME + '] = LTRIM(RTRIM([' + COLUMN_NAME + ']))'+#comma
FROM #tempcols
where id = #i
select #i = #i+1
END
--execute the entire query
EXEC sp_executesql #trimmer
drop table #tempcols