SQL distinct and count for a column in multiple tables - sql

So what I want is to have a table of distinct values and the count for those values. basically I want it to look like this:
DistinctValue | Count
Bob | 4
Fred | 5
George | 2
Joeseph | 1
for a single table I use :
SELECT ColumnName, COUNT(*) from TableName group by Column
How would I do this so that it would span across multiple tables. I have about say 30, possibly more, tables I need to do this for.
Any help would be greatly appreciated. Let me know if there's more information you need. Oh, and there's no worry about the column name because all the tables have the same column name.

WITH mytbl AS (
SELECT ColumnName, COUNT(*) AS myCount from TableName group by Column
UNION ALL
SELECT ColumnName, COUNT(*) from TableName2 group by Column
... a union all for every table
)
SELECT ColumnName, SUM(myCount)
FROM mytbl
GROUP BY ColumnName
-- If you are using an earlier version of MS SQL, the UNION statements can be put in a big sub select or a table variable.
-- IE, they'd take the place of mytbl in the last query replace mytbl in the bottom query with the UNIONS from the CTE

SELECT
t.name,
count(c.name) as columnsname
FROM
sys.tables t
inner join sys.columns c
ON t.object_id = c.object_id
group by t.name

You'll need to create and execute dynamic tsql to get your results:
DECLARE #Tsql NVARCHAR(MAX) = ''
DECLARE #ColumnName SYSNAME = 'YourColumnName'
SELECT #Tsql = #Tsql + 'SELECT ''[' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + ']'' AS TableName, ' +
'[' + #ColumnName + '], COUNT(*) AS RecordCount FROM [' + c.TABLE_SCHEMA + '].[' + c.TABLE_NAME + '] GROUP BY [' + #ColumnName + '] UNION ' + CHAR(13) + CHAR(10)
FROM INFORMATION_SCHEMA.COLUMNS c
JOIN INFORMATION_SCHEMA.TABLES t
ON t.TABLE_SCHEMA = c.TABLE_SCHEMA
AND t.TABLE_NAME = c.TABLE_NAME
--Comment out the next line if you want data/counts for views too.
AND t.TABLE_TYPE = 'BASE TABLE'
WHERE c.COLUMN_NAME = #ColumnName
--Remove the last UNION (and carriage-return, line-feed)
SELECT #Tsql = LEFT(#Tsql, LEN(#Tsql) - 8)
--Verify query.
PRINT #Tsql
--Uncomment when ready to proceed.
--EXEC (#Tsql)

Related

How to combine multiple results from a loop into one temp table

I have this statement:
Declare #sql varchar(max) = ''
declare #tablename as varchar(255) = 'test'
select #sql = #sql + 'select [' + c.name + '],count(*) as ''' + c.name
+ ''' from [' + t.name + ']'
from sys.columns c
inner join sys.tables t on c.object_id = t.object_id
where t.name = #tablename
EXEC (#sql)
But it gives the output comes out in different results windows and when I try to combine it with a union all the text doesn't fit it. I want to try and get the results into a temp table for SQL server is there anyway i can do this?
I'm trying to get:
Column Name Count Distinct Count
a 100 1
b 100 5
c 100 73
d 100 9
The statement above isn't for distinct count but i'm hoping I could replicate the same logic.
I suspect the query you want to construct really looks like:
select 'a' as column_name, count(column_name), count(distinct column_name)
from t
union all
select 'a' as column_name, count(column_name), count(distinct column_name)
from t
union all
. . .
To construct this in SQL Server, you can use logic like this:
declare #q nvarchar(max);
set #q = '
select ''[column_name]'' as column_name, count([column_name]) as cnt, count(distinct [column_name]) as distinct_count
from [table_name]
';
declare #sql nvarchar(max);
select #sql = string_agg(replace(replace(#q,
'[column_name]',
quotename(column_name)
),
'[table_name]',
quotename(table_name)
), ' union all '
)
from information_schema.columns c
where table_name = #name; -- should probably check the schema too!
exec sp_executesql #sql;

How to get the length one column in all tables in one database?

I am having trouble to get the max length of records in one column in all tables.
I would only like to display the max length for each table for the specific column.
Below is what I have tried, I already found the way to return the column I need, but now, i need to get the max len. I know this is not the right way.
select max(len(site)) as site from
(
SELECT t.name AS TableName
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
WHERE c.name LIKE 'site%')A
The expected result will display the column name, the table name and also the max length of the records for that column.
Thanks in advance
I don't understand what are you really trying to do, but I think you want something like
CREATE TABLE T1(
Site1 VARCHAR(45)
);
CREATE TABLE T2(
Site2 VARCHAR(45)
);
INSERT INTO T1 VALUES ('A'), ('AA');
INSERT INTO T2 VALUES ('BBB'), ('BBBBB');
DECLARE #SQL NVARCHAR(MAX) = 'SELECT ';
SELECT #SQL = #SQL +
N'(SELECT MAX(LEN(' + --You can also add ISNULL([Col],0) to get 0
QUOTENAME(c.name) + ')) FROM '+
QUOTENAME(t.name) + ') AS ' +
QUOTENAME(t.name + '.'+c.name) + ', '
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
WHERE c.name LIKE 'site%';
SET #SQL = LEFT(#SQL, LEN(#SQL)-1);
EXEC sp_executesql #SQL;
Which will returns:
+----------+----------+
| T1.Site1 | T2.Site2 |
+----------+----------+
| 2 | 5 |
+----------+----------+
Live Demo
Try this:You will get individual Scripts to execute.
select 'select Max(len('+COLUMN_NAME+')),'''+COLUMN_NAME+ ''' as ColumnName ,'''+TABLE_NAME+''' as TableName from ' +table_name
from INFORMATION_SCHEMA.COLUMNS where COLUMN_NAME = 'YourColumnName'
If I understand your question, you may try to generate a dynamic SQL statement and execute this statement:
-- Declarations
DECLARE #stm nvarchar(max)
SET #stm = N''
-- Dynamic SQL
SELECT #stm = (
SELECT CONCAT(
N'UNION ALL ',
N'SELECT ''',
t.name,
N''' AS TableName, ''',
c.name,
N''' AS ColumnName, ',
N'ValueLength = (SELECT MAX(LEN(',
QUOTENAME(c.name),
')) FROM ',
QUOTENAME(t.name),
N')'
)
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
WHERE c.name LIKE 'site%'
ORDER BY t.name, c.name
FOR XML PATH('')
)
SET #stm = STUFF(#stm, 1, 10, N'')
-- Execution
PRINT #stm
EXEC sp_executesql #stm

List of all tables that contains specific data in column

With this query, I get all tables that contains column named "Status_ID"
SELECT
c.name AS 'ColumnName'
,t.name AS 'TableName'
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
WHERE c.name LIKE 'Status_ID'
Data in Status_ID may only have values from 1 to 6.
What I want is to get a list of all tables, where Status_ID = 2 at least once.
(Exclude all tables from the code above, that do not contain data with Status_ID = 2)
Solution 1:
Run the below query, which will give you a select query with all the tables containing status_id column. then copy and execute the select query to find the data.
SELECT 'select * from ' + TABLE_NAME + ' where Status_ID = ''2'''
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME = 'Status_ID'
Solution 2:
You may need to use the following solution to find the data in all tables
Find a string by searching all tables in SQL Server Management Studio 2008
This should do the trick:
DECLARE #sql NVARCHAR(MAX) = 'DECLARE #tables NVARCHAR(MAX) = '''' ;' ;
DECLARE #tables NVARCHAR(MAX) = '';
SELECT #SQL += 'IF EXISTS (SELECT ''X'' FROM ' + QUOTENAME(t.name)
+ 'WHERE STATUS_ID =2) SET #tables+= ' + '''' + + ',' +
QUOTENAME(t.name) + ''''
+ ';'
FROM sys.columns c
JOIN sys.tables t ON c.object_id = t.object_id
WHERE c.name LIKE 'STATUS_ID'
SET #sql += 'SELECT SUBSTRING(#TABLES,2,LEN(#TABLES));'
EXEC(#SQL);

Looping through column names with dynamic SQL

I just came up with an idea for a piece of code to show all the distinct values for each column, and count how many records for each. I want the code to loop through all columns.
Here's what I have so far... I'm new to SQL so bear with the noobness :)
Hard code:
select [Sales Manager], count(*)
from [BT].[dbo].[test]
group by [Sales Manager]
order by 2 desc
Attempt at dynamic SQL:
Declare #sql varchar(max),
#column as varchar(255)
set #column = '[Sales Manager]'
set #sql = 'select ' + #column + ',count(*) from [BT].[dbo].[test] group by ' + #column + 'order by 2 desc'
exec (#sql)
Both of these work fine. How can I make it loop through all columns? I don't mind if I have to hard code the column names and it works its way through subbing in each one for #column.
Does this make sense?
Thanks all!
You can use dynamic SQL and get all the column names for a table. Then build up the script:
Declare #sql varchar(max) = ''
declare #tablename as varchar(255) = 'test'
select #sql = #sql + 'select [' + c.name + '],count(*) as ''' + c.name + ''' from [' + t.name + '] group by [' + c.name + '] order by 2 desc; '
from sys.columns c
inner join sys.tables t on c.object_id = t.object_id
where t.name = #tablename
EXEC (#sql)
Change #tablename to the name of your table (without the database or schema name).
This is a bit of an XY answer, but if you don't mind hardcoding the column names, I suggest you do just that, and avoid dynamic SQL - and the loop - entirely. Dynamic SQL is generally considered the last resort, opens you up to security issues (SQL injection attacks) if not careful, and can often be slower if queries and execution plans cannot be cached.
If you have a ton of column names you can write a quick piece of code or mail merge in Word to do the substitution for you.
However, as far as how to get column names, assuming this is SQL Server, you can use the following query:
SELECT c.name
FROM sys.columns c
WHERE c.object_id = OBJECT_ID('dbo.test')
Therefore, you can build your dynamic SQL from this query:
SELECT 'select '
+ QUOTENAME(c.name)
+ ',count(*) from [BT].[dbo].[test] group by '
+ QUOTENAME(c.name)
+ 'order by 2 desc'
FROM sys.columns c
WHERE c.object_id = OBJECT_ID('dbo.test')
and loop using a cursor.
Or compile the whole thing together into one batch and execute. Here we use the FOR XML PATH('') trick:
DECLARE #sql VARCHAR(MAX) = (
SELECT ' select ' --note the extra space at the beginning
+ QUOTENAME(c.name)
+ ',count(*) from [BT].[dbo].[test] group by '
+ QUOTENAME(c.name)
+ 'order by 2 desc'
FROM sys.columns c
WHERE c.object_id = OBJECT_ID('dbo.test')
FOR XML PATH('')
)
EXEC(#sql)
Note I am using the built-in QUOTENAME function to escape column names that need escaping.
You want to know the distinct coulmn values in all the columns of the table ? Just replace the table name Employee with your table name in the following code:
declare #SQL nvarchar(max)
set #SQL = ''
;with cols as (
select Table_Schema, Table_Name, Column_Name, Row_Number() over(partition by Table_Schema, Table_Name
order by ORDINAL_POSITION) as RowNum
from INFORMATION_SCHEMA.COLUMNS
)
select #SQL = #SQL + case when RowNum = 1 then '' else ' union all ' end
+ ' select ''' + Column_Name + ''' as Column_Name, count(distinct ' + quotename (Column_Name) + ' ) As DistinctCountValue,
count( '+ quotename (Column_Name) + ') as CountValue FROM ' + quotename (Table_Schema) + '.' + quotename (Table_Name)
from cols
where Table_Name = 'Employee' --print #SQL
execute (#SQL)

SQL Select tables / list tables

I would like to ask for help.
As I have 500 plus tables, and I need to search those table's column having some similar words. Is it possible to search and list those tables?
E.g
Table 1 - Name, age, height
Table 2 - Result, Name, Score
Table 3 - Name, Pic, Parent1, Parent2
I wan to do a query to select all the table that any of the column contain the word "%Name%", is this possible?
Just run following query in your db and replace your search string with string and it will work.
SQL for find particular word/value from all columns and tables in a database
DECLARE #SQL VARCHAR(MAX)
DECLARE #valueToFind VARCHAR(1000)
DECLARE #columnName VARCHAR(1000)
SET #valueToFind = 'string'
SET #columnName = '%%'
CREATE TABLE #TMP
(Clmn VARCHAR(500),
CNT INT)
SELECT #SQL=COALESCE(#SQL,'')+CAST('INSERT INTO #TMP Select ''' + TABLE_SCHEMA + '.' + TABLE_NAME + '.' + COLUMN_NAME + ''' AS Clmn, count(*) CNT FROM '
+ TABLE_SCHEMA + '.[' + TABLE_NAME +
'] WHERE [' + COLUMN_NAME + '] LIKE ''%' + #valueToFind + '%'' ;' AS VARCHAR(8000))
FROM INFORMATION_SCHEMA.COLUMNS
JOIN sysobjects B
ON INFORMATION_SCHEMA.COLUMNS.TABLE_NAME = B.NAME
WHERE COLUMN_NAME LIKE #columnName AND xtype = 'U'
AND DATA_TYPE IN ('char','nchar','ntext','nvarchar','text','varchar')
--PRINT (#SQL)
EXEC(#SQL)
SELECT * FROM #TMP WHERE CNT > 0
DROP TABLE #TMP
-----------------------------------------------------------------------------------------