sql - union tables with same prefix - sql

I have a set of tables with same prefix and same structure. All I need is to "combine" them as one.
use thisdb
declare #maxrow int, #result nvarchar(4000), #tempname nvarchar(20)
set #maxrow = (select count(*) from information_schema.tables where table_schema = 'dbo' and table_name like N'AAbc%')
set #result = ''
set #tempname = 'mytemp'
select #result = #result + case when [row] = 1 then replace([name],'from',concat('into ##',#tempname, ' from')) when [row] = #maxrow then replace([name],'union all','') else [name] end + ' '
from
(select ['table_name,'] union all') as [name], row_number() over(order by table_name asc) as [Row]
from information_schema.tables
where table_schema = 'dbo' and table_name like N'AAbc%') x
execute sp_executesql #result
Basically I retrieve tables with certain pattern from information_schema.tables, then get rid of the last union all.
Above method works for 20-30 tables, as the #result won't exceed the limit for nvarchar. But I'm curious how to get this job done if number of tables N is very large? Or there's better way to deal with this problem?

This should suffice. "Union all" replaced with "Go".
declare #script nvarchar(max)
set #script = (
SELECT STUFF(( SELECT 'insert #MyTable(sharedColName1, SharedColName2) select sharedColName1, SharedColName2 from ' + Table_Name + ' Go '
FROM ( SELECT DISTINCT
Table_Name
FROM INFORMATION_SCHEMA.Tables
where table_Name Like '<prefix_for_tables_with_identical_names>%'
) x
FOR
XML PATH('')
), 1, 0, '') A
)
sp_executeSql #script

Related

SQL count distinct or not null for each column for many columns

I need to analyze a large table with hundreds of columns. A lot of columns are unused.
To investigate I could do something like
SELECT DISTINCT Column1
FROM myTable
or
WITH C AS
(
SELECT DISTINCT Column1
FROM MyTable
)
SELECT COUNT(*)
FROM C
Then I do the same for column2 and so on. However these queries only work for one column which is time consuming and does not give overview in one glance.
Any idea how to build such investigation query for all columns in one?
You need only 1 query where you have to list all the columns of the table:
SELECT COUNT(DISTINCT Column1) column1_count,
COUNT(DISTINCT Column2) column2_count,
COUNT(DISTINCT Column3) column3_count
.....................................
FROM MyTable;
For local purposes only, you can make it dynamic like this:
Get the columns of the table
the query is created as the colleagues did and then it is executed with the EXEC()
DECLARE #columns as Table(RowId INT IDENTITY(1,1), ColumnName nVarchar(50))
DECLARE #ii int = 0
DECLARE #max int = 0
DECLARE #sqlQuery nVarchar(MAX)
INSERT INTO #columns
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'Customer'
SET #sqlQuery = 'SELECT '
SELECT #max = COUNT(*) FROM #columns
WHILE #ii <= #max
BEGIN
SELECT #sqlQuery = CONCAT(#sqlQuery,'COUNT(DISTINCT ',ColumnName,') ',LOWER(ColumnName),'_count, ')
FROM #columns
WHERE RowId = #ii
SET #ii = #ii + 1
END
SELECT #sqlQuery = CONCAT(#sqlQuery,'FROM Customer')
SELECT #sqlQuery = REPLACE(#sqlQuery,', FROM',' FROM')
select #sqlQuery
EXEC (#sqlQuery)
You should flesh out your requirement a bit more. If all you want to know is if a column contains only NULLs, you'll want to check for max(ColumnName) is null
declare #sql table (id int identity(1,1), QueryString nvarchar(max))
create table ##emptyColumns (emptyColumn nvarchar(128))
declare #i int = 0
declare #iMax int
declare #runthis nvarchar(max)
insert #sql
select 'select ''' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + quotename(c.name) + ''' as ''column''
from ' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + '
having max(' + c.name + ') is null'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where o.type = 'U'
order by s.name
, o.name
, c.colorder
select #iMax = count(*)
from #sql
print #iMax
while #i < #iMax
begin
set #i = #i + 1
select #runthis = 'insert into ##emptyColumns
' + QueryString
from #sql
where id = #i
execute sp_executesql #runthis
end
select *
from ##emptyColumns
drop table ##emptyColumns
One further option you might consider:
declare #sql nvarchar(max)
select #sql = isnull(#sql + ' union all ', '') + 'select ''' + COLUMN_NAME + ''',
sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end) as null_values,
count(distinct ' + COLUMN_NAME + ') as count_distinct
from ' + TABLE_SCHEMA + '.' + TABLE_NAME + '
'
from information_schema.columns
where TABLE_SCHEMA = 'MySchema' and TABLE_NAME = 'MyTable'
exec (#sql)
If you had very big tables with large numbers of columns and were only interested in empty columns you could look into something like checksum_agg(checksum(column_name)). It may help improve performance.
You'd need to be wary of column data types, as they are not all compatible with distinct.

Get data from tables that I have from another query

I'm trying to get data from all tables that I have from another query as follows:
DECLARE #count int
SET #count = (SELECT COUNT(*) FROM (SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME LIKE '%Project%') AS SUBQUERY)
WHILE(#count!=0)
BEGIN
SELECT * from (SELECT TABLE_NAME from (SELECT TABLE_NAME,
ROW_NUMBER() over (order by table_name) as row_number
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_NAME LIKE '%Project%') as sub
WHERE row_number = #count) as another_sub;
SET #count = #count-1
end
What I get with this right now is 5 table names LIKE '%Project%'. I want to get the data from all of these 5 tables, not just their names. Also I don't want to join or union the tables. How can I achieve this?
DECLARE #SQL varchar(max) = '';
SELECT #SQL = #SQL + 'SELECT * FROM ' + QUOTENAME(TABLE_SCHEMA) + '.' + QUOTENAME(TABLE_NAME) + ';' + CHAR(13) + CHAR(10)
FROM INFORMATION_SCHEMA.TABLES
WHERE TABLE_NAME LIKE '%Project%';
print #SQL;
--EXEC(#SQL);

Select table with name from column value

I have this SQL query
SELECT table_name
INTO #LukaTestTable
FROM information_schema.columns
WHERE column_name = 'GUID'
ORDER BY TABLE_NAME ;
How get the value of the column GUID in all tables with the name from TABLE_NAME?
Or can I get table like?
TABLE_NAME GUID_VALUE
In order to be able to query a table before you know what table you want you have to use dynamic queries. If you have an arbitrary number of tables, in SQL Server you could achieve this with cursors:
DECLARE
#column VARCHAR(256) = 'GUID',
#table VARCHAR(256),
#query VARCHAR(4000);
DECLARE #Tables CURSOR LOCAL FAST_FORWARD
FOR
SELECT TABLE_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME = #column
ORDER BY TABLE_NAME;
OPEN #Tables
FETCH NEXT FROM #Tables INTO #table
WHILE ##FETCH_STATUS = 0
BEGIN
SET #query = 'SELECT ' + #column + ' FROM ' + #table
EXEC(#query)
FETCH NEXT FROM #Tables INTO #table
END
CLOSE #Tables
DEALLOCATE #Tables;
If you're sure your 'GUID' column will have same data type - you can create a temp table and insert to it in WHILE loop and SELECT everything in one go at the end.
You can use dynamic sql like this:
declare #sql varchar(max) = cast (' ' as varchar(max));
select #sql = #sql +
(select ' select GUID from ' + quotename(table_name) + ' union all ' as 'text()'
from #LukaTestTable
for xml path(''));
set #sql = reverse(stuff(reverse(#sql), 1, 10, ''));
--print #sql
exec(#sql);
And if you want table_name as e separate column you should first save it in your #LukaTestTable
Try this below script using While loop and dynamic Sql
IF OBJECT_ID('tempdb..#GetSqlQuery') IS NOT NULL
DROP TABLE #GetSqlQuery
IF OBJECT_ID('tempdb..#GetSpecficcolumnvalue') IS NOT NULL
DROP TABLE #GetSpecficcolumnvalue
GO
CREATE TABLE #GetSqlQuery
(
ID INT IDENTITY
,SELECtQuery nvarchar(max)
,TableName varchar(200)
)
CREATE TABLE #GetSpecficcolumnvalue
(
ID INT IDENTITY
,GetValue nvarchar(max)
,TableName varchar(200)
)
INSERT INTO #GetSqlQuery(SELECtQuery,TableName)
SELECT 'SELECT GUID,'''+TABLE_NAME+''' As TableName FROM '+QUOTENAME(TABLE_NAME) AS SELECtQuery,QUOTENAME(TABLE_NAME) AS TableName
FROM(
SELECT
TABLE_SCHEMA,
TABLE_NAME,
COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME ='GUID'
)dt
ORDER BY TABLE_NAME
DEclare #MinId INt,#MaxId INT,#Sql nvarchar(maX),#TableName varchar(1000)
SELECT #MinId =MIN(Id),#MaxId=MAX(ID) FROM #GetSqlQuery
WHILE (#MinId<=#MaxId)
BEgin
SELECT #Sql=SELECtQuery ,#TableName=TableName From #GetSqlQuery WHERE ID=#MinId
PRINT #Sql
INSERT INTO #GetSpecficcolumnvalue(GetValue,TableName)
EXEC (#Sql)
SET #MinId=#MinId+1
END
SELECT ROW_NUMBER()OVER(ORDER BY (SELECT 1)) AS Seq,GetValue,TableName
FROM
(
SELECT *, ROW_NUMBER ()OVER(PArtition by GetValue,TableName ORDER BY
TableName) AS dup FROM #GetSpecficcolumnvalue
)dt where dt.dup=1

Get top three most common values from every column in a table

I'm trying to write a query that will produce a very small sample of data from each column of a table, in which the sample is made up of the top 3 most common values. This particular problem is part of a bigger task, which is to write scripts that can characterize a database and its tables, its data integrity, and also quickly survey common values in the table on a per-column basis. Think of this as an automated "analysis" of a table.
On a single column basis, I do this already by simply calculating the frequency of values and then sorting by frequency. If I had a column called "color" and all colors were in it, and it just so happened that the color "blue" was in most rows, then the top 1 most frequently occurring value would be "blue". In SQL that is easy to calculate.
However, I'm not sure how I would do this over multiple columns.
Currently, when I do a calculation over all columns of a table, I perform the following type of query:
USE database;
DECLARE #t nvarchar(max)
SET #t = N'SELECT '
SELECT #t = #t + 'count(DISTINCT CAST(' + c.name + ' as varchar(max))) "' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('table');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + ' FROM table;'
EXEC sp_executesql #t
However, its not entirely clear to me how I would do that here.
(Sidenote:columns that are of type text, ntext, and image, since those would cause errors while counting distinct values, but i'm less concerned about solving that)
But the problem of getting top three most frequent values per column has got me absolutely stumped.
Ideally, I'd like to end up with something like this:
Col1 Col2 Col3 Col4 Col5
---------------------------------------------------------------------
1,2,3 red,blue,green 29,17,0 c,d,j nevada,california,utah
I hacked this together, but it seems to work:
I cant help but think I should be using RANK().
USE <DB>;
DECLARE #query nvarchar(max)
DECLARE #column nvarchar(max)
DECLARE #table nvarchar(max)
DECLARE #i INT = 1
DECLARE #maxi INT = 10
DECLARE #target NVARCHAR(MAX) = <table>
declare #stage TABLE (i int IDENTITY(1,1), col nvarchar(max), tbl nvarchar(max))
declare #results table (ColumnName nvarchar(max), ColumnValue nvarchar(max), ColumnCount int, TableName NVARCHAR(MAX))
insert into #stage
select c.name, o.name
from sys.columns c
join sys.objects o on o.object_id=c.object_id and o.type = 'u'
and c.system_type_id IN (select system_type_id from sys.types where [name] not in ('text','ntext','image'))
and o.name like #target
SET #maxi = (select max(i) from #stage)
while #i <= #maxi
BEGIN
set #column = (select col from #stage where i = #i)
set #table = (select tbl from #stage where i = #i)
SET #query = N'SELECT ' +''''+#column+''''+' , '+ #column
SELECT #query = #query + ', COUNT( ' + #column + ' ) as count' + #column + ' , ''' + #table + ''' as tablename'
select #query = #query + ' from ' + #table + ' group by ' + #column
--Select #query
insert into #results
EXEC sp_executesql #query
SET #i = #i + 1
END
select * from #results
; with cte as (
select *, ROW_NUMBER() over (partition by Columnname order by ColumnCount desc) as rn from #results
)
select * from cte where rn <=3
Start with this SQL Statement builder, and modify it to suit your liking:
EDIT Added Order by Desc
With ColumnSet As
(
Select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME
From INFORMATION_SCHEMA.COLUMNS
Where 1=1
And TABLE_NAME IN ('Table1')
And COLUMN_NAME IN ('Column1', 'Column2')
)
Select 'Select Top 3 ' + COLUMN_NAME + ', Count (*) NumInstances From ' + TABLE_SCHEMA + '.'+ TABLE_NAME + ' Group By ' + COLUMN_NAME + ' Order by Count (*) Desc'
From ColumnSet

SQL schema and value

I have a select statement I want to make. I want to select
SELECT COLUMN_NAME AS FieldName FROM
INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = 'table1'
However I want to create another column named Value which is a particular row in table1
so I have rows of the column name and the corresponding single value. Any thoughts on how to approach this?
The following query produces a value (the minimum) for each column:
SELECT '''select '+COLUMN_NAME+''' AS FieldName, (select cast(MIN('+COLUMN_NAME+') as varchar(8000)) from '+const.tablename+')'
FROM INFORMATION_SCHEMA.COLUMNS c cross join
(select 'AllCurveNames' as tablename) const
WHERE c.TABLE_NAME = const.tablename
However, this produces a separate query for each row. To combine them together, you need a string aggregate concatenation. This is how you would do it in SQL Server:
declare #sql varchar(max);
SELECT #sql = (select 'select '''+COLUMN_NAME+''' AS FieldName, (select cast(MIN('+COLUMN_NAME+') as varchar(8000)) from '+const.tablename + ') union all '
FROM INFORMATION_SCHEMA.COLUMNS c cross join
(select WHATEVER as tablename) const
WHERE c.TABLE_NAME = const.tablename
for xml path('')
);
select #sql = LEFT(#sql, len(#sql) - 9);
exec(#sql);
Use a cross join, which is implicit if you just select from two tables with no join (i.e., from t1, t2):
SELECT COLUMN_NAME AS FieldName,
Table1.MyField
FROM
INFORMATION_SCHEMA.COLUMNS, Table1
WHERE
TABLE_NAME = 'table1'
AND
MyTable.ID = 123
I actually came up with a bit of a crazy solution but it works:
declare #tbl_name as varchar(255)
declare #field as varchar(255)
declare #val as varchar(255)
declare #SQL as nvarchar(4000)
create table #tbl ( [FieldName][varchar](255), [FieldVal][varchar](255))
set #tbl_name = 'table1'
DECLARE mah_cursor CURSOR FAST_FORWARD
FOR
SELECT COLUMN_NAME FROM
INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #tbl_name
OPEN mah_cursor
FETCH NEXT FROM mah_cursor INTO #field
WHILE ##FETCH_STATUS = 0
BEGIN
set #SQL = 'set #val = (Select top 1 ' + #field + ' from ' + #tbl_name + ')'
print #SQL
exec sp_executesql #query = #SQL, #params = N'#val varchar(255) OUTPUT', #val = #val OUTPUT
insert into #tbl ([FieldName],[FieldVal] ) values (#field, #val)
FETCH NEXT FROM mah_cursor INTO #field
END
CLOSE mah_cursor
DEALLOCATE mah_cursor
select * from #tbl
drop table #tbl
It loops through each value and adds it. The Fast_Forward feature optimizes the query for high performance