I have a scenario inside a stored procedure where a temporary table will be generated with an unknown number of columns (Column1.....ColumnN). One of the columns will be the total\sum of few of the other columns.
The clients requirement is to show the percentage value of each column in comparison to the total column
(C1*100)/Total as P1 ,(C2*100)/Total as P2.....
I have really been unable to find a solution to this problem other than doing it in the front end using LINQ. I am wondering if there is any way to achieve this in SQL as that would give me performance benefits.The last thing I want to do is to loop through the rows and columns in C# which will hammer the server.
I had done, I just change according to you and you can read the comment for better understand. I feel the schemaname is dbo, else change it.
-------------1. first step --------------
--create table for exercise
CREATE TABLE [dbo].[tblTest](
[ID] [int] NULL,
[isTrue] [bit] NULL
) ON [PRIMARY]
--insert date
insert into tblTest values(1,'true'),(2,'false'),(3,'false'),(4,'true'),(5,'false')
select * from tbltest
-------------2. second step --------------
--now start to get column name one by one
DECLARE #TableName nvarchar(256) = '[dbo].[tblTest]',
#SearchStr nvarchar(128)='id', #SearchStr2 nvarchar(110) --this is used to get only particular column result, to check remove uncomment in cursor
SET #SearchStr2 = QUOTENAME('%' + #SearchStr + '%','''')
DECLARE #Columnname varchar(100) ,#ColumnIndex int --, #PurchaseQty int -- declare temp variable which you u
CREATE TABLE #Results (ColumnName nvarchar(370), ColumnValue nvarchar(3630), ColIndex int)
DECLARE getItemID CURSOR
FOR
select column_name, ordinal_position from INFORMATION_SCHEMA.COLUMNS where TABLE_NAME = PARSENAME(#TableName, 1)
OPEN getItemID
FETCH NEXT FROM getItemID INTO #Columnname, #ColumnIndex
WHILE ##FETCH_STATUS = 0
BEGIN
--select #Columnname, #ColumnIndex ;
INSERT INTO #Results
EXEC
(
'SELECT ''' + #ColumnName + ''', LEFT(' + #ColumnName + ', 3630) , '+ #ColumnIndex +'
FROM ' + #TableName + ' (NOLOCK) '
--remove this to get only particular column entry
--+' WHERE ' + #ColumnName + ' LIKE ' + #SearchStr2
)
FETCH NEXT FROM getItemID INTO #Columnname, #ColumnIndex
END
CLOSE getItemID
DEALLOCATE getItemID
select * from #Results
drop table #Results
DECLARE #cols AS NVARCHAR(max),
#calCols AS NVARCHAR(max),
#query AS NVARCHAR(max)
SELECT *
INTO #temptable
FROM (SELECT journeyid,
notchl,
Cast(Sum(Datediff(second, starttime, endtime)) AS FLOAT) AS
Duration
FROM (SELECT notchlog.*,
CASE
WHEN ( Isnumeric(notch) = 1
AND notch < 0 ) THEN 'DYN'
WHEN notch = 'I' THEN 'IDLE'
WHEN notch = 'C' THEN 'COASTING'
ELSE 'N' + notch
END AS NotchL
FROM notchlog)Sub1
GROUP BY journeyid,
notchl)SUB1
SELECT #cols = Stuff(( SELECT ',' + Quotename(notchl)
FROM #temptable
GROUP BY notchl
--order by value
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '')
SELECT #calCols = Stuff((SELECT ',' + 'ROUND(' + Quotename(notchl)
+ '*100/RunningTime,2) as '
+ Quotename(notchl)
FROM #temptable
GROUP BY notchl
--order by value
FOR XML PATH(''), TYPE
).value('.', 'NVARCHAR(MAX)')
, 1, 1, '')
SET #query =N'Select * INTO #ResultTable FROM( SELECT Journeyid, '
+ #cols + ' from ( select Journeyid, NotchL, Duration from #TempTable Group By JourneyId,NotchL,Duration ) x pivot ( max(x.duration) for NotchL in ('
+ #cols
+ ') ) p ) Sub2 select NL.JourneyId,RunningTime,'
+ #calCols
+ N' from #ResultTable R INNER Join (Select JourneyID,Sum(DateDiff(second,starttime,endtime)) as RunningTime FROM NotchLog Group By JourneyID)NL ON NL.JourneyID=R.JourneyId INNER Join Journeys J ON J.JourneysID=R.JourneyID Drop Table #ResultTable '
EXEC Sp_executesql
#query;
DROP TABLE #temptable
Related
I need to analyze a large table with hundreds of columns. A lot of columns are unused.
To investigate I could do something like
SELECT DISTINCT Column1
FROM myTable
or
WITH C AS
(
SELECT DISTINCT Column1
FROM MyTable
)
SELECT COUNT(*)
FROM C
Then I do the same for column2 and so on. However these queries only work for one column which is time consuming and does not give overview in one glance.
Any idea how to build such investigation query for all columns in one?
You need only 1 query where you have to list all the columns of the table:
SELECT COUNT(DISTINCT Column1) column1_count,
COUNT(DISTINCT Column2) column2_count,
COUNT(DISTINCT Column3) column3_count
.....................................
FROM MyTable;
For local purposes only, you can make it dynamic like this:
Get the columns of the table
the query is created as the colleagues did and then it is executed with the EXEC()
DECLARE #columns as Table(RowId INT IDENTITY(1,1), ColumnName nVarchar(50))
DECLARE #ii int = 0
DECLARE #max int = 0
DECLARE #sqlQuery nVarchar(MAX)
INSERT INTO #columns
SELECT COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = N'Customer'
SET #sqlQuery = 'SELECT '
SELECT #max = COUNT(*) FROM #columns
WHILE #ii <= #max
BEGIN
SELECT #sqlQuery = CONCAT(#sqlQuery,'COUNT(DISTINCT ',ColumnName,') ',LOWER(ColumnName),'_count, ')
FROM #columns
WHERE RowId = #ii
SET #ii = #ii + 1
END
SELECT #sqlQuery = CONCAT(#sqlQuery,'FROM Customer')
SELECT #sqlQuery = REPLACE(#sqlQuery,', FROM',' FROM')
select #sqlQuery
EXEC (#sqlQuery)
You should flesh out your requirement a bit more. If all you want to know is if a column contains only NULLs, you'll want to check for max(ColumnName) is null
declare #sql table (id int identity(1,1), QueryString nvarchar(max))
create table ##emptyColumns (emptyColumn nvarchar(128))
declare #i int = 0
declare #iMax int
declare #runthis nvarchar(max)
insert #sql
select 'select ''' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + quotename(c.name) + ''' as ''column''
from ' + QUOTENAME(s.name) + '.' + QUOTENAME(o.name) + '
having max(' + c.name + ') is null'
from sys.sysobjects o
inner join sys.syscolumns c on c.id = o.id
inner join sys.schemas s on s.schema_id = o.uid
where o.type = 'U'
order by s.name
, o.name
, c.colorder
select #iMax = count(*)
from #sql
print #iMax
while #i < #iMax
begin
set #i = #i + 1
select #runthis = 'insert into ##emptyColumns
' + QueryString
from #sql
where id = #i
execute sp_executesql #runthis
end
select *
from ##emptyColumns
drop table ##emptyColumns
One further option you might consider:
declare #sql nvarchar(max)
select #sql = isnull(#sql + ' union all ', '') + 'select ''' + COLUMN_NAME + ''',
sum(case when ' + COLUMN_NAME + ' is null then 1 else 0 end) as null_values,
count(distinct ' + COLUMN_NAME + ') as count_distinct
from ' + TABLE_SCHEMA + '.' + TABLE_NAME + '
'
from information_schema.columns
where TABLE_SCHEMA = 'MySchema' and TABLE_NAME = 'MyTable'
exec (#sql)
If you had very big tables with large numbers of columns and were only interested in empty columns you could look into something like checksum_agg(checksum(column_name)). It may help improve performance.
You'd need to be wary of column data types, as they are not all compatible with distinct.
I have a final temporary table (#tempTable) with unknown columns number.
My final select is like this, it works :
SELECT temp.* FROM #tempTable temp
But instead of a '*' I would like to call each columns individually :
SELECT temp.col1, temp.col2 FROM #tempTable temp
To do so I need to iterate through my columns names and create a procedure, I tried something like this :
DECLARE #ColName VARCHAR(255)
SELECT #ColName = min(name) FROM tempdb.sys.columns
WHERE object_id = Object_id('tempdb..#TEMPTABLE');
WHILE #ColName is not null
BEGIN
-- i need to do it all in once and not each time....
declare #sql varchar(max) = 'SELECT tp.'+'#COlName'+'FROM #TEMPTABLE tp'
exec(#sql)
-- Increment the value, how to go to next column ?
select #ColName = min(name) FROM tempdb.sys.columns WHERE object_id =
Object_id('tempdb..#TEMPTABLE') > #ColName -- does not work because it is a string (column name)
END
Try this:
DECLARE #ColName VARCHAR(2000) = 'select '
SELECT #ColName = #ColName + ' temp.' + name + ',' FROM tempdb.sys.columns
WHERE object_id = Object_id('tempdb..#TEMPTABLE')
--delete last character, which is comma and append table name
#ColName = substring(#ColName, 1, LEN(#ColName) - 1) + ' from #TEMPTABLE temp'
exec(#ColName)
This query construct whole table list combined in select ... from ... statement. I increased size of the varchar variable, so it can accomodate long queries.
Also, IMO variable name such as #sql or #query would be more meaningful.
A set based approach
IF OBJECT_ID('tempdb..#TEMPTABLE','U') IS NOT NULL
DROP TABLE #TEMPTABLE;
CREATE TABLE #TEMPTABLE (
Id INT IDENTITY(1,1)
,Col1 INT
,Col2 BIGINT
,Col3 BIGINT
,Col4 DATETIME
,Col5 DATETIME
) ;
DECLARE #SQL NVARCHAR(MAX)
SELECT #SQL = N'SELECT ' + SUBSTRING((
SELECT N', temp.' + S.name
FROM
tempdb.sys.columns S
WHERE
S.object_id = OBJECT_ID('tempdb..#TEMPTABLE')
ORDER BY
S.column_id
FOR XML PATH('')
)
,2
,200000
) + N' FROM #TEMPTABLE temp'
EXEC sys.sp_executesql #SQL
Table:
Col
------
Table1
table2
table3
Query:
select count(*)
from #tablename
I wanted to pass table1, table2, table3 as parameters for #tablename in the select query and get the count for each table
Desired output:
2 (table 1 count) 3 (table 2 count) 4 (table 3 count)
you can use dynamic sql and a cursor to run through them:
Create temp table for testing:
DECLARE #tablenametable TABLE(tablename VARCHAR(100));
INSERT INTO #tablenametable
VALUES('table1'), ('table2'), ('table3');
Use a cursor to run through all tablenames in the table
DECLARE #tablename VARCHAR(100);
DECLARE dbcursor CURSOR
FOR
SELECT tablename
FROM #tablenametable;
OPEN dbcursor;
FETCH NEXT FROM dbcursor INTO #tablename;
WHILE ##FETCH_STATUS = 0
BEGIN
DECLARE #sql VARCHAR(MAX);
SET #sql = 'select count(*) from '+#tablename;
PRINT(#sql);
FETCH NEXT FROM dbcursor INTO #tablename;
END;
CLOSE dbcursor;
DEALLOCATE dbcursor;
Give the following results:
select count(*) from table1
select count(*) from table2
select count(*) from table3
Just change PRINT(#SQL) to EXEC(#SQL) when your happy with it
You can use dynamic sql query.
Query
declare #sql as varchar(max);
select #sql = stuff((
select ' union all '
+ 'select cast(count(*) as varchar(100))
+ ' + char(39) + '(' + [Col] +' Count)' + char(39)
+ ' as [table_counts] '
+ ' from ' + [col]
from [your_table_name]
for xml path('')
)
, 1, 11, ''
);
exec(#sql);
Find a demo here
As mentioned here, you have to use dynamic SQL.
First approach is where you specify table name yourself:
declare #tablename varchar(30), #SQL varchar(30)
set #tablename = 'Table1' --here you specify the name
set #SQL = concat('SELECT COUNT(*) FROM ', #tablename) --here you build the query
EXEC(#SQL)
Second approach lets you use table with names of tables:
declare #SQL varchar(8000)
set #SQL = ''
declare #TableNames table(name varchar(30))
insert into #TableNames values ('Table1'), ('Table2'), ('Table3')
--here you build the query
select #SQL = #SQL + ' SELECT ''' + name + ''' AS [TableName], COUNT(*) AS [Count] FROM ' + name + ' UNION ALL' from #TableNames
-- get rid of last "UNION ALL"
set #SQL = LEFT(#SQL, LEN(#SQL) - 10)
--execute the query
EXEC(#SQL)
The result of it will be:
TableName Count
Table1 3
Table2 6
Table3 4
You can use sys.dm_db_partition_stats like this [1]:
select
t.col tableName, sum(s.row_count) tableCount
from
yourTable t
join
sys.dm_db_partition_stats s
on
(object_name(s.object_id) = t.col )
and
(s.index_id < 2)
group by
t.col;
[1]. Related answer
One line output version will be:
select
sum(s.row_count), ' ('+t.col +' count) '
from
yourTable t
join
sys.dm_db_partition_stats s
on
(object_name(s.object_id) = t.col )
and
(s.index_id < 2)
group by
t.col
for xml path('');
output:
2 (Table1 count) 3 (table2 count) 4 (table3 count)
I'm trying to write a query that will produce a very small sample of data from each column of a table, in which the sample is made up of the top 3 most common values. This particular problem is part of a bigger task, which is to write scripts that can characterize a database and its tables, its data integrity, and also quickly survey common values in the table on a per-column basis. Think of this as an automated "analysis" of a table.
On a single column basis, I do this already by simply calculating the frequency of values and then sorting by frequency. If I had a column called "color" and all colors were in it, and it just so happened that the color "blue" was in most rows, then the top 1 most frequently occurring value would be "blue". In SQL that is easy to calculate.
However, I'm not sure how I would do this over multiple columns.
Currently, when I do a calculation over all columns of a table, I perform the following type of query:
USE database;
DECLARE #t nvarchar(max)
SET #t = N'SELECT '
SELECT #t = #t + 'count(DISTINCT CAST(' + c.name + ' as varchar(max))) "' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('table');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + ' FROM table;'
EXEC sp_executesql #t
However, its not entirely clear to me how I would do that here.
(Sidenote:columns that are of type text, ntext, and image, since those would cause errors while counting distinct values, but i'm less concerned about solving that)
But the problem of getting top three most frequent values per column has got me absolutely stumped.
Ideally, I'd like to end up with something like this:
Col1 Col2 Col3 Col4 Col5
---------------------------------------------------------------------
1,2,3 red,blue,green 29,17,0 c,d,j nevada,california,utah
I hacked this together, but it seems to work:
I cant help but think I should be using RANK().
USE <DB>;
DECLARE #query nvarchar(max)
DECLARE #column nvarchar(max)
DECLARE #table nvarchar(max)
DECLARE #i INT = 1
DECLARE #maxi INT = 10
DECLARE #target NVARCHAR(MAX) = <table>
declare #stage TABLE (i int IDENTITY(1,1), col nvarchar(max), tbl nvarchar(max))
declare #results table (ColumnName nvarchar(max), ColumnValue nvarchar(max), ColumnCount int, TableName NVARCHAR(MAX))
insert into #stage
select c.name, o.name
from sys.columns c
join sys.objects o on o.object_id=c.object_id and o.type = 'u'
and c.system_type_id IN (select system_type_id from sys.types where [name] not in ('text','ntext','image'))
and o.name like #target
SET #maxi = (select max(i) from #stage)
while #i <= #maxi
BEGIN
set #column = (select col from #stage where i = #i)
set #table = (select tbl from #stage where i = #i)
SET #query = N'SELECT ' +''''+#column+''''+' , '+ #column
SELECT #query = #query + ', COUNT( ' + #column + ' ) as count' + #column + ' , ''' + #table + ''' as tablename'
select #query = #query + ' from ' + #table + ' group by ' + #column
--Select #query
insert into #results
EXEC sp_executesql #query
SET #i = #i + 1
END
select * from #results
; with cte as (
select *, ROW_NUMBER() over (partition by Columnname order by ColumnCount desc) as rn from #results
)
select * from cte where rn <=3
Start with this SQL Statement builder, and modify it to suit your liking:
EDIT Added Order by Desc
With ColumnSet As
(
Select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME
From INFORMATION_SCHEMA.COLUMNS
Where 1=1
And TABLE_NAME IN ('Table1')
And COLUMN_NAME IN ('Column1', 'Column2')
)
Select 'Select Top 3 ' + COLUMN_NAME + ', Count (*) NumInstances From ' + TABLE_SCHEMA + '.'+ TABLE_NAME + ' Group By ' + COLUMN_NAME + ' Order by Count (*) Desc'
From ColumnSet
I have a field namely Modified_Dt of type Datetime in all of my tables, to keep track of last modified date and time for a record.
Now, let's say I need to know which tables has records that has been modified recently(like today).
How do I write a query for that? How do I query multiple tables?
By the way, I am using MS SQL Server 2008 R2.
USE MASTER
GO
DECLARE #ObjectName NVARCHAR(255)
DECLARE TablesList CURSOR
FOR select object_name(object_id, db_id('DBStackExchange'))
from [DBStackExchange].sys.columns
where name = 'Modified_Dt'
OPEN TablesList
FETCH NEXT FROM TablesList INTO #ObjectName
WHILE ##FETCH_STATUS = 0
BEGIN
exec
( 'If exists ( SELECT 1 FROM DBStackExchange.dbo.[' + #ObjectName
+ ']
Where convert(varchar(20),Modified_Dt,103)>=convert(varchar(20),getdate(),103))
Print ''' + #ObjectName + '''
'
)
FETCH NEXT FROM TablesList INTO #ObjectName
END
CLOSE TablesList
DEALLOCATE TablesList
Note: Replace 'DBStackExchange' with your Database name
declare #T table (T_Name nvarchar(255), M datetime)
declare #T_Name nvarchar(255), #SQLT nvarchar(max)
declare c cursor for select name from sys.tables
open c
fetch next from c into #T_Name
while ##fetch_status = 0 begin
set #SQLT = 'select top 1 ''' + #T_Name + ''', Modified_Dt from ' + #T_Name + ' order by Modified_Dt desc'
insert #T
exec sp_executesql #SQLT
fetch next from c into #T_Name
end
close c
deallocate c
select * from #T where M >= dateadd(day,datediff(day,0,getdate()),0)
Here is an answer without cursor or temporary table
DECLARE #ColumnName AS nvarchar(40) = 'Modified_Dt';
DECLARE #ModifiedSince AS datetime = '20140709';
DECLARE #sql AS nvarchar(max) = '';
-- Build a query with UNION ALL between all tables containing #ColumnName
WITH AllTables AS (
SELECT SCHEMA_NAME(Tables.schema_id) AS SchemaName
,Tables.name AS TableName
,Columns.name AS ColumnName
FROM sys.tables AS Tables
INNER JOIN sys.columns AS Columns
ON Tables.object_id = Columns.object_id
WHERE Columns.name = #ColumnName
)
SELECT #sql = #sql +
'UNION ALL SELECT ' + QUOTENAME(TableName, '''') +
', ' + QUOTENAME(ColumnName) +
' FROM ' + QUOTENAME(TableName) + CHAR(13)
FROM AllTables;
-- Create a query which selects last change from all tables
SET #sql =
'WITH AllChanges(TableName, ModifiedTime) AS ( ' +
STUFF(#sql, 1, LEN('UNION ALL'), '') + -- Remove first UNION
') ' +
'SELECT TableName ' +
' ,MAX(ModifiedTime) ' +
'FROM AllChanges ' +
'WHERE ModifiedTime > #ModifiedSince '
'GROUP BY TableName '
EXECUTE sp_executesql #sql, N'#ModifiedSince datetime', #ModifiedSince