Flattening of a 1 row table into a key-value pair table - sql

What's the best way to get a key-value pair result set that represents column-value in a row?
Given the following table A with only 1 row
Column1 Column2 Column3 ...
Value1 Value2 Value3
I want to query it and insert into another table B:
Key Value
Column1 Value1
Column2 Value2
Column3 Value3
A set of columns in table A is not known in advance.
NOTE: I was looking at FOR XML and PIVOT features as well as dynamic SQL to do something like this:
DECLARE #sql nvarchar(max)
SET #sql = (SELECT STUFF((SELECT ',' + column_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name='TableA'
ORDER BY column_name FOR XML PATH('')), 1, 1, ''))
SET #sql = 'SELECT ' + #sql + ' FROM TableA'
EXEC(#sql)

A version where there is no dynamic involved. If you have column names that is invalid to use as element names in XML this will fail.
select T2.N.value('local-name(.)', 'nvarchar(128)') as [Key],
T2.N.value('text()[1]', 'nvarchar(max)') as Value
from (select *
from TableA
for xml path(''), type) as T1(X)
cross apply T1.X.nodes('/*') as T2(N)
A working sample:
declare #T table
(
Column1 varchar(10),
Column2 varchar(10),
Column3 varchar(10)
)
insert into #T values('V1','V2','V3')
select T2.N.value('local-name(.)', 'nvarchar(128)') as [Key],
T2.N.value('text()[1]', 'nvarchar(max)') as Value
from (select *
from #T
for xml path(''), type) as T1(X)
cross apply T1.X.nodes('/*') as T2(N)
Result:
Key Value
-------------------- -----
Column1 V1
Column2 V2
Column3 V3
Update
For a query with more than one table you could use for xml auto to get the table names in the XML. Note, if you use alias for table names in the query you will get the alias instead.
select X2.N.value('local-name(..)', 'nvarchar(128)') as TableName,
X2.N.value('local-name(.)', 'nvarchar(128)') as [Key],
X2.N.value('text()[1]', 'nvarchar(max)') as Value
from (
-- Your query starts here
select T1.T1ID,
T1.T1Col,
T2.T2ID,
T2.T2Col
from T1
inner join T2
on T1.T1ID = T2.T1ID
-- Your query ends here
for xml auto, elements, type
) as X1(X)
cross apply X1.X.nodes('//*[text()]') as X2(N)
SQL Fiddle

I think you're halfway there. Just use UNPIVOT and dynamic SQL as Martin recommended:
CREATE TABLE TableA (
Code VARCHAR(10),
Name VARCHAR(10),
Details VARCHAR(10)
)
INSERT TableA VALUES ('Foo', 'Bar', 'Baz')
GO
DECLARE #sql nvarchar(max)
SET #sql = (SELECT STUFF((SELECT ',' + column_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name='TableA'
ORDER BY ordinal_position FOR XML PATH('')), 1, 1, ''))
SET #sql = N'SELECT [Key], Val FROM (SELECT ' + #sql + ' FROM TableA) x '
+ 'UNPIVOT ( Val FOR [Key] IN (' + #sql + ')) AS unpiv'
EXEC (#sql)
Results:
Key Val
------------ ------------
Code Foo
Name Bar
Details Baz
There is a caveat, of course. All your columns will need to be the same data type for the above code to work. If they are not, you will get this error:
Msg 8167, Level 16, State 1, Line 1
The type of column "Col" conflicts with the type of
other columns specified in the UNPIVOT list.
In order to get around this, you'll need to create two column string statements. One to get the columns and one to cast them all as the data type for your Val column.
For multiple column types:
CREATE TABLE TableA (
Code INT,
Name VARCHAR(10),
Details VARCHAR(10)
)
INSERT TableA VALUES (1, 'Foo', 'Baf')
GO
DECLARE
#sql nvarchar(max),
#cols nvarchar(max),
#conv nvarchar(max)
SET #cols = (SELECT STUFF((SELECT ',' + column_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name='TableA'
ORDER BY ordinal_position FOR XML PATH('')), 1, 1, ''))
SET #conv = (SELECT STUFF((SELECT ', CONVERT(VARCHAR(50), '
+ column_name + ') AS ' + column_name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name='TableA'
ORDER BY ordinal_position FOR XML PATH('')), 1, 1, ''))
SET #sql = N'SELECT [Key], Val FROM (SELECT ' + #conv + ' FROM TableA) x '
+ 'UNPIVOT ( Val FOR [Key] IN (' + #cols + ')) AS unpiv'
EXEC (#sql)

Perhaps you're making this more complicated than it needs to be. Partly because I couldn't wrap my little brain around the number of PIVOT/UNPIVOT/whatever combinations and a dynamic SQL "sea of red" would be necessary to pull this off. Since you know the table has exactly one row, pulling the value for each column can just be a subquery as part of a set of UNIONed queries.
DECLARE #sql NVARCHAR(MAX) = N'INSERT dbo.B([Key], Value) '
SELECT #sql += CHAR(13) + CHAR(10)
+ ' SELECT [Key] = ''' + REPLACE(name, '''', '''''') + ''',
Value = (SELECT ' + QUOTENAME(name) + ' FROM dbo.A) UNION ALL'
FROM sys.columns
WHERE [object_id] = OBJECT_ID('dbo.A');
SET #sql = LEFT(#sql, LEN(#sql)-9) + ';';
PRINT #sql;
-- EXEC sp_executesql #sql;
Result (I only created 4 columns, but this would work for any number):
INSERT dbo.B([Key], Value)
SELECT [Key] = 'Column1',
Value = (SELECT [Column1] FROM dbo.A) UNION ALL
SELECT [Key] = 'Column2',
Value = (SELECT [Column2] FROM dbo.A) UNION ALL
SELECT [Key] = 'Column3',
Value = (SELECT [Column3] FROM dbo.A) UNION ALL
SELECT [Key] = 'Column4',
Value = (SELECT [Column4] FROM dbo.A);
The most efficient thing in the world? Likely not. But again, for a one-row table, and hopefully a one-off task, I think it will work just fine. Just watch out for column names that contain apostrophes, if you allow those things in your shop...
EDIT sorry, couldn't leave it that way. Now it will handle apostrophes in column names and other sub-optimal naming choices.

Related

Extract all records from a JSON column, using JSON type

I have a couple tables (see reproducible code at the bottom):
tbl1_have
id json_col
1 {"a_i":"a","a_j":1}
1 {"a_i":"b","a_j":2}
2 {"a_i":"c","a_j":3}
2 {"a_i":"d","a_j":4}
tbl2_have
id json_col
1 [{"a_i":"a","a_j":1},{"a_i":"b","a_j":2}]
2 [{"a_i":"c","a_j":3},{"a_i":"d","a_j":4}]
I wish to extract all json columns without providing explicit data type conversion for each columns since in my use case the names and amounts of nested attributes vary.
The expected output is the same for both cases:
tbl_want
id a_i a_j
1 a 1
1 b 2
2 c 3
2 d 4
with a_i and a_j correctly stored as a character and numeric column, which mean I'd like to map json types to SQL types (say INT and VARCHAR() here) automatically.
The following gets me half way for both tables:
SELECT id, a_i, a_j FROM tbl2_have CROSS APPLY OPENJSON(json_col)
WITH(a_i VARCHAR(100), a_j INT)
id a_i a_j
1 1 a 1
2 1 b 2
3 2 c 3
4 2 d 4
How can I work around mentioning the types explicitly in with() ?
reproducible code :
CREATE TABLE tbl1_have (id INT, json_col VARCHAR(100))
INSERT INTO tbl1_have VALUES
(1, '{"a_i":"a","a_j":1}'),
(1, '{"a_i":"b","a_j":2}'),
(2, '{"a_i":"c","a_j":3}'),
(2, '{"a_i":"d","a_j":4}')
CREATE TABLE tbl2_have (id INT, json_col VARCHAR(100))
INSERT INTO tbl2_have VALUES
(1, '[{"a_i":"a","a_j":1},{"a_i":"b","a_j":2}]'),
(2, '[{"a_i":"c","a_j":3},{"a_i":"d","a_j":4}]')
SELECT id, a_i, a_j FROM tbl1_have CROSS APPLY OPENJSON(json_col)
WITH(a_i VARCHAR(100), a_j INT)
SELECT id, a_i, a_j FROM tbl2_have CROSS APPLY OPENJSON(json_col)
WITH(a_i VARCHAR(100), a_j INT)
I am assuming that you don't know the name and type of keys in advance. You need to use dynamic SQL.
You first need to use OPENJSON without the WITH clause on the {objects} like so:
select string_agg(quotename(k) + case t
when 0 then ' nchar(1)' -- javascript null
when 1 then ' nvarchar(max)' -- javascript string
when 2 then ' float' -- javascript number
when 3 then ' bit' -- javascript boolean
else ' nvarchar(max) as json' -- javascript array or object
end, ', ') within group (order by k)
from (
select j2.[key], max(j2.[type])
from test
cross apply openjson(case when json_col like '{%}' then '[' + json_col + ']' else json_col end) as j1
cross apply openjson(j1.value) as j2
group by j2.[key]
) as kt(k, t)
The inner query gives you the name and type of all the keys across all json values in the table. The outer query builds the WITH clause for dynamic SQL.
The rest is relatively straight forward, use the generated clause in your dynamic SQL. Here is the complete example:
declare #table_name nvarchar(100) = 'test';
declare #with_clause nvarchar(100);
declare #query1 nvarchar(999) = N'select #with_clause_temp = string_agg(quotename(k) + case t
when 0 then '' nchar(1)''
when 1 then '' nvarchar(max)''
when 2 then '' float''
when 3 then '' bit''
else '' nvarchar(max) as json''
end, '', '') within group (order by k)
from (
select j2.[key], max(j2.[type])
from ' + quotename(#table_name) + '
cross apply openjson(case when json_col like ''{%}'' then ''['' + json_col + '']'' else json_col end) as j1
cross apply openjson(j1.value) as j2
group by j2.[key]
) as kt(k, t)';
exec sp_executesql #query1, N'#with_clause_temp nvarchar(100) out', #with_clause out;
declare #query2 nvarchar(999) = N'select id, j.*
from ' + quotename(#table_name) + '
cross apply openjson(json_col)
with (' + #with_clause + ') as j';
exec sp_executesql #query2;
Demo on db<>fiddle
I have found a solution that maybe works for your use case. I am no SQL-expert by any means, and i did not manage to automatically detect the datatypes of the dynamic columns. But i found a solution for your two examples.
First i tried to get all column names dynamically from the json_col. I found an answer on stackoverflow and got this piece of code:
STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname) FROM #tmpTbl FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
This will output all column names as a string separated by commas, in your example: ' [a_i], [a_j]'. This can then be used to dynamically SELECT columns.
As already mentioned above, i was not able to write a datatype detection algorithm. I just hardcoded the columns to have nvarchar(100) as datatype.
To dynamically get the column-names with the corresponding datatype (hardcoded as nvarchar(100)) i used a slightly modified version of above query:
STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname)+' nvarchar(100)' FROM #tmpTbl FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
Then i just used them in the WITH-CLAUSE.
Full version for the table tbl1_have
DECLARE #cols NVARCHAR(MAX), #colsWithType NVARCHAR(MAX), #query NVARCHAR(MAX);
DROP TABLE IF EXISTS #tmpTbl
SELECT outerTable.[id] AS columnid, innerTable.[key] AS columnname, innerTable.[value] AS columnvalue
INTO #tmpTbl
FROM tbl1_have outerTable CROSS APPLY OPENJSON(json_col) AS innerTable
SELECT * FROM #tmpTbl
SET #cols = STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname) FROM #tmpTbl FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
SET #colsWithType = STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname)+' nvarchar(100)' FROM #tmpTbl FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
SET #query = N'SELECT id, '+#cols+' FROM tbl1_have CROSS APPLY OPENJSON(json_col)
WITH('+#colsWithType+')';
exec sp_executesql #query
Full Version for the table tbl2_have:
DECLARE #cols NVARCHAR(MAX), #colsWithType NVARCHAR(MAX), #query NVARCHAR(MAX);
DROP TABLE IF EXISTS #tmpTbl
DROP TABLE IF EXISTS #tmpTbl2
SELECT *
INTO #tmpTbl
FROM tbl2_have CROSS APPLY OPENJSON(json_col)
SELECT outerTable.[id] AS columnid, innerTable.[key] AS columnname, innerTable.[value] AS columnvalue
INTO #tmpTbl2
FROM #tmpTbl outerTable CROSS APPLY OPENJSON([value]) AS innerTable
SELECT * FROM #tmpTbl
SELECT * FROM #tmpTbl2
SET #cols = STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname) FROM #tmpTbl2 FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
SET #colsWithType = STUFF(
(
SELECT DISTINCT ', '+QUOTENAME(columnname)+' nvarchar(100)' FROM #tmpTbl2 FOR XML PATH(''), TYPE
).value('.', 'nvarchar(max)'), 1, 1, '');
SET #query = N'SELECT id, '+#cols+' FROM tbl2_have CROSS APPLY OPENJSON(json_col)
WITH('+#colsWithType+')';
exec sp_executesql #query
Would using the Value returned from OPENJSON work? It probably maps to a string data type, however, you do not have to know the type upfront. The official doc of the OPENJSON rowset function indicates that it returns a Key:Value pair as well as a Type for each parse. The Type value may be useful, however, it determines the datatype while parsing. I bet that Value is always a string type, as it would have to be.
;WITH X AS
(
SELECT id, a_i=J.[Key], a_j=J.[Value] FROM #tbl2_have CROSS APPLY OPENJSON(json_col) J
)
SELECT
id,
a_i=MAX(CASE WHEN J.[Key]='a_i' THEN J.[Value] ELSE NULL END),
a_j=MAX(CASE WHEN J.[Key]='a_j' THEN J.[Value] ELSE NULL END)
FROM X CROSS APPLY OPENJSON(X.a_j) J
GROUP BY
id,a_i,a_j

Select not null columns as a comma separated string from dynamic sql

I am trying to do my best to avoid using cursors. There is a comma separated list of columns in nvarchar variable that looks like this:
#columnList = 'col1,col2,col5'
There is a table with lots of varchar columns:
myTable: [col1],[col2],[col3],[col4],[col5],[col6],[col7]
This is how I select data using a dynamic sql:
exec ('select ' + #columnList + ' from myTable')
This query returns the following results:
[col1], [col2] , [col5]
null , "txt1" , null
"txt2", null , null
null , "txt3" , "txt4"
This is what I need to get:
#resultList = "txt1,txt2,txt3,txt4"
How do I select a comma separated string containing not-null values only? I know how to convert a table to comma separated string, so getting something like:
[column]
"txt1"
"txt2"
"txt3"
"txt4"
Is also fine. Any suggestions? Feel free to suggest a basic approach, I don't expect you to write the actual code for me.
You can use a solution like the following, using just a REPLACE to create the SQL query:
DECLARE #columnList VARCHAR(100)
SET #columnList = 'col1,col2,col5'
SET #columnList = REPLACE(#columnList, ',', ' AS colName FROM myTable UNION ALL SELECT ');
EXEC('SELECT * FROM (SELECT ' + #columnList + ' AS colName FROM myTable)t WHERE NOT t.colName IS NULL');
You can also use a solution using UNPIVOT:
DECLARE #columnList VARCHAR(100);
SET #columnList = 'col1,col2,col5';
EXEC('SELECT colName FROM (SELECT ' + #columnList + ' FROM myTable) t1 UNPIVOT (colName FOR columnNames IN (' + #columnList + ')) AS t2');
demo on dbfiddle.uk
Since you mention you already know how to aggregate into comma-seperated, here's how to unpivot your table with cross apply:
select unpivoted.*
from myTable
cross apply
( values
('col1',col2)
,('col2',col2)
,('col3',col3) -- etc
)unpivoted(colname,colval)
This would work for SQL Server 2017, on earlier version STRING_AGG usage should be replaced with XML-based solution.
You could first concatenate your results into a single line and then use STRING_AGG to aggregate them into a single string:
;with t as
(
SELECT * FROM (VALUES
(null , 'txt1' , null),
('txt2', null , null),
(null , 'txt3' , 'txt4')) x(col1, col2, col5)
)
SELECT STRING_AGG(STUFF(CONCAT(',' + col1, ',' + col2, ',' + col5), 1, 1, ''), ',')
FROM t
CTE is just for showcasing, you can simply do it in your dynamic query:
DECLARE #columnList NVARCHAR(MAX) = 'col1,col2,col5'
DECLARE #query NVARCHAR(MAX) = ''
SELECT #query = 'SELECT STRING_AGG(STUFF(CONCAT('','' + ' + REPLACE(#columnList, ',', ', '','' + ') + '), 1, 1, ''''), '','') from mytable'
EXEC sp_executesql #query
Working example on dbfiddle

How to dynamically calculate the sums of many columns in a GROUP?

In the table below, I have a variable number of columns, and that number is in the 1000s. I need to sum all the values of each of the 1000 columns grouped by the person's name. So, smith's total test_score_1, total test_score_2,...total test_score_1000. And then Jackson's total test_score_1, total test_score_2,...total test_score_1000.
I don't know the number of 'test_score_n' columns beforehand and they are always changing.
So given this table:
name test_score_1 test_score_2 ... test_score_1000
smith 2 1 0
jackson 0 3 1
jackson 1 1 2
jackson 3 0 3
smith 4 5 1
How can I produce the table below?
name test_score_1 test_score_2 ... test_score_1000
smith 6 6 1
jackson 4 4 6
SQL to generate the SQL
DECLARE #generatedSQL nvarchar(max);
SET #generatedSQL = (
SELECT
'SELECT ' +
SUBSTRING(X.foo, 2, 2000) +
'FROM ' +
QUOTENAME(SCHEMA_NAME(t.schema_id)) + '.' + QUOTENAME(t.name) +
' GROUP BY name' --fix this line , edited
FROM
sys.tables t
CROSS APPLY
(
SELECT
', SUM(' + QUOTENAME(c.name) + ')'
FROM
sys.columns c
WHERE
c.object_id = t.object_id
AND
c.name <> 'Name'
FOR XML PATH('')
) X (foo)
WHERE
t.name = 'MyTable'
);
EXEC (#generatedSQL);
Demo: http://rextester.com/MAFCP19297
SQL
DECLARE #cols varchar(max), #sql varchar(max);
SELECT #cols =
COALESCE(#cols + ', ', '') + 'SUM(' + COLUMN_NAME + ') AS ' + COLUMN_NAME
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_name = '<tbl name>'
AND COLUMN_NAME <> 'name'
-- The AND below may be optional - see "Additional Notes #1"
AND TABLE_CATALOG = '<database schema name>';
SET #sql = 'SELECT name, ' + #cols + ' FROM tbl GROUP BY name;';
EXEC (#sql);
Explanation
The DECLARE creates two variables - one for storing the column summing part of the SQL and the other for storing the whole dynamically created SQL statement to run.
The SELECT queries the INFORMATION_SCHEMA.COLUMNS system table to get the names of all the columns in tbl apart from the name column. (Alternatively the sys tables could be used - answers to this question discuss the relative merits of each). These row values are then converted into a single comma separated value using this method (which is arguably a little simpler than the alternative FOR XML PATH ('') method). The comma-separated values are a bit more than just the column names - they SUM over each column name and then assign the result with an alias of the same name.
The SET then builds a simple SQL statement that selects the name and all the summed values - e.g: SELECT name, SUM(test_score_1) AS test_score_1, SUM(test_score_2) AS test_score_2, SUM(test_score_1000) AS test_score_1000 FROM tbl GROUP BY name;.
The EXEC then runs the above query.
Additional Notes
If there is a possibility that the table name may not be unique across all databases then the following clause is needed in the select: AND TABLE_CATALOG = '<database schema name>'
My initial answer to this question was mistakenly using MySQL rather than SQL Server - this has now been corrected but the previous version is still in the edit history and might be helpful to someone...
Try this dynamic column generation Sql script
DECLARE #Sql nvarchar(max)
SET #Sql=( SELECT DISTINCT 'SELECT'+
STUFF((SELECT ', '+ ' SUM( '+ COLUMN_NAME +' ) AS '+ QUOTENAME( COLUMN_NAME )
FROM INFORMATION_SCHEMA.COLUMNS Where TABLE_NAME ='Tab1000'
FOR XML PATH (''),type).value('.','varchar(max)'),1,2,'')
+' From Tab1000'From INFORMATION_SCHEMA.COLUMNS Where TABLE_NAME ='Tab1000')
EXEC (#sql)
Try the below script
(set the #tableName= [yourTablename] and #nameColumn to the name of the field you want to group by)
Declare #tableName varchar(50)='totalscores'
Declare #nameColumn nvarchar(50)='name'
Declare #query as nvarchar(MAX) ;
select #query = 'select ' + nameColumn + cast(sumColumns as nvarchar(max)) + 'from ' + #tableName +' group by ' + nameColumn from (
select #nameColumn nameColumn, (SELECT
', SUM(' + QUOTENAME(c.name) + ') ' + QUOTENAME(c.name)
FROM
sys.columns c
WHERE
c.object_id=t.object_id and c.name != #nameColumn
order by c.name
FOR
XML path(''), type
) sumColumns
from sys.tables t where t.name= #tableName
)t
EXECUTE(#query)
Change tablename with your tablename.
Declare #query as nvarchar(MAX) = (SELECT
'SELECT name,' + SUBSTRING(tbl.col, 2, 2000) + ' FROM ' + QUOTENAME(SCHEMA_NAME(t.schema_id)) + '.' + QUOTENAME(t.name) + 'Group By name'
FROM
sys.tables t
CROSS APPLY
(
SELECT
', SUM(' + QUOTENAME(columns.name) + ') as ' + columns.name
FROM
sys.columns columns
WHERE
columns.object_id = t.object_id and columns.name != 'name'
FOR XML PATH('')
) tbl (col)
WHERE
t.name = 'tablename')
select #query EXECUTE(#query)
GBN's dynamic SQL would be my first choice (+1), and would be more performant. However, if you are interested in breaking this horrible cycle of a 1,000+ columns, consider the following:
Example
Declare #YourTable Table ([col 1] int,[col 2] int,[col 1000] varchar(50))
Insert Into #YourTable Values
(2,1,0)
,(4,5,1)
Select Item = replace(C.Item,'_x0020_', ' ')
,Value = sum(C.Value)
From #YourTable A
Cross Apply (Select XMLData= cast((Select A.* for XML RAW) as xml)) B
Cross Apply (
Select Item = a.value('local-name(.)','varchar(100)')
,Value = a.value('.','int')
From B.XMLData.nodes('/row') as C1(n)
Cross Apply C1.n.nodes('./#*') as C2(a)
Where a.value('local-name(.)','varchar(100)') not in ('Fields','ToExclude')
) C
Group By C.Item
Returns
Item Value
col 1 6
col 2 6
col 1000 1

Select non-empty columns using SQL Server

I am using SQL Server 2012. i have a table with 90 columns. I am trying to select only columns that contains data. After searching i used the following procedure:
1- Getting all columns count using one select query
2- Pivoting Result Table into a Temp table
3- Creating Select query
4- Executing this query
Here is the query i used:
DECLARE #strTablename varchar(100) = 'dbo.MyTable'
DECLARE #strQuery varchar(max) = ''
DECLARE #strSecondQuery varchar(max) = 'SELECT '
DECLARE #strUnPivot as varchar(max) = ' UNPIVOT ([Count] for [Column] IN ('
CREATE TABLE ##tblTemp([Column] varchar(50), [Count] Int)
SELECT #strQuery = ISNULL(#strQuery,'') + 'Count([' + name + ']) as [' + name + '] ,' from sys.columns where object_id = object_id(#strTablename) and is_nullable = 1
SELECT #strUnPivot = ISNULL(#strUnPivot,'') + '[' + name + '] ,' from sys.columns where object_id = object_id(#strTablename) and is_nullable = 1
SET #strQuery = 'SELECT [Column],[Count] FROM ( SELECT ' + SUBSTRING(#strQuery,1,LEN(#strQuery) - 1) + ' FROM ' + #strTablename + ') AS p ' + SUBSTRING(#strUnPivot,1,LEN(#strUnPivot) - 1) + ')) AS unpvt '
INSERT INTO ##tblTemp EXEC (#strQuery)
SELECT #strSecondQuery = #strSecondQuery + '[' + [Column] + '],' from ##tblTemp WHERE [Count] > 0
DROP TABLE ##tblTemp
SET #strSecondQuery = SUBSTRING(#strSecondQuery,1,LEN(#strSecondQuery) - 1) + ' FROM ' + #strTablename
EXEC (#strSecondQuery)
The problem is that this query is TOO SLOW. Is there a best way to achieve this?
Notes:
Table have only one clustered index on primary key Column ID and does not contains any other indexes.
Table is not editable.
Table contains very large data.
Query is taking about 1 minute to be executed
Thanks in advance.
I do not know if this is faster, but you might use one trick: FOR XML AUTO will ommit columns without content:
DECLARE #tbl TABLE(col1 INT,col2 INT,col3 INT);
INSERT INTO #tbl VALUES (1,2,NULL),(1,NULL,NULL),(NULL,NULL,NULL);
SELECT *
FROM #tbl AS tbl
FOR XML AUTO
This is the result: col3 is missing...
<tbl col1="1" col2="2" />
<tbl col1="1" />
<tbl />
Knowing this, you could find the list of columns, which are not NULL in all rows, like this:
DECLARE #ColList VARCHAR(MAX)=
STUFF
(
(
SELECT DISTINCT ',' + Attr.value('local-name(.)','nvarchar(max)')
FROM
(
SELECT
(
SELECT *
FROM #tbl AS tbl
FOR XML AUTO,TYPE
) AS TheXML
) AS t
CROSS APPLY t.TheXML.nodes('/tbl/#*') AS A(Attr)
FOR XML PATH('')
),1,1,''
);
SELECT #ColList
The content of #ColList is now col1,col2. This string you can place in a dynamically created SELECT.
UPDATE: Hints
It would be very clever, to replace the SELECT * with a column list created from INFORMATION_SCHEMA.COLUMNS excluding all not-nullable. And - if needed and possible - types, wich contain very large data (BLOBs).
UPDATE2: Performance
Don't know what your very large data means actually... Just tried this on a table with about 500.000 rows (with SELECT *) and it returned correctly after less than one minute. Hope, this is fast enough...
Try using this condition:
where #columnname IS NOT NULL AND #columnname <> ' '

Get top three most common values from every column in a table

I'm trying to write a query that will produce a very small sample of data from each column of a table, in which the sample is made up of the top 3 most common values. This particular problem is part of a bigger task, which is to write scripts that can characterize a database and its tables, its data integrity, and also quickly survey common values in the table on a per-column basis. Think of this as an automated "analysis" of a table.
On a single column basis, I do this already by simply calculating the frequency of values and then sorting by frequency. If I had a column called "color" and all colors were in it, and it just so happened that the color "blue" was in most rows, then the top 1 most frequently occurring value would be "blue". In SQL that is easy to calculate.
However, I'm not sure how I would do this over multiple columns.
Currently, when I do a calculation over all columns of a table, I perform the following type of query:
USE database;
DECLARE #t nvarchar(max)
SET #t = N'SELECT '
SELECT #t = #t + 'count(DISTINCT CAST(' + c.name + ' as varchar(max))) "' + c.name + '",'
FROM sys.columns c
WHERE c.object_id = object_id('table');
SET #t = SUBSTRING(#t, 1, LEN(#t) - 1) + ' FROM table;'
EXEC sp_executesql #t
However, its not entirely clear to me how I would do that here.
(Sidenote:columns that are of type text, ntext, and image, since those would cause errors while counting distinct values, but i'm less concerned about solving that)
But the problem of getting top three most frequent values per column has got me absolutely stumped.
Ideally, I'd like to end up with something like this:
Col1 Col2 Col3 Col4 Col5
---------------------------------------------------------------------
1,2,3 red,blue,green 29,17,0 c,d,j nevada,california,utah
I hacked this together, but it seems to work:
I cant help but think I should be using RANK().
USE <DB>;
DECLARE #query nvarchar(max)
DECLARE #column nvarchar(max)
DECLARE #table nvarchar(max)
DECLARE #i INT = 1
DECLARE #maxi INT = 10
DECLARE #target NVARCHAR(MAX) = <table>
declare #stage TABLE (i int IDENTITY(1,1), col nvarchar(max), tbl nvarchar(max))
declare #results table (ColumnName nvarchar(max), ColumnValue nvarchar(max), ColumnCount int, TableName NVARCHAR(MAX))
insert into #stage
select c.name, o.name
from sys.columns c
join sys.objects o on o.object_id=c.object_id and o.type = 'u'
and c.system_type_id IN (select system_type_id from sys.types where [name] not in ('text','ntext','image'))
and o.name like #target
SET #maxi = (select max(i) from #stage)
while #i <= #maxi
BEGIN
set #column = (select col from #stage where i = #i)
set #table = (select tbl from #stage where i = #i)
SET #query = N'SELECT ' +''''+#column+''''+' , '+ #column
SELECT #query = #query + ', COUNT( ' + #column + ' ) as count' + #column + ' , ''' + #table + ''' as tablename'
select #query = #query + ' from ' + #table + ' group by ' + #column
--Select #query
insert into #results
EXEC sp_executesql #query
SET #i = #i + 1
END
select * from #results
; with cte as (
select *, ROW_NUMBER() over (partition by Columnname order by ColumnCount desc) as rn from #results
)
select * from cte where rn <=3
Start with this SQL Statement builder, and modify it to suit your liking:
EDIT Added Order by Desc
With ColumnSet As
(
Select TABLE_SCHEMA, TABLE_NAME, COLUMN_NAME
From INFORMATION_SCHEMA.COLUMNS
Where 1=1
And TABLE_NAME IN ('Table1')
And COLUMN_NAME IN ('Column1', 'Column2')
)
Select 'Select Top 3 ' + COLUMN_NAME + ', Count (*) NumInstances From ' + TABLE_SCHEMA + '.'+ TABLE_NAME + ' Group By ' + COLUMN_NAME + ' Order by Count (*) Desc'
From ColumnSet