I have been given a task to find how many nulls there are for each column in a given table. The table has many columns (50-80, depending on the individual table). I would like the result pivoted so the column names are records, like this:
column_name
null_count
columnA
253
columnB
25
columnC
0
columnD
456
...
...
Currently, I do
SELECT 'columnA' as column_name, sum(case when columnA IS NULL then 1 else 0 end) null_count from [table] UNION
SELECT 'columnB', sum(case when columnB IS NULL then 1 else 0 end) from [table] UNION
...
for all the rows. This is kind of tedious and I would like to know if there is a more flexible way to do this in MS Sql Server management studio. Maybe something that can step through each record in [database].INFORMATION_SCHEMA.COLUMNS.
Please try the following solution.
It is using SQL Server's XML and XQuery magic.
No need for dynamic SQL.
We are leveraging a fact that FOR XML ... clause omits columns with NULL values.
SQL
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
-- DDL and sample data population, start
CREATE TABLE dbo.tbl (ID INT IDENTITY PRIMARY KEY, columnA INT, columnB VARCHAR(5), columnC INT);
INSERT dbo.tbl (columnA, columnB, columnC) VALUES
(1, NULL, NULL),
(2, 'city', NULL),
(NULL, 'cat', NULL),
(100, NULL, NULL);
-- DDL and sample data population, end
DECLARE #total_row_counter BIGINT = (SELECT COUNT_BIG(*) FROM dbo.tbl);
;WITH rs AS
(
SELECT column_name = c.value('local-name(.)', 'sysname')
FROM dbo.tbl AS p
CROSS APPLY (SELECT *
FROM dbo.tbl AS c
WHERE c.ID = p.ID
FOR XML PATH('root'), TYPE) AS t1(x)
CROSS APPLY x.nodes('/root/*') AS t2(c)
)
SELECT sch.column_name, null_counter = #total_row_counter - COUNT_BIG(rs.column_name)
FROM INFORMATION_SCHEMA.COLUMNS AS sch
LEFT OUTER JOIN rs ON sch.COLUMN_NAME = rs.column_name
WHERE TABLE_CATALOG = 'TEMPDB'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'tbl'
GROUP BY sch.column_name
ORDER BY sch.column_name;
SQL #2
It covers table column names with spaces. A minor FOR XML ... clause change automatically converts spaces into _x0020_ in the XML element names.
<root>
<ID>3</ID>
<column_x0020_B>cat</column_x0020_B>
</root>
The rest is identical.
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
-- DDL and sample data population, start
CREATE TABLE dbo.tbl (ID INT IDENTITY PRIMARY KEY, columnA INT, [column B] VARCHAR(5), columnC INT);
INSERT dbo.tbl (columnA, [column B], columnC) VALUES
(1, NULL, NULL),
(2, 'city', NULL),
(NULL, 'cat', NULL),
(100, NULL, NULL);
-- DDL and sample data population, end
DECLARE #total_row_counter BIGINT = (SELECT COUNT_BIG(*) FROM dbo.tbl);
;WITH rs AS
(
SELECT column_name = REPLACE(c.value('local-name(.)', 'sysname'), '_x0020_', SPACE(1))
FROM dbo.tbl AS p
CROSS APPLY (SELECT *
FROM dbo.tbl AS [root]
WHERE [root].ID = p.ID
FOR XML AUTO, ELEMENTS, TYPE) AS t1(x)
CROSS APPLY x.nodes('/root/*') AS t2(c)
)
SELECT sch.column_name, null_counter = #total_row_counter - COUNT_BIG(rs.column_name)
FROM INFORMATION_SCHEMA.COLUMNS AS sch
LEFT OUTER JOIN rs ON sch.COLUMN_NAME = rs.column_name
WHERE TABLE_CATALOG = 'TEMPDB'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'tbl'
GROUP BY sch.column_name
ORDER BY sch.column_name;
Output
+-------------+--------------+
| column_name | null_counter |
+-------------+--------------+
| columnA | 1 |
| columnB | 2 |
| columnC | 4 |
| ID | 0 |
+-------------+--------------+
Related
I have created a CTE (common table Expression) as follows:
DECLARE #N VARCHAR(100)
WITH CAT_NAM AS (
SELECT ID, NAME
FROM TABLE1
WHERE YEAR(DATE) = YEAR(GETDATE())
)
SELECT #N = STUFF((
SELECT ','''+ NAME+''''
FROM CAT_NAM
WHERE ID IN (20,23,25,30,37)
FOR XML PATH ('')
),1,1,'')
The result of above CTE is 'A','B','C','D','F'
Now I need to check 4 different columns CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4 in the result of CTE and form it as one column like follow:
Select
case when CAT_NAM_1 in (#N) then CAT_NAM_1
when CAT_NAM_2 in (#N) then CAT_NAM_2
when CAT_NAM_3 in (#N) then CAT_NAM_3
when CAT_NAM_4 in (#N) then CAT_NAM_4
end as CAT
from table2
When I'm trying to do the above getting error please help me to do.
If my approach is wrong help me with right one.
I am not exactly sure what you are trying to do, but if I understand the following script shows one possible technique. I have created some table variables to mimic the data you presented and then wrote a SELECT statement to do what I think you asked (but I am not sure).
DECLARE #TABLE1 AS TABLE (
ID INT NOT NULL,
[NAME] VARCHAR(10) NOT NULL,
[DATE] DATE NOT NULL
);
INSERT INTO #TABLE1(ID,[NAME],[DATE])
VALUES (20, 'A', '2021-01-01'), (23, 'B', '2021-02-01'),
(25, 'C', '2021-03-01'),(30, 'D', '2021-04-01'),
(37, 'E', '2021-05-01'),(40, 'F', '2021-06-01');
DECLARE #TABLE2 AS TABLE (
ID INT NOT NULL,
CAT_NAM_1 VARCHAR(10) NULL,
CAT_NAM_2 VARCHAR(10) NULL,
CAT_NAM_3 VARCHAR(10) NULL,
CAT_NAM_4 VARCHAR(10) NULL
);
INSERT INTO #TABLE2(ID,CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4)
VALUES (1,'A',NULL,NULL,NULL),(2,NULL,'B',NULL,NULL);
;WITH CAT_NAM AS (
SELECT ID, [NAME]
FROM #TABLE1
WHERE YEAR([DATE]) = YEAR(GETDATE())
AND ID IN (20,23,25,30,37,40)
)
SELECT CASE
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_1) THEN CAT_NAM_1
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_2) THEN CAT_NAM_2
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_3) THEN CAT_NAM_3
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_4) THEN CAT_NAM_4
ELSE '?' -- not sure what you want if there is no match
END AS CAT
FROM #TABLE2;
You can do a bit of set-based logic for this
SELECT
ct.NAME
FROM table2 t2
CROSS APPLY (
SELECT v.NAME
FROM (VALUES
(t2.CAT_NAM_1),
(t2.CAT_NAM_2),
(t2.CAT_NAM_3),
(t2.CAT_NAM_4)
) v(NAME)
INTERSECT
SELECT ct.NAM
FROM CAT_NAM ct
WHERE ct.ID IN (20,23,25,30,37)
) ct;
I want to group a result set by a column and combine the remaining columns into a json array, but I'm not sure how to aggregate the results for this.
I want the following output:
A_ID | Translations
--------------------
1 | [{"Name": "english_1","LCID": "en-gb"},{"Name": "french_1","LCID": "fr-fr"}]
2 | [{"Name": "english_2","LCID": "en-gb"},{"Name": "french_2","LCID": "fr-fr"}]
But I cannot group the results by A_ID without an aggregator so I get the following
A_ID | Translations
--------------------
1 | [{"Name": "english_1","LCID": "en-gb"}]
1 | [{"Name": "french_1","LCID": "fr-fr"}]
2 | [{"Name": "english_2","LCID": "en-gb"}]
2 | [{"Name": "french_2","LCID": "fr-fr"}]
Here is an example:
DROP TABLE IF EXISTS #tabA;
DROP TABLE IF EXISTS #tabB;
DROP TABLE IF EXISTS #tabC;
go
CREATE TABLE #tabA
(
Id int not null
);
CREATE TABLE #tabTranslations
(
translationId int not null,
Name nvarchar(32) not null,
aId int not null, -- Foreign key.
languageId int not null --Foreign key
);
CREATE TABLE #tabLanguages
(
languageId int not null,
LCID nvarchar(32) not null
);
go
INSERT INTO #tabA (Id)
VALUES
(1),
(2);
INSERT INTO #tabTranslations (translationId, Name, aId, languageId)
VALUES
(1, 'english_1', 1, 1),
(2, 'french_1', 1, 2),
(3, 'english_2', 2, 1),
(4, 'french_2', 2, 2);
INSERT INTO #tabLanguages (languageId, LCID)
VALUES
(1, 'en-gb'),
(2, 'fr-fr');
go
select
_a.Id as A_ID,
(
select
_translation.Name,
_language.LCID
for json path
)
from #tabA as _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
-- group by _a.Id ??
;
go
DROP TABLE IF EXISTS #tabA;
DROP TABLE IF EXISTS #tabTranslations;
DROP TABLE IF EXISTS #tabLanguages;
go
Alternative solution:
I know I can do this, but I obviously don't want to hard code the available LCIDs (maybe I could generate the sql query and exec it? but this feels too complex), also I would prefer an array
select
_a.Id as A_ID,
(
SELECT
MAX(CASE WHEN [LCID] = 'en-gb' THEN [Name] END) 'en-gb',
MAX(CASE WHEN [LCID] = 'fr-fr' THEN [Name] END) 'fr-fr'
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
) as b
from #tabA as _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
group by _a.Id;
result:
A_ID | Translations
--------------------
1 | { "en-Gb": "english_1", "fr-FR": "french_1"}
2 | { "en-Gb": "english_2", "fr-FR": "french_2"}
If I understand you correctly, next approach may help. Use additional CROSS APPLY operator and FOR JSON PATH to get your expected results:
Statement:
SELECT *
FROM #tabA AS t
CROSS APPLY (
SELECT _translation.Name AS Name, _language.LCID AS LCID
FROM #tabA _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
WHERE _a.Id = t.Id
for json path
) _c(Translations)
Output:
Id Translations
1 [{"Name":"english_1","LCID":"en-gb"},{"Name":"french_1","LCID":"fr-fr"}]
2 [{"Name":"english_2","LCID":"en-gb"},{"Name":"french_2","LCID":"fr-fr"}]
I'm trying to create some reports for auditing, but I have a very specific question.
There's about 120 columns, each with a specific numeric answer. I'd like to return the column name and the value of the rows of that column. I'm aware I'll get a lot of results, but it's not a problem.
For example I have:
KEY |ColumnA | ColumnB
1 |Value A | ValueB
2 |ValueA2 | ValueB2
But want I want is:
1 |ColumnA | Value A
2 |ColumnA | Value A2
1 |ColumnB | Value B
2 |ColumnB | Value B2
I've tried returning all rows and then joining on itself, but it didn't provide me with the output I needed.
Simple unpivot will do the work :)
declare #tbl table ([Key] int, ColumnA varchar(15), ColumnB varchar(15));
insert into #tbl values
(1, 'Value A', 'ValueB'),
(2, 'ValueA2', 'ValueB2');
select [key], [column], [value] from
(select * from #tbl) p
unpivot
([value] for [column] in (ColumnA, ColumnB)) u
order by [column]
it's so simple...If you know the column names, you could use a simple UNION
SELECT * FROM tblAuditing
SELECT 'ColumnA' AS ColumnA,'ColumnB' AS ColumnA UNION
SELECT ColumnA AS ColumnA,ColumnB AS ColumnA FROM tblAuditing
The following query should do what you want - you need to do a customized sorting for the columns names:
CREATE TABLE #temp (ColumnA VARCHAR(20), ColumnB VARCHAR(20))
INSERT INTO #temp VALUES ('Value A','Value B'),('Value A2','Value B2')
SELECT t.Col, t.Val
FROM (SELECT *,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) RNO FROM #temp t) tmp
CROSS APPLY (VALUES (tmp.ColumnA,'ColumnA',tmp.RNO),(tmp.ColumnB,'ColumnB',tmp.RNO)) AS T(Val,Col,sort)
ORDER BY T.Col, Sort
The result is as below,
Col Val
ColumnA Value A
ColumnA Value A2
ColumnB Value B
ColumnB Value B2
We have these tables
CREATE TABLE tbl01
(
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
CREATE TABLE tbl02
(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL REFERENCES tbl01(id),
[val] nvarchar(50) NULL,
[code] int NULL
)
If we run this query:
SELECT
tbl01.id, tbl01.name, tbl02.val, tbl02.code
FROM
tbl01
INNER JOIN
tbl02 ON tbl01.id = tbl02.id
we get these results:
-------------------------------
id | name | val | code
-------------------------------
1 | one | FirstVal | 1
1 | one | SecondVal | 2
2 | two | YourVal | 1
2 | two | OurVal | 2
3 | three | NotVal | 1
3 | three | ThisVal | 2
-------------------------------
You can see that each two rows are related to same "id"
The question is: we need for each id to retrieve one record with all val, each val will return in column according to the value of column code
if(code = 1) then val as val-1
else if (code = 2) then val as val-2
Like this:
-------------------------------
id | name | val-1 | val-2
-------------------------------
1 | one | FirstVal | SecondVal
2 | two | YourVal | OurVal
3 | three | NotVal | ThisVal
-------------------------------
Any advice?
Use can use MAX and Group By to achieve this
SELECT id,
name,
MAX([val1]) [val-1],
MAX([val2]) [val-2]
FROM ( SELECT tbl01.id, tbl01.name,
CASE code
WHEN 1 THEN tbl02.val
ELSE ''
END [val1],
CASE code
WHEN 2 THEN tbl02.val
ELSE ''
END [val2]
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
) Tbl
GROUP BY id, name
Is it the PIVOT operator (http://technet.microsoft.com/en-us/library/ms177410(v=sql.105).aspx) that you are looking for?
You've already got a few answers, but heres one using PIVOT as an alternative. The good thing is this approach is easy to scale if there are additional columns required later
-- SETUP TABLES
DECLARE #t1 TABLE (
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
DECLARE #t2 TABLE(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL,
[val] nvarchar(50) NULL,
[code] int NULL
)
-- SAMPLE DATA
INSERT #t1 ( id, name )
VALUES ( 1, 'one'), (2, 'two'), (3, 'three')
INSERT #t2
( subId, id, val, code )
VALUES ( 1,1,'FirstVal', 1), ( 2,1,'SecondVal', 2)
,( 3,2,'YourVal', 1), ( 4,2,'OurVal', 2)
,( 5,3,'NotVal', 1), ( 6,3,'ThisVal', 2)
-- SELECT (using PIVOT)
SELECT id, name, [1] AS 'val-1', [2] AS 'val-2'
FROM
(
SELECT t2.id, t1.name, t2.val, t2.code
FROM #t1 AS t1 JOIN #t2 AS t2 ON t2.id = t1.id
) AS src
PIVOT
(
MIN(val)
FOR code IN ([1], [2])
) AS pvt
results:
id name val-1 val-2
---------------------------------
1 one FirstVal SecondVal
2 two YourVal OurVal
3 three NotVal ThisVal
If there are always only two values, you could join them or even easier, group them:
SELECT tbl01.id as id, Min(tbl01.name) as name, MIN(tbl02.val) as val-1, MAX(tbl02.val) as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
GROUP BY tbl02.id
note: this query will always put the lowest value in the first column and highest in the second, if this is not wanted: use the join query:
Join query
If you always want code 1 in the first column and code 2 in the second:
SELECT tbl01.id as id, tbl01.name as name, tbl02.val as val-1, tbl03.val as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
ON tbl02.code = 1
INNER JOIN tbl03 ON tbl01.id = tbl03.id
ON tbl03.code = 2
Variable amount of columns
You cannot get an variable amount of columns, only when you do this by building your query in code or t-sql stored procedures.
My advice:
If its always to values: join them in query, if not, let your server-side code transform the data. (or even better, find a way which makes it not nessecery to transform data)
Try this - it uses a pivot function but it also creates creates the dynamic columns dependent on code
DECLARE #ColumnString varchar(200)
DECLARE #sql varchar(1000)
CREATE TABLE #ColumnValue
(
Value varchar(500)
)
INSERT INTO #ColumnValue (Value)
SELECT DISTINCT '[' + 'value' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id )) + ']'
FROM Test
SELECT #ColumnString = COALESCE(#ColumnString + ',', '') + Value
FROM #ColumnValue
Drop table #ColumnValue
SET #sql =
'
SELECT *
FROM
(
SELECT
id,name,val,''value'' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id ))as [values]
FROM Test
) AS P
PIVOT
(
MAX(val) FOR [values] IN ('+#ColumnString+')
) AS pv
'
--print #sql
EXEC (#sql)
Edit :- for any one with a similar problem, there's a good article covering various solutions here
Given the following tables recs and audit, how would one in SQL transform into the resultant table.
A little background, the former table is a simplified example of an standard SQL table used in a CRUD application collecting data. On any update to a column a record is written to an audit table in EAV form. There is now a requirement to transform the recs table into a historical table with a copy of each row as it was at a point in time for reporting (the data will be stored in a star schema data warehouse ultimately.
It seems like this would be straightforward enough in a procedural language and manageable (if ugly) using cursors, but is there a set based approach that would work?
I'm using T-SQL right now, but I imagine that I could port any examples or ideas from any sufficiently rich SQL dialect.
Setup
create table recs
(
ID int identity(1,1) not null primary key,
Column1 nvarchar(30) not null,
Column2 nvarchar(30) not null,
sys_updated_on datetime not null
)
create table audit
(
ID int identity(1,1) not null primary key,
recs_id int not null,
fieldname nvarchar(30) not null,
old_value nvarchar(30) not null,
new_value nvarchar(30) not null,
sys_updated_on datetime not null
)
insert into recs (Column1, Column2, sys_updated_on)
values ('A', 'B', '2012-10-31 22:00')
, ('C', 'D', '2012-10-31 22:30')
insert into audit (recs_id, fieldname, old_value, new_value, sys_updated_on)
values (1, 'Column1', 'Z', 'A', '2012-10-31 22:00')
, (2, 'Column2','X', 'D', '2012-10-31 22:30')
, (1, 'Column1', 'Y', 'Z', '2012-10-31 21:00')
Resultant Data
Recs
ID Column1 Column2 sys_updated_on
1 A B 31/10/2012 22:00:00
2 C D 31/10/2012 22:30:00
Audit
ID recs_id fieldname old_value new_value sys_updated_on
1 1 Column1 Z A 31/10/2012 22:00:00
2 2 Column2 X D 31/10/2012 22:30:00
3 1 Column1 Y Z 31/10/2012 21:00:00
Desired result
recs_id sys_updated_on Column1 Column2
1 null Y B
1 31/10/2012 21:00:00 Z B
1 31/10/2012 22:00:00 A B
2 null C X
2 31/10/2012 22:30:00 C D
Interesting....
Try this
;with cte as
(
select recs_id, sys_updated_on, column1, column2,
ROW_NUMBER() over (order by sys_updated_on) rn
from audit a
pivot
(max(old_value) for fieldname in (column1,column2)) p
)
select
recs_id,
case when ud1>ud2 then ud1 else ud2 end as updateddate,
coalesce(cte.column1,mc1,recs.column1),
coalesce(cte.column2,mc2,recs.column2)
from cte
outer apply
(
select top 1
column1 as mc1, sys_updated_on as ud1
from cte prev1
where prev1.recs_id=cte.recs_id and prev1.rn<cte.rn
order by prev1.rn desc
) r1
outer apply
(
select top 1
column2 as mc2, sys_updated_on as ud2
from cte prev2
where prev2.recs_id=cte.recs_id and prev2.rn<cte.rn
order by prev2.rn desc
) r2
inner join recs on cte.recs_id = recs.id
where cte.sys_updated_on is not null
union
select id, sys_updated_on, Column1, Column2 from recs
order by recs_id, cte.updateddate