MS SQL Server counting nulls by column in a table

MS SQL Server counting nulls by column in a table - sql

I have been given a task to find how many nulls there are for each column in a given table. The table has many columns (50-80, depending on the individual table). I would like the result pivoted so the column names are records, like this:
column_name
null_count
columnA
253
columnB
25
columnC
0
columnD
456
...
...
Currently, I do
SELECT 'columnA' as column_name, sum(case when columnA IS NULL then 1 else 0 end) null_count from [table] UNION
SELECT 'columnB', sum(case when columnB IS NULL then 1 else 0 end) from [table] UNION
...
for all the rows. This is kind of tedious and I would like to know if there is a more flexible way to do this in MS Sql Server management studio. Maybe something that can step through each record in [database].INFORMATION_SCHEMA.COLUMNS.

Please try the following solution.
It is using SQL Server's XML and XQuery magic.
No need for dynamic SQL.
We are leveraging a fact that FOR XML ... clause omits columns with NULL values.
SQL
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
-- DDL and sample data population, start
CREATE TABLE dbo.tbl (ID INT IDENTITY PRIMARY KEY, columnA INT, columnB VARCHAR(5), columnC INT);
INSERT dbo.tbl (columnA, columnB, columnC) VALUES
(1, NULL, NULL),
(2, 'city', NULL),
(NULL, 'cat', NULL),
(100, NULL, NULL);
-- DDL and sample data population, end
DECLARE #total_row_counter BIGINT = (SELECT COUNT_BIG(*) FROM dbo.tbl);
;WITH rs AS
(
SELECT column_name = c.value('local-name(.)', 'sysname')
FROM dbo.tbl AS p
CROSS APPLY (SELECT *
FROM dbo.tbl AS c
WHERE c.ID = p.ID
FOR XML PATH('root'), TYPE) AS t1(x)
CROSS APPLY x.nodes('/root/*') AS t2(c)
)
SELECT sch.column_name, null_counter = #total_row_counter - COUNT_BIG(rs.column_name)
FROM INFORMATION_SCHEMA.COLUMNS AS sch
LEFT OUTER JOIN rs ON sch.COLUMN_NAME = rs.column_name
WHERE TABLE_CATALOG = 'TEMPDB'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'tbl'
GROUP BY sch.column_name
ORDER BY sch.column_name;
SQL #2
It covers table column names with spaces. A minor FOR XML ... clause change automatically converts spaces into _x0020_ in the XML element names.
<root>
<ID>3</ID>
<column_x0020_B>cat</column_x0020_B>
</root>
The rest is identical.
USE tempdb;
GO
DROP TABLE IF EXISTS dbo.tbl;
-- DDL and sample data population, start
CREATE TABLE dbo.tbl (ID INT IDENTITY PRIMARY KEY, columnA INT, [column B] VARCHAR(5), columnC INT);
INSERT dbo.tbl (columnA, [column B], columnC) VALUES
(1, NULL, NULL),
(2, 'city', NULL),
(NULL, 'cat', NULL),
(100, NULL, NULL);
-- DDL and sample data population, end
DECLARE #total_row_counter BIGINT = (SELECT COUNT_BIG(*) FROM dbo.tbl);
;WITH rs AS
(
SELECT column_name = REPLACE(c.value('local-name(.)', 'sysname'), '_x0020_', SPACE(1))
FROM dbo.tbl AS p
CROSS APPLY (SELECT *
FROM dbo.tbl AS [root]
WHERE [root].ID = p.ID
FOR XML AUTO, ELEMENTS, TYPE) AS t1(x)
CROSS APPLY x.nodes('/root/*') AS t2(c)
)
SELECT sch.column_name, null_counter = #total_row_counter - COUNT_BIG(rs.column_name)
FROM INFORMATION_SCHEMA.COLUMNS AS sch
LEFT OUTER JOIN rs ON sch.COLUMN_NAME = rs.column_name
WHERE TABLE_CATALOG = 'TEMPDB'
AND TABLE_SCHEMA = 'dbo'
AND TABLE_NAME = 'tbl'
GROUP BY sch.column_name
ORDER BY sch.column_name;
Output
+-------------+--------------+
| column_name | null_counter |
+-------------+--------------+
| columnA | 1 |
| columnB | 2 |
| columnC | 4 |
| ID | 0 |
+-------------+--------------+

Related

Want to compare 4 different columns with the result of CTE

I have created a CTE (common table Expression) as follows:
DECLARE #N VARCHAR(100)
WITH CAT_NAM AS (
SELECT ID, NAME
FROM TABLE1
WHERE YEAR(DATE) = YEAR(GETDATE())
)
SELECT #N = STUFF((
SELECT ','''+ NAME+''''
FROM CAT_NAM
WHERE ID IN (20,23,25,30,37)
FOR XML PATH ('')
),1,1,'')
The result of above CTE is 'A','B','C','D','F'
Now I need to check 4 different columns CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4 in the result of CTE and form it as one column like follow:
Select
case when CAT_NAM_1 in (#N) then CAT_NAM_1
when CAT_NAM_2 in (#N) then CAT_NAM_2
when CAT_NAM_3 in (#N) then CAT_NAM_3
when CAT_NAM_4 in (#N) then CAT_NAM_4
end as CAT
from table2
When I'm trying to do the above getting error please help me to do.
If my approach is wrong help me with right one.

I am not exactly sure what you are trying to do, but if I understand the following script shows one possible technique. I have created some table variables to mimic the data you presented and then wrote a SELECT statement to do what I think you asked (but I am not sure).
DECLARE #TABLE1 AS TABLE (
ID INT NOT NULL,
[NAME] VARCHAR(10) NOT NULL,
[DATE] DATE NOT NULL
);
INSERT INTO #TABLE1(ID,[NAME],[DATE])
VALUES (20, 'A', '2021-01-01'), (23, 'B', '2021-02-01'),
(25, 'C', '2021-03-01'),(30, 'D', '2021-04-01'),
(37, 'E', '2021-05-01'),(40, 'F', '2021-06-01');
DECLARE #TABLE2 AS TABLE (
ID INT NOT NULL,
CAT_NAM_1 VARCHAR(10) NULL,
CAT_NAM_2 VARCHAR(10) NULL,
CAT_NAM_3 VARCHAR(10) NULL,
CAT_NAM_4 VARCHAR(10) NULL
);
INSERT INTO #TABLE2(ID,CAT_NAM_1,CAT_NAM_2,CAT_NAM_3,CAT_NAM_4)
VALUES (1,'A',NULL,NULL,NULL),(2,NULL,'B',NULL,NULL);
;WITH CAT_NAM AS (
SELECT ID, [NAME]
FROM #TABLE1
WHERE YEAR([DATE]) = YEAR(GETDATE())
AND ID IN (20,23,25,30,37,40)
)
SELECT CASE
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_1) THEN CAT_NAM_1
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_2) THEN CAT_NAM_2
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_3) THEN CAT_NAM_3
WHEN EXISTS(SELECT 1 FROM CAT_NAM WHERE CAT_NAM.[NAME] = CAT_NAM_4) THEN CAT_NAM_4
ELSE '?' -- not sure what you want if there is no match
END AS CAT
FROM #TABLE2;

You can do a bit of set-based logic for this
SELECT
ct.NAME
FROM table2 t2
CROSS APPLY (
SELECT v.NAME
FROM (VALUES
(t2.CAT_NAM_1),
(t2.CAT_NAM_2),
(t2.CAT_NAM_3),
(t2.CAT_NAM_4)
) v(NAME)
INTERSECT
SELECT ct.NAM
FROM CAT_NAM ct
WHERE ct.ID IN (20,23,25,30,37)
) ct;

TSQL group by and combine remaining columns in json array

I want to group a result set by a column and combine the remaining columns into a json array, but I'm not sure how to aggregate the results for this.
I want the following output:
A_ID | Translations
--------------------
1 | [{"Name": "english_1","LCID": "en-gb"},{"Name": "french_1","LCID": "fr-fr"}]
2 | [{"Name": "english_2","LCID": "en-gb"},{"Name": "french_2","LCID": "fr-fr"}]
But I cannot group the results by A_ID without an aggregator so I get the following
A_ID | Translations
--------------------
1 | [{"Name": "english_1","LCID": "en-gb"}]
1 | [{"Name": "french_1","LCID": "fr-fr"}]
2 | [{"Name": "english_2","LCID": "en-gb"}]
2 | [{"Name": "french_2","LCID": "fr-fr"}]
Here is an example:
DROP TABLE IF EXISTS #tabA;
DROP TABLE IF EXISTS #tabB;
DROP TABLE IF EXISTS #tabC;
go
CREATE TABLE #tabA
(
Id int not null
);
CREATE TABLE #tabTranslations
(
translationId int not null,
Name nvarchar(32) not null,
aId int not null, -- Foreign key.
languageId int not null --Foreign key
);
CREATE TABLE #tabLanguages
(
languageId int not null,
LCID nvarchar(32) not null
);
go
INSERT INTO #tabA (Id)
VALUES
(1),
(2);
INSERT INTO #tabTranslations (translationId, Name, aId, languageId)
VALUES
(1, 'english_1', 1, 1),
(2, 'french_1', 1, 2),
(3, 'english_2', 2, 1),
(4, 'french_2', 2, 2);
INSERT INTO #tabLanguages (languageId, LCID)
VALUES
(1, 'en-gb'),
(2, 'fr-fr');
go
select
_a.Id as A_ID,
(
select
_translation.Name,
_language.LCID
for json path
)
from #tabA as _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
-- group by _a.Id ??
;
go
DROP TABLE IF EXISTS #tabA;
DROP TABLE IF EXISTS #tabTranslations;
DROP TABLE IF EXISTS #tabLanguages;
go
Alternative solution:
I know I can do this, but I obviously don't want to hard code the available LCIDs (maybe I could generate the sql query and exec it? but this feels too complex), also I would prefer an array
select
_a.Id as A_ID,
(
SELECT
MAX(CASE WHEN [LCID] = 'en-gb' THEN [Name] END) 'en-gb',
MAX(CASE WHEN [LCID] = 'fr-fr' THEN [Name] END) 'fr-fr'
FOR JSON PATH, WITHOUT_ARRAY_WRAPPER
) as b
from #tabA as _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
group by _a.Id;
result:
A_ID | Translations
--------------------
1 | { "en-Gb": "english_1", "fr-FR": "french_1"}
2 | { "en-Gb": "english_2", "fr-FR": "french_2"}

If I understand you correctly, next approach may help. Use additional CROSS APPLY operator and FOR JSON PATH to get your expected results:
Statement:
SELECT *
FROM #tabA AS t
CROSS APPLY (
SELECT _translation.Name AS Name, _language.LCID AS LCID
FROM #tabA _a
inner join #tabTranslations as _translation ON _translation.aId = _a.Id
inner join #tabLanguages as _language ON _language.languageId = _translation.languageId
WHERE _a.Id = t.Id
for json path
) _c(Translations)
Output:
Id Translations
1 [{"Name":"english_1","LCID":"en-gb"},{"Name":"french_1","LCID":"fr-fr"}]
2 [{"Name":"english_2","LCID":"en-gb"},{"Name":"french_2","LCID":"fr-fr"}]

Can I return all columns as rows and their values in the second column?

I'm trying to create some reports for auditing, but I have a very specific question.
There's about 120 columns, each with a specific numeric answer. I'd like to return the column name and the value of the rows of that column. I'm aware I'll get a lot of results, but it's not a problem.
For example I have:
KEY |ColumnA | ColumnB
1 |Value A | ValueB
2 |ValueA2 | ValueB2
But want I want is:
1 |ColumnA | Value A
2 |ColumnA | Value A2
1 |ColumnB | Value B
2 |ColumnB | Value B2
I've tried returning all rows and then joining on itself, but it didn't provide me with the output I needed.

Simple unpivot will do the work :)
declare #tbl table ([Key] int, ColumnA varchar(15), ColumnB varchar(15));
insert into #tbl values
(1, 'Value A', 'ValueB'),
(2, 'ValueA2', 'ValueB2');
select [key], [column], [value] from
(select * from #tbl) p
unpivot
([value] for [column] in (ColumnA, ColumnB)) u
order by [column]

it's so simple...If you know the column names, you could use a simple UNION
SELECT * FROM tblAuditing
SELECT 'ColumnA' AS ColumnA,'ColumnB' AS ColumnA UNION
SELECT ColumnA AS ColumnA,ColumnB AS ColumnA FROM tblAuditing

The following query should do what you want - you need to do a customized sorting for the columns names:
CREATE TABLE #temp (ColumnA VARCHAR(20), ColumnB VARCHAR(20))
INSERT INTO #temp VALUES ('Value A','Value B'),('Value A2','Value B2')
SELECT t.Col, t.Val
FROM (SELECT *,ROW_NUMBER() OVER (ORDER BY (SELECT 1)) RNO FROM #temp t) tmp
CROSS APPLY (VALUES (tmp.ColumnA,'ColumnA',tmp.RNO),(tmp.ColumnB,'ColumnB',tmp.RNO)) AS T(Val,Col,sort)
ORDER BY T.Col, Sort
The result is as below,
Col Val
ColumnA Value A
ColumnA Value A2
ColumnB Value B
ColumnB Value B2

SQL Server, Merge two records in one record

We have these tables
CREATE TABLE tbl01
(
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
CREATE TABLE tbl02
(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL REFERENCES tbl01(id),
[val] nvarchar(50) NULL,
[code] int NULL
)
If we run this query:
SELECT
tbl01.id, tbl01.name, tbl02.val, tbl02.code
FROM
tbl01
INNER JOIN
tbl02 ON tbl01.id = tbl02.id
we get these results:
-------------------------------
id | name | val | code
-------------------------------
1 | one | FirstVal | 1
1 | one | SecondVal | 2
2 | two | YourVal | 1
2 | two | OurVal | 2
3 | three | NotVal | 1
3 | three | ThisVal | 2
-------------------------------
You can see that each two rows are related to same "id"
The question is: we need for each id to retrieve one record with all val, each val will return in column according to the value of column code
if(code = 1) then val as val-1
else if (code = 2) then val as val-2
Like this:
-------------------------------
id | name | val-1 | val-2
-------------------------------
1 | one | FirstVal | SecondVal
2 | two | YourVal | OurVal
3 | three | NotVal | ThisVal
-------------------------------
Any advice?

Use can use MAX and Group By to achieve this
SELECT id,
name,
MAX([val1]) [val-1],
MAX([val2]) [val-2]
FROM ( SELECT tbl01.id, tbl01.name,
CASE code
WHEN 1 THEN tbl02.val
ELSE ''
END [val1],
CASE code
WHEN 2 THEN tbl02.val
ELSE ''
END [val2]
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
) Tbl
GROUP BY id, name

Is it the PIVOT operator (http://technet.microsoft.com/en-us/library/ms177410(v=sql.105).aspx) that you are looking for?

You've already got a few answers, but heres one using PIVOT as an alternative. The good thing is this approach is easy to scale if there are additional columns required later
-- SETUP TABLES
DECLARE #t1 TABLE (
[id] int NOT NULL PRIMARY KEY,
[name] nvarchar(50) NOT NULL
)
DECLARE #t2 TABLE(
[subId] int NOT NULL PRIMARY KEY ,
[id] int NOT NULL,
[val] nvarchar(50) NULL,
[code] int NULL
)
-- SAMPLE DATA
INSERT #t1 ( id, name )
VALUES ( 1, 'one'), (2, 'two'), (3, 'three')
INSERT #t2
( subId, id, val, code )
VALUES ( 1,1,'FirstVal', 1), ( 2,1,'SecondVal', 2)
,( 3,2,'YourVal', 1), ( 4,2,'OurVal', 2)
,( 5,3,'NotVal', 1), ( 6,3,'ThisVal', 2)
-- SELECT (using PIVOT)
SELECT id, name, [1] AS 'val-1', [2] AS 'val-2'
FROM
(
SELECT t2.id, t1.name, t2.val, t2.code
FROM #t1 AS t1 JOIN #t2 AS t2 ON t2.id = t1.id
) AS src
PIVOT
(
MIN(val)
FOR code IN ([1], [2])
) AS pvt
results:
id name val-1 val-2
---------------------------------
1 one FirstVal SecondVal
2 two YourVal OurVal
3 three NotVal ThisVal

If there are always only two values, you could join them or even easier, group them:
SELECT tbl01.id as id, Min(tbl01.name) as name, MIN(tbl02.val) as val-1, MAX(tbl02.val) as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
GROUP BY tbl02.id
note: this query will always put the lowest value in the first column and highest in the second, if this is not wanted: use the join query:
Join query
If you always want code 1 in the first column and code 2 in the second:
SELECT tbl01.id as id, tbl01.name as name, tbl02.val as val-1, tbl03.val as val-2
FROM tbl01
INNER JOIN tbl02 ON tbl01.id = tbl02.id
ON tbl02.code = 1
INNER JOIN tbl03 ON tbl01.id = tbl03.id
ON tbl03.code = 2
Variable amount of columns
You cannot get an variable amount of columns, only when you do this by building your query in code or t-sql stored procedures.
My advice:
If its always to values: join them in query, if not, let your server-side code transform the data. (or even better, find a way which makes it not nessecery to transform data)

Try this - it uses a pivot function but it also creates creates the dynamic columns dependent on code
DECLARE #ColumnString varchar(200)
DECLARE #sql varchar(1000)
CREATE TABLE #ColumnValue
(
Value varchar(500)
)
INSERT INTO #ColumnValue (Value)
SELECT DISTINCT '[' + 'value' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id )) + ']'
FROM Test
SELECT #ColumnString = COALESCE(#ColumnString + ',', '') + Value
FROM #ColumnValue
Drop table #ColumnValue
SET #sql =
'
SELECT *
FROM
(
SELECT
id,name,val,''value'' + Convert(Varchar(20),ROW_NUMBER() Over(Partition by id Order by id ))as [values]
FROM Test
) AS P
PIVOT
(
MAX(val) FOR [values] IN ('+#ColumnString+')
) AS pv
'
--print #sql
EXEC (#sql)

Recreating historical field changes from an EAV audit table

Edit :- for any one with a similar problem, there's a good article covering various solutions here
Given the following tables recs and audit, how would one in SQL transform into the resultant table.
A little background, the former table is a simplified example of an standard SQL table used in a CRUD application collecting data. On any update to a column a record is written to an audit table in EAV form. There is now a requirement to transform the recs table into a historical table with a copy of each row as it was at a point in time for reporting (the data will be stored in a star schema data warehouse ultimately.
It seems like this would be straightforward enough in a procedural language and manageable (if ugly) using cursors, but is there a set based approach that would work?
I'm using T-SQL right now, but I imagine that I could port any examples or ideas from any sufficiently rich SQL dialect.
Setup
create table recs
(
ID int identity(1,1) not null primary key,
Column1 nvarchar(30) not null,
Column2 nvarchar(30) not null,
sys_updated_on datetime not null
)
create table audit
(
ID int identity(1,1) not null primary key,
recs_id int not null,
fieldname nvarchar(30) not null,
old_value nvarchar(30) not null,
new_value nvarchar(30) not null,
sys_updated_on datetime not null
)
insert into recs (Column1, Column2, sys_updated_on)
values ('A', 'B', '2012-10-31 22:00')
, ('C', 'D', '2012-10-31 22:30')
insert into audit (recs_id, fieldname, old_value, new_value, sys_updated_on)
values (1, 'Column1', 'Z', 'A', '2012-10-31 22:00')
, (2, 'Column2','X', 'D', '2012-10-31 22:30')
, (1, 'Column1', 'Y', 'Z', '2012-10-31 21:00')
Resultant Data
Recs
ID Column1 Column2 sys_updated_on
1 A B 31/10/2012 22:00:00
2 C D 31/10/2012 22:30:00
Audit
ID recs_id fieldname old_value new_value sys_updated_on
1 1 Column1 Z A 31/10/2012 22:00:00
2 2 Column2 X D 31/10/2012 22:30:00
3 1 Column1 Y Z 31/10/2012 21:00:00
Desired result
recs_id sys_updated_on Column1 Column2
1 null Y B
1 31/10/2012 21:00:00 Z B
1 31/10/2012 22:00:00 A B
2 null C X
2 31/10/2012 22:30:00 C D

Interesting....
Try this
;with cte as
(
select recs_id, sys_updated_on, column1, column2,
ROW_NUMBER() over (order by sys_updated_on) rn
from audit a
pivot
(max(old_value) for fieldname in (column1,column2)) p
)
select
recs_id,
case when ud1>ud2 then ud1 else ud2 end as updateddate,
coalesce(cte.column1,mc1,recs.column1),
coalesce(cte.column2,mc2,recs.column2)
from cte
outer apply
(
select top 1
column1 as mc1, sys_updated_on as ud1
from cte prev1
where prev1.recs_id=cte.recs_id and prev1.rn<cte.rn
order by prev1.rn desc
) r1
outer apply
(
select top 1
column2 as mc2, sys_updated_on as ud2
from cte prev2
where prev2.recs_id=cte.recs_id and prev2.rn<cte.rn
order by prev2.rn desc
) r2
inner join recs on cte.recs_id = recs.id
where cte.sys_updated_on is not null
union
select id, sys_updated_on, Column1, Column2 from recs
order by recs_id, cte.updateddate

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

MS SQL Server counting nulls by column in a table - sql

Related

Want to compare 4 different columns with the result of CTE

TSQL group by and combine remaining columns in json array

Can I return all columns as rows and their values in the second column?

SQL Server, Merge two records in one record

Recreating historical field changes from an EAV audit table

Categories

Resources