SQL Loop through 8 million record and update them - sql

I have a audit table that has about 8 million records. I have recently added two new column which I need to update from existing column with some rules/conditions. Basically initially, whenever a FK was updated in a table, it was storing old and new FK ids into the audit table. for example
Table A
ID Name
1 First A
2 Second A
3 Third A
Table B
ID AID Name
1 1 First B
2 1 Second B
3 2 Third B
Audit
ID TableName FieldName OldValue NewValue
now if i update first record of the table B
from 1 1 First B to 1 3 First B then the audit table will store the change as
Audit
ID TableName FieldName OldValue NewValue
1 Table B AID 1 3
Now I have updated Audit table to store actual Text value of the FK i.e above change will be stored as
Audit
ID TableName FieldName OldValue NewValue OldText NewText
1 Table B AID 1 3 First A Third A
The problem is I already have about 8 million records that I need to new columns for. I have written below query to do that
declare #sql nvarchar(max);
declare #start int = 1
while #start <= 8000000
begin
select top 10000 #sql = COALESCE(#sql+'Update Audit set ','Update Audit set') +
isnull(' OldText = ('+ dbo.GetFKText(i.TableName, i.FieldName)+case when len(isnull(i.OldValue,'')) < 1 then null else i.OldValue end +'),',' OldText = OldValue, ') +
isnull(' NewText = ('+ dbo.GetFKText(i.TableName, i.FieldName)+case when len(isnull(i.NewValue,'')) < 1 then null else i.NewValue end +')',' NewText = NewValue ') +
' where AuditID = '+cast(i.AuditID as nvarchar(200))+' and lower(ltrim(rtrim(TableName))) <> ''audit'';'
from Audit i where i.AuditID >= #start
exec sp_executesql #sql
set #start = #start+10000;
end
get text function (basically I getting column that has name = (TableName)+'Name' or (TablName)+(SomeText)+'Name' this just a convention that I have followed in all the tables)
declare #res nvarchar(max)='';
declare #fn nvarchar(200);
declare #ttn nvarchar(200);
declare #tcn nvarchar(200);
SELECT top 1
#ttn = kcu.table_name
,#tcn = kcu.column_name
FROM INFORMATION_SCHEMA.CONSTRAINT_COLUMN_USAGE ccu
INNER JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS rc
ON ccu.CONSTRAINT_NAME = rc.CONSTRAINT_NAME
INNER JOIN INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
ON kcu.CONSTRAINT_NAME = rc.UNIQUE_CONSTRAINT_NAME
Where ccu.TABLE_NAME = #TableName and ccu.COLUMN_NAME = #FieldName
if isnull(#ttn,'') != '' and ISNULL(#tcn,'') != ''
begin
select #fn= COLUMN_NAME
from (SELECT top 1 COLUMN_NAME ,
case when COLUMN_NAME like (#ttn+'Name') then 0
when COLUMN_NAME like (#ttn+'%Name') then 1
when COLUMN_NAME like (#ttn+'Code') then 2
when COLUMN_NAME like (#ttn+'%Code') then 3 else 4 end as CPriority
FROM JVO.INFORMATION_SCHEMA.COLUMNS
WHERE TABLE_NAME = #ttn and (COLUMN_NAME like '%Name' or COLUMN_NAME like '%Code'
)
order by CPriority) as aa;
RETURN 'select '+#fn+' from '+#ttn+' where '+#tcn+' = ';
end
return null;
Its working but really slow, it update about 1 million records in 13 hours. can anyone help to improve this query or suggest alternative way to update it.
Thanks

Related

ms sql server how to check table has “id” column and count rows if "id" exist

There are too many tables in my SQL Server db. Most of them have an 'id' column, but some do not. I want to know which table(s) doesn't have the 'id' column and to count the rows where id=null if an 'id' column exists. The query results may look like this:
TABLE_NAME | HAS_ID | ID_NULL_COUNT | ID_NOT_NULL_COUNT
table1 | false | 0 | 0
table2 | true | 10 | 100
How do I write this query?
Building query:
WITH cte AS (
SELECT t.*, has_id = CASE WHEN COLUMN_NAME = 'ID' THEN 'true' ELSE 'false' END
FROM INFORMATION_SCHEMA.TABLES t
OUTER APPLY (SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS c
WHERE t.TABLE_NAME = c.TABLE_NAME
AND t.[TABLE_SCHEMA] = c.[TABLE_SCHEMA]
AND c.COLUMN_NAME = 'id') s
WHERE t.TABLE_SCHEMA IN (...)
)
SELECT
query_to_run = REPLACE(REPLACE(REPLACE(REPLACE(REPLACE(
'SELECT tab_name = ''<tab_name>'',
has_id = ''<has_id>'',
id_null_count = <id_null_count>,
id_not_null_count = <id_not_null_count>
FROM <schema_name>.<tab_name>'
,'<tab_name>', TABLE_NAME)
,'<schema_name>', TABLE_SCHEMA)
,'<has_id>', has_id)
,'<id_null_count>', CASE WHEN has_id = 'false' THEN '0' ELSE 'SUM(CASE WHEN id IS NULL THEN 1 END)' END)
,'<id_not_null_count>', CASE WHEN has_id = 'false' THEN '0' ELSE 'COUNT(id)' END)
FROM cte;
Copy the output and execute in separate window. UNION ALL could be added to get single resultset.
db<>fiddle demo
This might be useful for you... lists out the row count for all tables that have an "id" column. It filters out tables that start with "sys" because those are mostly internal tables. If you have a table that starts with "sys", you'll probably want to delete that part of the WHERE clause.
SELECT DISTINCT OBJECT_NAME(r.[object_id]) AS [TableName], [row_count] AS [RowCount]
FROM sys.dm_db_partition_stats r
WHERE index_id = 1
AND EXISTS (SELECT 1 FROM sys.columns c WHERE c.[object_id] = r.[object_id] AND c.[name] = N'id')
AND OBJECT_NAME(r.[object_id]) NOT LIKE 'sys%'
ORDER BY [TableName]
Note you can change the "c.[name] = N'id'" to be any column name, or even change the "=" to "<>" to find only tables without an id column
pmbAustin answers how to list all tables without "ID" column.
To know how many rows in each table, SQL Server has a built-in report for you.
Right click the database in SSMS, click "Reports", "Standard Reports" then "Disk Usage by Table"
You now know how many rows in each table, and from pmbAustin's answer you know how which tables do and do not have "ID" columns. with a simple Vlookup in Excel you can combine these two datasets to arrive at any answer you wish.
This will give you the info about which tables have or not have column named "ID":
SELECT Table_Name
, case when column_name not like '%ID%' then 'false'
else 'true'
end as HAS_ID
FROM INFORMATION_SCHEMA.COLUMNS;
Here is a small demo
And here is one way that you can use to select all the tables that have columns named ID and if this columns are null or not:
CREATE TABLE #AllIDSNullable (TABLE_NAME NVARCHAR(256) NOT NULL
, HAS_ID VARCHAR(10)
, ID_NULL_COUNT INT DEFAULT 0
, ID_NOT_NULL_COUNT INT DEFAULT 0);
DECLARE CT CURSOR FOR
SELECT Table_Name
FROM INFORMATION_SCHEMA.COLUMNS
WHERE column_name = 'ID';
DECLARE #name NVARCHAR(MAX), #SQL NVARCHAR(MAX);
OPEN CT; FETCH NEXT FROM CT INTO #name;
WHILE ##FETCH_STATUS=0 BEGIN
SET #SQL = 'INSERT #AllIDSNullable (TABLE_NAME , HAS_ID) SELECT Table_Name, case when column_name not like ''%ID%'' then ''false'' else ''true'' end FROM INFORMATION_SCHEMA.COLUMNS;';
EXEC (#SQL);
SET #SQL = 'UPDATE #AllIDSNullable SET ID_NULL_COUNT = (SELECT COUNT(*) FROM ['+#name+'] WHERE ID IS NULL), ID_NOT_NULL_COUNT = (SELECT COUNT(*) FROM ['+#name+'] WHERE ID IS NOT NULL) WHERE TABLE_NAME='''+#name+''';';
EXEC (#SQL);
FETCH NEXT FROM CT INTO #name;
END;
CLOSE CT;
SELECT *
FROM #AllIDSNullable;
Here is a demo
Result:

How to access the column name of table using while loop

I have a below table(#Temp):
RowNo Item
1 A
2 B
My requirement is if Item equals to B do action.
declare #count int = 1
WHILE(#count < (select count(*) from #Temp))
Begin
// Here I have to access my column name(Item) , so that I can check its value to B
set #count = #count + 1
End
Please suggest
You can use DESCRIBE:
DESCRIBE my_table;
Or in newer versions you can use INFORMATION_SCHEMA:
SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_SCHEMA = 'my_database' AND TABLE_NAME = 'my_table';
Or you can use SHOW COLUMNS:
SHOW COLUMNS FROM my_table;

SQL Select Column From Table Based on Another Select Statement

I have a table containing column names from another table. I want to run an update statement to update some values from that table, based off of the other.
EX:
TableA
ID|Column1|Column2
1 | 1.3 | 2.3
2 | 0 | 7
3 | 2.5 | 12.1
TableB
ID|ColumnName|MaxValue
1 | Column1 | NULL
2 | Column2 | NULL
Something along the lines of this:
So in this case, I would want to update MaxValue in TableB to be the max value from TableA where ColumnName is a colum in TableA.
Is this possible?
You can do it with a cursor and some dynamic sql. This isn't the best thing to do but if you needed a quick and dirty solution here you go:
DECLARE #colName VARCHAR(50), #str VARCHAR(2000), #id int
DECLARE c CURSOR FOR
SELECT id, columnName
FROM tableB
OPEN c
FETCH NEXT FROM c INTO #id, #columnName
WHILE ##fetch_status = 0
BEGIN
SET #str = 'update tableB set MaxValue = ( select max(' + #colName + ') from
tableA ) where id = ' + CONVERT(VARCHAR, #id)
EXEC ( #str )
FETCH NEXT FROM c INTO #id, #columnName
END
CLOSE c
DEALLOCATE c
If you do not want to use dynamic SQL, you could always do something like this
Update TableB
Set MaxValue = MaxValues.MaxValue
From TableB
Join
(
Select MaxValue = Max(Column1)
,ColumnName = 'Column1'
From TableA
Union All
Select MaxValue = Max(Column2)
,ColumnName = 'Column2'
From TableA
-- Union All ... and so on for all columns
) MaxValues
On TableB.ColumnName = MaxValues.ColumnName
Remember, if the TableA DDL changes, you must update this DML.

Doubt in Query - SQL Server 2005

I am having table with 100 columns. here up to 50 to 60 columns contains NULL value in it. Now i need to Replace this NULL value to 0 in all 50 to 60 columns. I tried with the Update query as,
UPDATE [tableName]
SET col1=0, col2 = 0, ... col60 = 0
WHERE col1 IS NULL AND Col2 IS NULL ... Col60 IS NULL
Is there anyother Query to update these all 60 columns without specifying such columns or we have any other approach???
You have to specify all columns, but you can skip the WHERE clause and have one update deal with them all at once:
UPDATE [tableName] SET
col1=COALESCE(col1, 0),
col2=COALESCE(col2, 0),
col3=COALESCE(col3, 0),
col4=COALESCE(col4, 0),
[...]
You could try this workaround if every value in the columns is NULL:
Edit the table definition and set the columns as "Calculated" and use 0 as formula
Save the table
Remove the formula
It is not very elegant but works
I don't think there's an alternative - but the query you posted will only update records where all the columns are null.
If you want to update individual columns, you need to break it up into individual updates:
update table
set col1 = 0
where col 1 is null
update table
set col2 = 0
where col2 is null
To do not write this query by hand, you can generate this by using dynamic SQL:
DECLARE #Table NVARCHAR(255)= 'Your table'
DECLARE #sSQl NVARCHAR(MAX)= 'UPDATE ' + #Table + ' SET ' + CHAR(13) ;
WITH c AS ( SELECT c.name
FROM sys.all_columns c
JOIN sys.tables T ON c.object_id = T.object_id
WHERE t.name = #Table
)
SELECT #sSQl = #sSQl + c.name + '=ISNULL(' + c.name + ',0)' + ','
+ CHAR(13)
FROM c
IF LEN(#sSQl) > 0
SET #ssql = LEFT(#sSQl, LEN(#sSQl) - 2)
PRINT #ssql

Count number of rows across multiple tables in one query

I have a SQL Server 2005 database that stores data for multiple users. Each table that contains user-owned data has a column called OwnerID that identifies the owner; most but not all tables have this column.
I want to be able to count number of rows 'owned' by a user in each table. In other words, I want a query that returns the names of each table that contains an OwnerID column, and counts the number of rows in each table that match a given OwnerID value.
I can return just the names of the matching tables using this query:
SELECT OBJECT_NAME(object_id) [Table] FROM sys.columns
WHERE name = 'OwnerID' ORDER BY OBJECT_NAME(object_id);
That query returns a list of table names like this:
+---------+
| Table |
+---------+
| Alpha |
| Beta |
| Gamma |
| ... |
+---------+
But is it possible to write a query that can also count the number of rows in each table that match a given OwnerID? ie:
+---------+------------+
| Table | RowCount |
+---------+------------+
| Alpha | 2042 |
| Beta | 49 |
| Gamma | 740 |
| ... | ... |
+---------+------------+
Note: The list of table names needs to be returned dynamically, it is not suitable to hard-code table names into this query.
Edit: the answer...
(I can't edit your answers yet but I can edit my own question so I'm putting it here...)
Damien_The_Unbeliever had essentially the correct answer, but SQL Server doesn't allow string concatenation in an exec statement so I had to set the query prior to the exec statement. The final query is as follows:
DECLARE #OwnerID int;
SET #OwnerID = 1;
DECLARE #ForEachSQL varchar(100);
SET #ForEachSQL = 'INSERT INTO #t(TableName,RowsOwned) SELECT ''?'', COUNT(*) FROM ? WHERE OwnerID = ' + CONVERT(varchar(11), #OwnerID);
CREATE TABLE #t(TableName sysname, RowsOwned int);
EXEC sp_MSforeachtable #ForEachSQL,
#whereAnd = 'AND o.id IN (SELECT id FROM syscolumns where name=''OwnerID'')';
SELECT * FROM #t ORDER BY TableName;
DROP TABLE #t;
You can use sp_MSForeachtable, and the #whereand parameter, to specify a filter so you're only working against tables with an OwnerID column. Create a temp table, and populate that for each matching table. Something like:
create table #t(tablename sysname,Cnt int)
sp_MSforeachtable 'insert into #t(tablename,Cnt) select ''?'',COUNT(*) from ?',#whereAnd='and o.id in (select id from syscolumns where name=''OwnerID'')'
select * from #t
Two major caveats to mention - first is that sp_MSforeachtable is "undocumented", so you use it at your own risk - it could be suddenly removed from SQL Server by any kind of servicing, or in the next release.
The second is that, having a dynamic schema is usually a sign that something else has gone wrong in modelling - possibly attribute splitting (where sales for January and February are given different tables, even though they're logically the same thing and should appear in the same table, with possibly an additional column to distinguish them)
And, of course, you wanted to filter based on a particular clientID, so the query would be more like:
'insert into #t(tablename,Cnt) select ''?'',COUNT(*) from ? where OwnerID=' + #OwnerID
(Assuming #OwnerID is the owner sought, and is an int)
This would get the info from sysindexes. It can be slightly out of date but will give you a rough count
SELECT
[TableName] = so.name,
[RowCount] = MAX(si.rows)
FROM
sysobjects so,
sysindexes si
WHERE
so.xtype = 'U'
AND
si.id = OBJECT_ID(so.name)
GROUP BY
so.name
ORDER BY
2 DESC
If you needed it to be 100% right then you could use the undocumented feature sp_MSForEachTable
DECLARE #SQL VARCHAR(255)
SET #SQL = 'DBCC UPDATEUSAGE (' + DB_NAME() + ')'
EXEC(#SQL)
CREATE TABLE #foo
(
tablename VARCHAR(255),
rc INT
)
INSERT #foo
EXEC sp_msForEachTable
'SELECT PARSENAME(''?'', 1),
COUNT(*) FROM ?'
SELECT tablename, rc
FROM #foo
ORDER BY rc DESC
DROP TABLE #foo
You can use this:
DECLARE #nSQL NVARCHAR(MAX)
SELECT #nSQL = COALESCE(#nSQL + 'UNION ALL ' + CHAR(10), '')
+ 'SELECT ''' + TABLE_NAME + ''' AS TableName, COUNT(*) FROM ' + QUOTENAME(TABLE_NAME) + CHAR(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME = 'strKey'
-- This will PRINT out the dynamically generated SQL statement. Just replace this with EXECUTE(#nSQL) when you are happy to run it.
PRINT #nSQL
Update: To search for a specific OwnerId:
DECLARE #nSQL NVARCHAR(MAX)
DECLARE #OwnerId INTEGER
SET #OwnerId = 1
SELECT #nSQL = COALESCE(#nSQL + 'UNION ALL ' + CHAR(10), '')
+ 'SELECT ''' + TABLE_NAME + ''' AS TableName, COUNT(*) FROM ' + QUOTENAME(TABLE_NAME) + ' WHERE OwnerId = #OwnerId' + CHAR(10)
FROM INFORMATION_SCHEMA.COLUMNS
WHERE COLUMN_NAME = 'strKey'
EXECUTE sp_executesql #nSQL, '#OwnerId INTEGER', #OwnerId
SELECT
O.ID,
O.NAME,
I.ROWCNT
FROM SYSOBJECTS O
INNER JOIN SYSINDEXES I
ON O.ID = I.ID
WHERE O.UID = 5
AND O.XTYPE = 'U'
AND I.STATUS = 0
Try using this query it will give you id of the table, table name and no of rows for that table.
UID = 5 means I want to check in particular schema which has id = 5.You can check schema id using SELECT SCHEMA_ID('<schema name>');
XTYPE = 'U' means User defined tables only.