Update table in chunks - sql

I am trying to update a large table in chunks and transactions.
This query runs endlessly in case column1 is not updated for some reason. I have another nested query there, which does not necessarily return a value, so some column1's remain as null after the update. This puts my query into an endless loop.
How can I specify a position for "update top" to start with?
Thanks in advance.
declare #counter int
declare #total int
declare #batch int
set #total = (SELECT COUNT(*) FROM table with(nolock))
set #counter = 0
set #batch = 1000
while (#counter < (#total/#batch) + 1)
begin
BEGIN TRANSACTION
set #counter = #counter + 1
Update TOP ( #batch ) table
SET column1 = 'something'
where column1 is null
Commit transaction
end

Related

Batch updates to SQL Table wont stop

I have a large table (18 million records) which I am updating using the following batch update snippet:
SET NOCOUNT ON;
DECLARE #rows INT, #count INT, #message VARCHAR(100);
SET #rows = 1;
SET #count = 0;
WHILE #rows > 0
BEGIN
BEGIN TRAN
UPDATE TOP 100000 tblName
SET col_name = 'xxxxxx'
SET #rows = ##ROWCOUNT
SET #COUNT = #count + #rows
RAISERROR ('count %d', 0, 1, #count) WITH NOWAIT
COMMIT TRAN
END
Even though the code has the #count increment logic, it races past the 18 million records I am trying to update. What am I missing here and what should I add/remove to make the updates stop at the 18,206,650 records that I have in the table?
Thanks,
RV.
Silly me. I was missing where clause on the update statement. Sorry y'all.

SQL server transfer column from one table to another

I am trying to transfer data from a column from one table to another, both columns are with unique identifiers and when i transfer the data it is copying the column after the end of the data of the second table. After the end of the other column of the second table (I am inserting first random integers in the first column and then I want to copy the information from another table in the same database and it is starting after the 135th row (I add 135 rows with random ints)). First table name : carBrand and column name model_id - second table name Cars11, model_idss - or whatever is the name...
THE QUESTION IS WHY is it inputting the iformation after the first input - example - i am inputting 135 random ints and after that i am trying to copy the information from the other table and when i am pasting it, the information is pasted after the 136th to the 270th sign
My query is looking like this for the new table
DECLARE #Min_Value AS int
SET #Min_Value= 15000
DECLARE #Max_Value AS int
SET #Max_Value = 1000000
DECLARE #n AS int
SET #n = 135
BEGIN TRANSACTION
DECLARE #uid uniqueidentifier
SET #uid = NEWID()
DECLARE #i AS int
SET #i = 1
WHILE #i <= #n BEGIN INSERT INTO Cars11([Model_prices]) VALUES(FLOOR(RAND(CHECKSUM(#uid))*(#Max_Value - #Min_Value +1) + #Min_Value)) SET #i += 1 SET #uid = NEWID() END COMMIT TRANSACTION
INSERT INTO Cars11(model_idss)
SELECT model_id
FROM carBrand
WHERE model_id <= 135;
It would be easier to parse your query if you used the code sample block (ctrl-k)
You need to do an update instead of insert for the second insert into Cars11.
Update changes already existing records, Insert creates new records.
Something like this:
DECLARE #Min_Value AS int
SET
#Min_Value = 15000
DECLARE #Max_Value AS int
SET
#Max_Value = 1000000
DECLARE #n AS int
SET
#n = 135
BEGIN TRANSACTION
DECLARE #uid uniqueidentifier
SET
#uid = NEWID()
DECLARE #i AS int
SET
#i = 1 WHILE #i <= #n
BEGIN
INSERT INTO
Cars11([Model_prices])
VALUES
(
FLOOR(
RAND(CHECKSUM(#uid)) *(#Max_Value - #Min_Value + 1) + #Min_Value
)
)
SET
#i + = 1
SET
#uid = NEWID()
END
COMMIT TRANSACTION
UPDATE Cars11
Set model_idss = (Select model_id FROM carBrand WHERE Cars11.model_idss = carBrand.model_id and carBrand.model_id <= 135));
Here are some other options for updating a column based on a query result

UPDATE in Batches Does Not End and Remaining Data Does Not Get Updated

I need to update a table in batches, but it does not work. I tried 2 options below.
Both of the options update the first 10 rows but the update is still running. But only 10 rows remain updated.
Seems like update never finishes and count shows more than number of records in the tables to be updated.
Please advise.
-- OPTION #1
SET NOCOUNT OFF
IF OBJECT_ID('tempdb..#Table') IS NOT NULL
BEGIN
DROP TABLE #Table
END
-- select count(*) from #Table where ID = 0
-- select * from #Table
CREATE TABLE #Table ( ID INT )
WHILE (1 = 1)
AND ( Select count(*) from #Table ) < 10000
BEGIN
BEGIN TRANSACTION
INSERT INTO #Table (ID)
VALUES (1)
IF ##ROWCOUNT = 10000 -- terminating condition;
BEGIN
COMMIT TRANSACTION
BREAK
END
END
-- UPDATE
WHILE (1 = 1)
BEGIN
BEGIN TRANSACTION
UPDATE TOP (10) upd
SET ID = 0
FROM #Table upd
IF ##ROWCOUNT = 0 -- terminating condition;
BEGIN
COMMIT TRANSACTION
BREAK
END
END
-- OPTION #2
SET NOCOUNT OFF
IF OBJECT_ID('tempdb..#Table2') IS NOT NULL
BEGIN
DROP TABLE #Table2
END
-- select count(*) from #Table2 where ID = 0
-- select * from #Table2
CREATE TABLE #Table2 ( ID INT )
--DECLARE #rows INT
--DECLARE #count INT
WHILE (1 = 1)
AND ( Select count(*) from #Table2 ) < 10000
BEGIN
BEGIN TRANSACTION
INSERT INTO #Table2 (ID)
VALUES (1)
IF ##ROWCOUNT = 10000 -- terminating condition;
BEGIN
COMMIT TRANSACTION
BREAK
END
END
DECLARE #rows INT
DECLARE #count INT
-- UPDATE
SET #rows = 1
SET #count = 0
WHILE #rows > 0
BEGIN
BEGIN TRANSACTION
UPDATE TOP (10) #Table2 -- upd
SET ID = 0
-- FROM #Table upd
SET #rows = ##ROWCOUNT
SET #count = #count + #rows
RAISERROR('COUNT %d', 0, 1, #count) WITH NOWAIT
COMMIT TRANSACTION
END
OK there were a couple of issues with your code.
You can't use TOP in an update - however its fairly straight forward to restrict the rows with a sub-query as shown.
You were setting all the ID's to 1 therefore there was no way to uniquely identify a row, you could only update all of them. I have assumed that in your real life problem you would have unique ID's and I have modified the code to suit.
I'm unsure about the intention of the various nested transactions, they don't appear to accomplish much and they don't match the logic.
IF OBJECT_ID('tempdb..#Table2') IS NOT NULL
BEGIN
DROP TABLE #Table2;
END
CREATE TABLE #Table2 (ID INT);
DECLARE #Count int = 0;
WHILE (select count(*) from #Table2) < 10000 BEGIN
INSERT INTO #Table2 (ID)
VALUES (#Count)
-- Make sure we have a unique id for the test, else we can't identify 10 records
set #Count = #Count + 1;
END
-- While exists an 'un-updated' record continue
WHILE exists (select 1 from #Table2 where ID > 0) BEGIN
-- Update any top 10 'un-updated' records
UPDATE #Table2 SET
ID = 0
where id in (select top 10 id from #Table2 where ID > 0)
END
DROP TABLE #Table2

Updating Null records of a table by invoking stored procedure throws error 'Subquery returned more than one value'

I am trying to update all null values of a column with Uuid (generated with the help of a stored procedure GetOptimizedUuid). While doing so I am getting an error
Subquery returned more than 1 value
I could understand the causes of error but none of my fix helped out.
I tried out with some loops but it doesn't fix
BEGIN
DECLARE #no INT;
DECLARE #i INT;
SET #no = (SELECT COUNT(id) FROM table1)
SET #i = 0;
WHILE #i < #no
BEGIN
DECLARE #TempUuid TABLE(SeqUuid UNIQUEIDENTIFIER, OptimizedUuid UNIQUEIDENTIFIER)
INSERT INTO #TempUuid
EXECUTE [Sample].[dbo].[GetOptimizedUuid]
UPDATE table1
SET col2 = (SELECT OptimizedUuid FROM #TempUuid)
WHERE col2 IS NULL;
SET #i = #i + 1;
END
END
Help me to sort out this, Thanks!
Not entirely sure what you're doing - what do you need to call this GetOptimizedUuid stored procedure? Can't you just use NEWID() to get a new GUID?
Anyway - assuming you have to call this stored procedure, I assume you'd call it once before the loop, to get the ID's you need - and then you get the top (1) UUID from the table and update one row in your database table - and then you also need to remove that UUID that you've just used from the temp table, otherwise you keep re-using the same ID over and over again....
Try something like this:
BEGIN
DECLARE #no INT;
DECLARE #i INT;
SET #no = (SELECT COUNT(id) FROM table1)
SET #i = 0;
-- define and fill the table *ONCE* and *BEFORE* the loop
DECLARE #TempUuid TABLE(SeqUuid UNIQUEIDENTIFIER, OptimizedUuid UNIQUEIDENTIFIER)
INSERT INTO #TempUuid
EXECUTE [Sample].[dbo].[GetOptimizedUuid]
-- declare a UUID to use
DECLARE #NewUuid UNIQUEIDENTIFIER;
WHILE #i < #no
BEGIN
-- get the first UUID from the temp table
SELECT TOP (1) #NewUuid = OptimizedUuid
FROM #TempUuid;
-- update your table
UPDATE table1
SET col2 = #NewUuid
WHERE col2 IS NULL;
-- *REMOVE* that UUID that you've used from the table
DELETE FROM #TempUuid
WHERE OptimizedUuid = #NewUuid;
SET #i = #i + 1;
END
END

Deleting large number of rows in chunks

I have about 8 tables that have 10 million rows or more each and I want to do the fastest/elegant delete on them. I have decided to delete them in chunks at a time. When I added my changes, it looks very very ugly, and want to know how to format it to look better. Also, is this the best way to be doing this?
DECLARE #ChunkSize int
SET #ChunkSize = 50000
WHILE #ChunkSize <> 0
BEGIN
DELETE TOP (#ChunkSize) FROM TABLE1
WHERE CREATED < #DATE
SET #ChunkSize = ##rowcount
END
DECLARE #ChunkSize int
SET #ChunkSize = 50000
WHILE #ChunkSize <> 0
BEGIN
DELETE TOP (#ChunkSize) FROM TABLE2
WHERE CREATED < #DATE
SET #ChunkSize = ##rowcount
END
.......
I would be doing this for all 8 tables which doesn't seem practical. Any advice on how to clean this up?
Prior to 2016 SP1 when partitioning is only available in Enterprise you can either delete in batches or if the amount of data to be removed is small compared to the total data you can copy the good data to another table.
For doing the batch work I would make some suggestions to your code so it is a bit simpler.
DECLARE #ChunkSize int
SELECT #ChunkSize = 50000 --use select instead of set so ##rowcount will <> 0
WHILE ##rowcount <> 0
BEGIN
DELETE TOP (#ChunkSize) FROM TABLE1
WHERE CREATED < #DATE
END
SELECT #ChunkSize = #ChunkSize --this will ensure that ##rowcount = 1 again.
WHILE ##rowcount <> 0
BEGIN
DELETE TOP (#ChunkSize) FROM TABLE2
WHERE CREATED < #DATE
END
You may have to play with the ChunkSize to work well with your data but 50k is a reasonable starting point.
If you want to avoid repeating your loop for each table, you could use dynamic SQL
IF OBJECT_ID('tempdb..#tableNames') IS NOT NULL DROP TABLE tempdb..#tableNames
SELECT name INTO #tableNames FROM sys.tables WHERE name IN (/* Names of tables you want to delete from */)
DECLARE #table varchar(50)
DECLARE #query nvarchar(max)
WHILE EXISTS (select '1' from #tableNames)
BEGIN
SET #table = (select top 1 name from #tableNames)
DELETE FROM #tableNames WHERE name = #table
SET #query = 'DECLARE #ChunkSize int
SET #ChunkSize = 50000
WHILE #ChunkSize <> 0
BEGIN
DELETE TOP (#ChunkSize) FROM ' + #table + '
WHERE CREATED < #DATE
SET #ChunkSize = ##rowcount
END'
EXEC sp_executesql #query
END