SQL - slow While loop - sql

I have the following T-SQL code that is either adding or updating 1 record at a time to a temp table. Does anyone have any suggestions to speed this process up?
DECLARE #Total AS INT
SELECT #Total = count(AgentsID) from #TempAgentsConcat
DECLARE #counter AS INT
SET #counter = 1
DECLARE #CurrentVal AS NVARCHAR(1024)
DECLARE #RowCount AS INT
DECLARE #OBJ_ID AS INT
while (#counter <= #Total)
begin
SELECT #OBJ_ID = Id FROM #TempAgentsConcat WHERE AgentsId = #counter
SELECT #CurrentVal = SVRMachine FROM #TempAgentsConcat WHERE ID = #OBJ_ID
IF EXISTS (SELECT * FROM #TempEndpoints WHERE ID = #OBJ_ID)
BEGIN
UPDATE #TempEndpoints SET SVRMachine = #CurrentVal WHERE ID = #OBJ_ID
END
ELSE
BEGIN
INSERT INTO #TempEndpoints (SVRMachine, IPPort, ID)
VALUES (#CurrentVal, NULL, #OBJ_ID)
END
--END
SET #counter = #counter + 1
end

It looks like, you are trying to merge one table into other. First lets talk of couple of issues in your query-
1. Avoid using loops unless it's extremely necessary.
2. You are assigning two different variables by reading same row in 2 queries.
You can do this in single query like
SELECT #OBJ_ID = Id,#CurrentVal = SVRMachine FROM #TempAgentsConcat WHERE AgentsId = #counter
instead of 2 queries
SELECT #OBJ_ID = Id FROM #TempAgentsConcat WHERE AgentsId = #counter
SELECT #CurrentVal = SVRMachine FROM #TempAgentsConcat WHERE ID = #OBJ_ID
Let's rewrite the query without using loops. The answer by #Cetin is one of the solutions. Your requirement looks classic example of merging tables, so you may use SQL MERGE (SQL server 2008 and above). You can read more about MERGE, here, checkout the example 'C'.
Using MERGE, your query will look like below.
MERGE INTO #TempEndpoints AS Target
USING (SELECT SVRMachine , Id from #TempAgentsConcat)
AS Source (SVRMachine, ID)
ON Target.ID = Source.ID
WHEN MATCHED THEN
UPDATE SET SVRMachine = Source.SVRMachine
WHEN NOT MATCHED BY TARGET THEN
INSERT (ID, IPPort, SVRMachine) VALUES (Id, NULL,SVRMachine)

Why would you use a lot of variables and loop. SQL server (any SQL series database as well) works best with sets, rather than loops:
UPDATE #TempEndpoints
SET SVRMachine = ac.SVRMachine
FROM #TempAgentsConcat ac
WHERE #TempEndpoints.Id = ac.ID;
INSERT INTO #TempEndpoints
(
SVRMachine,
ID
)
SELECT SVRMachine,
ID
FROM #TempAgentsConcat ac
WHERE NOT EXISTS
(
SELECT * FROM #TempEndpoints ep WHERE ep.ID = ac.ID
);

Related

Generating dummy data from existing data set is slow using cursor

I'm trying to generate dummy data from the existing data I have in the tables. All I want is to increase the number of records in Table1 to N specified amount. The other tables should increase based on the foreign key references.
The tables has one to many relationship. For one record in table 1, I can have multiple entries in table 2, and in table 3 I can have many records based on IDs of the second table.
Since IDs are primary keys, I either capture it by
SET #NEWLY_INSERTED_ID = SCOPE_IDENTITY()
after inserting to table 1 and using in insert for table2, or inserting them to temp table and joining them to achieve the same results for table 3.
Here's the approach I'm taking with the CURSOR.
DECLARE #MyId as INT;
DECLARE #myCursor as CURSOR;
DECLARE #DESIRED_ROW_COUNT INT = 70000
DECLARE #ROWS_INSERTED INT = 0
DECLARE #CURRENT_ROW_COUNT INT = 0
DECLARE #NEWLY_INSERTED_ID INT
DECLARE #LANGUAGE_PAIR_IDS TABLE ( LangugePairId INT, NewId INT, SourceLanguage varchar(100), TargetLangauge varchar(100) )
WHILE (#ROWS_INSERTED < #DESIRED_ROW_COUNT)
BEGIN
SET #myCursor = CURSOR FOR
SELECT Id FROM MyTable
SET #CURRENT_ROW_COUNT = (SELECT COUNT(ID) FROM MyTable)
OPEN #myCursor;
FETCH NEXT FROM #myCursor INTO #MyId;
WHILE ##FETCH_STATUS = 0
BEGIN
IF ((#CURRENT_SUBMISSION_COUNT < #DESIRED_ROW_COUNT) AND (#ROWS_INSERTED < #DESIRED_ROW_COUNT))
BEGIN
INSERT INTO [dbo].[MyTable]
([Column1]
([Column2]
([Column3]
)
SELECT
,convert(numeric(9,0),rand() * 899999999) + 100000000
,COlumn2
,Colum3
FROM MyTable
WHERE Id = #MyId
SET #NEWLY_INSERTED_ID = SCOPE_IDENTITY()
INSERT INTO [dbo].[Language]
([MyTable1Id]
,[Target]
,[Source]
OUTPUT inserted.Id, inserted.MyTable1Id, inserted.Source, inserted.[Target] INTO #LANGUAGE_PAIR_IDS (LangugePairId, NewId, SourceLanguage, TargetLangauge)
SELECT
#NEWLY_INSERTED_ID
,[Target]
,[Source]
FROM [dbo].[Language]
WHERE MyTableId = #MyId
ORDER BY Id
DECLARE #tbl AS TABLE (newLanguageId INT, oldLanguageId INT, sourceLanguage VARCHAR(100), targetLanguage VARCHAR(100))
INSERT INTO #tbl (newLanguageId, oldLanguageId, sourceLanguage, targetLanguage)
SELECT 0, id, [Source], [Target] MyTable1Id FROM Language WHERE MyTable1Id = #MyId ORDER BY Id
UPDATE t
SET t.newlanguageid = lp.LangugePairId
FROM #tbl t
JOIN #LANGUAGE_PAIR_IDS lp
ON t.sourceLanguage = lp.SourceLanguage
AND t.targetLanguage = lp.TargetLangauge
INSERT INTO [dbo].[Manager]
([LanguagePairId]
,[UserId]
,[MyDate])
SELECT
tbl.newLanguageId
,p.[UserId]
,p.[MyDate]
FROM Manager m
INNER JOIN #tbl tbl
ON m.LanguagePairId = tbl.oldLanguageId
WHERE m.LanguagePairId in (SELECT Id FROM Language WHERE MyTable1Id = #MyId) -- returns the old language pair id
SET #ROWS_INSERTED += 1
SET #CURRENT_ROW_COUNT +=1
END
ELSE
BEGIN
PRINT 'REACHED EXIT'
SET #ROWS_INSERTED = #DESIRED_ROW_COUNT
BREAK
END
FETCH NEXT FROM #myCursor INTO #MyId;
END
CLOSE #myCursor
DEALLOCATE #myCursor
END
The above code works! It generates the data I need. However, it's very very slow. Just to give some comparison. Initial load of data for table 1 was ~60,000 records, Table2: ~74,000 and Tabl3 ~3,400
I tried to insert 9,000 rows in Table1. With the above code, it took 17:05:01 seconds to complete.
Any suggestion on how I can optimize the query to run little faster? My goal is to insert 1-2 mln records in Table1 without having to wait for days. I'm not tied to CURSOR. I'm ok to achieve the same result in any other way possible.

How do I update a SQL table in batches?

I already spent some time trying to figure this out, but I am still somewhat stuck an I can't really find the solution online as I think I am missing the keywords.
I want to update an SQL tables in batches, meaning I have a few million entries and want to update index 0-999, 1000-1999 step by step to avoid a huge database lock.
This is what I found:
DECLARE #Rows INT,
#BatchSize INT;
SET #BatchSize = 2500;
SET #Rows = #BatchSize;
WHILE (#Rows = #BatchSize)
BEGIN
UPDATE TOP(#BatchSize) db1
SET db1.attr = db2.attr
FROM DB1 db1
LEFT JOIN DB2 db2
ON db1.attr2 = db2.attr2
SET #Rows = ##ROWCOUNT;
END;
I simplified my statement a little bit as you can see, but it should still be clear how I approached the whole problem.
However, this thing loops forever, and when looking at the output it changed much more rows than there are in the database.
I checked the same loop with a select statement inside later on and found out that it seems to simply select the first #BatchSize rows of the table on and on, even though I thought it would progress in the index with every iteration.
How can I change this so it actually does progress by #BatchSize indices every iteration instead of simply targeting the same rows everytime?
You need some limiting factor to decide which rows are hit each loop. Generally you will use an id field. There are lots of ways to approach it, but here is one way:
DECLARE #MinID int = 1;
DECLARE #MaxID int = 2500;
DECLARE #Rows int = 1;
DECLARE #Batchsize int = 2500;
WHILE (#Rows > 1)
BEGIN
UPDATE db1
SET db1.attr = db2.attr
FROM DB1 db1
LEFT JOIN DB2 db2 ON db1.attr2 = db2.attr2
WHERE db1.ID BETWEEN #MinID AND MaxID
SET #Rows = ##ROWCOUNT
SET #MinID = MinID + #Batchsize
SET #MaxID = MaxID + #Batchsize
END
Replace db1.ID with whatever field works best in your table schema.
Note, your approach would work if you had some kind of WHERE clause on the update query that prevented the same rows from being returned.
Ex. UPDATE table SET id = 1 WHERE id = 2 won't pull the same rows in a second execution
One way to do it is using a cte with row_number:
DECLARE #BatchSize int = 2500,
#LastRowUpdated int = 0;
#Count int
SELECT #Count = COUNT(*) FROM db1;
;WITH CTE AS
(
SELECT attr,
attr2,
ROW_NUMBER() OVER(ORDER BY attr, atrr2) As RN
FROM db1
)
WHILE #LastRowUpdated < #Count
BEGIN
UPDATE c
SET attr = db2.atrr
FROM CTE c
LEFT JOIN DB2 ON c.attr2 = db2.attr2
WHERE c.RN > #LastRowUpdated
AND c.RN < (#LastRowUpdated +1) * #BatchSize
SELECT #LastRowUpdated += 1
END
This will update 2500 records each step of the loop.
You are just updating the same rows. Need a and <>.
Left join? If you really want to assign null values then use a separate update.
DECLARE #Rows INT,
#BatchSize INT;
SET #BatchSize = 2500;
SET #Rows = #BatchSize;
WHILE (#Rows = #BatchSize)
BEGIN
UPDATE TOP(#BatchSize) db1
SET db1.attr = db2.attr
FROM DB1 db1
JOIN DB2 db2
ON db1.attr2 = db2.attr2
AND db1.attr <> db2.attr
SET #Rows = ##ROWCOUNT;
END;
And you can do this:
select 1
WHILE (##ROWCOUNT > 0)
BEGIN
UPDATE TOP(2000) db1
SET db1.attr = db2.attr
FROM DB1 db1
JOIN DB2 db2
ON db1.attr2 = db2.attr2
AND db1.attr <> db2.attr
END;

Can we create a view after a script from a variable?

I would like to create a view at the end of the following request.
I know that 'create view' must be the first statement in a query batch. My problem is that for this query i must use a variable (#listOfIDRUB).
This variable is only fill correctly at the end of my little script.
I also have tried to create the view before my first declaration but it created a problem with "DECLARE".
So is it possible to create a view easily from the result of my script or i have to do something else ?
DECLARE #CounterResId int;
DECLARE #lePath varchar(255);
DECLARE #listOfIDRUB TABLE (EXTERNALREFERENCE uniqueidentifier, ID varchar(255), DOCID varchar(255) );
DECLARE #Max int;
SET #lePath = '';
SET #CounterResId = 1;
SET #Max = (SELECT COUNT(*) FROM SYNTHETIC..EXTRANET_PURGE WHERE TYPE_SUPPR = 'ResId')
WHILE (#CounterResId <= #Max )
BEGIN;
set #lePath =
(select tmp.lePath from
(
select row_number() over(order by path)as NumLigne, CONCAT(path, '%' ) as lePath from RUBRIQUE
WHERE MODELE = 'CAEEE64D-2B00-44EF-AA11-6B72ABD9FE38'
and CODE in (SELECT ID FROM SYNTHETIC..EXTRANET_PURGE where TYPE_SUPPR='ResId')
) tmp
WHERE tmp.NumLigne = #CounterResId)
INSERT INTO #listOfIDRUB(EXTERNALREFERENCE, ID, DOCID)
SELECT SEC.EXTERNALREFERENCE , SEC.ID, SEC.DOCUMENTID
FROM WEBACCESS_FRONT..SECTIONS sec
inner join rubrique rub ON rub.ID_RUBRIQUE = sec.EXTERNALREFERENCE
inner join template_tree_item tti ON tti.id_template_tree_item = rub.modele
inner join template t ON t.id_template = tti.template
WHERE t.CODE IN (SELECT TEMPLATE_CODE from SYNTHETIC..EasyFlowEngineListTemplateCode)
and rub.path like #lePath
print #CounterResId;
print #lePath;
set #CounterResId = #CounterResId + 1;
END;
select * from #listOfIDRUB;
Instead of select * from #listOfIDRUB
i wanted create view test as select * from listOfIDRUB
I have also tried create view test as (all my request)
Whenever you ask something about SQL please state your RDBMS (product and version). The answers are highly depending on this...
From your code I assume this is SQL Server.
So to your question: No, a VIEW must be "inlineable" (single-statement or "ad-hoc") statement.
You might think about a multi-statement UDF, but this is in almost all cases a bad thing (bad performance). Only go this way, if your result table will consist of rather few rows!
Without knowing your tables this is rather blind walking, but you might try this (add parameters, if you can transfer external operations (e.g. filtering) into the function):
CREATE FUNCTION dbo.MyFunction()
RETURNS #listOfIDRUB TABLE (EXTERNALREFERENCE uniqueidentifier, ID varchar(255), DOCID varchar(255) )
AS
BEGIN
DECLARE #CounterResId int;
DECLARE #lePath varchar(255);
DECLARE #Max int;
SET #lePath = '';
SET #CounterResId = 1;
SET #Max = (SELECT COUNT(*) FROM SYNTHETIC..EXTRANET_PURGE WHERE TYPE_SUPPR = 'ResId')
WHILE (#CounterResId <= #Max )
BEGIN;
set #lePath =
(select tmp.lePath from
(
select row_number() over(order by path)as NumLigne, CONCAT(path, '%' ) as lePath from RUBRIQUE
WHERE MODELE = 'CAEEE64D-2B00-44EF-AA11-6B72ABD9FE38'
and CODE in (SELECT ID FROM SYNTHETIC..EXTRANET_PURGE where TYPE_SUPPR='ResId')
) tmp
WHERE tmp.NumLigne = #CounterResId)
INSERT INTO #listOfIDRUB(EXTERNALREFERENCE, ID, DOCID)
SELECT SEC.EXTERNALREFERENCE , SEC.ID, SEC.DOCUMENTID
FROM WEBACCESS_FRONT..SECTIONS sec
inner join rubrique rub ON rub.ID_RUBRIQUE = sec.EXTERNALREFERENCE
inner join template_tree_item tti ON tti.id_template_tree_item = rub.modele
inner join template t ON t.id_template = tti.template
WHERE t.CODE IN (SELECT TEMPLATE_CODE from SYNTHETIC..EasyFlowEngineListTemplateCode)
and rub.path like #lePath
--print #CounterResId;
--print #lePath;
set #CounterResId = #CounterResId + 1;
END;
RETURN;
END
You can call it like this (very similar to a VIEW)
SELECT * FROM dbo.MyFunction();
And you might even use it in joins...
And last but not least I'm quite sure, that one could solve this without declares and a loop too...

SQL Server generate script for views and how to decide order?

I am generating the script for views using SQL Server built-in feature (Task -> Generate script). I am creating separate file for each object (of view). I have say around 400 files (containing SQL script of all views) to be executed on another database and to do that automatically I have created BAT file which takes care of that.
There are views which are dependent on other views and due to that many views failed to execute. Is there any way by which we can set order of execution and get rid off the failure ?
Any pointers would be a great help.
Please let me know if you need more details.
Thanks
Jony
Could you try this query? You can execute the create scripts in order to "gen" (generation).
DECLARE #cnt int = 0, #index int;
DECLARE #viewNames table (number int, name varchar(max))
DECLARE #viewGen table (id uniqueidentifier, gen int, name varchar(max), parentId uniqueidentifier)
INSERT INTO #viewNames
SELECT ROW_NUMBER() OVER(ORDER BY object_Id), name FROM sys.views
SELECT #cnt = COUNT(*) FROM #viewNames
SET #index = #cnt;
WHILE ((SELECT COUNT(*) FROM #viewGen) < #cnt)
BEGIN
DECLARE #viewName varchar(200)
SELECT #viewName = name FROM #viewNames WHERE number = #index;
DECLARE #depCnt int = 0;
SELECT #depCnt = COUNT(*) FROM sys.dm_sql_referencing_entities ('dbo.' + #viewName, 'OBJECT')
IF (#depCnt = 0)
BEGIN
INSERT INTO #viewGen SELECT NEWID(), 0, name, null FROM #viewNames WHERE number = #index;
END
ELSE
BEGIN
IF EXISTS(SELECT * FROM sys.dm_sql_referencing_entities ('dbo.' + #viewName, 'OBJECT') AS r INNER JOIN #viewGen AS v ON r.referencing_entity_name = v.name)
BEGIN
DECLARE #parentId uniqueidentifier = NEWID();
INSERT INTO #viewGen SELECT #parentId, 0, name, null FROM #viewNames WHERE number = #index;
UPDATE v
SET v.gen = (v.gen + 1), parentId = #parentId
FROM #viewGen AS v
INNER JOIN sys.dm_sql_referencing_entities('dbo.' + #viewName, 'OBJECT') AS r ON r.referencing_entity_name = v.name
UPDATE #viewGen
SET gen = gen + 1
WHERE Id = parentId OR parentId IN (SELECT Id FROM #viewGen WHERE parentId = parentId)
END
END
SET #index = #index - 1
IF (#index < 0) BEGIN SET #index = #cnt; END
END
SELECT gen as [order], name FROM #viewGen ORDER BY gen
Expecting result:
order name
0 vw_Ancient
1 vw_Child1
1 vw_Child2
2 vw_GrandChild

Efficient SQL Server stored procedure

I am using SQL Server 2008 and running the following stored procedure that needs to "clean" a 70 mill table from about 50 mill rows to another table, the id_col is integer (primary identity key)
According to the last running I made it is working good but it is expected to last for about 200 days:
SET NOCOUNT ON
-- define the last ID handled
DECLARE #LastID integer
SET #LastID = 0
declare #tempDate datetime
set #tempDate = dateadd(dd,-20,getdate())
-- define the ID to be handled now
DECLARE #IDToHandle integer
DECLARE #iCounter integer
DECLARE #watch1 nvarchar(50)
DECLARE #watch2 nvarchar(50)
set #iCounter = 0
-- select the next to handle
SELECT TOP 1 #IDToHandle = id_col
FROM MAIN_TABLE
WHERE id_col> #LastID and DATEDIFF(DD,someDateCol,otherDateCol) < 1
and datediff(dd,someDateCol,#tempDate) > 0 and (some_other_int_col = 1745 or some_other_int_col = 1548 or some_other_int_col = 4785)
ORDER BY id_col
-- as long as we have s......
WHILE #IDToHandle IS NOT NULL
BEGIN
IF ((select count(1) from SOME_OTHER_TABLE_THAT_CONTAINS_20k_ROWS where some_int_col = #IDToHandle) = 0 and (select count(1) from A_70k_rows_table where some_int_col =#IDToHandle )=0)
BEGIN
INSERT INTO SECONDERY_TABLE
SELECT col1,col2,col3.....
FROM MAIN_TABLE WHERE id_col = #IDToHandle
EXEC [dbo].[DeleteByID] #ID = #IDToHandle --deletes the row from 2 other tables that is related to the MAIN_TABLE and than from the MAIN_TABLE
set #iCounter = #iCounter +1
END
IF (#iCounter % 1000 = 0)
begin
set #watch1 = 'iCounter - ' + CAST(#iCounter AS VARCHAR)
set #watch2 = 'IDToHandle - '+ CAST(#IDToHandle AS VARCHAR)
raiserror ( #watch1, 10,1) with nowait
raiserror (#watch2, 10,1) with nowait
end
-- set the last handled to the one we just handled
SET #LastID = #IDToHandle
SET #IDToHandle = NULL
-- select the next to handle
SELECT TOP 1 #IDToHandle = id_col
FROM MAIN_TABLE
WHERE id_col> #LastID and DATEDIFF(DD,someDateCol,otherDateCol) < 1
and datediff(dd,someDateCol,#tempDate) > 0 and (some_other_int_col = 1745 or some_other_int_col = 1548 or some_other_int_col = 4785)
ORDER BY id_col
END
Any ideas or directions to improve this procedure run-time will be welcomed
Yes, try this:
Declare #Ids Table (id int Primary Key not Null)
Insert #Ids(id)
Select id_col
From MAIN_TABLE m
Where someDateCol >= otherDateCol
And someDateCol < #tempDate -- If there are times in these datetime fields,
-- then you may need to modify this condition.
And some_other_int_col In (1745, 1548, 4785)
And Not exists (Select * from SOME_OTHER_TABLE_THAT_CONTAINS_20k_ROWS
Where some_int_col = m.id_col)
And Not Exists (Select * From A_70k_rows_table
Where some_int_col = m.id_col)
Select id from #Ids -- this to confirm above code generates the correct list of Ids
return -- this line to stop (Not do insert/deletes) until you have verified #Ids is correct
-- Once you have verified that above #Ids is correctly populated,
-- then delete or comment out the select and return lines above so insert runs.
Begin Transaction
Delete OT -- eliminate row-by-row call to second stored proc
From OtherTable ot
Join MAIN_TABLE m On m.id_col = ot.FKCol
Join #Ids i On i.Id = m.id_col
Insert SECONDERY_TABLE(col1, col2, etc.)
Select col1,col2,col3.....
FROM MAIN_TABLE m Join #Ids i On i.Id = m.id_col
Delete m -- eliminate row-by-row call to second stored proc
FROM MAIN_TABLE m
Join #Ids i On i.Id = m.id_col
Commit Transaction
Explaanation.
You had numerous filtering conditions that were not SARGable, i.e., they would force a complete table scan for every iteration of your loop, instead of being able to use any existing index. Always try to avoid filter conditions that apply processing logic to a table column value before comparing it to some other value. This eliminates the opportunity for the query optimizer to use an index.
You were executing the inserts one at a time... Way better to generate a list of PK Ids that need to be processed (all at once) and then do all the inserts at once, in one statement.