Insertion of records based on some condition - sql

I'm trying to insert few records from the temporary table using a SQL Server stored procedure. There is a percentage column in the temporary table and a PQ number column. In a table there may exists more than 1 row with the same PQ number. But for insertion to happen the sum of percentage for the same PQ number should be 100%. I couldn't write the where clause for this situation.
CREATE PROCEDURE [dbo].[Upsert_DebitSheet]
#filename VARCHAR(250)
AS
BEGIN
SET XACT_ABORT ON
RETRY: -- Label RETRY
BEGIN TRANSACTION
BEGIN TRY
SET NOCOUNT ON;
INSERT INTO [dbo].[DebitSheet]([Date], [RMMName], [Invoice],[PQNumber], [CAF],
[Percentage], [Amount], [FileName])
SELECT
*, #filename
FROM
(SELECT
[Date], [RMMName], [Invoice], [PQNumber], [CAF],
[Percentage], [Amount]
FROM
[dbo].[TempDebitSheet]
WHERE) result
SELECT ##ROWCOUNT
TRUNCATE TABLE [dbo].[TempDebitSheet]
COMMIT TRANSACTION
END TRY
BEGIN CATCH
PRINT ERROR_MESSAGE()
ROLLBACK TRANSACTION
IF ERROR_NUMBER() = 1205 -- Deadlock Error Number
BEGIN
WAITFOR DELAY '00:00:00.05' -- Wait for 5 ms
GOTO RETRY -- Go to Label RETRY
END
END CATCH
SET ROWCOUNT 0;
END
Temporary Table
MainTable(Expected Result)

You can use subquery in the WHERE
INSERT INTO [dbo].[DebitSheet]
([Date]
,[RMMName]
,[Invoice]
,[PQNumber]
,[CAF]
,[Percentage]
,[Amount]
,[FileName])
SELECT [Date]
,[RMMName]
,[Invoice]
,[PQNumber]
,[CAF]
,[Percentage]
,[Amount]
FROM [dbo].[TempDebitSheet]
WHERE EXISTS (
SELECT tmp.[PQNumber]
FROM [dbo].[TempDebitSheet] tmp
WHERE tmp.[PQNumber] = [TempDebitSheet].[PQNumber]
GROUP BY tmp.[PQNumber]
HAVING SUM(tmp.[Percentage]) = 100
)

Modify your query like this
Insert into ...
Select result.*, #filename from (....) result

Related

Fastest way to insert data in SQL table using the data from same table or any other dummy data

I have a table in SQL something like below
PersonDetails
{
ID, // identity column
Age int,
FirstName varchar,
LastName varchar,
CreatedDateTime DateTime,
}
This table currently has around 100 million rows, I need to increase the data to about 1 billion to test the time it would take to create some indexes on this table.
I can use the data from same table to insert the rows again and again.
What is the fastest way to achieve it?
I made a simple while loop in SQL something like below
Declare #maxrows bigint = 900000000,
#currentrows bigint,
#batchsize bigint = 10000000;
select #currentrows = count(*) from [dbo].[PersonDetails] with(nolock)
while #currentrows < #maxrows
begin
insert into [dbo].[PersonDetails]
select top(#batchsize)
[Age]
,[FirstName]
,[LastName]
,[CreatedDateTime]
from [dbo].[PersonDetails]
select #currentrows = count(*) from [dbo].[PersonDetails] with(nolock)
end
But the problem is that this query shows below error after inserting some of the data.
The transaction log for database 'DBNAME' is full due to 'LOG_BACKUP'.
I can either add some delay in each insert, or reduce the batch size.
What is the best way to move forward here?
If this database is living in a testing environment, change the recovery model to simple:
If not, I am guessing you are not able to change the database settings, I will advice to perform the operation on small batches and commit each iteration. It will be something like the following:
while #currentrows < #maxrows
begin
BEGIN TRY;
BEGIN TRANSACTION;
insert into [dbo].[PersonDetails]
select top(#batchsize)
[Age]
,[FirstName]
,[LastName]
,[CreatedDateTime]
from [dbo].[PersonDetails]
COMMIT TRANSACTION;
select #currentrows = count(*) from [dbo].[PersonDetails] with(nolock)
END TRY
BEGIN CATCH
IF ##TRANCOUNT > 0
BEGIN;
ROLLBACK TRANSACTION;
END;
THROW;
END CATCH;
-- WAITFOR DELAY '00:00:01';
end
and you can use waitfor delay, too in order to allocated to many resources and block the other queries.
I would create a temp table and then tranfer the date to the acual table
Something like this
select * into #PersonDetails from [dbo].[PersonDetails]
Declare #maxrows bigint = 900000000,
#currentrows bigint,
#batchsize bigint = 10000000;
select #currentrows = count(*) from [dbo].[PersonDetails] with(nolock)
while #currentrows < #maxrows
begin
insert into #PersonDetails
select top(#batchsize)
[Age]
,[FirstName]
,[LastName]
,[CreatedDateTime]
from #PersonDetails
select #currentrows = count(*) from #PersonDetails
end
insert into [dbo].[PersonDetails] (Age,FirstName,LastName,CreatedDateTime) select (Age,FirstName,LastName,CreatedDateTime) from #PersonDetails

SQL Count Unique Deleted Sets

I currently have a simple table in my database that stores sets and values. I want to be able to delete all entries in the database and return the number of distinct sets that were deleted.
create table sets(
SetId varchar(50)
Value int
)
If I have two sets each with two values, then the table will be loaded for four entries.
Set1, 0
Set1, 1
Set2, 0
Set2, 1
If I delete everything I want to be able to count how many unique SetIds were deleted, so in the example above it should return 2.
Right now I can accomplish this by creating a tempTable that contains the deleted SetIds and then I count distinct
CREATE TABLE #temp
(
SetId varchar(50)
);
delete from Sets
OUTPUT DELETED.SetId INTO #temp
select count(distinct SetId) from #temp;
Is there a better way to accomplish this without having to use a temp table?
If you have many rows, and want to avoid temp table (lot of IO) :
declare #cnt int;
set xact_abort on
begin transaction
begin try
select #cnt = count(distinct SetId) from sets;
delete from sets;
commit transaction
end try
begin catch
rollback;
end catch
or :
declare #cnt int;
set xact_abort on
begin transaction
begin try
select #cnt = count(distinct SetId) from sets;
truncate table sets
commit transaction
end try
begin catch
rollback;
end catch

Catching multiple errors in loop SQL query

I have the below insert query which selects records from the OriginalData table where everything is of datatype nvarchar(max) and inserts it into the temp table which has specific column definitions i.e MainAccount is of type INT.
I am doing a row by row insert because if there is a record in OriginalData table where the MainAccount value is 'Test' the it will obviously cause a conversion error and the insert will fail. The begin try block is used to update the table with the error.
However if there are multiple errors on the same row I want to be able to capture them both and not just the first one.
TRUNCATE TABLE [Temp]
DECLARE #RowId INT, #MaxRowId INT
SET #RowId = 1
SELECT #MaxRowId = MAX(RowId)
FROM [Staging].[FactFinancialsCoded_Abbas_InitialValidationTest]
WHILE(#RowId <= #MaxRowId)
BEGIN
BEGIN TRY
INSERT INTO [Temp] (ExtractSource, MainAccount,
RecordLevel1Code, RecordLevel2Code, RecordTypeNo,
TransDate, Amount, PeriodCode, CompanyCode)
SELECT
ExtractSource, MainAccount,
RecordLevel1Code, RecordLevel2Code, RecordTypeNo,
TransDate, Amount, PeriodCode, DataAreaId
FROM
[Staging].[FactFinancialsCoded_Abbas_InitialValidationTest]
WHERE
RowId = #RowId;
PRINT #RowId;
END TRY
BEGIN CATCH
Update [Staging].[FactFinancialsCoded_Abbas_InitialValidationTest]
Set ValidationErrors = ERROR_MESSAGE()
where RowId = #RowId
END CATCH
SET #RowId += 1;
END
Instead of doing it this way, I handle this by using TRY_PARSE() or TRY_CONVERT() on each column that I am converting to a non-string column.
If you then need to store the validation failures in another table, you can make a second pass getting all the rows that have a non-null value in the source table and a null value in the destination table, and insert those rows into your "failed validation" table.

Batch deletion correctly formatted?

I have multiple tables with millions of rows in them. To be safe and not overflow the transaction log, I am deleting them in batches of 100,000 rows at a time. I have to first filter out based on date, and then delete all rows less than a certain date.
To do this I am creating a table in my stored procedure which holds the ID's of the rows that need to be deleted:
I then insert into that table and delete the rows from the desired table using loops. This seems to run successfully but it is extremely slow. Is this being done correctly? Is this the fastest way to do it?
DECLARE #FILL_ID_TABLE TABLE (
FILL_ID varchar(16)
)
DECLARE #TODAYS_DATE date
SELECT
#TODAYS_DATE = GETDATE()
--This deletes all data older than 2 weeks ago from today
DECLARE #_DATE date
SET #_DATE = DATEADD(WEEK, -2, #TODAYS_DATE)
DECLARE #BatchSize int
SELECT
#BatchSize = 100000
BEGIN TRAN FUTURE_TRAN
BEGIN TRY
INSERT INTO #FILL_ID_TABLE
SELECT DISTINCT
ID
FROM dbo.ID_TABLE
WHERE CREATED < #_DATE
SELECT
#BatchSize = 100000
WHILE #BatchSize <> 0
BEGIN
DELETE TOP (#BatchSize) FROM TABLE1
OUTPUT DELETED.* INTO dbo.TABLE1_ARCHIVE
WHERE ID IN (SELECT
ROLLUP_ID
FROM #FILL_ID_TABLE)
SET #BatchSize = ##rowcount
END
SELECT
#BatchSize = 100000
WHILE #BatchSize <> 0
BEGIN
DELETE TOP (#BatchSize) FROM TABLE2
OUTPUT DELETED.* INTO dbo.TABLE2_ARCHIVE
WHERE ID IN (SELECT
FILL_ID
FROM #FILL_ID_TABLE)
SET #BatchSize = ##rowcount
END
PRINT 'Succeed'
COMMIT TRANSACTION FUTURE_TRAN
END TRY
BEGIN CATCH
PRINT 'Failed'
ROLLBACK TRANSACTION FUTURE_TRAN
END CATCH
Try join instead of subquery
DELETE TOP (#BatchSize) T1
OUTPUT DELETED.* INTO dbo.TABLE1_ARCHIVE
FROM TABLE1 AS T1
JOIN #FILL_ID_TABLE AS FIL ON FIL.ROLLUP_ID = T1.Id

SQL Server 2005 BULK INSERT's Committed Count

I use BULK INSERT WITH BATCHSIZE OPTION.
How can I Get Committed Count When the BULK INSERT fail in processing.
like:
BEGIN TRY
BULK INSERT t1 FROM "C:\temp\temp.dat" WITH(BATCHSIZE=1000)
END TRY
BEGIN CATCH
PRINT CONVERT(VARCHAR, ##rowcount)
END CATCH
the ##rowcount returned 0
You could count the table rows first into a variable.
DECLARE #cntBefore bigint;
SELECT #cntBefore = COUNT(*) FROM t1;
BEGIN TRY
BULK INSERT t1 FROM "C:\temp\temp.dat" WITH(BATCHSIZE=1000)
END TRY
BEGIN CATCH
DECLARE #cntAfter bigint;
SELECT #cntAfter = COUNT(*) FROM t1;
PRINT 'Imported ' + CONVERT(VARCHAR, #cntAfter-#cntBefore)
END CATCH
Alternatively, use ROWS_PER_BATCH to optimise the import, then all the rows will be rolled back.