SQL Long running query / maxing out server resource e.g. RAM/CPU - sql
I originally posted the following thread SQL Query - long running / taking up CPU resource
My issue was the SARGABILITY of my query, having addressed that (see the previous thread but in short I was using a lot of ISNULL functions which were bypassing the index scans) I am now having further issues.
My SQL server settings are as below:
cost threshold for parallelism
5
max degree of parallelism
0
My query still takes 2:13 to run and causes CPU / Memory spikes, I have a largely capable server e.g. 64GB RAM so resource is not the issue. See query below:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed = 0 OR J.Closed IS NULL)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent = 0 OR R.EmailSent IS NULL)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe = 'True' OR R.IsEmailSubscribe IS NULL)
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
See Indexes for the Results table below, something does not seem right on the below:
/****** Object: Index [_dta_index_Results_7_2107154552__K35_K2_K3_K34_K36_K8_K33_K1] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [_dta_index_Results_7_2107154552__K35_K2_K3_K34_K36_K8_K33_K1] ON [dbo].[Results]
(
[EmailSentDate] ASC,
[JobId] ASC,
[AryaCandidateId] ASC,
[EmailSent] ASC,
[EmailStatus] ASC,
[Email] ASC,
[IsEmailSubscribe] ASC,
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [ACI_CMT_APPLICANTS] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_CMT_APPLICANTS] ON [dbo].[Results]
(
[Email] ASC
)
INCLUDE ( [Id],
[AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [ACI_Job] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_Job] ON [dbo].[Results]
(
[AryaCandidateId] ASC,
[JobId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [ACI_Results] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_Results] ON [dbo].[Results]
(
[AryaCandidateId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [gen_smtp_auto] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [gen_smtp_auto] ON [dbo].[Results]
(
[EmailSentDate] ASC,
[Email] ASC,
[IsEmailSubscribe] ASC,
[EmailSent] ASC,
[EmailStatus] ASC
)
INCLUDE ( [Id],
[JobId],
[AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Hot] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Hot] ON [dbo].[Results]
(
[JobId] ASC,
[Action] ASC
)
INCLUDE ( [Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [IX_Results] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [IX_Results] ON [dbo].[Results]
(
[Id] ASC,
[JobId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [IX_Results_1] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [IX_Results_1] ON [dbo].[Results]
(
[Id] ASC,
[JobId] ASC,
[AryaCandidateId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [JobMetrics] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [JobMetrics] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [KEY_CAMPAIGN] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [KEY_CAMPAIGN] ON [dbo].[Results]
(
[ResumeDownloadedDate] ASC,
[ResumeDownloadStatus] ASC,
[KeywordCampaignId] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_CREATEDATE] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_CREATEDATE] ON [dbo].[Results]
(
[CreateDate] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_MOVERSPROB] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_MOVERSPROB] ON [dbo].[Results]
(
[MoversProbability] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_SORTORDER] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_SORTORDER] ON [dbo].[Results]
(
[SortOrder] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Proto_Resume_Downloa] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Proto_Resume_Downloa] ON [dbo].[Results]
(
[JobId] ASC,
[ResumeDownloadedDate] ASC,
[ResumeDownloadStatus] ASC,
[Location] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Result_Email] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Result_Email] ON [dbo].[Results]
(
[Email] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Result_Email_Send] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Result_Email_Send] ON [dbo].[Results]
(
[EmailSentDate] ASC
)
INCLUDE ( [Id],
[JobId],
[AryaCandidateId],
[Email],
[IsEmailSubscribe],
[EmailSent],
[EmailStatus]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Results_JobId_ACI_Email] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Results_JobId_ACI_Email] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[Email]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [RESULTS_JOBID_ALL] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [RESULTS_JOBID_ALL] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[CandidateScore],
[FirstName],
[LastName],
[Telephone],
[Email],
[AddressLine1],
[Location],
[Postcode],
[Resume],
[CurrentJob],
[CurrentCompany],
[Skills],
[Experience],
[Education],
[AryaUpdateDate],
[Industry],
[Source],
[LinkedIn],
[Facebook],
[Twitter],
[MoversLabel],
[MoversProbability],
[SortOrder],
[CreateDate],
[ResumeId],
[IsEmailSubscribe],
[EmailSent],
[EmailSentDate],
[EmailStatus],
[Registered],
[HasVoyagerData],
[Action],
[Engaged],
[FormattedCV],
[CV],
[DerivedSource],
[VoyCode],
[IsEmailEngaged],
[IsSMSEngaged],
[KeywordCampaignId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [ResultsGetResultsbyConsultantId] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ResultsGetResultsbyConsultantId] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[CandidateScore],
[FirstName],
[LastName],
[Telephone],
[Email],
[AddressLine1],
[Location],
[Postcode],
[Resume],
[CurrentJob],
[CurrentCompany],
[Skills],
[Experience],
[Education],
[AryaUpdateDate],
[Industry],
[Source],
[LinkedIn],
[Facebook],
[Twitter],
[MoversLabel],
[MoversProbability],
[DOB],
[SortOrder],
[ResumeDownloaded],
[ResumeDownloadedDate],
[ResumeDownloadStatus],
[CreateDate],
[ResumeId],
[IsEmailSubscribe],
[EmailSent],
[EmailSentDate],
[EmailStatus],
[Action],
[Engaged],
[SentToArya],
[IgnoreEmailSent],
[IgnoreEmailSentDate],
[FormattedCV],
[CV],
[DerivedSource],
[IsEmailEngaged],
[IsSMSEngaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JOB_ACI_ACTION_ENGAGED] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JOB_ACI_ACTION_ENGAGED] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[Action],
[Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [Stats_Results_JobId_ACI] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_ACI_Action_Engaged] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI_Action_Engaged] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[Action],
[Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_ACI_DERIVED] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI_DERIVED] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[DerivedSource]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_SOURCE_ACI] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_SOURCE_ACI] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_Source_ACI_V2] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_Source_ACI_V2] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Results] ADD CONSTRAINT [DF_Results_CreateDate] DEFAULT (getdate()) FOR [CreateDate]
GO
I have some suggestions for you to reduce the execution time:
if the email is empty for the master record, you don't need to run the sub query:
so instead of this statement:
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
use below statement:
AND (NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE u.EmailAddress = R.Email )
) or R.Email IS NULL) -- you dont need to check is it is null
I recommend you to reduce or notation as much as you can, please try to use union instead of OR. You can find some examples in below link:
SQL Performance UNION vs OR
as I understood you can use JOBID to filter SMTP_Production records, If you can do so:
instead of this statement
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId -- can we add SMTP.JobId = R.JobId
)
you can use below
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId and SMTP.JobId = R.JobId
)
final version of the query might be like this:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed <> 1)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent <> 1)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe <> 'False')
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND ((NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE u.EmailAddress = R.Email )
) or R.Email IS NULL) )
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId and SMTP.JobId = R.JobId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
Related
Azure DB/MSSQL 2017 query performance regression
I have this pretty simple table with 17m records in it: CREATE TABLE [dbo].[LineNumbers]( [Id] [int] IDENTITY(1,1) NOT NULL, [LineDescriptionId] [int] NOT NULL, [ProtocolId] [int] NULL, [Value] [int] NULL, CONSTRAINT [PK_LineNumbers] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] Query to the table with additional join works fine, if there is no ProtocolId in it: select top 1 ln.LineDescriptionId from LineNumbers ln join LineDescriptions ld on ld.Id = ln.LineDescriptionId and ld.ProtocolSetId = 25 -- Elapsed time: 00:00:00.1718750 Execution plan: https://www.brentozar.com/pastetheplan/?id=rJV34gvR7 But when I try to add ProtocolId to the field list, query time grows dramatically: select top 1 ln.ProtocolId from LineNumbers ln join LineDescriptions ld on ld.Id = ln.LineDescriptionId and ld.ProtocolSetId = 25 -- Elapsed time: 00:02:19.6464843 Execution plan: https://www.brentozar.com/pastetheplan/?id=SkG-hyDCQ Also, this works smooth: select top 1 (select ProtocolId from LineNumbers where LineNumbers.Id = ln.Id) as ProtocolId from LineNumbers ln join LineDescriptions ld on ld.Id = ln.LineDescriptionId and ld.ProtocolSetId = 25 -- Elapsed time: 00:00:00.1718750 Tried this queries and variations on Azure DB and local MSSQL 2017. Results are the same. As long as I keep ProtocolId out of the field list everything is fine. Is there some mistake in my data scheme (everything was created via migrations of Entity Framework)? CREATE TABLE [dbo].[LineNumbers]( [Id] [int] IDENTITY(1,1) NOT NULL, [LineDescriptionId] [int] NOT NULL, [ProtocolId] [int] NULL, [Value] [int] NULL, CONSTRAINT [PK_LineNumbers] PRIMARY KEY CLUSTERED ( [Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO /****** Object: Index [IX_LineNumbers_LineDescriptionId] Script Date: 21.11.2018 10:47:09 ******/ CREATE NONCLUSTERED INDEX [IX_LineNumbers_LineDescriptionId] ON [dbo].[LineNumbers] ( [LineDescriptionId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO /****** Object: Index [IX_LineNumbers_LineDescriptionId_Value] Script Date: 21.11.2018 10:47:09 ******/ CREATE NONCLUSTERED INDEX [IX_LineNumbers_LineDescriptionId_Value] ON [dbo].[LineNumbers] ( [LineDescriptionId] ASC, [Value] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO /****** Object: Index [IX_LineNumbers_ProtocolId] Script Date: 21.11.2018 10:47:09 ******/ CREATE NONCLUSTERED INDEX [IX_LineNumbers_ProtocolId] ON [dbo].[LineNumbers] ( [ProtocolId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO ALTER TABLE [dbo].[LineNumbers] WITH NOCHECK ADD CONSTRAINT [FK_LineNumbers_LineDescriptions_LineDescriptionId] FOREIGN KEY([LineDescriptionId]) REFERENCES [dbo].[LineDescriptions] ([Id]) ON DELETE CASCADE GO ALTER TABLE [dbo].[LineNumbers] CHECK CONSTRAINT [FK_LineNumbers_LineDescriptions_LineDescriptionId] GO ALTER TABLE [dbo].[LineNumbers] WITH NOCHECK ADD CONSTRAINT [FK_LineNumbers_Protocols_ProtocolId] FOREIGN KEY([ProtocolId]) REFERENCES [dbo].[Protocols] ([Id]) GO ALTER TABLE [dbo].[LineNumbers] CHECK CONSTRAINT [FK_LineNumbers_Protocols_ProtocolId] GO
Eventually, I solved it by adding nonclustered index on field LineNumbers.LineDescriptionId with inclusion of LineNumbers.ProtocolId CREATE NONCLUSTERED INDEX [IX_LineNumbers_LineDescriptionId_ProtocolId] ON [dbo].[LineNumbers]([LineDescriptionId] ASC) INCLUDE ([ProtocolId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) Result: SELECT TOP 1 ln.ProtocolId FROM LineNumbers ln JOIN LineDescriptions ld ON ld.Id = ln.LineDescriptionId AND ld.ProtocolSetId = 25 -- Elapsed time: 00:00:00.1403155 Execution plan: https://www.brentozar.com/pastetheplan/?id=Syywn1wRQ Why does it work that way? For example, if I'd do similar use case with PostgreSQL then there is no need in any additional indexes at all (beside obvious FK indexes on ProtocolId and LineDescriptionId fields).
Updating SQL Database Table with 30M Rows
I have a MS SQL table with approx 30M rows that I need to update a field based on the previous records. Here is an update that works but it is taking an incredible amount of time: UPDATE AccountTransaction SET EndingBalance = (SELECT COALESCE(SUM(b.amount), 0) FROM AccountTransaction AS b WHERE b.AccountId = AccountTransaction.AccountId and b.Date <= AccountTransaction.Date and (b.Date != AccountTransaction.Date or b.CreatedDate < AccountTransaction.CreatedDate)) + Amount Here is the full DDL: CREATE TABLE [dbo].[AccountTransaction]( [AccountTransactionId] [uniqueidentifier] NOT NULL, [AccountId] [uniqueidentifier] NOT NULL, [Amount] [decimal](16, 2) NOT NULL, [EndingBalance] [decimal](16, 2) NOT NULL, [Date] [date] NOT NULL, [CreatedDate] [datetime2](3) NOT NULL, CONSTRAINT [PkAccountTransaction] PRIMARY KEY CLUSTERED ( [AccountTransactionId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO CREATE NONCLUSTERED INDEX [IxAccountTransaction_AccountId_Date_CreatedDate] ON [dbo].[AccountTransaction] ( [AccountId] ASC, [Date] ASC, [CreatedDate] ASC ) INCLUDE ([AccountTransactionId], [Amount], [EndingBalance]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO CREATE NONCLUSTERED INDEX [IxAccountTransaction_AccountId] ON [dbo].[AccountTransaction] ( [AccountId] ASC ) INCLUDE ([AccountTransactionId], [Amount], [EndingBalance], [Date], [CreatedDate]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO
The following should yield much better performance and will be able to take advantage of the IxAccountTransaction_AccountId_Date_CreatedDate index... WITH cte_Runningtotal AS ( SELECT at1.EndingBalance, NewEB = SUM(at1.Amount) OVER (PARTITION BY at1.AccountId ORDER BY at1.[Date] ROWS UNBOUNDED PRECEDING) FROM dbo.AccountTransaction at1 ) UPDATE rt SET rt.EndingBalance = rt.NewEB FROM cte_Runningtotal rt;
SQL Server slow select from large Table
i have 2 Really Big sql server Database tables for IOT Project First TABLE IS Message (rows count 7,423,889,085 rows) CREATE TABLE [aymax].[Message]( [MessageId] [bigint] IDENTITY(1,1) NOT NULL, [ObjectId] [int] NOT NULL, [TimeStamp] [datetime] NOT NULL CONSTRAINT [DF__Message__TimeSta__3B75D760] DEFAULT (getdate()), [GpsTime] [datetime] NOT NULL, [VisibleSatelites] [int] NOT NULL, [X] [float] NOT NULL, [Y] [float] NOT NULL, CONSTRAINT [Message_PK] PRIMARY KEY NONCLUSTERED ( [MessageId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO Second table is MessageSensors , row count (26,359,568,037 rows) , this table have value for each sensor in message table CREATE TABLE [aymax].[MessageSensors]( [MessageId] [bigint] NOT NULL, [DataSourceId] [int] NOT NULL, [Value] [float] NOT NULL CONSTRAINT [DF__AnalogDat__Value__5812160E] DEFAULT ((0)), CONSTRAINT [AnalogData_PK] PRIMARY KEY CLUSTERED ( [MessageId] ASC, [DataSourceId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] ) ON [PRIMARY] GO my problem that seek by time interval between 2 date time is really slow , also it became more slow if i select with message sensor data , also when i use sp_BlitzIndex check from brentozar.com it say that i have "Indexaphobia: High value missing index" [aymax].[MessageSensors] (EQUALITY: [DataSourceId], [Value] INCLUDES: [MessageId] ) [aymax].[MessageSensors] EQUALITY: [Value] INCLUDES: [MessageId], [DataSourceId] I belive that create this 2 index is will increase storage alot , also will take too much time to be created , i need your advice for both table regarding index my current indexes 1- CREATE NONCLUSTERED INDEX [IX_gpstime_objectid] ON [aymax].[Message] ( [GpsTime] ASC ) INCLUDE ( [MessageId], [ObjectId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO 2- alter TABLE [aymax].[Message] ADD CONSTRAINT [Message_PK] PRIMARY KEY NONCLUSTERED ( [MessageId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO 3rd- ALTER TABLE [aymax].[MessageSensors] ADD CONSTRAINT [AnalogData_PK] PRIMARY KEY CLUSTERED ( [MessageId] ASC, [DataSourceId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO 4- CREATE NONCLUSTERED INDEX [MessageData_DataSourceId_IDX] ON [aymax].[MessageSensors] ( [DataSourceId] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO any help please , i need to make a fast retrieve from message , and message sensor update while doing some investigate i found that select float value will slow up the result too much , from 1 second to 3 minutes SELECT m.messageid, m.objectid, m.gpstime, m.x, m.y, -- slow is here if i replace md.value with md.messageId will return fast , md.value is float md.Value , 0 FROM aymax.[message] m WITH (nolock) left JOIN aymax.MessageSensors md WITH (nolock) ON m.messageid = md.messageid AND md.datasourceid = 425732 WHERE m.objectid = 14099 AND m.gpstime BETWEEN '2017-04-01 19:46:18.607' AND '2017-04-10 19:05:18.607'
Possible solutions: Filtered index (filter by date and do not index old data) https://learn.microsoft.com/en-us/sql/relational-databases/indexes/create-filtered-indexes. Clustered index on GpsTime, MessageId (Espessially if you have no plans about another indexes). Requires rebuild your table. Partitions (see #Siyaul's comments)
Insert blocking update query on table in SQL Server
I have a table "Alert_List" with two sessions are running on it. Session 1: insert into Alert_List values ( "required values") Session 2: Update A set Active = 0 from Alert_List A left join STG_Alert B on A.Alert_List_Id = B.Alert_List_Id left join STG_UserID C on A.UserID = C.UserID where B.Alert_List_Id is null and C.UserID is null Here STG_Alert,STG_UserIDare staging tables which will have a few selected Alert_List_Id and UserID which are not required to be deactivated. Indexes on Alert_List as below. ALTER TABLE [dbo].[Alert_List] ADD CONSTRAINT [PK_EA_List] PRIMARY KEY CLUSTERED ( [Alert_List_Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO CREATE NONCLUSTERED INDEX [Alert_List_INDEX] ON [dbo].[Alert_List] ( [AVE_Id] ASC, [Request_Id] ASC, [User_Id] ASC, Active ASC, [Process_Id] ASC, [Status_Id] ASC ) INCLUDE ( [Alert_List_Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO CREATE NONCLUSTERED INDEX [indexeaml] ON [dbo].[Alert_List] ( [User_Id] ASC, [Process_Id] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) GO One query from above two failing with below error. Transaction (Process ID 85) was deadlocked on lock resources with another process and has been chosen as the deadlock victim. Rerun the transaction. I dont want to use WITH(NOLOCK) nor ALLOW_SNAPSHOT_ISOLATION in this case. Let me know if you have any other ways to solve this issue.
How do I speed up this Row_Number query?
This is my query: SELECT * FROM (SELECT ROW_NUMBER() OVER (ORDER BY NAME asc) peta_rn, peta_query.* FROM (SELECT BOOK, PAGETRIMMED, NAME, TYPE, PDF FROM CCWiseDocumentNames2 cdn INNER JOIN CCWiseInstr2 cwi ON cwi.ID = cdn.ID) as peta_query) peta_paged WHERE peta_rn > 1331900 AND peta_rn <= 1331950 Currently this query takes about 4 seconds to get the results. Is there any way to bring it under 1 second? Index is already created on cwi.ID and cdn.ID. Below is the actual execution plan from sql server: Any help would be useful. This is the table structure: /****** Object: Table [dbo].[CCWiseInstr2] Script Date: 9/17/2013 3:54:27 AM ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO SET ANSI_PADDING ON GO CREATE TABLE [dbo].[CCWiseInstr2]( [ID] [int] NULL, [BK_PG] [varchar](50) NULL, [DATE] [datetime] NULL, [ITYPE] [varchar](50) NULL, [BOOK] [int] NULL, [PAGE] [varchar](50) NULL, [NOBP] [varchar](50) NULL, [DESC] [varchar](240) NULL, [TIF] [varchar](50) NULL, [INDEXNAME] [varchar](50) NULL, [CONFIRM] [varchar](50) NULL, [PDF] [varchar](50) NULL, [PAGETRIMMED] [varchar](10) NULL ) ON [PRIMARY] GO SET ANSI_PADDING OFF GO /****** Object: Index [IX_CCWiseInstr2_ID] Script Date: 9/17/2013 3:54:27 AM ******/ CREATE NONCLUSTERED INDEX [IX_CCWiseInstr2_ID] ON [dbo].[CCWiseInstr2] ( [ID] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO /****** Object: Table [dbo].[CCWiseDocumentNames2] Script Date: 9/17/2013 3:54:18 AM ******/ SET ANSI_NULLS ON GO SET QUOTED_IDENTIFIER ON GO SET ANSI_PADDING ON GO CREATE TABLE [dbo].[CCWiseDocumentNames2]( [ID] [int] NULL, [BK_PG] [varchar](50) NULL, [NAME] [varchar](100) NULL, [OTHERNAM] [varchar](100) NULL, [TYPE] [varchar](50) NULL, [INDEXNAME] [varchar](50) NULL ) ON [PRIMARY] GO SET ANSI_PADDING OFF GO /****** Object: Index [IX_CCWiseDocumentNames2_ID] Script Date: 9/17/2013 3:54:18 AM ******/ CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_ID] ON [dbo].[CCWiseDocumentNames2] ( [ID] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO SET ANSI_PADDING ON GO /****** Object: Index [IX_CCWiseDocumentNames2_NAME] Script Date: 9/17/2013 3:54:18 AM ******/ CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_NAME] ON [dbo].[CCWiseDocumentNames2] ( [NAME] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY] GO
I think the problem is that both tables need to be matched completely and then sorted on name before the where clause can throw out the trash. I'm not sure this will help, but it's worth a shot - try adding the name to the index: CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_ID] ON [dbo].[CCWiseDocumentNames2] ( [ID] ASC, [Name] ASC )WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
You don't need to have PK or Identity, so you still can create clustered index on ID column. It alows repeating values, and the only thing you should be worrying about is INSERT performance if you're the IDs are not appended, but added in the middle. Why are you using row_number() in outer query? I think the same result can be achieved in single select (maybe you'll have to change the ranking function and use partitioning). Btw if your inner query does not return unique NAME and you're using row_number without partitioning, then peta_rn may return misleading values (same name with many different peta_rn). I'm just guessing because I don't know what exactly are you trying to achieve. Go with the clustered index and you'll bring it under 1s no problem.