I originally posted the following thread SQL Query - long running / taking up CPU resource
My issue was the SARGABILITY of my query, having addressed that (see the previous thread but in short I was using a lot of ISNULL functions which were bypassing the index scans) I am now having further issues.
My SQL server settings are as below:
cost threshold for parallelism
5
max degree of parallelism
0
My query still takes 2:13 to run and causes CPU / Memory spikes, I have a largely capable server e.g. 64GB RAM so resource is not the issue. See query below:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed = 0 OR J.Closed IS NULL)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent = 0 OR R.EmailSent IS NULL)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe = 'True' OR R.IsEmailSubscribe IS NULL)
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
See Indexes for the Results table below, something does not seem right on the below:
/****** Object: Index [_dta_index_Results_7_2107154552__K35_K2_K3_K34_K36_K8_K33_K1] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [_dta_index_Results_7_2107154552__K35_K2_K3_K34_K36_K8_K33_K1] ON [dbo].[Results]
(
[EmailSentDate] ASC,
[JobId] ASC,
[AryaCandidateId] ASC,
[EmailSent] ASC,
[EmailStatus] ASC,
[Email] ASC,
[IsEmailSubscribe] ASC,
[Id] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [ACI_CMT_APPLICANTS] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_CMT_APPLICANTS] ON [dbo].[Results]
(
[Email] ASC
)
INCLUDE ( [Id],
[AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [ACI_Job] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_Job] ON [dbo].[Results]
(
[AryaCandidateId] ASC,
[JobId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [ACI_Results] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ACI_Results] ON [dbo].[Results]
(
[AryaCandidateId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [gen_smtp_auto] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [gen_smtp_auto] ON [dbo].[Results]
(
[EmailSentDate] ASC,
[Email] ASC,
[IsEmailSubscribe] ASC,
[EmailSent] ASC,
[EmailStatus] ASC
)
INCLUDE ( [Id],
[JobId],
[AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Hot] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Hot] ON [dbo].[Results]
(
[JobId] ASC,
[Action] ASC
)
INCLUDE ( [Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [IX_Results] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [IX_Results] ON [dbo].[Results]
(
[Id] ASC,
[JobId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [IX_Results_1] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [IX_Results_1] ON [dbo].[Results]
(
[Id] ASC,
[JobId] ASC,
[AryaCandidateId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [JobMetrics] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [JobMetrics] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [KEY_CAMPAIGN] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [KEY_CAMPAIGN] ON [dbo].[Results]
(
[ResumeDownloadedDate] ASC,
[ResumeDownloadStatus] ASC,
[KeywordCampaignId] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_CREATEDATE] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_CREATEDATE] ON [dbo].[Results]
(
[CreateDate] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_MOVERSPROB] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_MOVERSPROB] ON [dbo].[Results]
(
[MoversProbability] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [MISSING_SORTORDER] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [MISSING_SORTORDER] ON [dbo].[Results]
(
[SortOrder] ASC
)
INCLUDE ( [Id]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Proto_Resume_Downloa] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Proto_Resume_Downloa] ON [dbo].[Results]
(
[JobId] ASC,
[ResumeDownloadedDate] ASC,
[ResumeDownloadStatus] ASC,
[Location] ASC,
[Source] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Result_Email] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Result_Email] ON [dbo].[Results]
(
[Email] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Result_Email_Send] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Result_Email_Send] ON [dbo].[Results]
(
[EmailSentDate] ASC
)
INCLUDE ( [Id],
[JobId],
[AryaCandidateId],
[Email],
[IsEmailSubscribe],
[EmailSent],
[EmailStatus]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Results_JobId_ACI_Email] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Results_JobId_ACI_Email] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[Email]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [RESULTS_JOBID_ALL] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [RESULTS_JOBID_ALL] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[CandidateScore],
[FirstName],
[LastName],
[Telephone],
[Email],
[AddressLine1],
[Location],
[Postcode],
[Resume],
[CurrentJob],
[CurrentCompany],
[Skills],
[Experience],
[Education],
[AryaUpdateDate],
[Industry],
[Source],
[LinkedIn],
[Facebook],
[Twitter],
[MoversLabel],
[MoversProbability],
[SortOrder],
[CreateDate],
[ResumeId],
[IsEmailSubscribe],
[EmailSent],
[EmailSentDate],
[EmailStatus],
[Registered],
[HasVoyagerData],
[Action],
[Engaged],
[FormattedCV],
[CV],
[DerivedSource],
[VoyCode],
[IsEmailEngaged],
[IsSMSEngaged],
[KeywordCampaignId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [ResultsGetResultsbyConsultantId] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [ResultsGetResultsbyConsultantId] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [Id],
[AryaCandidateId],
[CandidateScore],
[FirstName],
[LastName],
[Telephone],
[Email],
[AddressLine1],
[Location],
[Postcode],
[Resume],
[CurrentJob],
[CurrentCompany],
[Skills],
[Experience],
[Education],
[AryaUpdateDate],
[Industry],
[Source],
[LinkedIn],
[Facebook],
[Twitter],
[MoversLabel],
[MoversProbability],
[DOB],
[SortOrder],
[ResumeDownloaded],
[ResumeDownloadedDate],
[ResumeDownloadStatus],
[CreateDate],
[ResumeId],
[IsEmailSubscribe],
[EmailSent],
[EmailSentDate],
[EmailStatus],
[Action],
[Engaged],
[SentToArya],
[IgnoreEmailSent],
[IgnoreEmailSentDate],
[FormattedCV],
[CV],
[DerivedSource],
[IsEmailEngaged],
[IsSMSEngaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JOB_ACI_ACTION_ENGAGED] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JOB_ACI_ACTION_ENGAGED] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[Action],
[Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Index [Stats_Results_JobId_ACI] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_ACI_Action_Engaged] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI_Action_Engaged] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[Action],
[Engaged]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_ACI_DERIVED] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_ACI_DERIVED] ON [dbo].[Results]
(
[JobId] ASC
)
INCLUDE ( [AryaCandidateId],
[DerivedSource]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_SOURCE_ACI] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_SOURCE_ACI] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [Stats_Results_JobId_Source_ACI_V2] Script Date: 17/10/2018 15:06:18 ******/
CREATE NONCLUSTERED INDEX [Stats_Results_JobId_Source_ACI_V2] ON [dbo].[Results]
(
[JobId] ASC,
[Source] ASC
)
INCLUDE ( [AryaCandidateId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
ALTER TABLE [dbo].[Results] ADD CONSTRAINT [DF_Results_CreateDate] DEFAULT (getdate()) FOR [CreateDate]
GO
I have some suggestions for you to reduce the execution time:
if the email is empty for the master record, you don't need to run the sub query:
so instead of this statement:
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
use below statement:
AND (NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE u.EmailAddress = R.Email )
) or R.Email IS NULL) -- you dont need to check is it is null
I recommend you to reduce or notation as much as you can, please try to use union instead of OR. You can find some examples in below link:
SQL Performance UNION vs OR
as I understood you can use JOBID to filter SMTP_Production records, If you can do so:
instead of this statement
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId -- can we add SMTP.JobId = R.JobId
)
you can use below
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId and SMTP.JobId = R.JobId
)
final version of the query might be like this:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed <> 1)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent <> 1)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe <> 'False')
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND ((NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE u.EmailAddress = R.Email )
) or R.Email IS NULL) )
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId and SMTP.JobId = R.JobId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
i have 2 Really Big sql server Database tables for IOT Project
First TABLE IS Message (rows count 7,423,889,085 rows)
CREATE TABLE [aymax].[Message](
[MessageId] [bigint] IDENTITY(1,1) NOT NULL,
[ObjectId] [int] NOT NULL,
[TimeStamp] [datetime] NOT NULL CONSTRAINT [DF__Message__TimeSta__3B75D760] DEFAULT (getdate()),
[GpsTime] [datetime] NOT NULL,
[VisibleSatelites] [int] NOT NULL,
[X] [float] NOT NULL,
[Y] [float] NOT NULL,
CONSTRAINT [Message_PK] PRIMARY KEY NONCLUSTERED
(
[MessageId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
Second table is MessageSensors , row count (26,359,568,037 rows) , this table have value for each sensor in message table
CREATE TABLE [aymax].[MessageSensors](
[MessageId] [bigint] NOT NULL,
[DataSourceId] [int] NOT NULL,
[Value] [float] NOT NULL CONSTRAINT [DF__AnalogDat__Value__5812160E] DEFAULT ((0)),
CONSTRAINT [AnalogData_PK] PRIMARY KEY CLUSTERED
(
[MessageId] ASC,
[DataSourceId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
) ON [PRIMARY]
GO
my problem that seek by time interval between 2 date time is really slow , also it became more slow if i select with message sensor data , also when i use sp_BlitzIndex check from brentozar.com it say that i have
"Indexaphobia: High value missing index"
[aymax].[MessageSensors] (EQUALITY: [DataSourceId], [Value] INCLUDES: [MessageId] )
[aymax].[MessageSensors] EQUALITY: [Value] INCLUDES: [MessageId], [DataSourceId]
I belive that create this 2 index is will increase storage alot , also will take too much time to be created , i need your advice for both table regarding index
my current indexes
1-
CREATE NONCLUSTERED INDEX [IX_gpstime_objectid] ON [aymax].[Message]
(
[GpsTime] ASC
)
INCLUDE ( [MessageId],
[ObjectId]) WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
2-
alter TABLE [aymax].[Message] ADD CONSTRAINT [Message_PK] PRIMARY KEY NONCLUSTERED
(
[MessageId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
3rd-
ALTER TABLE [aymax].[MessageSensors] ADD CONSTRAINT [AnalogData_PK] PRIMARY KEY CLUSTERED
(
[MessageId] ASC,
[DataSourceId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
4-
CREATE NONCLUSTERED INDEX [MessageData_DataSourceId_IDX] ON [aymax].[MessageSensors]
(
[DataSourceId] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON)
GO
any help please , i need to make a fast retrieve from message , and message sensor
update
while doing some investigate i found that select float value will slow up the result too much , from 1 second to 3 minutes
SELECT m.messageid,
m.objectid,
m.gpstime,
m.x,
m.y,
-- slow is here if i replace md.value with md.messageId will return fast , md.value is float
md.Value ,
0
FROM aymax.[message] m WITH (nolock)
left JOIN aymax.MessageSensors md WITH (nolock)
ON m.messageid = md.messageid
AND md.datasourceid = 425732
WHERE m.objectid = 14099
AND m.gpstime BETWEEN '2017-04-01 19:46:18.607' AND '2017-04-10 19:05:18.607'
Possible solutions:
Filtered index (filter by date and do not index old data)
https://learn.microsoft.com/en-us/sql/relational-databases/indexes/create-filtered-indexes.
Clustered index on GpsTime, MessageId (Espessially if you have no plans about another indexes). Requires rebuild your table.
Partitions (see #Siyaul's comments)
This is my query:
SELECT *
FROM
(SELECT
ROW_NUMBER() OVER (ORDER BY NAME asc) peta_rn,
peta_query.*
FROM
(SELECT
BOOK, PAGETRIMMED, NAME, TYPE, PDF
FROM
CCWiseDocumentNames2 cdn
INNER JOIN
CCWiseInstr2 cwi ON cwi.ID = cdn.ID) as peta_query) peta_paged
WHERE
peta_rn > 1331900 AND peta_rn <= 1331950
Currently this query takes about 4 seconds to get the results. Is there any way to bring it under 1 second?
Index is already created on cwi.ID and cdn.ID. Below is the actual execution plan from sql server:
Any help would be useful.
This is the table structure:
/****** Object: Table [dbo].[CCWiseInstr2] Script Date: 9/17/2013 3:54:27 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[CCWiseInstr2](
[ID] [int] NULL,
[BK_PG] [varchar](50) NULL,
[DATE] [datetime] NULL,
[ITYPE] [varchar](50) NULL,
[BOOK] [int] NULL,
[PAGE] [varchar](50) NULL,
[NOBP] [varchar](50) NULL,
[DESC] [varchar](240) NULL,
[TIF] [varchar](50) NULL,
[INDEXNAME] [varchar](50) NULL,
[CONFIRM] [varchar](50) NULL,
[PDF] [varchar](50) NULL,
[PAGETRIMMED] [varchar](10) NULL
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
/****** Object: Index [IX_CCWiseInstr2_ID] Script Date: 9/17/2013 3:54:27 AM ******/
CREATE NONCLUSTERED INDEX [IX_CCWiseInstr2_ID] ON [dbo].[CCWiseInstr2]
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
/****** Object: Table [dbo].[CCWiseDocumentNames2] Script Date: 9/17/2013 3:54:18 AM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
SET ANSI_PADDING ON
GO
CREATE TABLE [dbo].[CCWiseDocumentNames2](
[ID] [int] NULL,
[BK_PG] [varchar](50) NULL,
[NAME] [varchar](100) NULL,
[OTHERNAM] [varchar](100) NULL,
[TYPE] [varchar](50) NULL,
[INDEXNAME] [varchar](50) NULL
) ON [PRIMARY]
GO
SET ANSI_PADDING OFF
GO
/****** Object: Index [IX_CCWiseDocumentNames2_ID] Script Date: 9/17/2013 3:54:18 AM ******/
CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_ID] ON [dbo].[CCWiseDocumentNames2]
(
[ID] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
SET ANSI_PADDING ON
GO
/****** Object: Index [IX_CCWiseDocumentNames2_NAME] Script Date: 9/17/2013 3:54:18 AM ******/
CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_NAME] ON [dbo].[CCWiseDocumentNames2]
(
[NAME] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO
I think the problem is that both tables need to be matched completely and then sorted on name before the where clause can throw out the trash.
I'm not sure this will help, but it's worth a shot - try adding the name to the index:
CREATE NONCLUSTERED INDEX [IX_CCWiseDocumentNames2_ID] ON [dbo].[CCWiseDocumentNames2]
(
[ID] ASC,
[Name] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
You don't need to have PK or Identity, so you still can create clustered index on ID column. It alows repeating values, and the only thing you should be worrying about is INSERT performance if you're the IDs are not appended, but added in the middle.
Why are you using row_number() in outer query? I think the same result can be achieved in single select (maybe you'll have to change the ranking function and use partitioning).
Btw if your inner query does not return unique NAME and you're using row_number without partitioning, then peta_rn may return misleading values (same name with many different peta_rn). I'm just guessing because I don't know what exactly are you trying to achieve.
Go with the clustered index and you'll bring it under 1s no problem.