SQL Server query inefficient for table with high I/O operations - sql

I'm trying to write an sql script that returns an item from a list, if that item can be found in the list, if not, it returns the most recent item added to the list. I came up with a solution using count and an if-else statement. However my table has very frequent I/O operations and I think this solution is inefficient. Does anyone have a away to optimize this solution or a better approach.
here is my solution:
DECLARE #result_set INT
SET #result_set = (
SELECT COUNT(*) FROM
( SELECT *
FROM notification p
WHERE p.code = #code
AND p.reference = #reference
AND p.response ='00'
) x
)
IF(#result_set > 0)
BEGIN
SELECT *
FROM notification p
WHERE p.code = #code
AND p.reference = #reference
AND p.response ='00'
END
ELSE
BEGIN
SELECT
TOP 1 p.*
FROM notification p (nolock)
WHERE p.code = #code
AND p.reference = #reference
ORDER BY p.id DESC
END
I also think there should be a way around repeating this select statement:
SELECT *
FROM notification p
WHERE p.code = #code
AND p.reference = #reference
AND p.response ='00'
I'm just not proficient enough in SQL to figure it out.

You can do something like this:
SELECT TOP (1) n.*
FROM notification n
WHERE p.code = #code AND p.reference = #reference
ORDER BY (CASE WHEN p.response ='00' THEN 1 ELSE 2 END), id DESC;
This will return the row with response of '00' first and then any other row. I would expect another column i the ORDER BY to handle recency, but your sample code doesn't provide any clue on what this might be.

WITH ItemIWant AS (
SELECT *
FROM notification p
WHERE p.code = #code
AND p.reference = #reference
AND p.response ='00'
),
SELECT *
FROM ItemIWant
UNION ALL
SELECT TOP 1 *
FROM notification p
WHERE p.code = #code
AND p.reference = #reference
AND NOT EXISTS (SELECT * FROM ItemIWant)
ORDER BY id desc
This will do that with minimal passes on the table. It will only return the top row if there are no rows returned by ItemIWant. There is no conditional logic so it can be compiled and indexed effectively.

Related

SQL Query - long running / taking up CPU resource

Hello I have the below SQL query that is taking on average 40 minutes to run, one of the tables that it references has over 7 million records in it.
I have ran this through the database tuning advisor and applied all recommendations, also I have assesed it within the activity monitor in sql and no further indexes etc have been recommended.
Any suggestions would be great, thanks in advance
WITH CTE AS
(
SELECT r.Id AS ResultId,
r.JobId,
r.CandidateId,
r.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM Result R
INNER JOIN Job J ON R.JobId = J.Id
INNER JOIN User C ON J.UserId = C.Id
WHERE
ISNULL(J.Approved, CAST(0 AS BIT)) = CAST(1 AS BIT)
AND ISNULL(J.Closed, CAST(0 AS BIT)) = CAST(0 AS BIT)
AND ISNULL(R.Email,'') <> '' -- has an email address
AND ISNULL(R.EmailSent, CAST(0 AS BIT)) = CAST(0 AS BIT) -- email has not been sent
AND R.EmailSentDate IS NULL -- email has not been sent
AND ISNULL(R.EmailStatus,'') = '' -- email has not been sent
AND ISNULL(R.IsEmailSubscribe, 'True') <> 'False' -- not unsubscribed
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE ISNULL(u.EmailAddress, '') = ISNULL(R.Email, '')
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- list of ids
)
AND J.Id NOT IN (
-- list of ids
)
AND J.ClientId NOT IN
(
-- list of ids
)
)
INSERT INTO smtp_production (ResultId, JobId, CandidateId, Email, EmailSent, EmailSentDate, EmailStatus, CreateDate, ConsultantId, ConsultantEmail, Subject)
OUTPUT INSERTED.ResultId,GETDATE() INTO ResultstoUpdate
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
Please see my updated query below:
WITH CTE AS
(
SELECT R.Id AS ResultId,
r.JobId,
r.CandidateId,
R.Email,
CAST(0 AS BIT) AS EmailSent,
NULL AS EmailSentDate,
'PICKUP' AS EmailStatus,
GETDATE() AS CreateDate,
C.Id AS UserId,
C.Email AS UserEmail,
NULL AS Subject
FROM RESULTS R
INNER JOIN JOB J ON R.JobId = J.Id
INNER JOIN Consultant C ON J.UserId = C.Id
WHERE
J.DCApproved = 1
AND (J.Closed = 0 OR J.Closed IS NULL)
AND (R.Email <> '' OR R.Email IS NOT NULL)
AND (R.EmailSent = 0 OR R.EmailSent IS NULL)
AND R.EmailSentDate IS NULL -- email has not been sent
AND (R.EmailStatus = '' OR R.EmailStatus IS NULL)
AND (R.IsEmailSubscribe = 'True' OR R.IsEmailSubscribe IS NULL)
-- not already been emailed for this job
AND NOT EXISTS (
SELECT SMTP.Email
FROM SMTP_Production SMTP
WHERE SMTP.JobId = R.JobId AND SMTP.CandidateId = R.CandidateId
)
-- not unsubscribed
AND NOT EXISTS (
SELECT u.Id FROM Unsubscribe u
WHERE (u.EmailAddress = R.Email OR (u.EmailAddress IS NULL AND R.Email IS NULL))
)
AND NOT EXISTS (
SELECT SMTP.Id FROM SMTP_Production SMTP
WHERE SMTP.EmailStatus = 'PICKUP' AND SMTP.CandidateId = R.CandidateId
)
AND C.Id NOT IN (
-- LIST OF IDS
)
AND J.Id NOT IN (
-- LIST OF IDS
)
AND J.ClientId NOT IN
(
-- LIST OF IDS
)
)
INSERT INTO smtp_production (ResultId, JobId, CandidateId, Email, EmailSent, EmailSentDate, EmailStatus, CreateDate, UserId, UserEmail, Subject)
OUTPUT INSERTED.ResultId,GETDATE() INTO ResultstoUpdate
SELECT
CTE.ResultId,
CTE.JobId,
CTE.CandidateId,
CTE.Email,
CTE.EmailSent,
CTE.EmailSentDate,
CTE.EmailStatus,
CTE.CreateDate,
CTE.UserId,
CTE.UserEmail,
NULL
FROM CTE
INNER JOIN
(
SELECT *, row_number() over(partition by CTE.Email, CTE.CandidateId order by CTE.EmailSentDate desc) as rn
FROM CTE
) DCTE ON CTE.ResultId = DCTE.ResultId AND DCTE.rn = 1
GO
Using ISNULL in your WHERE and JOIN clauses is probably the main cause here. Using functions against columns in your query causes the query to become non-SARGable (meaning that it can't use any of the indexes on your table(s) and so it has the scan the whole thing). Note; using functions against variables, in there WHERE is normally fine. For example WHERE SomeColumn = DATEADD(DAY, #n, #SomeDate). Things like WHERE SomeColumn = ISNULL(#Variable,0) have the smell of a "catch-all query", so can be performance hitters; depending on your set up. This isn't the discussion at hand though.
For clauses like ISNULL(J.Closed, CAST(0 AS BIT)) = CAST(0 AS BIT) this is therefore a big headache for the query optimiser and your query is riddled with them. You'll need to replace these with clauses like:
WHERE (J.Closed = 0 OR J.Closed IS NULL)
Although it makes no difference, there's no need to CAST the 0 there either. SQL Server can see you're making a comparison to a bit and will therefore interpret the 0 as one as well.
You also have a EXISTS with the WHERE clause ISNULL(u.EmailAddress, '') = ISNULL(R.Email, ''). This will need to become:
WHERE (u.EmailAddress = R.Email
OR (u.EmailAddress IS NULL AND R.Email IS NULL))
You'll need to change all of your ISNULL usage in your WHERE clauses (the CTE and the subqueries) and you should see a decent performance increase.
Generally, 7 million records are a joke for modern databases. If you alk problems, you are supposed to talk problems on billions of rows, not 7 millions.
Which indicates problems with the query. High CPU is generally a sign of non matching fields (compare string in one table to number in another ) or... functions called too often. Long running normally is a sign of either missing indices or.... non sargeability. Which you really do a lot to force.
Non-Sargeability means taht indices CAN NOT be used. Example of this is all this:
ISNULL(J.Approved, CAST(0 AS BIT)) = CAST(1 AS BIT)
The ISNULL(field, value) means that an index on field is not usable - baically "goodby index, hello table scan". It also means - well....
(J.Approoved = 1 or J.Approoved IS NULL)
has the same meaning, but it sargeable. Pretty much EVERY of your conditions is written in a non sargeable way - welcome to db hell. Start rewriting.
You may want to read up more on sargeability at https://www.techopedia.com/definition/28838/sargeable
Also make sure you ahve indices on all relevant foreign keys (and the referenced primary keys) - otherwise, again, welcome table scans.

More effective way to write following SQL query

I am writing a query to return a list of articles for the news portal homepage.
Requirement is following.
Each category which needs to be on the homepage needs to display 5 articles by following criteria.
Each category needs to have one article which is main news for the category, followed by 4 most popular news at the time being.
If there is no first news for category set, then display 5 most popular insted.
I wrote a SQL Function which has CategoryID parameter and another SQL procedure which calls that function N Times.
Is there more efficient way to write this query?
Function
CREATE FUNCTION [dbo].[Fn_FetchHomepageCategory]
(
-- Add the parameters for the function here
#categoryId int
)
RETURNS #ArticlesToReturn TABLE
( Id int,
Title nvarchar(500),
Slug nvarchar(500),
Summary nvarchar(1500),
IsCategoryFirst bit,
RootCategoryId int,
RootCategory nvarchar(500),
OldFacebookCommentsUrl nvarchar(500),
Icon nvarchar(500),
TopicName nvarchar(500),
MainArticlePhoto nvarchar(500),
FrontPagePhoto nvarchar(500),
PublishDate datetime
)
AS
BEGIN
-- select category first news if any
INSERT INTO #ArticlesToReturn
SELECT TOP 1
ART.Id, ART.Title, ART.InitialTitle, ART.Summary,ART.IsCategoryFirst,
ART.RootCategoryId, CAT.Name, ART.OldFacebookCommentsUrl, ICO.CssClass,
ART.TopicName, ART.MainArticlePhoto, ART.FrontPagePhoto, ART.PublishDate
FROM Articles ART WITH (NOLOCK)
INNER JOIN ArticleViewCountSum AVS WITH (NOLOCK) ON AVS.ArticleId = ART.Id
INNER JOIN Categories CAT WITH (NOLOCK) ON CAT.Id = ART.RootCategoryId
LEFT JOIN ArticleIcons ICO WITH (NOLOCK) ON ICO.Id = ART.IconId
WHERE ART.RootCategoryId = #categoryId
AND ART.PublishDate < GETDATE()
AND ART.Active = 1
AND IsCategoryFirst = 1
-- select 5 most popular by coefficient
INSERT INTO #ArticlesToReturn
SELECT TOP 5
ART.Id, ART.Title, ART.InitialTitle, ART.Summary,ART.IsCategoryFirst,
ART.RootCategoryId, CAT.Name, ART.OldFacebookCommentsUrl, ICO.CssClass,
ART.TopicName, ART.MainArticlePhoto, ART.FrontPagePhoto, ART.PublishDate
FROM Articles ART WITH (NOLOCK)
INNER JOIN ArticleViewCountSum AVS WITH (NOLOCK) ON AVS.ArticleId = ART.Id
INNER JOIN Categories CAT WITH (NOLOCK) ON CAT.Id = ART.RootCategoryId
LEFT JOIN ArticleIcons ICO WITH (NOLOCK) ON ICO.Id = ART.IconId
WHERE ART.RootCategoryId = #categoryId
AND ART.PublishDate < GETDATE()
AND ART.Active = 1
ORDER BY ART.Coefficient DESC
RETURN
END
Stored procedure:
CREATE PROCEDURE [dbo].[Fetch_HomePageArticles]
AS
BEGIN
-- SET NOCOUNT ON added to prevent extra result sets from
-- interfering with SELECT statements.
SET NOCOUNT ON;
DECLARE #dateNow datetime = GETDATE();
-- first main news
SELECT TOP 1 * FROM Articles
WHERE IsFirst = 1 AND PublishDate < #dateNow
--TODO: featured
SELECT TOP 10 * From Featured
WHERE PublishDate < #dateNow AND Active = 1
ORDER BY PublishDate DESC
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(3)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(150)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(1523)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(1509)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(1569)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(1545)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(1548)
SELECT TOP 5 * FROM Fn_FetchHomepageCategory(67)
END
I tried to modify function to have only one SELECT and included Order BY IsFirstCategory DESC, but query ran much slower then.
One potential improvement would be merging two SELECT clauses in the Fn_FetchHomepageCategory function into one single query by adding a new made-up Coefficient parameter:
SELECT
TOP 5 ART.Id,
ART.Title,
ART.InitialTitle,
ART.Summary,
ART.IsCategoryFirst,
ART.RootCategoryId,
CAT.Name,
ART.OldFacebookCommentsUrl,
ICO.CssClass,
ART.TopicName,
ART.MainArticlePhoto,
ART.FrontPagePhoto,
ART.PublishDate
FROM
Articles ART WITH (NOLOCK)
INNER JOIN ArticleViewCountSum AVS WITH (NOLOCK) ON AVS.ArticleId = ART.Id
INNER JOIN Categories CAT WITH (NOLOCK) ON CAT.Id = ART.RootCategoryId
LEFT JOIN ArticleIcons ICO WITH (NOLOCK) ON ICO.Id = ART.IconId
WHERE
ART.RootCategoryId = #categoryId
AND ART.PublishDate < GETDATE()
AND ART.Active = 1
ORDER BY
CASE IsCategoryFirst
WHEN 1 THEN 1000000
ELSE ART.Coefficient
END DESC
You can replace 1000000 with another big number. Its only point is assigning the highest co-efficiency score possible to the post that have IsCategoryFirst = 1.
Please note that it works fine only if you have only one post with IsCategoryFirst = 1.

How to select minimum non duplicated value in a column?

Can you help me with SQL statements to find minimum non duplicated value?
This is my sql statement
DECLARE #currentDate DATETIME = CONVERT(VARCHAR(10), Getdate(), 120)
UPDATE Dinfo
SET WinnerID = result.CustomerID
FROM Daily_Info Dinfo
JOIN (SELECT CO.DailyInfoID,
CO.CustomerID
FROM Customer_Offer CO
WHERE CO.OfferDate = #currentDate
GROUP BY CO.DailyInfoID,
CO.CustomerID
HAVING ( Count(CO.OfferPrice) = 1 )) result
ON Dinfo.DailyID = result.DailyInfoID
and i want to update my winner who offered minimum unique offer. How can i select it?
If you want to find data, then I would expect a select. I think the following query might do what you want:
select min(offerprice)
from (select co.*, count(*) over (partition by co.offerprice) as cnt
from Customer_Offer co
where CO.OfferDate = #currentDate
) co
where cnt = 1;
If you want to update information based on this, then use join:
update dinfo
set winnerId = c.CustomerId
from dinfo cross join
(select top 1 co.*
from (select co.*, count(*) over (partition by co.offerprice) as cnt
from Customer_Offer co
where CO.OfferDate = #currentDate
) co
where cnt = 1
order by offerprice
) c
This follows the structure of your query, but it is going to update all rows in dinfo. You might want some other conditions to so only one row is updated.

SQL Text Matching Query Tuning

I'm trying to do some free text search matching, and wondering if I can improve this query (using MSSQL 2008):
#FreeText is a table, where each row is a search word
DECLARE #WordCount = (SELECT COUNT(*) from #FreeText)
SELECT p.ID
FROM Product p
OUTER APPLY
(
SELECT COUNT(ID) as MatchCount
FROM Product pm
INNER JOIN #FreeText ft
ON pm.txt like '%'+ft.text+'%'
WHERE pm.ID = p.ID
AND (SELECT TOP 1 [text] FROM #FreeText) IS NOT NULL
)MC
WHERE MatchCount = #WordCount
So I'm wondering if there is any way to avoid the "FROM Product pm" in the outer apply?
I cannot always INNER JOIN #FreeText because sometimes we don't use free text searching.
Any thoughts or tips would be greatly appreciated, also let me know if I can clarify anything. Thanks in advance.
P.S. I do know that MS SQL has a FREETEXT() search, but I unfortunately cannot use that at the moment.
Here's a query without OUTER APPLY, that returns all results when there are no search critera.
DECLARE #FreeText TABLE
(
[text] varchar(200)
)
INSERT INTO #FreeText SELECT 'a'
INSERT INTO #FreeText SELECT 'c'
-- what, null? No.
DELETE FROM #FreeText WHERE [text] is null
DECLARE #WordCount int
SET #WordCount = (SELECT Count(*) FROM #FreeText)
SELECT p.ID
FROM Product p
LEFT JOIN #FreeText ft
ON p.txt like '%' + ft.text + '%'
WHERE ft.text is not null OR #WordCount = 0
GROUP BY p.ID
HAVING COUNT(*) = #WordCount OR #WordCount = 0
Note: it would be my preference to not use the "freetext" query when there is not any freetext criteria - instead use another query (simpler). If you choose to go that route - go back to an INNER JOIN and drop the OR #WordCount = 0 x2.

SQL get single value inside existing query?

I have a query that returns a bunch of rows.
But using the same query i would like to:
1. get the total row count in the table
2. get the row number where a certian username is located
Right now im doing like so:
BEGIN
DECLARE #startRowIndex INT;
DECLARE #PageIndex INT;
DECLARE #RowsPerPage INT;
SET #PageIndex = 0;
SET #RowsPerPage = 15;
SET #startRowIndex = (#PageIndex * #RowsPerPage) + 1;
WITH messageentries
AS (SELECT Row_number()
OVER(ORDER BY score DESC) AS row,
Count(DISTINCT town.townid) AS towns,
user_details.username,
user_score.score,
allience.alliencename,
allience.allienceid,
allience.alliencetagname,
(SELECT Count(* ) FROM user_details) AS numberofrows
FROM user_details
INNER JOIN user_score
ON user_details.username = user_score.username
INNER JOIN town
ON user_details.username = town.townownername
LEFT OUTER JOIN allience_roles
ON user_details.useralliencerole = allience_roles.roleid
LEFT OUTER JOIN allience
ON allience_roles.allienceid = allience.allienceid
GROUP BY user_details.username,
user_score.score,
allience.alliencename,
allience.allienceid,
allience.alliencetagname)
SELECT *, (SELECT row FROM messageentries WHERE username = 'myUsername') AS myself
FROM messageentries
WHERE row BETWEEN #startRowIndex AND #StartRowIndex + #RowsPerPage - 1
END
That works, but isn't the two nested selects going to run once for every row in the table? :/
...
(SELECT Count(* ) FROM user_details) AS numberofrows
...
(SELECT row FROM messageentries WHERE username = 'myUsername') AS myself
So my question being how can i get the values i want as "low-cost" as possible, and preferably in the same query?
Thanks in advance :)
try this...
DECLARE #NumberOfRows INT
SELECT #NumberOfRows = Count(* ) FROM user_details
WITH messageentries
AS (SELECT Row_number()
OVER(ORDER BY score DESC) AS row,
Count(DISTINCT town.townid) AS towns,
user_details.username,
user_score.score,
allience.alliencename,
allience.allienceid,
allience.alliencetagname,
#NumberOfRows AS numberofrows
FROM user_details
INNER JOIN user_score
ON user_details.username = user_score.username
INNER JOIN town
ON user_details.username = town.townownername
LEFT OUTER JOIN allience_roles
ON user_details.useralliencerole = allience_roles.roleid
LEFT OUTER JOIN allience
ON allience_roles.allienceid = allience.allienceid
GROUP BY user_details.username,
user_score.score,
allience.alliencename,
allience.allienceid,
allience.alliencetagname)
SELECT *, MyRowNumber.row AS myself
FROM messageentries,
(SELECT row FROM messageentries WHERE username = 'myUsername') MyRowNumber
WHERE row BETWEEN #startRowIndex AND #StartRowIndex + #RowsPerPage - 1
(SELECT Count(* ) FROM user_details)
This one will be cached (most probably materialized in a Worktable).
(SELECT row FROM messageentries WHERE username = 'myUsername')
For this one, most probably a Lazy Spool (or Eager Spool) will be built, which will be used to pull this value.