Optimizing a query that uses a lot of aggregates - sql

Heys guys, I asked yesterday about optimizing a query, and with some help I managed to get my query from taking 20 seconds to running instantly.
This was the question (and it contains an example of the underlying table) -> Is there any way to improve the efficiency of this SQL query and make it run faster?
The problem was solved by being smarter about aggregates and group by. However, I still have some trouble with group by. I want to post another query that is against the exact same table and see if anyone sees anyway to improve the efficiency of this query.
So originally, this query was a whole mess. I tried to apply a similar GROUP BY technique that fixed the first query to this query as well but it didn't really pan out.
Here is what I ended up with after I cleaned everything up:
BEGIN
DECLARE #LocalCompanyCode VARCHAR(5)
SET #LocalCompanyCode = '09'
DECLARE #LocalDivisionCode VARCHAR(5)
SET #LocalDivisionCode = '001'
DECLARE #LocalCustomerBaseFromDate DATETIME
SET #LocalCustomerBaseFromDate = '1/1/2018'
DECLARE #LocalCustomerBaseToDate DATETIME
SET #LocalCustomerBaseToDate = '9/1/2019'
DECLARE #LocalRecurringBaseFromDate DATETIME
SET #LocalRecurringBaseFromDate = '1/1/2017'
DECLARE #LocalLifetimeBaseFromDate DATETIME
SET #LocalLifetimeBaseFromDate = '1/1/2016'
SELECT
*
FROM (
SELECT
Email
,Date_Created
,BrandNewCustomer
,RecurringCustomer
,ReactivatedCustomer
,TotalOrders
,TotalCustomerValue
,TotalQuantity
,TotalOrdersNewBase
,TotalCustomerValueNewBase
,TotalQuantityNewBase
,TotalOrdersRecurringBase
,TotalCustomerValueRecurringBase
,TotalQuantityRecurringBase
,TotalOrdersLifetimeBase
,TotalCustomerValueLifetimeBase
,TotalQuantityLifetimeBase
,SUM(TotalCustomerValueNewBase) Over () BaseCustomersTotal
,SUM(TotalCustomerValueRecurringBase) Over () RecurringCustomersTotal
,SUM(TotalCustomerValueLifetimeBase) Over () LifetimeCustomersTotal
,SUM(TotalCustomerValue) Over () AllCustomersTotal
,(dense_rank() over (order by (case when Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) then Email end) asc) +
dense_rank() over (order by (case when Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) then Email end) desc) - 1
) as TotalCustomersOverCustomerBase
,(dense_rank() over (order by (case when Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate then Email end) asc) +
dense_rank() over (order by (case when Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate then Email end) desc) - 1
) as TotalCustomersOverRecurringBase
,(dense_rank() over (order by (case when Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate then Email end) asc) +
dense_rank() over (order by (case when Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate then Email end) desc) - 1
) as TotalCustomersOverLifetimeBase
,(DENSE_RANK() over (order by Email asc)
+DENSE_RANK() over ( order by Email desc)
- 1) as TotalCustomersOverBase
,SUM( CASE WHEN (BrandNewCustomer + RecurringCustomer + ReactivatedCustomer) = 1 THEN 1 ELSE 0 END) over () KeptCustomers
,SUM( CASE WHEN (BrandNewCustomer + RecurringCustomer + ReactivatedCustomer) = 0 THEN 1 ELSE 0 END) over () LostCustomers
FROM (
SELECT
T.Email
,MAX(T.Date_Created) Date_Created
,COUNT(*) TotalOrders
,SUM(T.Order_Sell_price) TotalCustomerValue
,SUM(T.Quantity_Ordered) TotalQuantity
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN 1 ELSE 0 END) TotalOrdersNewBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN Order_Sell_price ELSE 0 END) TotalCustomerValueNewBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN Quantity_Ordered ELSE 0 END) TotalQuantityNewBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN 1 ELSE 0 END) TotalOrdersRecurringBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN Order_Sell_price ELSE 0 END) TotalCustomerValueRecurringBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN Quantity_Ordered ELSE 0 END) TotalQuantityRecurringBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate THEN 1 ELSE 0 END) TotalOrdersLifetimeBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate THEN Order_Sell_price ELSE 0 END) TotalCustomerValueLifetimeBase
,SUM(CASE WHEN T.Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate THEN Quantity_Ordered ELSE 0 END) TotalQuantityLifetimeBase
,CASE WHEN
( ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN 1 ELSE 0 END),0) >= 1
AND ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN 1 ELSE 0 END),0) = 0
AND ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate THEN Quantity_Ordered ELSE 0 END),0) = 0)
THEN 1 ELSE 0 END BrandNewCustomer
,CASE WHEN
( ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN 1 ELSE 0 END),0) >= 1
AND ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN 1 ELSE 0 END),0) >= 1)
THEN 1 ELSE 0 END RecurringCustomer
,CASE WHEN
( ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalCustomerBaseFromDate and DATEADD(dayofyear, 1, #LocalCustomerBaseToDate) THEN 1 ELSE 0 END),0) >= 1
AND ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalRecurringBaseFromDate and #LocalCustomerBaseFromDate THEN 1 ELSE 0 END),0) = 0
AND ISNULL(SUM(CASE WHEN T.Date_Created BETWEEN #LocalLifetimeBaseFromDate and #LocalRecurringBaseFromDate THEN Quantity_Ordered ELSE 0 END),0) >= 1)
THEN 1 ELSE 0 END ReactivatedCustomer
FROM (
SELECT
F.Email
,F.Coal_Date Date_Created
,Month(F.Coal_Date) Month
,Year(F.Coal_Date) Year
,F.Customer_Purchase_Order_Number
,F.Order_Status
,Row_Number() over (Partition by Email order by Coal_Date asc) OrderCount
,F.Order_Sell_price
,F.Order_Quantity_Ordered Quantity_Ordered
FROM
FinalEcomTable F
WHERE
1=1
AND (F.Company_Code = #LocalCompanyCode OR #LocalCompanyCode IS NULL)
AND (F.Division_Code = #LocalDivisionCode OR #LocalDivisionCode IS NULL)
AND F.Coal_Date BETWEEN #LocalLifetimeBaseFromDate AND DATEADD(dayofyear, 1, #LocalCustomerBaseToDate)
AND F.Order_Status <> 'CANCELLED'
AND F.Odet_Line_Number = 1
) T
GROUP BY T.Email
) TT
) TTT
WHERE (BrandNewCustomer + RecurringCustomer + ReactivatedCustomer) = 1
ORDER BY email DESC
END
And here is the full execution plan:
https://www.brentozar.com/pastetheplan/?id=SkDyXPfDH
Those 4 dense_rank lines by themselves are almost doubling the run time. I realize now that its usually always these types of lines that do that.
I'll try to explain a bit about my thinking behind the query. So it is based on the same table as my first query. In reality, I am dealing with lots of data across three different linked servers, so I decided to create one master table that I would update every day and instead of calling to the linked servers and causing everything to run super slow, I would just call to that table.
So the most inner query takes each customer email, and then by each email, gets each order and what order number this is (first, second, etc). Is it a problem that I am doing Over () for my Row_Number aggregate? I would want to Group By Email but then how would I get each actual order number? Because if I group be email, then I have to do something like Max() of each order number, but I want the actual order number. Is there any way to do this?
Then the next outer query takes this data and actually groups by email. Over this group by I calculate totals over certain date ranges. The idea for this report is that there are three date ranges. New, which is what the actual report is based on and that range is something like the past 3 months. I look at all the customers in the past three months and then I see if they have any purchases in the recurring date range (a year before the three months) or the lifetime date range. Then I determine whether they are new customer or a recurring customer (or reactivated) by just putting a 1 on which type of customer it is.
Then my final outer query takes that data and calculates overall totals like Total value of all my recurring customers or total kept vs lost customers and so on. And like I mentioned before the dense_ranks really trip me up here. I feel like if I could add a Group By to this last query, I can solve a lot of my performance problems, but I can't seem to figure out how to integrate it.
Currently, this query runs about 10-15 seconds and I feel it could be instant. I tried creating the index the execution plan suggested but it didn't do anything.

Related

SQL joining most recent event by criteria to missing value

I have a SQL table that records interactions and the changes that happen in an interaction by interactionkey, user, group, and skill. I want to find the duration of each of the actions (A,B,C) by the grouping variables. Whenever action C happens, the skillKey is left blank (not NULL) and I need it to take the value of the last Skill in that interaction by the user and group so it is grouped together. The first table is the raw SQL data for 1 interaction, and the second is how I need it to look. Edit: I'm using Microsoft SQL Server Management Studio.
Here's what I have so far but it doesn't account for the last skill the user used in the group and interactionkey so it remains blank and adds it up seperately
SELECT
[InteractionKey],
[User],
[StartTime],
[SkillKey],
[GroupKey],
SUM(CASE WHEN ActionKey = 'A' THEN ActionDuration ELSE 0 END) AS 'ActionADuration',
SUM(CASE WHEN ActionKey = 'B' THEN ActionDuration ELSE 0 END) AS 'ActionBDuration',
SUM(CASE WHEN ActionKey = 'C' THEN ActionDuration ELSE 0 END) AS 'ActionCDuration'
FROM
(SELECT
[ActionKey],
[InteractionKey],
[SkillKey],
[GroupKey],
SUM(ActionDuration) AS 'ActionDuration',
[User],
CAST(StartTime AS DATE)
FROM
[InteractionTable]
GROUP BY
InteractionKey, User, SkillKey, GroupKey, ActionKey,
CAST(StartTime AS DATE)) sub
GROUP BY
InteractionKey, User, Date, SkillKey, GroupKey
ORDER BY
InteractionKey
Raw SQL Server table:
Desired output:
All you need is to prepare another "table" with SkillKey already filled as you need, and then use this table in your query.
I will use CTE in my code,
and it is different for divverent versions of SQL Server.
The first one is preferable, but it's for servers starting with 2012.
If you are on lower version use the second query.
-- for ##version >= 2012
with cte as
(
select *,
case
when SkillKey <> ''
then SkillKey
else lag(SkillKey) over(partition by InteractionKey, [User], GroupKey order by [Date])
end as SkillKey
from InteractionTable
)
SELECT
[InteractionKey],
[User],
[StartTime],
[SkillKey],
[GroupKey],
SUM(CASE WHEN ActionKey = 'A' THEN ActionDuration ELSE 0 END) AS 'ActionADuration',
SUM(CASE WHEN ActionKey = 'B' THEN ActionDuration ELSE 0 END) AS 'ActionBDuration',
SUM(CASE WHEN ActionKey = 'C' THEN ActionDuration ELSE 0 END) AS 'ActionCDuration'
FROM
(SELECT
[ActionKey],
[InteractionKey],
[SkillKey],
[GroupKey],
SUM(ActionDuration) AS 'ActionDuration',
[User],
CAST(StartTime AS DATE)
FROM
cte
GROUP BY
InteractionKey, User, SkillKey, GroupKey, ActionKey,
CAST(StartTime AS DATE)) sub
GROUP BY
InteractionKey, User, Date, SkillKey, GroupKey
ORDER BY
InteractionKey
The second:
-- for ##version >= 2005
with r as
(
select *,
row_number() over(partition by InteractionKey, [User], GroupKey order by [Date]) as rn
from InteractionTable
)
,cte as
(
select r1.*,
case
when r1.SkillKey <> ''
then r1.SkillKey
else r2.SkillKey
end as SkillKey
from r r1
left join r r2
on r1.rn = r2.rn + 1
)
SELECT
[InteractionKey],
[User],
[StartTime],
[SkillKey],
[GroupKey],
SUM(CASE WHEN ActionKey = 'A' THEN ActionDuration ELSE 0 END) AS 'ActionADuration',
SUM(CASE WHEN ActionKey = 'B' THEN ActionDuration ELSE 0 END) AS 'ActionBDuration',
SUM(CASE WHEN ActionKey = 'C' THEN ActionDuration ELSE 0 END) AS 'ActionCDuration'
FROM
(SELECT
[ActionKey],
[InteractionKey],
[SkillKey],
[GroupKey],
SUM(ActionDuration) AS 'ActionDuration',
[User],
CAST(StartTime AS DATE)
FROM
cte
GROUP BY
InteractionKey, User, SkillKey, GroupKey, ActionKey,
CAST(StartTime AS DATE)) sub
GROUP BY
InteractionKey, User, Date, SkillKey, GroupKey
ORDER BY
InteractionKey

SQL CASE WHEN THEN logics of calculating the types of a column

Have a tableA like this:
I wanna receive a tableŠ˜ like this (group by startTime and endTime, count of Severity in cnt column and count of every type of Severity in a distinct column):
The simple count (cnt column) works fine. But with the other I tired CASE WHEN THEN logics and it seems not working (line 10 for example). Can you please assist me with SQL query in this case.
You need conditional aggregation :
select starttime, endtime, count(*),
sum(case when severity = 'low' then 1 else 0 end),
sum(case when severity = 'med' then 1 else 0 end),
sum(case when severity = 'high' then 1 else 0 end)
from table t
group by starttime, endtime;
Try below query: with case when
select starttime, endtime, count(severity) as cnt, count(case when severity='LOW' then 1 end) cnt_low,count(case when severity='MED' then 1 end) cnt_med,count(case when severity='HIGH' then 1 end) as cnt_high
from tablename
group by starttime, endtime
use case when and aggregate function sum
select startTime , endTime,count(*) as Cnt,
sum( case when Severity='MED' then 1 else 0 end) as cntMed,
sum( case when Severity='LOW' then 1 else 0 end) as cntLow,
sum( case when Severity='HIGH' then 1 else 0 end) as cntHIGH from yourtable
group by startTime , endTime

SSRS: how to get top 3 in order Z to A

I try to get in my diagram the top 3 of the worst value in SSRS:
my Code:
SELECT *
FROM (
Select top 3
intervaldate as Datum
,Name
,teamname as Team
,SUM(case when CounterName = 'Blown away' then calculationUnits else 0 end) as Blown
,Sum(case when CounterName = 'Thrown away' then calculationUnits else 0 end) as Thrown
,Sum(case when CounterName = 'total' then calculationUnits else 0 end) as Total
from Counting
where IntervalDate >= dateadd(day,datediff(day,1,GETDATE()),0)
AND IntervalDate < dateadd(day,datediff(day,0,GETDATE()),0)
and Name in (Select SystemID from tSystemView where SystemViewID = 2)
group by intervaldate, teamName, Name
) c
Expression of the diagram:
=Sum(Fields!Blown.Value + Fields!Thrown.Value) / Sum(Fields!Total.Value) * 100
And I sorted it from highest to lowest
But it does not show me the right order.
If I choose every "Name" then it shows me other value then the top 3:
all Names with value:
top 3:
It's because your top 3 statement is in the SQL while your sort is in the report. Without an order by SQL picks the top 3 random records. Also, unless there is more SQL you are not showing, the outer select is unnecessary. Add an order by <column> desc below your group by.
with Calcs as
(
select intervaldate as Datum,
Name,
TeamName,
SUM(case when CounterName = 'Blown away' then calculationUnits else 0 end) as Blown,
Sum(case when CounterName = 'Thrown away' then calculationUnits else 0 end) as Thrown,
Sum(case when CounterName = 'total' then calculationUnits else 0 end) as Total
from Counting
where IntervalDate >= dateadd(day,datediff(day,1,GETDATE()),0)
AND IntervalDate < dateadd(day,datediff(day,0,GETDATE()),0)
and Name in (Select SystemID from tSystemView where SystemViewID = 2)
group by intervaldate, teamName, Name
)
select b.*
from
(
select a.*, row_number() over (order by (Blown + Thrown)/Total desc) as R_Ord -- Change between ASC/DESC depending on needs
from Calcs a
) b
where R_Ord <=3

How to show 0 value using COUNT and SELECTon a SQL query

I have ONLY 1 table called Meeting that stores all meeting requests.
This table can be EMPTY.
It has several columns including requestType (which can only be "MT") meetingStatus (can only be either pending, approved, denied or canceled) and meetingCreatedTime
I want to count how many requests of each status's type (in other words how many requests are pending, how many are approved, denied and canceled) for the last 30 days
Problem is that if there is no request then nothing display but I want to display 0, how do I do it? Here is my query now:
SELECT [requestType],
( SELECT COUNT ([requestType]) FROM [Meeting] WHERE CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE) AND [meetingStatus] = 'Approved') As 'Approved',
( SELECT COUNT ([requestType]) FROM [Meeting] WHERE CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE) AND [meetingStatus] = 'Pending') As 'Pending',
( SELECT COUNT ([requestType]) FROM [Meeting] WHERE CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE) AND [meetingStatus] = 'Canceled') As 'Canceled',
( SELECT COUNT ([requestType]) FROM [Meeting] WHERE CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE) AND [meetingStatus] = 'Denied') As 'Denied'
FROM [Meeting]
WHERE CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE) GROUP BY [requestType]
Result:
What I want is:
SELECT
RT.requestType,
SUM(CASE WHEN M.meetingStatus = 'Approved' THEN 1 ELSE 0 END) AS Approved,
SUM(CASE WHEN M.meetingStatus = 'Pending' THEN 1 ELSE 0 END) AS Pending,
SUM(CASE WHEN M.meetingStatus = 'Canceled' THEN 1 ELSE 0 END) AS Canceled,
SUM(CASE WHEN M.meetingStatus = 'Denied' THEN 1 ELSE 0 END) AS Denied,
FROM
(SELECT DISTINCT requestType FROM Meeting) RT
LEFT OUTER JOIN Meeting M ON
M.requestType = RT.requestType AND
M.meetingCreatedTime >= DATEADD(DAY, -30, GETDATE())
GROUP BY
RT.requestType
The SUMs are a much clearer (IMO) and much more efficient way of getting the counts that you need. Using the requestType table (assuming that you have one) lets you get results for every request type even if there are no meetings of that type in the date range. The LEFT OUTER JOIN to the meeting table allows the request type to still show up even if there are no meetings for that time period.
All of your CASTs between date values seem unnecessary.
Move those subqueries into simple sum/case statements:
select rt.request_type,
sum(case when [meetingStatus] = 'Approved' then 1 else 0 end),
sum(case when [meetingStatus] = 'Pending' then 1 else 0 end),
sum(case when [meetingStatus] = 'Canceled' then 1 else 0 end),
sum(case when [meetingStatus] = 'Denied' then 1 else 0 end)
from ( select 'MT' ) rt (request_type) --hopefully you have lookup table for this
left
join [Meeting] m on
rt.request_type = m.request_type and
CAST([meetingCreatedTime] AS DATE) >= CAST(DateAdd(DAY,-30,Getdate()) AS DATE)
group
by rt.request_type;
This is one possible approach to force one line to be visible in any case. Adapt this to your needs...
Copy it into an empty query window and execute... play around with the WHERE part...
DECLARE #Test TABLE (ID INT IDENTITY, GroupingKey VARCHAR(100));
INSERT INTO #Test VALUES ('a'),('a'),('b');
SELECT TOP 1 tbl.CountOfA
,tbl.CountOfB
,tbl.CountOfC
FROM
(
SELECT 1 AS Marker
,(SELECT COUNT(*) FROM #Test WHERE GroupingKey='a') AS CountOfA
,(SELECT COUNT(*) FROM #Test WHERE GroupingKey='b') AS CountOfB
,(SELECT COUNT(*) FROM #Test WHERE GroupingKey='c') AS CountOfC
WHERE (1=1) --play here with (1=0) and (1=1)
UNION ALL
SELECT 2,0,0,0
) AS tbl
ORDER BY Marker

SQL Query: Cannot perform aggregate functions on sub queries

I have the following SQL query
SELECT
[Date],
DATENAME(dw,[Date]) AS Day,
SUM(CASE WHEN ChargeCode IN (SELECT ChargeCode FROM tblChargeCodes WHERE Chargeable = 1) THEN Units ELSE 0 END) ChargeableTotal,
SUM(CASE WHEN ChargeCode IN (SELECT ChargeCode FROM tblChargeCodes WHERE Chargeable = 0) THEN Units ELSE 0 END) NotChargeableTotal,
SUM(Units) AS TotalUnits
FROM
tblTimesheetEntries
WHERE
UserID = 'PJW'
AND Date >= '2013-01-01'
GROUP BY
[Date]
ORDER BY
[Date] DESC;
But I get the error message:
Cannot perform an aggregate function on an expression containing an aggregate or a subquery.
Because I am using sub queries in the Case Else Summation.
How can I revise my query to get 2 x Sums of [Units] one for Chargeable = true, and one for Chargeable = false, even though the Chargeable field is in a different table to all the other information. The two tables are linked by ChargeCode which appears in both tblTimesheetEntries and tblChargeCodes.
Have you tried joining the tables on the chargeCode:
SELECT e.[Date],
DATENAME(dw,e.[Date]) AS Day,
SUM(CASE WHEN c.Chargeable = 1 THEN e.Units ELSE 0 END) ChargeableTotal,
SUM(CASE WHEN c.Chargeable = 0 THEN e.Units ELSE 0 END) NotChargeableTotal,
SUM(e.Units) AS TotalUnits
FROM tblTimesheetEntries e
LEFT JOIN tblChargeCodes c
on e.ChargeCode = c.ChargeCode
WHERE e.UserID = 'PJW'
AND e.Date >= '2013-01-01'
GROUP BY e.[Date]
ORDER BY e.[Date] DESC;