Is it possible to rewrite this SQL query with multiple Inner Joins so that it executes much faster? - sql

I have a table with 200,000 rows. I have created a View where I am removing slices of data from this table based on different criteria which fit my definition of what constitutes a duplicate record. I have the code for doing so below and I was wondering if anyone could suggest a faster/more efficient method of writing this query. It currently takes about 20 seconds to execute but I was hoping for a couple of seconds at most to execute this query if not less. I am using SQL Server 2005. My knowledge of SQL is very beginner and I appreciate any help.
WITH dsm_hardware_basic_cte AS
(
SELECT TOP 100 PERCENT
dbo.dsm_hardware_basic.[UUID]
,dbo.dsm_hardware_basic.[Name]
,dbo.dsm_hardware_basic.[LastAgentExecution]
,dbo.dsm_hardware_basic.[MaxUserRegistration]
,REPLACE(RIGHT([MaxUserRegistration], CHARINDEX('/', REVERSE([MaxUserRegistration])) - 1),'_ADMIN','') AS [MaxUserUsername]
,dbo.dsm_hardware_basic.[LastUserRegistration]
,REPLACE(RIGHT([LastUserRegistration], CHARINDEX('/', REVERSE([LastUserRegistration])) - 1),'_ADMIN','') AS [LastUserUsername]
,dbo.dsm_hardware_basic.[IPAddress]
,dbo.dsm_hardware_basic.[HostName]
,dbo.dsm_hardware_basic.[MACAddress]
FROM dbo.dsm_hardware_basic
)
SELECT TOP 100 PERCENT
dsm_hardware_basic_cte.[UUID]
,dsm_hardware_basic_cte.[Name]
,dsm_hardware_basic_cte.[LastAgentExecution]
,dsm_hardware_basic_cte.[MaxUserRegistration]
,dsm_hardware_basic_cte.[LastUserRegistration]
,dsm_hardware_basic_cte.[IPAddress]
,dsm_hardware_basic_cte.[HostName]
,dsm_hardware_basic_cte.[MACAddress]
FROM dsm_hardware_basic_cte
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [Name], [MACAddress] ORDER BY [LastAgentExecution] DESC) AS [NameMACRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_NameMAC_filtered
ON duplicate_NameMAC_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_NameMAC_filtered.[NameMACRowNum] = 1
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [Name], [HostName] ORDER BY [LastAgentExecution] DESC) AS [NameHostNameRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_NameHostName_filtered
ON duplicate_NameHostName_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_NameHostName_filtered.[NameHostNameRowNum] = 1
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [HostName], [MACAddress] ORDER BY [LastAgentExecution] DESC) AS [HostNameMACRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_HostNameMAC_filtered
ON duplicate_HostNameMAC_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_HostNameMAC_filtered.[HostNameMACRowNum] = 1
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [HostName], [IPAddress] ORDER BY [LastAgentExecution] DESC) AS [HostNameIPAddressRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_HostNameIPAddress_filtered
ON duplicate_HostNameIPAddress_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_HostNameIPAddress_filtered.[HostNameIPAddressRowNum] = 1
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [Name], [MaxUserUsername] ORDER BY [LastAgentExecution] DESC) AS [NameMaxUserRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_NameMaxUser_filtered
ON duplicate_NameMaxUser_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_NameMaxUser_filtered.[NameMaxUserRowNum] = 1
INNER JOIN
(
SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [Name], [LastUserUsername] ORDER BY [LastAgentExecution] DESC) AS [NameLastUserRowNum]
FROM dsm_hardware_basic_cte
) AS duplicate_NameLastUser_filtered
ON duplicate_NameLastUser_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_NameLastUser_filtered.[NameLastUserRowNum] = 1

I don't know what your needs are, but I'd try re-writing the query as such:
WITH dsm_hardware_basic_cte AS (
SELECT
d.[UUID]
,d.[Name]
,d.[LastAgentExecution]
,d.[MaxUserRegistration]
,REPLACE(RIGHT([MaxUserRegistration], CHARINDEX('/', REVERSE([MaxUserRegistration])) - 1),'_ADMIN','') AS [MaxUserUsername]
,d.[LastUserRegistration]
,REPLACE(RIGHT([LastUserRegistration], CHARINDEX('/', REVERSE([LastUserRegistration])) - 1),'_ADMIN','') AS [LastUserUsername]
,d.[IPAddress]
,d.[HostName]
,d.[MACAddress]
,ROW_NUMBER() OVER (PARTITION BY [Name], [MACAddress] ORDER BY [LastAgentExecution] DESC) AS [NameMACRowNum]
,ROW_NUMBER() OVER (PARTITION BY [Name], [HostName] ORDER BY [LastAgentExecution] DESC) AS [NameHostNameRowNum]
,ROW_NUMBER() OVER (PARTITION BY [HostName], [MACAddress] ORDER BY [LastAgentExecution] DESC) AS [HostNameMACRowNum]
,ROW_NUMBER() OVER (PARTITION BY [HostName], [IPAddress] ORDER BY [LastAgentExecution] DESC) AS [HostNameIPAddressRowNum]
,ROW_NUMBER() OVER (PARTITION BY [Name], [MaxUserUsername] ORDER BY [LastAgentExecution] DESC) AS [NameMaxUserRowNum]
,ROW_NUMBER() OVER (PARTITION BY [Name], [LastUserUsername] ORDER BY [LastAgentExecution] DESC) AS [NameLastUserRowNum]
FROM dbo.dsm_hardware_basic as d
)
SELECT
c.[UUID]
,c.[Name]
,c.[LastAgentExecution]
,c.[MaxUserRegistration]
,c.[LastUserRegistration]
,c.[IPAddress]
,c.[HostName]
,c.[MACAddress]
FROM dsm_hardware_basic_cte as c
WHERE
c.[NameMACRowNum] = 1
or c.[NameHostNameRowNum] = 1
or c.[HostNameMACRowNum] = 1
or [HostNameIPAddressRowNum] = 1
or [NameMaxUserRowNum] = 1
or [NameLastUserRowNum] = 1
I think that your query and mine are logically equivalent. The optimizer might be smart enough to have reduced your query to mine, but give it a spin and see! A couple of notes:
I used table aliases in order to make it a bit more readable (in my opinion)
I removed the "top 100 percent" clause from your select. It's not needed; that's typically a hack that people put in place so they can do an order by in the view to get, well, an "ordered view". Don't do that. :)

according to your Query Plan the sorting by LastAgentExecution takes 19% of time. Start by creating an index on this column.
However, if I were you I would changed the habit of using "ROW_NUMBER() OVER (PARTITION BY [Name], [MACAddress] ORDER BY [LastAgentExecution] DESC)" type of syntax, because it does not seem to be very effective

Instead of inner joins, try replacing them with the "Exists" clause like this
WHERE EXISTS
((SELECT [UUID],[NameMACRowNum]
FROM
(SELECT [UUID]
,ROW_NUMBER() OVER (PARTITION BY [Name], [MACAddress] ORDER BY [LastAgentExecution] DESC) AS [NameMACRowNum]
FROM dsm_hardware_basic_cte) AS duplicate_NameMAC_filtered
WHERE duplicate_NameMAC_filtered.[UUID] = dsm_hardware_basic_cte.[UUID]
AND duplicate_NameMAC_filtered.[NameMACRowNum] = 1)
Not sure it should be Exists or Not Exists, but it will be simple to change once the rest is working.

Related

SQL select row with max value or distinct value and sum all

I have the following data that is returned to me. I need to get a distinct or max sum of all the commission by taxid for a single repnbr. The 'qtrlycommrep' column is the value I'm trying to get to, but not able to. For repnbr c590, I need to get the 854.66 commission amount, which is the max for each taxid.
What am I doing wrong?
Any help would be much appreciated!
Here's what I've tried so far. Using the Row_number
select distinct
sub.Repnbr
, (sub.QtrLYComm) as qtrlycommrep
from (
select distinct repnbr, QtrLYComm
, rn = row_number() over(partition by repnbr order by QtrLYComm desc)
from #qtrly
) sub
where sub.rn = 1
Cross Apply
select distinct
#qtrly.repnbr
, x.QtrLYComm as qtrlycommrep
from #qtrly
cross apply (
select top 1
*
from #qtrly as i
where i.repnbr = Repnbr
order by i.qtrlycomm desc
) as x;
inner join
select
#qtrly.repnbr, #qtrly.qtrlycomm as qtrlycommrep
from #qtrly
inner join (
select maxvalue = max(qtrlycomm), repnbr
from #qtrly
group by repnbr
) as m
on #qtrly.repnbr = m.repnbr
and #qtrly.qtrlycomm = m.maxvalue;
order by row_number
select top 1 with ties
#qtrly.repnbr, #qtrly.qtrlycomm as qtrlycommrep
from #qtrly
order by
row_number() over(partition by repnbr
order by qtrlycomm desc)
You want one value per tax id. You need to include that. For instance:
select q.Repnbr, sum(q.QtrLYComm) as qtrlycommrep
from (select q.*,
row_number() over(partition by repnbr, taxid order by QtrLYComm desc) as seqnum
from #qtrly q
) q
where seqnum = 1
group by q.Repnbr;
However, I would be inclined to use two levels of aggregation:
select q.Repnbr, sum(q.QtrLYComm) as qtrlycommrep
from (select distinct repnbr, taxid, QtrLYComm
from #qtrly q
) q
group by q.Repnbr;

How to get the max row number in the Partition over by SQL Syntax?

We have duplicate file name record in the file name column on various dates. I need to pick the Max - Rownumber of filename and its corresponding date.
with cte as
(
select [FileName], ProcessDate,
ROW_NUMBER() over (partition by [FileName] order by [FileName]) RowNumber
from StagingTable
)
select * from cte
order by rownumber desc
This is not addressing your question but I think this is what you want. This will give you the last ProcessDate with one SELECT:
SELECT
[FileName]
,MAX (ProcessDate) AS LastProcessDate
,COUNT (*) FileNameOccurence
FROM StagingTable
GROUP BY [FileName]
If so, don't make it more complicated with nested queries.
with cte as
(
select [FileName], ProcessDate,
ROW_NUMBER() over (partition by [FileName] order by [FileName] desc) RowNumber
from StagingTable
)
select * from cte
where RowNumber = 1
Assuming you actually want the latest file where there are more than one with the same file name - You don't necessarily need a CTE - a subquery would work just fine
select *
from(
select [FileName], ProcessDate, ROW_NUMBER() over (partition by [FileName] order by
ProcessDate desc) RN
from StagingTable
)files
where RN = 1

SQL Server Partition Order - No tie DenseRank values even if rows are same

This question is best explained with an image and the script I have currently... How can I extract a FULL one row per assignment, with the lowest rank, and if there are 2 rows with a denserank as 1, then choose either of them?...
select *
,Dense_RANK() over (partition by [Assignment] order by [Text] desc) as
[DenseRank]
from [dbo].[CLEANSED_T3B_Step1_Res_Withdups____CP]
select * from
(
select *
,Dense_RANK() over (partition by [Assignment] order by [Text] desc, NewID()
) as [DenseRank] from [dbo].[CLEANSED_T3B_Step1_Res_Withdups____CP]
) as A
where A.[DenseRank] = 1
Second script is working perfectly!
SELECT * INTO
[dbo].[CLEANSED_T3B_Step1_COMPLETED]
from
(
select *
,Dense_RANK() over (partition by [Assignment] order by
left([Text],1) desc , [Diff_Doc_Clearing_Date] desc , [Amount] asc
as [DenseRank]
from [dbo].[CLEANSED_T3B_Step1_Res_Withdups____CP]
)
as A
where A.[DenseRank] = 1
No longer need just a random first Tied '1st place', now need to get the one with the highest day diff and then also the highest amount after. SO have adapted everything in this version 3.
It seems you don't want to use DENSE_RANK but ROW_NUMBER.
with cte as(
select t.*, rn = row_number() over(partition by assignment order by [text] desc)
from tablename t
)
select * from cte
where rn = 1
Order by 'newid()' as the 'tie-breaker'
Order by [Text],Newid()

If Rownumber = 1 and Condition Then Condition

I've got a question about a sql result and how to achieve the following.
In the Screenshot, there is a Rownumber for every ID, and every ID has another column which has a status 'old' or 'processed'. What i want is, if the RN is = 1 and the Status is processed, than all other RN of this ID should also have the status 'processed'.
Is there a possibility to achieve this in sql?
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
Put your query in CTE? then JOIN it with actual table:
;WITH cte AS (
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
)
SELECT t.[CI_EQUIPMENT_ID] AS ID_PART,
CASE WHEN c.RN is NOT NULL THEN c.REPORTING_RELEVANT ELSE t.[REPORTING_RELEVANT_STATUS_ID] END AS REPORTING_RELEVANT,
t.[BILLING_PERIOD],
t.[NODE_NAME]
FROM Table t
LEFT JOIN (
SELECT *
FROM cte
WHERE RN = 1 AND REPORTING_RELEVANT = 'PROCESSED'
) as c
ON c.ID_PART = t.[CI_EQUIPMENT_ID]
Creating common table expression
Updating the CTE values based on RN=1 and Status=Processed. using the Self Join
here:
;with CTE (RN,ID_PART,REPORTING_RELEVANT,BILLING_PERIOD,NODE_NAME) AS
(
SELECT RN = ROW_NUMBER() OVER (PARTITION BY [NODE_NAME]
ORDER by REPORTING_RELEVANT_STATUS_ID DESC, BILLING_PERIOD DESC)
,[CI_EQUIPMENT_ID] AS ID_PART
,[REPORTING_RELEVANT_STATUS_ID] AS REPORTING_RELEVANT
,[BILLING_PERIOD]
, [NODE_NAME]
FROM Table
)
Update CTE1
set CTE1.Status='Processed'
from CTE CTE1 inner join CTE CTE2
on CTE1.ID_PART=CTE2.ID_PART
where CTE2.RN=1 and CTE2.Status='Processed'

Better solution for PARTITION BY?

I want to optimize this
WITH a as
(SELECT *
,ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn
,(select count(*) from RM_TbPackages where d.ApplicationID=ApplicationID) as PackageCount
FROM CM_VwSupplierApplications d)
select * from a
where rn=1
order by a.ApplicationID
As per the comment, there is nothing wrong with the partition. One possible inefficiency is the subquery (select count(*) from RM_TbPackages where d.ApplicationID=ApplicationID) - a set based approach to this by computing all counts per Application and then joining to the count should improve performance:
WITH a as
(
SELECT * ,
ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn,
x.PackageCount
FROM CM_VwSupplierApplications d
INNER JOIN
(select ApplicationID, count(*) as PackageCount
from RM_TbPackages
group by ApplicationID )x
on x.ApplicationID = d.ApplicationID
)
select * from a
where rn=1
order by a.ApplicationID;
This query will run faster since it is not making a subselect for ever row in CM_VwSupplierApplications:
;WITH a AS
(
SELECT * ,ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn
FROM CM_VwSupplierApplications d
)
SELECT a.*, b.PackageCount
FROM a
OUTER APPLY
( SELECT count(*) PackageCount
FROM RM_TbPackages
WHERE d.ApplicationID=ApplicationID) b
WHERE a.rn=1
ORDER BY a.ApplicationID
To improve it even more, you could consider index on table CM_VwSupplierApplications on the columns applicationid and AgreementStartDate