SQL Server 2005 - Row_Number() - sql

I'm trying to understand the unusual behaviour seen when ordering results in a descending order using the row_number() function when using a DISITINCT on the outermost select in my query as below:
SELECT DISTINCT (ID), State_Id, Name_Of_Trip, Date_Of_Travel, Creation_Date, Locking_Id, Applicant_Name, Reference_Number, State_Name
FROM (
SELECT app.ID, app.State_Id, app.Name_Of_Trip, app.Date_Of_Travel, app.Creation_Date, app.Locking_Id, app.Applicant_Name, app.Reference_Number,
State.Name AS State_Name, ROW_NUMBER() OVER(ORDER BY Reference_Number DESC) as rowNum
FROM Application_Leg AS app
INNER JOIN State AS state
ON app.State_Id = state.ID
WHERE (app.State_Id = 5 OR app.State_Id = 6 OR app.State_Id = 8) AND app.Organisation_Id=12
AND Leg_Number IN
(SELECT DISTINCT Leg_Number
from Application_Leg as al
INNER JOIN
Organisation as org
ON al.Organisation_Id = org.ID
WHERE al.ID=app.ID AND org.Approval_Required=1 AND Mode_Of_Transport=1))
as pagedApplications
WHERE rowNum BETWEEN 0 AND (0 + 10)
When the outermost DISTINCT is taken out then the descending order is fine but when it is included the results are not shown in descending order.

ORDER BY in ROW_NUMBER clause does not guarantee the order of the resultset.
ROW_NUMBER usually uses sorting in the query plan which results in the fact that the values come out presorted.
This is a side effect and should not be relied upon.
DISTINCT uses Hash Match (Aggregate) which breaks sorting.
Add ORDER BY clause to the end of the query:
SELECT DISTINCT (ID), State_Id, Name_Of_Trip, Date_Of_Travel, Creation_Date, Locking_Id, Applicant_Name, Reference_Number, State_Name
FROM (
SELECT app.ID, app.State_Id, app.Name_Of_Trip, app.Date_Of_Travel,
app.Creation_Date, app.Locking_Id, app.Applicant_Name, app.Reference_Number,
State.Name AS State_Name, ROW_NUMBER() OVER(ORDER BY Reference_Number DESC) as rowNum
FROM Application_Leg AS app
INNER JOIN
State AS state
ON app.State_Id = state.ID
WHERE app.State_Id IN (5, 6, 8)
AND app.Organisation_Id = 12
AND Leg_Number IN
(
SELECT Leg_Number
FROM Application_Leg as al
INNER JOIN
Organisation as org
ON al.Organisation_Id = org.ID
WHERE al.ID = app.ID
AND org.Approval_Required = 1
AND Mode_Of_Transport = 1
)
) AS pagedApplications
WHERE rowNum BETWEEN 0 AND (0 + 10)
ORDER BY
ReferenceNumber DESC
Also note that it will not return 10 distinct results, it will return DISTINCT of the first 10 results.
If you want the former, use this:
SELECT DISTINCT TOP 10 ID, State_Id, Name_Of_Trip, Date_Of_Travel, Creation_Date, Locking_Id, Applicant_Name, Reference_Number, State_Name
FROM (
SELECT app.ID, app.State_Id, app.Name_Of_Trip, app.Date_Of_Travel,
app.Creation_Date, app.Locking_Id, app.Applicant_Name, app.Reference_Number,
State.Name AS State_Name
FROM Application_Leg AS app
INNER JOIN
State AS state
ON app.State_Id = state.ID
WHERE app.State_Id IN (5, 6, 8)
AND app.Organisation_Id = 12
AND EXISTS
(
SELECT Leg_Number
FROM Application_Leg AS al
INNER JOIN
Organisation as org
ON al.Organisation_Id = org.ID
WHERE al.ID = app.ID
AND al.LegNumber = app.LegNumber
AND org.Approval_Required = 1
AND Mode_Of_Transport = 1
)
) AS pagedApplications
ORDER BY
ReferenceNumber DESC

Have you tried adding an order by to your outer select?

Related

I need to only select the minimum value of my query

I have a view created in SQL Server Management Studio that brings in certain data, I need to only select the rows with the minimum sequence. For example, in the screenshot see the job number "50773-4", I would only need to see the row with SEQ number 2. I've tried to Group by Min, but to no avail. Any help would be appreciated.
SELECT
TOP (100) PERCENT dbo.Job_Operation.Job,
MIN(dbo.Job_Operation.Sequence) AS SEQ,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
GROUP BY
dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
HAVING
(dbo.Work_Center.UVText4 = 'Machining')
ORDER BY
dbo.User_Values.Decimal1 DESC,
SEQ
[enter image description here]
I would try the RANK() window function. Perhaps:
SELECT column1,
column2,
rank() OVER (PARTITION BY job ORDER BY seq) AS seq_by_job
Then use this as a nested statement, and filter on only the min rank (i.e. WHERE nested_statement.seq_by_job = 1)
here is one way :
SELECT
TOP (100) PERCENT Job,
Sequence AS SEQ,
Work_Center,
Department
FROM
( select dbo.Job_Operation.Job,
dbo.Job_Operation.Sequence,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department,
dbo.User_Values.Decimal1 ,
ROW_NUMBER() over (partition by dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
Order by dbo.Job_Operation.Sequence asc) rn
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
) tt
WHERE rn = 1
and UVText4 = 'Machining'
You can do:
with
q as (
SELECT
dbo.Job_Operation.Job,
MIN(dbo.Job_Operation.Sequence) AS SEQ,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department,
dbo.User_Values.Decimal1
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values
ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center
ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
GROUP BY
dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
HAVING
(dbo.Work_Center.UVText4 = 'Machining')
),
r as (
select *,
row_number() over(partition by job order by seq) as rn
from q
)
select job, seq, work_center, department
from r
where rn = 1
order by Decimal1 DESC

What will be the query for this?

JOIN public.match m ON (s.stadium_id = m.stadium_id)
group
AS (
)
SELECT round_number
,stadium_name
,spectators
FROM (
SELECT round_number
,stadium_name
,spectators
,RANK() OVER (
PARTITION BY round_number ORDER BY spectators DESC
) AS rank1
FROM t1
) AS s1
WHERE rank1 = 1
<br>
Any smaller query than this?
I think you can just use window functions:
select ms.*
from (select m.round_number, s.stadium_name, m.no_spectators,
row_number() over (partition by m.round_number order by m.no_spectators desc) as seqnum
from public.stadium s join
public.match m
on s.stadium_id = m.stadium_id
) ms
where seqnum = 1
order by m.round_number;
I don't see why aggregation would be needed for the inner query.
You can use a subquery to get the max first
select m.round_number, s.stadium_name, MaxSpec
from public.stadium s
JOIN public.match m ON (s.stadium_id = m.stadium_id)
JOIN
(
select m.round_number, MAX(m.no_spectators) as MaxSpec
from public.stadium s
JOIN public.match m ON (s.stadium_id = m.stadium_id)
group by m.round_number
)a on m.no_spectators = a.MaxSpec
Just one more way to skin this cat. Throw your MAX(no_spectators) into a WHERE clause.
SELECT
m.round_number,
s.stadium_name,
m.no_spectators
FROM
PUBLIC.stadium s
JOIN
PUBLIC.match m
ON s.stadium_id = m.stadium_id
WHERE
m.no_spectators = (SELECT MAX(no_spectators) FROM PUBLIC.match);
That should do for an intro class.

Multiple subquery join in View with group by returns duplicate rows

I have created a view using subquery but I want this view to return few mendatory column which cant be added in group by subquery, so I have to create one more select statement and join with other group by subquery
I am come up with following query,
But problem I am facing is if group by seller has 28 rows it returns 28 duplicate rows, also I want whole query to order by TotalOrderItem.
Alter VIEW [dbo].[SellersPerformance] AS
Select
RequiredColumns.Id as Id,
aggrgateDT.SellerId as SellerId,
aggrgateDT.TenantId as TenantId,
aggrgateDT.Active as Active,
aggrgateDT.TotalOrderedItem as TotalOrderItem,
aggrgateDT.MoveToPurchase as MoveToPurchase,
aggrgateDT.GoodPurchase as GoodPurchase,
RequiredColumns.Created as Created,
RequiredColumns.Modified as Modified,
RequiredColumns.CreatorId as CreatorId,
RequiredColumns.ModifierId as ModifierId
From
(
(Select
sellerId, p.TenantId, p.Active, count(*) as TotalOrderedItem,
count(*) - count(o.Id) as MoveToPurchase,
count(o.Id) as GoodPurchase,
count(case when o.ApplicationStatus = 'Perfect' then 1 end) as Perfect,
count(case when o.ApplicationStatus = 'R-Perfect' then 1 end) as R_Perfect
FROM [dbo].[AmazonOrderPurchaseInfo] p
left join [dbo].[AmazonOrder] o
on p.AmazonOrderId = o.Id
AND p.Id = o.[AmazonOrderPurchaseInfoId]
group by SellerId, p.TenantId, p.Active
order by TotalOrderedItem offset 0 rows
) aggrgateDT
Left outer Join (
SELECT
NEWID() Id,
purchase.Created AS Created,
purchase.Modified AS Modified,
purchase.CreatorId AS CreatorId,
purchase.ModifierId AS ModifierId,
purchase.SellerId As SellerId
From dbo.AmazonOrderPurchaseInfo purchase
) RequiredColumns ON aggrgateDT.SellerId = RequiredColumns.SellerId
)
GO
You may try Group by for this.
Alter VIEW [dbo].[SellersPerformance] AS
select res.Id, res.SellerId, res.TenandId, res.Active, res.TotalOrderItem, res.MovetoPurchase, res.GoodPurchase, res.Created, res.Modified, res.CreatorId, res.ModifierId
from
(
Select
RequiredColumns.Id as Id,
aggrgateDT.SellerId as SellerId,
aggrgateDT.TenantId as TenantId,
aggrgateDT.Active as Active,
aggrgateDT.TotalOrderedItem as TotalOrderItem,
aggrgateDT.MoveToPurchase as MoveToPurchase,
aggrgateDT.GoodPurchase as GoodPurchase,
RequiredColumns.Created as Created,
RequiredColumns.Modified as Modified,
RequiredColumns.CreatorId as CreatorId,
RequiredColumns.ModifierId as ModifierId
From
(
(Select
sellerId, p.TenantId, p.Active, count(*) as TotalOrderedItem,
count(*) - count(o.Id) as MoveToPurchase,
count(o.Id) as GoodPurchase,
count(case when o.ApplicationStatus = 'Perfect' then 1 end) as Perfect,
count(case when o.ApplicationStatus = 'R-Perfect' then 1 end) as R_Perfect
FROM [dbo].[AmazonOrderPurchaseInfo] p
left join [dbo].[AmazonOrder] o
on p.AmazonOrderId = o.Id
AND p.Id = o.[AmazonOrderPurchaseInfoId]
group by SellerId, p.TenantId, p.Active
order by TotalOrderedItem offset 0 rows
) aggrgateDT
Left outer Join (
SELECT
NEWID() Id,
purchase.Created AS Created,
purchase.Modified AS Modified,
purchase.CreatorId AS CreatorId,
purchase.ModifierId AS ModifierId,
purchase.SellerId As SellerId
From dbo.AmazonOrderPurchaseInfo purchase
) RequiredColumns ON aggrgateDT.SellerId = RequiredColumns.SellerId
) as res
group by res.Id, res.SellerId, res.TenandId, res.Active, res.TotalOrderItem, res.MovetoPurchase, res.GoodPurchase, res.Created, res.Modified, res.CreatorId, res.ModifierId
)
GO
Here if Id, Created, Modified, CreatorId, ModifierId columns will have same id then you may get your expected result.

Adding a Helper SQL Index

I have the following View which seems to work quickly enough but when I look at the Execution Plan, it shows the Top N Sort in the second query taking ~90% due to it being repeated for every row in the first query.
Should I be adding an Index to the Loan table to help the ORDER BY clause?
CREATE VIEW [dbo].[ResourceItemStatus] AS
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
(SELECT COUNT(*) FROM Loan WHERE Loan.ItemID = i.ID) AS LoanCount,
(SELECT COUNT(*) FROM Item WHERE Item.DateRemoved IS NULL AND Item.ResourceID = i.ResourceID) AS AvailableItemCount
FROM Item i
OUTER APPLY
(
SELECT TOP 1
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID
FROM Loan l
WHERE l.ItemID = i.ID
ORDER BY l.IssueDateTime DESC, l.ID DESC
) AS ll
Try Windowed Aggregates instead of Scalar Subqueries/Outer Apply:
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
coalesce(ll.LoanCount, 0)
COUNT(case when Item.DateRemoved IS NULL then 1 end)
over (partition by ResourceID) AS AvailableItemCount
FROM Item i
LEFT JOIN
(
SELECT
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID,
COUNT(*) over (partition by ItemId) AS LoanCount,
row_number()
over (partition by ItemId
order by l.IssueDateTime DESC, l.ID DESC) as rn
FROM Loan l
) as ll
on ll.ItemID = i.ID
and ll.rn = 1

How do I group by the most recent date?

I have a HISTORY table that has multiple rows for the same record and I am trying to get the latest (closest to today's date) record. I am attempting to group by the closest date but am having a difficult time. Please check out the query below and advise me.
SELECT DISTINCT *
FROM
(SELECT etc.Complaint.Complaint_ID AS Complaint_ID
FROM etc.Complaint) AS Qry1
LEFT JOIN
(SELECT etc.Complaint.Complaint_ID AS Complaint_ID,
o.Action_User AS Resolved_User,
o.Action_Date AS LastActionDate
FROM etc.Complaint
LEFT OUTER JOIN etc.History as o
ON SUBSTRING(Primary_Key,15,LEN(Primary_Key) - 15) = etc.Complaint.Complaint_ID
AND TABLE_NAME = 'Resolution' AND o.Field_Name = 'Resolved_Ind'
AND New_Value = 1) AS Qry2
ON Qry1.Complaint_ID = Qry2.Complaint_ID
ORDER BY Qry1.Complaint_ID, MAX(Qry2.LastActionDate)
does this change help?
SELECT DISTINCT *
FROM
(SELECT etc.Complaint.Complaint_ID AS Complaint_ID FROM etc.Complaint) AS Qry1
LEFT JOIN
(SELECT etc.Complaint.Complaint_ID AS Complaint_ID,
o.Action_User AS Resolved_User,
o.Action_Date AS LastActionDate
FROM etc.Complaint
LEFT OUTER JOIN
(
SELECT SUBSTRING(Primary_Key,15,LEN(Primary_Key) - 15) as hist_Complaint_ID , MAX(Action_Date) as Action_Date
FROM etc.History
WHERE Field_Name = 'Resolved_Ind'
GROUP BY SUBSTRING(Primary_Key,15,LEN(Primary_Key) - 15)
) as o
ON o.hist_Complaint_ID = etc.Complaint.Complaint_ID
AND TABLE_NAME = 'Resolution' AND o.Field_Name = 'Resolved_Ind'
AND New_Value = 1) AS Qry2
ON Qry1.Complaint_ID = Qry2.Complaint_ID
ORDER BY Qry1.Complaint_ID, Qry2.LastActionDate
You can use ROW_NUMBER and a CTE to get it:
WITH cte AS (
SELECT etc.Complaint.Complaint_ID AS Complaint_ID,
o.Action_User AS Resolved_User,
o.Action_Date AS LastActionDate
row_number() over (partition by etc.Complaint.Complaint_ID order by o.Action_Date desc) AS rn
FROM etc.Complaint
LEFT OUTER JOIN etc.History as o
ON SUBSTRING(Primary_Key,15,LEN(Primary_Key) - 15) = etc.Complaint.Complaint_ID
AND TABLE_NAME = 'Resolution' AND o.Field_Name = 'Resolved_Ind'
AND New_Value = 1
)
SELECT * FROM cte
WHERE rn = 1