SQL Select TOP 1 for each group in subquery - sql

Good morning,
I want to alter my query in such a way, that only the top 1, filtered from h.started asc is selected.
select h.started, * from wshhistory h
join asset a on h.assetid = a.uid
inner join
(
select Count(*) as TotalLatest, a.uid, a.deleted from asset a
join wshhistory h on a.uid = h.assetid
where h.latest = 1
group by a.uid, a.deleted
having Count(*) > 1
) X
on X.uid = h.assetid
where X.deleted = 0 and h.latest = 1
order by h.assetid desc
I searched all over, and found in most posts, to use:
ROW_NUMBER() OVER (PARTITION BY a.uid ORDER BY h.started asc) as rn
But I can't seem to use this since I need use group by, and this results in the error message:
Column 'wshhistory.started' is invalid in the select list because it
is not contained in either an aggregate function or the GROUP BY
clause.
To give some extra info about my query:
I need to search where I have duplicates of Latest = 1 (table: wshhistory), of the same assetid. And then I need to set the them all on 0 except the latest one.

I think you want something like this:
with toupdate as (
select h.*,
row_number() over (partition by h.assetid order by h.started desc) as seqnum
from wshhistory h
where h.latest = 1
)
update toupdate
set latest = 0
where seqnum > 1 and
exists (select 1
from asset a
where a.uid = toupdate.assetid and a.deleted = 0
);
Sample data and desired results are much easier to work with than non-working queries.

Related

I need to only select the minimum value of my query

I have a view created in SQL Server Management Studio that brings in certain data, I need to only select the rows with the minimum sequence. For example, in the screenshot see the job number "50773-4", I would only need to see the row with SEQ number 2. I've tried to Group by Min, but to no avail. Any help would be appreciated.
SELECT
TOP (100) PERCENT dbo.Job_Operation.Job,
MIN(dbo.Job_Operation.Sequence) AS SEQ,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
GROUP BY
dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
HAVING
(dbo.Work_Center.UVText4 = 'Machining')
ORDER BY
dbo.User_Values.Decimal1 DESC,
SEQ
[enter image description here]
I would try the RANK() window function. Perhaps:
SELECT column1,
column2,
rank() OVER (PARTITION BY job ORDER BY seq) AS seq_by_job
Then use this as a nested statement, and filter on only the min rank (i.e. WHERE nested_statement.seq_by_job = 1)
here is one way :
SELECT
TOP (100) PERCENT Job,
Sequence AS SEQ,
Work_Center,
Department
FROM
( select dbo.Job_Operation.Job,
dbo.Job_Operation.Sequence,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department,
dbo.User_Values.Decimal1 ,
ROW_NUMBER() over (partition by dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
Order by dbo.Job_Operation.Sequence asc) rn
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
) tt
WHERE rn = 1
and UVText4 = 'Machining'
You can do:
with
q as (
SELECT
dbo.Job_Operation.Job,
MIN(dbo.Job_Operation.Sequence) AS SEQ,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department,
dbo.User_Values.Decimal1
FROM
dbo.Job_Operation
INNER JOIN dbo.Job ON dbo.Job_Operation.Job = dbo.Job.Job
INNER JOIN dbo.User_Values
ON dbo.Job.User_Values = dbo.User_Values.User_Values
INNER JOIN dbo.Work_Center
ON dbo.Job_Operation.Work_Center = dbo.Work_Center.Work_Center
GROUP BY
dbo.Job_Operation.Job,
dbo.User_Values.Numeric2,
dbo.Work_Center.UVText4,
dbo.Job.Status,
dbo.Job_Operation.Status,
dbo.User_Values.Decimal1,
dbo.Job_Operation.Work_Center,
dbo.Work_Center.Department
HAVING
(dbo.Work_Center.UVText4 = 'Machining')
),
r as (
select *,
row_number() over(partition by job order by seq) as rn
from q
)
select job, seq, work_center, department
from r
where rn = 1
order by Decimal1 DESC

HIDE Column in Select query?

Hi I'm trying to hide the TotalRank Column that gets displayed during my results because I only needed it in this query to help sort items. I don't need this information displayed.
SELECT TOP 15 G.CharacterName, G.JobCode, G.PvPExp, D.PVPWin, D.PVPLose, D.PVPGiveUp, RANK() OVER (ORDER BY TotalRank ASC ) as TotalRank
FROM PvPRanking as G
INNER JOIN PVPScores as D
ON G.CharacterID = D.CharacterID
You can use th following
SELECT TOP 15 G.CharacterName, G.JobCode, G.PvPExp, D.PVPWin, D.PVPLose, D.PVPGiveUp
FROM PvPRanking as G
INNER JOIN PVPScores as D
ON G.CharacterID = D.CharacterID
ORDER BY RANK() OVER (ORDER BY TotalRank ASC )
You can create and alias to the resulting table and select everything from it except for the total rank column.
SELECT t1.CharacterName, t1.JobCode, t1.PvPExp, t1.PVPWin, t1.PVPLose, t1.PVPGiveUp
FROM
(SELECT TOP 15 G.CharacterName, G.JobCode, G.PvPExp, D.PVPWin, D.PVPLose, D.PVPGiveUp,
RANK() OVER (ORDER BY TotalRank ASC ) as TotalRank
FROM PvPRanking as G
INNER JOIN PVPScores as D
ON G.CharacterID = D.CharacterID) AS t1

What will be the query for this?

JOIN public.match m ON (s.stadium_id = m.stadium_id)
group
AS (
)
SELECT round_number
,stadium_name
,spectators
FROM (
SELECT round_number
,stadium_name
,spectators
,RANK() OVER (
PARTITION BY round_number ORDER BY spectators DESC
) AS rank1
FROM t1
) AS s1
WHERE rank1 = 1
<br>
Any smaller query than this?
I think you can just use window functions:
select ms.*
from (select m.round_number, s.stadium_name, m.no_spectators,
row_number() over (partition by m.round_number order by m.no_spectators desc) as seqnum
from public.stadium s join
public.match m
on s.stadium_id = m.stadium_id
) ms
where seqnum = 1
order by m.round_number;
I don't see why aggregation would be needed for the inner query.
You can use a subquery to get the max first
select m.round_number, s.stadium_name, MaxSpec
from public.stadium s
JOIN public.match m ON (s.stadium_id = m.stadium_id)
JOIN
(
select m.round_number, MAX(m.no_spectators) as MaxSpec
from public.stadium s
JOIN public.match m ON (s.stadium_id = m.stadium_id)
group by m.round_number
)a on m.no_spectators = a.MaxSpec
Just one more way to skin this cat. Throw your MAX(no_spectators) into a WHERE clause.
SELECT
m.round_number,
s.stadium_name,
m.no_spectators
FROM
PUBLIC.stadium s
JOIN
PUBLIC.match m
ON s.stadium_id = m.stadium_id
WHERE
m.no_spectators = (SELECT MAX(no_spectators) FROM PUBLIC.match);
That should do for an intro class.

show only MAX rows

i have a table which shows me workhours of postoffices, the problem is that sometimes there are duplicates: for example i have saturday showing twice from one postoffice with the same time.
Solution is to show only 1 saturday with MAX(ID), but i can't deal with that and tha think is that i don't have to show id in select.
this is my script
SELECT h.ID,
h.POSTINDEX,
H.LONGNAME_UA,
h.SHORTNAME_UA,
pt.LONGNAME_UA,
h.parent_Id,
WORKCOMMENT,
INTERVALTYPE,
TO_CHAR(TFROM, 'HH24:MI') AS TFROM,
TO_CHAR(TTO, 'HH24:MI'),
WD.NAME_UA,
WD.NAME_EN,
WD.NAME_RU,
WD.SHORTNAME_UA,
pt.isVPZ,
lr.NAME_UA,
lr.CODE
FROM ADDR_PO_WORKSCHEDULE tt
LEFT JOIN ADDR_POSTOFFICE h
ON tt.POSTOFFICE_ID = h.ID
INNER JOIN mdm_lockReason lr
ON lr.id = H.LOCK_REASON
INNER JOIN ADDR_POSTOFFICEtype pt
ON pt.ID = H.POSTOFFICETYPE_ID
INNER JOIN ADDR_PO_WORKDAYS wd
ON wd.ID = tt.dayofweek
where tt.datestop = TO_DATE('9999-12-31','YYYY-MM-DD') AND tt.postoffice_id = 8221
HAVING MAX(tt.ID)
ORDER BY h.postIndex,
h.POSTOFFICETYPE_ID,
dayofweek,
intervaltype,
tFrom,
tto
as you can see there i've add HAVING MAX(tt.ID) but i understand that it is incorrect and don't know how to solve that. help please!
You can use row_number():
with t as (
<your query here with no `having` and with tt.id as tt_id>
)
select t.*
from (select t.*, row_number() over (order by tt_id desc) as seqnum
from t
) t
where seqnum = 1;
If the source of your duplicate rows is coming from the ADDR_PO_WORKSCHEDULE table, then you can just de-dupe it before you use it in the rest of your query:
SELECT
h.ID, h.POSTINDEX, h.LONGNAME_UA, h.SHORTNAME_UA, pt.LONGNAME_UA, h.parent_Id,
WORKCOMMENT, INTERVALTYPE, TO_CHAR(TFROM, 'HH24:MI') AS TFROM, TO_CHAR(TTO, 'HH24:MI'),
WD.NAME_UA, WD.NAME_EN, WD.NAME_RU, WD.SHORTNAME_UA, pt.isVPZ, lr.NAME_UA, lr.CODE
FROM (
SELECT src.*
FROM (
SELECT t.*,
ROW_NUMBER() OVER(
PARTITION BY tt.postoffice_id, tt.dayofweek -- Group by post office / day of week
ORDER BY <order_columns> -- Rank rows within each group
) AS RowNum
FROM ADDR_PO_WORKSCHEDULE t
) src
WHERE RowNum = 1
) tt
LEFT JOIN ADDR_POSTOFFICE h ON tt.POSTOFFICE_ID = h.ID
INNER JOIN mdm_lockReason lr ON lr.id = H.LOCK_REASON
INNER JOIN ADDR_POSTOFFICEtype pt ON pt.ID = H.POSTOFFICETYPE_ID
INNER JOIN ADDR_PO_WORKDAYS wd ON wd.ID = tt.dayofweek
WHERE tt.datestop = TO_DATE('9999-12-31','YYYY-MM-DD')
AND tt.postoffice_id = 8221
ORDER BY h.postIndex, h.POSTOFFICETYPE_ID, dayofweek, intervaltype, tFrom, tto
Just replace with the columns you want to use to decide which of the "duplicate" rows to keep. If the order doesn't matter, just remove the ORDER BY altogether.
Also, you may want to qualify all column references, since you are selecting from multiple tables.
You can use analytical function as following:
Select * from
(Select <your select column list>,
Row_number() over (partition by h.postindex, trunc(tfrom) order by tt.id desc nulls last) as rn
From <from clause>
Where <where clause>
)
Where rn = 1;
group by is not needed.

Adding a Helper SQL Index

I have the following View which seems to work quickly enough but when I look at the Execution Plan, it shows the Top N Sort in the second query taking ~90% due to it being repeated for every row in the first query.
Should I be adding an Index to the Loan table to help the ORDER BY clause?
CREATE VIEW [dbo].[ResourceItemStatus] AS
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
(SELECT COUNT(*) FROM Loan WHERE Loan.ItemID = i.ID) AS LoanCount,
(SELECT COUNT(*) FROM Item WHERE Item.DateRemoved IS NULL AND Item.ResourceID = i.ResourceID) AS AvailableItemCount
FROM Item i
OUTER APPLY
(
SELECT TOP 1
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID
FROM Loan l
WHERE l.ItemID = i.ID
ORDER BY l.IssueDateTime DESC, l.ID DESC
) AS ll
Try Windowed Aggregates instead of Scalar Subqueries/Outer Apply:
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
coalesce(ll.LoanCount, 0)
COUNT(case when Item.DateRemoved IS NULL then 1 end)
over (partition by ResourceID) AS AvailableItemCount
FROM Item i
LEFT JOIN
(
SELECT
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID,
COUNT(*) over (partition by ItemId) AS LoanCount,
row_number()
over (partition by ItemId
order by l.IssueDateTime DESC, l.ID DESC) as rn
FROM Loan l
) as ll
on ll.ItemID = i.ID
and ll.rn = 1