Selecting multiple fields from row with max value of column, per group - sql

I'm quite certain I've painted myself into a corner and I can't figure my way out.
The Users table and OrderHistories tables both have 1+ million records:
SELECT
u.Id ,
u.Email AS EmailAddress ,
c.Address_Address1 AS "Address 1" ,
(
SELECT
COUNT(*)
FROM
dbo.OrderHistories oh
WHERE
oh.UserId = u.UserName
) AS NumberOfOrders ,
Carts.SubtotalAmount AS CartTotal ,
(
SELECT
MAX(oh.CreateDate)
FROM
dbo.OrderHistories AS oh
WHERE
oh.UserId = u.Id
) AS LastOrderDate ,
(
SELECT
LastOrders.SubtotalAmount AS LastOrderSubtotal
FROM
(
SELECT
UserId ,
CreateDate ,
SubtotalAmount ,
MAX(CreateDate) OVER ( PARTITION BY UserId ) MyLastOrderDate
FROM
Users u
INNER JOIN dbo.OrderHistories oh
ON u.Id = oh.UserId
) AS LastOrders
WHERE
LastOrders.MyLastOrderDate = LastOrders.CreateDate
AND LastOrders.UserId = u.Id
) AS LastOrderSubtotal
FROM
Users u
INNER JOIN Customers AS c
ON u.Id = c.Id
LEFT JOIN dbo.Carts
ON c.Id = Carts.CustomerId
This particular subquery is my current problem (EXTREMELY inefficient), but I'm not experienced enough to understand exactly why, or how I should be doing it instead (I can't get there from here!):
(
SELECT
LastOrders.SubtotalAmount AS LastOrderSubtotal
FROM
(
SELECT
UserId ,
CreateDate ,
SubtotalAmount ,
MAX(CreateDate) OVER ( PARTITION BY UserId ) MyLastOrderDate
FROM
Users u
INNER JOIN dbo.OrderHistories oh
ON u.Id = oh.UserId
) AS LastOrders
WHERE
LastOrders.MyLastOrderDate = LastOrders.CreateDate
AND LastOrders.UserId = u.Id
) AS LastOrderSubtotal
Anyone mind telling me how terrible I am and then segue right into a suggested improvement?

Just from looking at your query, you may be able to simplify it using cross apply() like so:
select
u.Id
, EmailAddress = u.Email
, [Address 1] = c.Address_Address1
, CartTotal = Carts.SubtotalAmount
, NumberOfOrders = oh.NumberOfOrders
, LastOrderDate = oh.CreateDate
, LastOrderSubtotal = oh.SubtotalAmount
from Users u
inner join Customers AS c
on u.Id = c.Id
left join dbo.Carts
on c.Id = Carts.CustomerId
cross apply (
select top 1
i.CreateDate
, i.SubtotalAmount
, NumberOfOrders = count(*) over (partition by i.UserId)
from dbo.OrderHistories i
where i.UserId = u.Id
order by i.CreateDate desc
) as oh
If you want rows that may not have an OrderHistory, switch to outer apply().
Reference:
apply() - msdn
The power of T-SQL's APPLY operator - Rob Farley
APPLY: It Slices! It Dices! It Does It All! - Brad Shulz

Related

How I can select highest review from a user?

I need to select reviews for product, but unique by user (i.e. one review from user).
With my code, I select all reviews, and I can see few reviews left by one user.
SELECT
tr.reviewText, tr.reviewDate, tr.reviewRating,
u.userName AS userName,
u.userFirstName AS userFirstName, u.userSurname AS userSurname,
u.countryId AS countryId
FROM
tblReviews tr
INNER JOIN
tblOrderProduct op ON op.orderProductId = tr.orderProductId
AND op.productOptionId IN (SELECT productOptionId
FROM tblProductOption
WHERE productSubCuId = 111
AND productOptionActive = 1)
LEFT JOIN
tblOrder o ON o.orderId = op.orderId
LEFT JOIN
tblUser u ON u.userRandomId = o.userRandomId
WHERE
tr.reviewsStatusId = 2
ORDER BY
tr.reviewRating DESC, tr.reviewDate DESC
OFFSET 0 ROWS FETCH NEXT 100 ROWS ONLY
Can I get just one review from each user?
Maybe I need select userId -> group results by userId and select one per group? [I tried to do so, but I didn't succeed :( ]
You can use row_number to number the reviews and select any one like below:
;with per_user_one_review
as
(SELECT tr.reviewText, tr.reviewDate, tr.reviewRating,
u.userName as userName,
u.userFirstName as userFirstName, u.userSurname as userSurname,
u.countryId as countryId, row_number() over (partition by u.userRandomId order by tr.reviewDate desc) rn
FROM tblReviews tr
INNER JOIN tblOrderProduct op
ON op.orderProductId = tr.orderProductId
AND op.productOptionId IN (
SELECT productOptionId FROM tblProductOption
WHERE productSubCuId = 111 AND productOptionActive = 1
)
LEFT JOIN tblOrder o ON o.orderId = op.orderId
LEFT JOIN tblUser u ON u.userRandomId = o.userRandomId
WHERE tr.reviewsStatusId = 2
ORDER BY tr.reviewRating DESC, tr.reviewDate DESC
OFFSET 0 ROWS FETCH NEXT 100 ROWS ONLY
)
select * from per_user_one_review where rn = 1
It will pick the latest review (reviewDate desc) from the user.
If you need the last review you could use a join with the suquery for max review date grouped by orderProductId
(and as a suggestion you could use a inner join instead of a IN clasue based on a subquery)
select tr.reviewText
, tr.reviewDate
, tr.reviewRating
, u.userName
, u.userFirstName
, u.userSurname
, u.countryId
from tblReviews tr
INNER JOIN (
select max(reviewDate) max_date, orderProductId
from tblReviews
group by orderProductId
) t1 on t1.orderProductId = tr.orderProductId and t1.max_date = tr.reviewDate
INNER JOIN tblOrderProduct op ON op.orderProductId = tr.orderProductId
INNER JOIN (
SELECT productOptionId
FROM tblProductOption
WHERE productSubCuId = 111 AND productOptionActive = 1
) t2 ON op.productOptionId = t2.productOptionId
LEFT JOIN tblOrder o ON o.orderId = op.orderId
LEFT JOIN tblUser u ON u.userRandomId = o.userRandomId
WHERE tr.reviewsStatusId = 2
ORDER BY tr.reviewRating DESC, tr.reviewDate DESC
OFFSET 0 ROWS FETCH NEXT 100 ROWS ONLY

SQL query optimization - make only one join on table

I have a large SQL query, where I need to select some data.
SELECT p.Id, p.UserId, u.Name AS CreatedBy, p.JournalId, p.Title, pt.Name AS PublicationType, p.CreatedDate, p.MagazineTitle, /*ps.StatusId,*/ p.Authors, pb.Name AS Publisher, p.Draft,jns.Name AS JournalTitle,
ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1
)AS StatusId
,ISNULL(
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StatusDate
,ISNULL(
(cast((SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id) as date) ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StDate
,CASE
WHEN ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1 ) IN (1, 7, 8) THEN 0
ELSE 1 END AS OrderCriteria
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 1 AND se.UserId = #UserId) AS NumberOfAlerts
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 3 AND se.UserId = #UserId) AS NumberOfReminders
FROM Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
The problem is that the query is slow. AS you can see I have the same thing at OrderCriteria and the StatusId. The StatusDate I'm getting from the same table.
I thought that I could resolve the performance to make only one \
LEFT JOIN
something like this:
LEFT JOIN (
SELECT
PublicationId,
StatusId AS StatusId,
StatusDate AS StatusDate
FROM [PublicationsStatus] WHERE StatusDate=
(
SELECT MAX(StatusDate) FROM PublicationsStatus
)
) AS ps ON ps.PublicationId = p.Id
but I did not get the same results this way.
Can you please advise?
I tried to simplify your query using a few CTE to avoid doing the same subquery multiple times. You can try this out and see if it's still slow.
;WITH MaxStatusDateByPublication AS
(
SELECT
PublicationId = ps.PublicationId,
MaxStatusDate = MAX(ps.StatusDate)
FROM
[PublicationsStatus] AS ps
GROUP BY
PS.PublicationId
),
StatusForMaxDateByPublication AS
(
SELECT
StatusId = PS.StatusId,
M.PublicationId,
M.MaxStatusDate
FROM
MaxStatusDateByPublication AS M
INNER JOIN [PublicationsStatus] AS PS ON
M.PublicationId = PS.PublicationId AND
M.MaxStatusDate = PS.StatusDate
),
SentEmailsByPublicationAndType AS
(
SELECT
S.PublicationID,
S.EmailType,
AmountSentEmails = COUNT(1)
FROM
SentEmails AS S
WHERE
S.EmailType IN (1, 3) AND
S.UserID = #UserId
GROUP BY
S.PublicationID,
S.EmailType
)
SELECT
p.Id,
p.UserId,
u.Name AS CreatedBy,
p.JournalId,
p.Title,
pt.Name AS PublicationType,
p.CreatedDate,
p.MagazineTitle,
p.Authors,
pb.Name AS Publisher,
p.Draft,
jns.Name AS JournalTitle,
COALESCE(MS.StatusId, SL.StatusId, 1) AS StatusId,
ISNULL(MS.MaxStatusDate, P.CreatedDate) AS StatusDate,
ISNULL(CONVERT(DATE, MS.MaxStatusDate), P.CreatedDate) AS StDate,
CASE
WHEN COALESCE(MS.StatusId, SL.StatusId, 1) IN (1, 7, 8) THEN 0
ELSE 1
END AS OrderCriteria,
ISNULL(TY1.AmountSentEmails, 0) AS NumberOfAlerts,
ISNULL(TY3.AmountSentEmails, 0) AS NumberOfReminders
FROM
Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
LEFT JOIN StatusForMaxDateByPublication AS MS ON P.Id = MS.PublicationId
LEFT JOIN SentEmailsByPublicationAndType AS TY1 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 1
LEFT JOIN SentEmailsByPublicationAndType AS TY3 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 3
OUTER APPLY (
SELECT TOP 1
StatusId = ActionId + 6
FROM
[PublicationsQuoteSaleLines] AS pqsl
WHERE
pqsl.PublicationId = P.Id
ORDER BY
pqsl.Id ASC) AS SL
Try to avoid writing the same expression several times (and specially if it involes subqueries inside a column!). Using a few CTEs and proper identing will help readability.
This is a complex query and involves several tables. If your query runs slow it might be for many different reasons. Try executing each subquery on it's own to check if they are slow or not, then try joining them 1 by 1. Indexes by the join columns will probably increase your performance if they don't exist already. Posting the full query execution plan might help.

How to optimize multiple subqueries to the same data set

Imagine I have a query like the following one:
SELECT
u.ID,
( SELECT
COUNT(*)
FROM
POSTS p
WHERE
p.USER_ID = u.ID
AND p.TYPE = 1
) AS interesting_posts,
( SELECT
COUNT(*)
FROM
POSTS p
WHERE
p.USER_ID = u.ID
AND p.TYPE = 2
) AS boring_posts,
( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
AND c.TYPE = 1
) AS interesting_comments,
( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
AND c.TYPE = 2
) AS boring_comments
FROM
USERS u;
( Hopefully it's correct because I just came up with it and didn't test it )
where I try to calculate the number of interesting and boring posts and comments that the user has.
Now, the problem with this query is that we have 2 sequential scans on both the posts and comments table and I wonder if there is a way to avoid that?
I could probably LEFT JOIN both posts and comments to the users table and do some aggregation but it's gonna generate a lot of rows before aggregation and I am not sure if that's a good way to go.
Aggregate posts and comments and outer join them to the users table.
select
u.id as user_id,
coaleasce(p.interesting, 0) as interesting_posts,
coaleasce(p.boring, 0) as boring_posts,
coaleasce(c.interesting, 0) as interesting_comments,
coaleasce(c.boring, 0) as boring_comments
from users u
left join
(
select
user_id,
count(case when type = 1 then 1 end) as interesting,
count(case when type = 2 then 1 end) as boring
from posts
group by user_id
) p on p.user_id = u.id
left join
(
select
user_id,
count(case when type = 1 then 1 end) as interesting,
count(case when type = 2 then 1 end) as boring
from comments
group by user_id
) c on c.user_id = u.id;
compare results and execution plan (here you scan posts once):
with c as (
select distinct
count(1) filter (where TYPE = 1) over (partition by USER_ID) interesting_posts
, count(1) filter (where TYPE = 2) over (partition by USER_ID) boring_posts
, USER_ID
)
, p as (select USER_ID,max(interesting_posts) interesting_posts, max(boring_posts) boring_posts from c)
SELECT
u.ID, interesting_posts,boring_posts
, ( SELECT
COUNT(*)
FROM
COMMENTS c
WHERE
c.USER_ID = u.ID
) AS comments
FROM
USERS u
JOIN p on p.USER_ID = u.ID

Sql query correct syntax

I'm trying to return in a reporting service, the count of ID and NUM, that have the USERID AND CREATION_DATE entered by the user.
My aim is to get one row as a result, containing both counts. I'm getting the count correct however I'm being displayed with several rows as
a return. (which are the rows that have the parameters specified by the user). How can I get only one row containing only the fields COUNTID and COUNTNUM.
I'm using Microsoft sql server.
SELECT
(SELECT COUNT(ID)
FROM PART
WHERE USERID = $P{userId} and CREATION_DATE = $P{creationDate}) as COUNTID ,
(SELECT COUNT(NUM)
FROM IDENTITY
WHERE USERID = $P{userId} and CREATION_DATE = $P{creationDate}) as COUNTNUM
FROM
PART,
IDENTITY
If you only want to return one row, and each of your subqueries is returning the "count" you want, you could just remove the FROM clause from the outer query. Something like this:
SELECT ( SELECT COUNT(p.ID)
FROM PART p
WHERE p.USERID = $P{userId}
AND p.CREATION_DATE = $P{creationDate}
) AS COUNTID
, ( SELECT COUNT(i.NUM)
FROM IDENTITY i
WHERE i.USERID = $P{userId}
AND i.CREATION_DATE = $P{creationDate}
) AS COUNTNUM
Personally, I'd write the query a little differently. I'd use the subqueries as inline views cross joined in the FROM clause, with each of the inline views returning a single row. Like this:
SELECT cp.countid
, ci.countnum
FROM ( SELECT COUNT(p.ID) AS countid
FROM PART p
WHERE p.USERID = $P{userId}
AND p.CREATION_DATE = $P{creationDate}
) cp
CROSS
JOIN ( SELECT COUNT(i.NUM) AS countnum
FROM IDENTITY i
WHERE i.USERID = $P{userId}
AND i.CREATION_DATE = $P{creationDate}
) ci
Use a join and a Group By -- like this:
SELECT P.USERID, P.CREATION_DATE, COUNT(P.ID) AS COUNTID, COUNT(I.NUM) AS COUNTNUM
FROM PART P
JOIN IDENTITY I ON P.USERID = I.USERID AND P.CREATION_DATE = I.CREATION_DATE
WHERE P.USERID = $P{userId} and P.CREATION_DATE = $P{creationDate}
GROUP BY P.USERID, P.CREATION_DATE
As a side bonus if you take out the WHERE you can see the results for all users and all dates.
Note, if not all users and dates are in the PART table or Identity table do this:
SELECT B.USERID, B.CREATION_DATE, COUNT(P.ID) AS COUNTID, COUNT(I.NUM) AS COUNTNUM
FROM (
SELECT DISTINCT USERID, CREATEION_DATE FROM PART
UNION
SELECT DISTINCT USERID, CREATEION_DATE FROM IDENTITY
) AS B
LEFT JOIN PART P ON B.USERID = P.USERID AND B.CREATION_DATE = P.CREATION_DATE
LEFT JOIN IDENTITY I ON B.USERID = I.USERID AND B.CREATION_DATE = I.CREATION_DATE
WHERE B.USERID = $P{userId} and B.CREATION_DATE = $P{creationDate}
GROUP BY B.USERID, B.CREATION_DATE
NOTE: This second query is much more correct, but might not be needed depending on your data. Since you don't tell us anything about your data or data model it is hard for me to know if the first query will work.

Only get lastest Date time in SQL Server

I have created this code to pull tickets from our database and also pull the last ticket note date and last ticket user. It works except for if two people (or more) created a note on the last date (no matter the time difference) it will create multiple lines for each ticket. How do I fix this? here is the code:
Select distinct t.ticketID,
t.OpenDate,
c.categoryname,
s.statusname,
p.priorityname,
u.firstname,
u.lastname,
tu.firstname as 'tech_firstname',
tu.lastname as 'tech_lastname',
ltn.maxdate as 'last date',
ltu.firstname + ' ' + ltu.lastname as 'Last User'
from ticket t
left join category c on t.categoryid = c.categoryid
left join [status] s on t.statusid = s.statusid
left join [priority] p on t.priorityid = p.priorityid
left join [user] u on t.userid = u.userid
left join [user] tu on t.technicianid = tu.userid
left join ticketnote tn on t.ticketid = tn.ticketid
inner join (
Select Max(TicketNoteDate) as MaxDate, max(cast(ticketnotedate as time)) as MaxTime, ticketid, userid
From ticketNote
group by ticketid, userid) ltn on tn.ticketid = ltn.ticketid and tn.ticketnotedate = ltn.maxdate and cast(tn.ticketnotedate as time) = ltn.maxtime
left join [user] ltu on ltn.userid = ltu.userid
where t.statusid = 1
and t.LocationID = 1
order by t.ticketid
As suggested in comments, use row_number in your subquery where you get the maximum value
SELECT row_number() over
(partition by tiketid, userid
order by TicketNoteDate desc ) as rn,
ticketid,
userid,
ticketnotedate
and then join with outer query with condition being rn=1
I rewrote it getting the max date of all tickets, then finding all tickets with that max date and ordering by time descending and getting the first row and then retrieving extra data (like user) for that entry. I had no way of testing this, but here goes...
Select distinct t.ticketID,
t.OpenDate,
c.categoryname,
s.statusname,
p.priorityname,
u.firstname,
u.lastname,
tu.firstname as 'tech_firstname',
tu.lastname as 'tech_lastname',
ltn.maxdate as 'last date',
ltu.firstname + ' ' + ltu.lastname as 'Last User'
from ticket t
left join category c on t.categoryid = c.categoryid
left join [status] s on t.statusid = s.statusid
left join [priority] p on t.priorityid = p.priorityid
left join [user] u on t.userid = u.userid
left join [user] tu on t.technicianid = tu.userid
inner join (
select maxdate, maxtime, ticketid, userid, rownum from (
select maxdate, maxtime, ticketid, userid, row_number() over (order by maxdate desc, maxtime desc) as rownum
from ticketnote tn,
(Select Max(TicketNoteDate) as MaxDate
From ticketNote) mx
where mx.maxdate = tn.ticketnotedate
) x where x.rownum = 1
) ltn
left join [user] ltu on ltn.userid = ltu.userid
where t.statusid = 1
and t.LocationID = 1
order by t.ticketid