Sql query correct syntax - sql

I'm trying to return in a reporting service, the count of ID and NUM, that have the USERID AND CREATION_DATE entered by the user.
My aim is to get one row as a result, containing both counts. I'm getting the count correct however I'm being displayed with several rows as
a return. (which are the rows that have the parameters specified by the user). How can I get only one row containing only the fields COUNTID and COUNTNUM.
I'm using Microsoft sql server.
SELECT
(SELECT COUNT(ID)
FROM PART
WHERE USERID = $P{userId} and CREATION_DATE = $P{creationDate}) as COUNTID ,
(SELECT COUNT(NUM)
FROM IDENTITY
WHERE USERID = $P{userId} and CREATION_DATE = $P{creationDate}) as COUNTNUM
FROM
PART,
IDENTITY

If you only want to return one row, and each of your subqueries is returning the "count" you want, you could just remove the FROM clause from the outer query. Something like this:
SELECT ( SELECT COUNT(p.ID)
FROM PART p
WHERE p.USERID = $P{userId}
AND p.CREATION_DATE = $P{creationDate}
) AS COUNTID
, ( SELECT COUNT(i.NUM)
FROM IDENTITY i
WHERE i.USERID = $P{userId}
AND i.CREATION_DATE = $P{creationDate}
) AS COUNTNUM
Personally, I'd write the query a little differently. I'd use the subqueries as inline views cross joined in the FROM clause, with each of the inline views returning a single row. Like this:
SELECT cp.countid
, ci.countnum
FROM ( SELECT COUNT(p.ID) AS countid
FROM PART p
WHERE p.USERID = $P{userId}
AND p.CREATION_DATE = $P{creationDate}
) cp
CROSS
JOIN ( SELECT COUNT(i.NUM) AS countnum
FROM IDENTITY i
WHERE i.USERID = $P{userId}
AND i.CREATION_DATE = $P{creationDate}
) ci

Use a join and a Group By -- like this:
SELECT P.USERID, P.CREATION_DATE, COUNT(P.ID) AS COUNTID, COUNT(I.NUM) AS COUNTNUM
FROM PART P
JOIN IDENTITY I ON P.USERID = I.USERID AND P.CREATION_DATE = I.CREATION_DATE
WHERE P.USERID = $P{userId} and P.CREATION_DATE = $P{creationDate}
GROUP BY P.USERID, P.CREATION_DATE
As a side bonus if you take out the WHERE you can see the results for all users and all dates.
Note, if not all users and dates are in the PART table or Identity table do this:
SELECT B.USERID, B.CREATION_DATE, COUNT(P.ID) AS COUNTID, COUNT(I.NUM) AS COUNTNUM
FROM (
SELECT DISTINCT USERID, CREATEION_DATE FROM PART
UNION
SELECT DISTINCT USERID, CREATEION_DATE FROM IDENTITY
) AS B
LEFT JOIN PART P ON B.USERID = P.USERID AND B.CREATION_DATE = P.CREATION_DATE
LEFT JOIN IDENTITY I ON B.USERID = I.USERID AND B.CREATION_DATE = I.CREATION_DATE
WHERE B.USERID = $P{userId} and B.CREATION_DATE = $P{creationDate}
GROUP BY B.USERID, B.CREATION_DATE
NOTE: This second query is much more correct, but might not be needed depending on your data. Since you don't tell us anything about your data or data model it is hard for me to know if the first query will work.

Related

SQL query optimization - make only one join on table

I have a large SQL query, where I need to select some data.
SELECT p.Id, p.UserId, u.Name AS CreatedBy, p.JournalId, p.Title, pt.Name AS PublicationType, p.CreatedDate, p.MagazineTitle, /*ps.StatusId,*/ p.Authors, pb.Name AS Publisher, p.Draft,jns.Name AS JournalTitle,
ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1
)AS StatusId
,ISNULL(
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StatusDate
,ISNULL(
(cast((SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id) as date) ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StDate
,CASE
WHEN ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1 ) IN (1, 7, 8) THEN 0
ELSE 1 END AS OrderCriteria
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 1 AND se.UserId = #UserId) AS NumberOfAlerts
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 3 AND se.UserId = #UserId) AS NumberOfReminders
FROM Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
The problem is that the query is slow. AS you can see I have the same thing at OrderCriteria and the StatusId. The StatusDate I'm getting from the same table.
I thought that I could resolve the performance to make only one \
LEFT JOIN
something like this:
LEFT JOIN (
SELECT
PublicationId,
StatusId AS StatusId,
StatusDate AS StatusDate
FROM [PublicationsStatus] WHERE StatusDate=
(
SELECT MAX(StatusDate) FROM PublicationsStatus
)
) AS ps ON ps.PublicationId = p.Id
but I did not get the same results this way.
Can you please advise?
I tried to simplify your query using a few CTE to avoid doing the same subquery multiple times. You can try this out and see if it's still slow.
;WITH MaxStatusDateByPublication AS
(
SELECT
PublicationId = ps.PublicationId,
MaxStatusDate = MAX(ps.StatusDate)
FROM
[PublicationsStatus] AS ps
GROUP BY
PS.PublicationId
),
StatusForMaxDateByPublication AS
(
SELECT
StatusId = PS.StatusId,
M.PublicationId,
M.MaxStatusDate
FROM
MaxStatusDateByPublication AS M
INNER JOIN [PublicationsStatus] AS PS ON
M.PublicationId = PS.PublicationId AND
M.MaxStatusDate = PS.StatusDate
),
SentEmailsByPublicationAndType AS
(
SELECT
S.PublicationID,
S.EmailType,
AmountSentEmails = COUNT(1)
FROM
SentEmails AS S
WHERE
S.EmailType IN (1, 3) AND
S.UserID = #UserId
GROUP BY
S.PublicationID,
S.EmailType
)
SELECT
p.Id,
p.UserId,
u.Name AS CreatedBy,
p.JournalId,
p.Title,
pt.Name AS PublicationType,
p.CreatedDate,
p.MagazineTitle,
p.Authors,
pb.Name AS Publisher,
p.Draft,
jns.Name AS JournalTitle,
COALESCE(MS.StatusId, SL.StatusId, 1) AS StatusId,
ISNULL(MS.MaxStatusDate, P.CreatedDate) AS StatusDate,
ISNULL(CONVERT(DATE, MS.MaxStatusDate), P.CreatedDate) AS StDate,
CASE
WHEN COALESCE(MS.StatusId, SL.StatusId, 1) IN (1, 7, 8) THEN 0
ELSE 1
END AS OrderCriteria,
ISNULL(TY1.AmountSentEmails, 0) AS NumberOfAlerts,
ISNULL(TY3.AmountSentEmails, 0) AS NumberOfReminders
FROM
Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
LEFT JOIN StatusForMaxDateByPublication AS MS ON P.Id = MS.PublicationId
LEFT JOIN SentEmailsByPublicationAndType AS TY1 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 1
LEFT JOIN SentEmailsByPublicationAndType AS TY3 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 3
OUTER APPLY (
SELECT TOP 1
StatusId = ActionId + 6
FROM
[PublicationsQuoteSaleLines] AS pqsl
WHERE
pqsl.PublicationId = P.Id
ORDER BY
pqsl.Id ASC) AS SL
Try to avoid writing the same expression several times (and specially if it involes subqueries inside a column!). Using a few CTEs and proper identing will help readability.
This is a complex query and involves several tables. If your query runs slow it might be for many different reasons. Try executing each subquery on it's own to check if they are slow or not, then try joining them 1 by 1. Indexes by the join columns will probably increase your performance if they don't exist already. Posting the full query execution plan might help.

Selecting multiple fields from row with max value of column, per group

I'm quite certain I've painted myself into a corner and I can't figure my way out.
The Users table and OrderHistories tables both have 1+ million records:
SELECT
u.Id ,
u.Email AS EmailAddress ,
c.Address_Address1 AS "Address 1" ,
(
SELECT
COUNT(*)
FROM
dbo.OrderHistories oh
WHERE
oh.UserId = u.UserName
) AS NumberOfOrders ,
Carts.SubtotalAmount AS CartTotal ,
(
SELECT
MAX(oh.CreateDate)
FROM
dbo.OrderHistories AS oh
WHERE
oh.UserId = u.Id
) AS LastOrderDate ,
(
SELECT
LastOrders.SubtotalAmount AS LastOrderSubtotal
FROM
(
SELECT
UserId ,
CreateDate ,
SubtotalAmount ,
MAX(CreateDate) OVER ( PARTITION BY UserId ) MyLastOrderDate
FROM
Users u
INNER JOIN dbo.OrderHistories oh
ON u.Id = oh.UserId
) AS LastOrders
WHERE
LastOrders.MyLastOrderDate = LastOrders.CreateDate
AND LastOrders.UserId = u.Id
) AS LastOrderSubtotal
FROM
Users u
INNER JOIN Customers AS c
ON u.Id = c.Id
LEFT JOIN dbo.Carts
ON c.Id = Carts.CustomerId
This particular subquery is my current problem (EXTREMELY inefficient), but I'm not experienced enough to understand exactly why, or how I should be doing it instead (I can't get there from here!):
(
SELECT
LastOrders.SubtotalAmount AS LastOrderSubtotal
FROM
(
SELECT
UserId ,
CreateDate ,
SubtotalAmount ,
MAX(CreateDate) OVER ( PARTITION BY UserId ) MyLastOrderDate
FROM
Users u
INNER JOIN dbo.OrderHistories oh
ON u.Id = oh.UserId
) AS LastOrders
WHERE
LastOrders.MyLastOrderDate = LastOrders.CreateDate
AND LastOrders.UserId = u.Id
) AS LastOrderSubtotal
Anyone mind telling me how terrible I am and then segue right into a suggested improvement?
Just from looking at your query, you may be able to simplify it using cross apply() like so:
select
u.Id
, EmailAddress = u.Email
, [Address 1] = c.Address_Address1
, CartTotal = Carts.SubtotalAmount
, NumberOfOrders = oh.NumberOfOrders
, LastOrderDate = oh.CreateDate
, LastOrderSubtotal = oh.SubtotalAmount
from Users u
inner join Customers AS c
on u.Id = c.Id
left join dbo.Carts
on c.Id = Carts.CustomerId
cross apply (
select top 1
i.CreateDate
, i.SubtotalAmount
, NumberOfOrders = count(*) over (partition by i.UserId)
from dbo.OrderHistories i
where i.UserId = u.Id
order by i.CreateDate desc
) as oh
If you want rows that may not have an OrderHistory, switch to outer apply().
Reference:
apply() - msdn
The power of T-SQL's APPLY operator - Rob Farley
APPLY: It Slices! It Dices! It Does It All! - Brad Shulz

Get Distinct results of all columns based on MAX DATE of one

Using SQL Server 2012
I have seen a few threads about this topic but I can't find one that involves multiple joins in the query. I can't create a VIEW on this database so the joins are needed.
The Query
SELECT
p.Price
,s.Type
,s.Symbol
, MAX(d.Date) Maxed
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID
INNER JOIN dbo.DimDateTime d
ON
p.DateTimeKey = d.DateTimeKey
GROUP BY p.Price ,
s.Type ,
s.Symbol
ORDER BY s.Symbol
The query works but does not produce distinct results. I am using Order by to validate the results, but it is not required once I get it working. I The result set looks like this.
Price Type Symbol Maxed
10.57 bfus *bbkd 3/31/1989
10.77 bfus *bbkd 2/28/1990
100.74049 cbus 001397AA6 8/2/2005
100.8161 cbus 001397AA6 7/21/2005
The result set I want is
Price Type Symbol Maxed
10.77 bfus *bbkd 2/28/1990
100.74049 cbus 001397AA6 8/2/2005
Here were a few other StackOverflow threads I tried but couldn't get t work with my specific query
How can I SELECT rows with MAX(Column value), DISTINCT by another column in SQL?
SQL Selecting distinct rows from multiple columns based on max value in one column
If you want data for the maximum date, use row_number() rather than group by:
SELECT ts.*
FROM (SELECT p.Price, s.Type, s.Symbol, d.Date,
ROW_NUMBER() OVER (PARTITION BY s.Type, s.Symbol
ORDER BY d.Date DESC
) as seqnum
FROM AdventDW.dbo.FactPrices p INNER JOIN
dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID INNER JOIN
dbo.DimDateTime d
ON p.DateTimeKey = d.DateTimeKey
) ts
WHERE seqnum = 1
ORDER BY s.Symbol;
You should use a derived table since you really only want to group the DateTimeKey table to get the MAX date.
SELECT p.Price ,
s.Type ,
s.Symbol ,
tmp.MaxDate
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s ON s.SecurityID = p.SecurityID
INNER JOIN
( SELECT MAX(d.Date) AS MaxDate ,
d.DateTimeKey
FROM dbo.DimDateTime d
GROUP BY d.DateTimeKey ) tmp ON p.DateTimeKey = tmp.DateTimeKey
ORDER BY s.Symbol;
/*
this is your initial select which is fine because this is base from your original criteria,
I cannot ignore this so i'll keep this in-tact. Instead from here i'll create a temp
*/
SELECT
p.Price
, s.Type
, s.Symbol
, MAX(d.Date) Maxed
INTO #tmpT
FROM AdventDW.dbo.FactPrices p
INNER JOIN dbo.DimSecurityMaster s
ON s.SecurityID = p.SecurityID
INNER JOIN dbo.DimDateTime d
ON p.DateTimeKey = d.DateTimeKey
GROUP BY p.Price ,
s.Type ,
s.Symbol
ORDER BY s.Symbol
SELECT innerTable.Price, innerTable.Symbol, innerTable.Type, innerTable.Maxed
FROM (
SELECT
ROW_NUMBER () OVER (PARTITION BY t1.Symbol, t1.Type, t1.Maxed ORDER BY t1.Maxed DESC) as row
, *
FROM #tmpT AS t1
) AS innerTable
WHERE row = 1
DROP TABLE #tmpT

sql syntax group by

Struggling with this as i'm not good with sql and designer wont work with the OVER use. Basically this is getting a list of topics if the user is following an associated tag.
I need to group by T.TopicId to stop duplicates. If a user is selecting more than one tag associated with a topic it will list the topic twice (once for each tag)
When I add a group by in sql I get multiple errors and i've tried rearranging things and cant get it to work, As said i'm useless with sql statements
#id int = null
AS
SELECT
*
FROM
(SELECT
ROW_NUMBER()
OVER
(ORDER BY TopicOrder desc
,
(CASE
WHEN M.MessageCreationDate > T.TopicCreationDate THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END) desc)
AS RowNumber
,T.TopicId, T.TopicTitle, T.TopicShortName, T.TopicDescription, T.TopicCreationDate, T.TopicViews, T.TopicReplies, T.UserId, T.TopicTags, T.TopicIsClose,
T.TopicOrder, T.LastMessageId, T.UserName, M.MessageCreationDate, M.UserId AS MessageUserId, MU.UserName AS MessageUserName, U.UserGroupId,
U.UserPhoto, T.UserFullName
FROM Tags INNER JOIN
TopicsComplete AS T ON T.TopicId = Tags.TopicId LEFT OUTER JOIN
Messages AS M ON M.TopicId = T.TopicId AND M.MessageId = T.LastMessageId AND M.Active = 1 LEFT OUTER JOIN
Users AS MU ON MU.UserId = M.UserId LEFT OUTER JOIN
Users AS U ON U.UserId = T.UserId LEFT OUTER JOIN
tagfollows AS TF ON #id = TF.userid
WHERE (Tags.Tag = TF.tag)
)T
If anyone could help it would be much appreciated, thanks! :)
I think you only need to convert the join to tagfollows into an EXISTS subquery (and remove the redundant nesting):
SELECT
ROW_NUMBER()
OVER ( ORDER BY TopicOrder desc
, CASE WHEN M.MessageCreationDate > T.TopicCreationDate
THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END desc )
AS RowNumber,
T.TopicId, T.TopicTitle, T.TopicShortName, T.TopicDescription,
T.TopicCreationDate, T.TopicViews, T.TopicReplies, T.UserId,
T.TopicTags, T.TopicIsClose, T.TopicOrder, T.LastMessageId,
T.UserName, M.MessageCreationDate,
M.UserId AS MessageUserId,
MU.UserName AS MessageUserName,
U.UserGroupId, U.UserPhoto, T.UserFullName
FROM
TopicsComplete AS T
LEFT OUTER JOIN
Messages AS M ON M.TopicId = T.TopicId
AND M.MessageId = T.LastMessageId
AND M.Active = 1
LEFT OUTER JOIN
Users AS MU ON MU.UserId = M.UserId
LEFT OUTER JOIN
Users AS U ON U.UserId = T.UserId
WHERE EXISTS
( SELECT *
FROM Tags
INNER JOIN tagfollows AS TF
ON Tags.Tag = TF.tag
WHERE T.TopicId = Tags.TopicId
AND #id = TF.userid
) ;
You say you want to show posts with tags in the set that the user is following, but you don't want the post to show up multiple times when it has multiple matching tags. That's a perfect use for an EXISTS subquery. Here's an example from that MSDN page.
SELECT a.FirstName, a.LastName
FROM Person.Person AS a
WHERE EXISTS
(SELECT *
FROM HumanResources.Employee AS b
WHERE a.BusinessEntityID = b.BusinessEntityID
AND a.LastName = 'Johnson');
You're really interested in the person table (like your posts table), but you want to show records that have at least one matching record in employee (like your tags table).

Make sure that min() only retrieves one value

I have a SQL query that retrieves data from a table that lists athletes' 100- and 200-meter race times. The query only retrieves the best race time of each athlete based on the athlete_id, it also wants to know if the race-time is a 100 or 200 meter-time (event_code).
So a runner can have several race-times but the query only gets the best race-time from each runner at each event.
The problem is that if an athlete have done exactly the same best race time two or more times, the query retrieves all those race times. How can I make sure the query only retrives one value?
Here is the code:
select r.*
from result r
inner join (
select athelete_id, min(result_time) as FastestTime
from result
where event_code = 1
group by athelete_id
) rm on r.athelete_id = rm.athelete_id and r.result_time = rm.FastestTime
It is a pain in SQL Server 2000. This would be much easier using row_number, but that requires 2005.
However, the idea is simple, you just need one more layer of subqueries:
select r.*
from result r join
(select r.athelete_id, MIN(result_id) as minresult_id
from result r inner join
(select athelete_id, min(result_time) as FastestTime
from result
where event_code = 1
group by athelete_id
) rm
on r.athelete_id = rm.athelete_id and r.result_time = rm.FastestTime
group by r.athelete_id
) aft
on r.result_id = minresult_id
The innermost subquery is basically your subquery. Then, this is aggregated by athelete_id, to get the minimum result_id, which is used for the final join.
Sorry, I misunderstood the question a bit, but you can manage it somehow like this for all competitor like this:
select MIN(r.id), r.athelete_id, r.result_time, r....
from result r
inner join (
select athelete_id, min(result_time) as FastestTime
from result
where event_code = 1
group by athelete_id
) rm on r.athelete_id = rm.athelete_id and r.result_time = rm.FastestTime
GROUP BY r.athelete_id, r.result_time, r....
--(as how many column you have, except r.ID)
Try this:
select top 1 r.*
from result r
inner join (
select athelete_id, min(result_time) as FastestTime
from result
where event_code = 1
group by athelete_id
) rm on r.athelete_id = rm.athelete_id and r.result_time = rm.FastestTime
OR
SET ROWCOUNT 1
select r.*
from result r
inner join (
select athelete_id, min(result_time) as FastestTime
from result
where event_code = 1
group by athelete_id
) rm on r.athelete_id = rm.athelete_id and r.result_time = rm.FastestTime
Largely a similar approach to Gordon's, but breaks ties using the earliest event (which should be the identity column, if that's what result_id is, but that's not always guaranteed).
DECLARE #event_code INT;
SET #event_code = 1;
SELECT r.*
FROM dbo.result AS r
INNER JOIN
(
SELECT r2.athelete_id, x.mintime, mindate = MIN(r2.result_date)
FROM
(
SELECT athelete_id, mintime = MIN(result_time)
FROM dbo.result
WHERE event_code = #event_code
GROUP BY athelete_id
) AS x
INNER JOIN dbo.result AS r2
ON x.athelete_id = r2.athelete_id
AND x.mintime = r2.result_time
WHERE r2.event_code = #event_code
GROUP BY r2.athelete_id, x.mintime
) AS y
ON r.athelete_id = y.athelete_id
AND r.result_time = y.mintime
AND r.result_date = y.mindate
WHERE r.event_code = #event_code;
Much easier in SQL Server 2005, of course:
;WITH x AS
(
SELECT athelete_id, --... other columns,
rn = ROW_NUMBER() OVER (PARTITION BY athelete_id ORDER BY result_time, result_date)
FROM dbo.result
WHERE event_code = 1
)
SELECT athelete_id, --... other columns
FROM x WHERE rn = 1;