converting subquery to join in postgresql - sql

select p.postid postid,p.posttypeid posttypeid,
(select count(parentid) as no_of_answers from post
where parentid=p.postid), p.title title,
p.body body ,
p.creationdate creationdate,
p.modifieddate modifieddate,
p.modifieduserid modifieduserid,
p.score score,p.views no_of_views,
u.userid userid
from post
as p
left outer join user_quest as u on p.owneruserid = u.userid
where p.posttypeid = 1
Order by p.creationdate desc
LIMIT 10 OFFSET 0;
I need to convert his subquery to join, please help

You could use CTE:
WITH counts AS (
SELECT
count(parentid) AS no_of_answers,
parentid
FROM post
GROUP BY parentid
)
SELECT
p.postid,
p.posttypeid,
p.title,
COALESCE(c.no_of_answers, 0) AS no_of_answers,
p.body,
p.creationdate,
p.modifieddate,
p.modifieduserid,
p.score,
p.views AS no_of_views,
u.userid
FROM post AS p
LEFT JOIN counts c ON (c.parentid = p.postid)
LEFT JOIN user_quest AS u ON (p.owneruserid = u.userid)
WHERE p.posttypeid = 1
ORDER BY p.creationdate DESC
LIMIT 10 OFFSET 0;
or put yours subquery to JOIN:
SELECT
p.postid,
p.posttypeid,
p.title,
COALESCE(c.no_of_answers, 0) AS no_of_answers,
p.body,
p.creationdate,
p.modifieddate,
p.modifieduserid,
p.score,
p.views AS no_of_views,
u.userid
FROM post AS p
LEFT JOIN
(
SELECT
count(parentid) AS no_of_answers,
parentid
FROM post
GROUP BY parentid
) c ON (c.parentid = p.postid)
LEFT JOIN user_quest AS u ON (p.owneruserid = u.userid)
WHERE p.posttypeid = 1
ORDER BY p.creationdate DESC
LIMIT 10 OFFSET 0;
But I would prefer CTE. The performance of CTEs and subqueries should, in theory, be the same since both provide the same information to the query optimizer. One difference is that a CTE used more than once could be easily identified and calculated once. And it's look pretties because is easier to read, at least i thing so.

Related

How to pass a variable to a subselect in a view

I have a table of posts, post_likes, and I need a query that will give me both totals for likes for posts, and also a given specific user's likes for those posts. This means I need a good way of giving MY_USER_ID as input data.
Here's a query that works
create view post_view as
select post.id,
coalesce(sum(post_like.score),0) as score,
(
select score
from post_like
where post.id = post_like.post_id
and post_like.fedi_user_id = MY_USER_ID
) as my_vote,
from post
left join post_like on post.id = post_like.post_id
group by post.id;
BUT my ORM (rust diesel) doesn't allow me to set or query for that necessary MY_USER_ID field, since it's a subquery.
I'd really love to be able to do something like:
select *
from post_view
where my_user_id = 'X';
Expose my_user_id on select clause of view
-- get all of the user's score on all post, regardless of the user liking the post or not
create view post_view as
select
u.id as my_user_id,
p.id as post_id,
sum(pl.score) over (partition by p.id) as score,
coalesce(pl.score, 0) as my_vote -- each u.id's vote
from user u
cross join post p
left join post_like pl on u.id = pl.fedi_user_id and p.id = pl.post_id;
select * from post_view where my_user_id = 'X';
UPDATE
This can obtain post's scores even when no user is given
create view post_view as
with all_post as
(
select
p.id as post_id,
sum(pl.score) as score
from post p
left join post_like pl on p.id = pl.post_id
group by p.id
)
select
u.id as my_user_id,
ap.post_id,
ap.score,
coalesce(pl.score, 0) as my_vote
from user u
cross join all_post ap
left join post_like pl on u.id = pl.fedi_user_id and ap.post_id = pl.post_id
union all
select
'' as my_user_id,
ap.post_id,
ap.score,
0 as my_vote
from all_post ap
;
select * from post_view where my_user_id = 'X';
When no user is passed, select the query denoted by my_user_id of ''
select * from post_view where my_user_id = '';
you can move that condition in on clause
create view post_view as
select post.id,
coalesce(sum(post_like.score),0) as score
from post
left join post_like
on post.id = post_like.post_id and post_like.fedi_user_id = MY_USER_ID
group by post.id;
You can move the logic to the select using conditional aggregation:
select p.id,
coalesce(sum(pl.score), 0) as score,
sum( (pl.fedi_user_id = MY_USER_ID)::int ) as my_vote
from post p left join
post_like pl
on p.id = pl.post_id
group by p.id;

SQL query optimization - make only one join on table

I have a large SQL query, where I need to select some data.
SELECT p.Id, p.UserId, u.Name AS CreatedBy, p.JournalId, p.Title, pt.Name AS PublicationType, p.CreatedDate, p.MagazineTitle, /*ps.StatusId,*/ p.Authors, pb.Name AS Publisher, p.Draft,jns.Name AS JournalTitle,
ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1
)AS StatusId
,ISNULL(
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StatusDate
,ISNULL(
(cast((SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id) as date) ),--AND ps.UserId = #UserId),
p.CreatedDate
) AS StDate
,CASE
WHEN ISNULL(
ISNULL(
(SELECT StatusId FROM [PublicationsStatus] WHERE StatusDate=
(SELECT MAX(StatusDate) FROM [PublicationsStatus] AS ps WHERE ps.PublicationId = p.Id )),--AND ps.UserId = #UserId ORDER BY StatusDate DESC),
(SELECT TOP(1) ActionId + 6 FROM [PublicationsQuoteSaleLines] AS pqsl WHERE pqsl.PublicationId = p.Id ORDER BY pqsl.Id)
),
1 ) IN (1, 7, 8) THEN 0
ELSE 1 END AS OrderCriteria
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 1 AND se.UserId = #UserId) AS NumberOfAlerts
,(SELECT COUNT(*) FROM SentEmails AS se WHERE se.PublicationId = p.Id AND se.EmailType = 3 AND se.UserId = #UserId) AS NumberOfReminders
FROM Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
The problem is that the query is slow. AS you can see I have the same thing at OrderCriteria and the StatusId. The StatusDate I'm getting from the same table.
I thought that I could resolve the performance to make only one \
LEFT JOIN
something like this:
LEFT JOIN (
SELECT
PublicationId,
StatusId AS StatusId,
StatusDate AS StatusDate
FROM [PublicationsStatus] WHERE StatusDate=
(
SELECT MAX(StatusDate) FROM PublicationsStatus
)
) AS ps ON ps.PublicationId = p.Id
but I did not get the same results this way.
Can you please advise?
I tried to simplify your query using a few CTE to avoid doing the same subquery multiple times. You can try this out and see if it's still slow.
;WITH MaxStatusDateByPublication AS
(
SELECT
PublicationId = ps.PublicationId,
MaxStatusDate = MAX(ps.StatusDate)
FROM
[PublicationsStatus] AS ps
GROUP BY
PS.PublicationId
),
StatusForMaxDateByPublication AS
(
SELECT
StatusId = PS.StatusId,
M.PublicationId,
M.MaxStatusDate
FROM
MaxStatusDateByPublication AS M
INNER JOIN [PublicationsStatus] AS PS ON
M.PublicationId = PS.PublicationId AND
M.MaxStatusDate = PS.StatusDate
),
SentEmailsByPublicationAndType AS
(
SELECT
S.PublicationID,
S.EmailType,
AmountSentEmails = COUNT(1)
FROM
SentEmails AS S
WHERE
S.EmailType IN (1, 3) AND
S.UserID = #UserId
GROUP BY
S.PublicationID,
S.EmailType
)
SELECT
p.Id,
p.UserId,
u.Name AS CreatedBy,
p.JournalId,
p.Title,
pt.Name AS PublicationType,
p.CreatedDate,
p.MagazineTitle,
p.Authors,
pb.Name AS Publisher,
p.Draft,
jns.Name AS JournalTitle,
COALESCE(MS.StatusId, SL.StatusId, 1) AS StatusId,
ISNULL(MS.MaxStatusDate, P.CreatedDate) AS StatusDate,
ISNULL(CONVERT(DATE, MS.MaxStatusDate), P.CreatedDate) AS StDate,
CASE
WHEN COALESCE(MS.StatusId, SL.StatusId, 1) IN (1, 7, 8) THEN 0
ELSE 1
END AS OrderCriteria,
ISNULL(TY1.AmountSentEmails, 0) AS NumberOfAlerts,
ISNULL(TY3.AmountSentEmails, 0) AS NumberOfReminders
FROM
Publications AS p
LEFT JOIN PublicationTypes AS pt ON p.PublicationTypeId = pt.Id
LEFT JOIN Publishers AS pb ON p.PublisherId = pb.Id
LEFT JOIN Journals As jns ON p.JournalId = jns.Id
LEFT JOIN Users AS u ON u.Id = p.UserId
LEFT JOIN StatusForMaxDateByPublication AS MS ON P.Id = MS.PublicationId
LEFT JOIN SentEmailsByPublicationAndType AS TY1 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 1
LEFT JOIN SentEmailsByPublicationAndType AS TY3 ON
P.Id = TE.PublicationID AND
TY1.EmailType = 3
OUTER APPLY (
SELECT TOP 1
StatusId = ActionId + 6
FROM
[PublicationsQuoteSaleLines] AS pqsl
WHERE
pqsl.PublicationId = P.Id
ORDER BY
pqsl.Id ASC) AS SL
Try to avoid writing the same expression several times (and specially if it involes subqueries inside a column!). Using a few CTEs and proper identing will help readability.
This is a complex query and involves several tables. If your query runs slow it might be for many different reasons. Try executing each subquery on it's own to check if they are slow or not, then try joining them 1 by 1. Indexes by the join columns will probably increase your performance if they don't exist already. Posting the full query execution plan might help.

sql syntax group by

Struggling with this as i'm not good with sql and designer wont work with the OVER use. Basically this is getting a list of topics if the user is following an associated tag.
I need to group by T.TopicId to stop duplicates. If a user is selecting more than one tag associated with a topic it will list the topic twice (once for each tag)
When I add a group by in sql I get multiple errors and i've tried rearranging things and cant get it to work, As said i'm useless with sql statements
#id int = null
AS
SELECT
*
FROM
(SELECT
ROW_NUMBER()
OVER
(ORDER BY TopicOrder desc
,
(CASE
WHEN M.MessageCreationDate > T.TopicCreationDate THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END) desc)
AS RowNumber
,T.TopicId, T.TopicTitle, T.TopicShortName, T.TopicDescription, T.TopicCreationDate, T.TopicViews, T.TopicReplies, T.UserId, T.TopicTags, T.TopicIsClose,
T.TopicOrder, T.LastMessageId, T.UserName, M.MessageCreationDate, M.UserId AS MessageUserId, MU.UserName AS MessageUserName, U.UserGroupId,
U.UserPhoto, T.UserFullName
FROM Tags INNER JOIN
TopicsComplete AS T ON T.TopicId = Tags.TopicId LEFT OUTER JOIN
Messages AS M ON M.TopicId = T.TopicId AND M.MessageId = T.LastMessageId AND M.Active = 1 LEFT OUTER JOIN
Users AS MU ON MU.UserId = M.UserId LEFT OUTER JOIN
Users AS U ON U.UserId = T.UserId LEFT OUTER JOIN
tagfollows AS TF ON #id = TF.userid
WHERE (Tags.Tag = TF.tag)
)T
If anyone could help it would be much appreciated, thanks! :)
I think you only need to convert the join to tagfollows into an EXISTS subquery (and remove the redundant nesting):
SELECT
ROW_NUMBER()
OVER ( ORDER BY TopicOrder desc
, CASE WHEN M.MessageCreationDate > T.TopicCreationDate
THEN M.MessageCreationDate
ELSE T.TopicCreationDate
END desc )
AS RowNumber,
T.TopicId, T.TopicTitle, T.TopicShortName, T.TopicDescription,
T.TopicCreationDate, T.TopicViews, T.TopicReplies, T.UserId,
T.TopicTags, T.TopicIsClose, T.TopicOrder, T.LastMessageId,
T.UserName, M.MessageCreationDate,
M.UserId AS MessageUserId,
MU.UserName AS MessageUserName,
U.UserGroupId, U.UserPhoto, T.UserFullName
FROM
TopicsComplete AS T
LEFT OUTER JOIN
Messages AS M ON M.TopicId = T.TopicId
AND M.MessageId = T.LastMessageId
AND M.Active = 1
LEFT OUTER JOIN
Users AS MU ON MU.UserId = M.UserId
LEFT OUTER JOIN
Users AS U ON U.UserId = T.UserId
WHERE EXISTS
( SELECT *
FROM Tags
INNER JOIN tagfollows AS TF
ON Tags.Tag = TF.tag
WHERE T.TopicId = Tags.TopicId
AND #id = TF.userid
) ;
You say you want to show posts with tags in the set that the user is following, but you don't want the post to show up multiple times when it has multiple matching tags. That's a perfect use for an EXISTS subquery. Here's an example from that MSDN page.
SELECT a.FirstName, a.LastName
FROM Person.Person AS a
WHERE EXISTS
(SELECT *
FROM HumanResources.Employee AS b
WHERE a.BusinessEntityID = b.BusinessEntityID
AND a.LastName = 'Johnson');
You're really interested in the person table (like your posts table), but you want to show records that have at least one matching record in employee (like your tags table).

Multiple counts on different tables in same query

We have a tool that allows users to create their own groups. Within these groups, users can write posts. What I am trying to determine is the relationship between size of the group and total number of posts in that group.
I can do SQL statements to get a list of group names and the number of users in that group (Query 1) and a list of group names and the number of posts (Query 2) but I would like for both to be in the same query.
Query 1
select count(pg.personID) as GroupSize, g.GroupName
from Group g inner join PersonGroup pg g.GroupID = pg.GroupID
where LastViewed between #startDate and #enddate and
g.Type = 0
group by g.GroupID, g.GroupName
order by GroupSize
Query 2
select count(gp.PostID) as TotalPosts, g.GroupName
from Group g inner join GroupPost gp on g.GroupID = gp.GroupID
inner join Post p on gp.PostID = p.PostID
where g.Type = 0 and
gp.Created between #startDate and #enddate
group by g.GroupID, g.GroupName
order by TotalPosts
**Note: A person can post the same "post" to multiple groups
I believe from this data I could build a Histogram (# of groups with 10-20 users, 21-30 users, etc..) and incorporate average number of posts for groups in those different bins.
A simple solution would be to use those queries as Sub queries, and combine them:
SELECT
grps.GroupName,
grps.GroupSize,
psts.TotalPosts
FROM (
select count(pg.personID) as GroupSize, g.GroupName, g.GroupID
from Group g inner join PersonGroup pg g.GroupID = pg.GroupID
where LastViewed between #startDate and #enddate and
g.Type = 0
group by g.GroupID, g.GroupName
order by GroupSize) grps
JOIN (
select count(gp.PostID) as TotalPosts, g.GroupName, g.groupID
from Group g inner join GroupPost gp on g.GroupID = gp.GroupID
inner join Post p on gp.PostID = p.PostID
where g.Type = 0 and
gp.Created between #startDate and #enddate
group by g.GroupID, g.GroupName
order by TotalPosts) psts
ON psts.GroupID = grps.GroupID
Paul's solution assumes that the two sets of groups (by posts and by users) is the same. This may not be true, so either a full outer join or union all is needed.
My preference is the following:
with groups as
(
select *
from Group g
where g.Type = 0
and g.LastViewed between #startDate and #enddate
)
select GroupId, GroupName, SUM(GroupSize) as GroupSize, SUM(TotalPosts) as TotalPosts)
from
(
(select groups.GroupId, groups.GroupName, 1 as GroupSize, 0 as TotalPosts
from groups
join PersonGroup pg
on pg.GroupId = groups.groupId
)
union all
(select groups.GroupId, groups.GroupName, 0 as GroupSize, 1 as TotalPosts
from groups
join GroupPost gp
on groups.GroupId = gp.GroupId
join Post p
on gp.PostId = p.PostId
)
)
group by GroupId, GroupName
The "with" clause defines the set of groups that you are using. This places the definition in one place, making it obvious that the two subqueries have the same filtering. The two subqueries simply have flags indicating each of the two variables, which are then aggregated at the higher level. Sometimes it is more efficient to also do the aggregation inside the subqueries, particularly when there are indexes.

SQL query not sorting desc

This sql command works fine in sqlitemanager
but in my android application this don't sort desc...
select t._id, u.name, c.commdate, c.message
from tickets t, users u, comments c
where c.userid = u._id and c.ticketid = t._id
and t.status = 5
group by t._id
having max(c.commdate)
order by c.commdate desc
I have a feeling the "having" part is a problem. Try (untested);
SELECT t._id, u.name, c.commdate, c.message
FROM tickets t
JOIN comments c ON (t._id = c.ticketid)
-- Find most decent comments on tickets, but handle lack of any comments
LEFT JOIN (
SELECT c2.ticketid, MAX(c2.commdate) as max_commdate
FROM comments c2
) AS latest ON (c.ticketid = latest.ticketid AND c.commdate = latest.max_commdate)
JOIN users u ON (c.userid = i._id)
WHERE t.status = 5
ORDER BY t._id DESC, c.commdate DESC