Join table on conditions, count on conditions - sql

SELECT *, null AS score,
'0' AS SortOrder
FROM products
WHERE datelive = -1
AND hidden = 0
UNION
SELECT e.*, (SUM(r.a)/(COUNT(*)*1.0)+
SUM(r.b)/(COUNT(*)*1.0)+
SUM(r.c)/(COUNT(*)*1.0)+
SUM(r.d)/(COUNT(*)*1.0))/4 AS score,
'1' AS SortOrder
FROM products e
LEFT JOIN reviews r
ON r.productID = e.productID
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID
HAVING COUNT(*) >= 5
UNION
SELECT e.*, (SUM(r.a)/(COUNT(*)*1.0)+
SUM(r.b)/(COUNT(*)*1.0)+
SUM(r.c)/(COUNT(*)*1.0)+
SUM(r.d)/(COUNT(*)*1.0))/4 AS score,
'2' AS SortOrder
FROM products e
LEFT JOIN reviews r
ON r.productID = e.productID
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID
HAVING COUNT(*) < 5
ORDER BY SortOrder ASC, score DESC
This creates an SQL object for displaying products on a page. The first request grabs items of type datelive = -1, the second of type datelive != -1 but r.count(*) >= 5, and the third of type datelive != -1 and r.count(*) < 5. The reviews table is structured similar to the below:
reviewID | productID | a | b | c | d | approved
-------------------------------------------------
1 1 5 4 5 5 1
2 5 3 2 5 5 0
3 2 5 5 4 3 1
... ... ... ... ... ... ...
I'm trying to work it such that r.count(*) only cares for rows of type approved = 1, since tallying data based on unapproved reviews isn't ideal. How can I join these tables such that the summations of scores and the number of rows is dependent only on approved = 1?
I've tried adding in AND r.approved = 1 in the WHERE conditional for the joins and it doesn't do what I'd like. It does sort it properly, but then it no longer includes items with zero reviews.

You seem to be nearly there.
In your question you talked about adding the AND r.approved = 1 to the join criteria but by the sounds of it you are actually adding it to the WHERE clause.
If you instead properly add it to the join criteria like below then it should work fine:
SELECT *, null AS score,
'0' AS SortOrder
FROM products
WHERE datelive = -1
AND hidden = 0
UNION
SELECT e.*, (SUM(r.a)/(COUNT(*)*1.0)+
SUM(r.b)/(COUNT(*)*1.0)+
SUM(r.c)/(COUNT(*)*1.0)+
SUM(r.d)/(COUNT(*)*1.0))/4 AS score,
'1' AS SortOrder
FROM products e
LEFT JOIN reviews r ON r.productID = e.productID
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID
HAVING COUNT(*) >= 5
UNION
SELECT e.*, (SUM(r.a)/(COUNT(*)*1.0)+
SUM(r.b)/(COUNT(*)*1.0)+
SUM(r.c)/(COUNT(*)*1.0)+
SUM(r.d)/(COUNT(*)*1.0))/4 AS score,
'2' AS SortOrder
FROM products e
LEFT JOIN reviews r ON r.productID = e.productID AND r.approved = 1
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID
HAVING COUNT(*) < 5
ORDER BY SortOrder ASC, score DESC
SQL Fiddle here.
Notice again how I have simply put the AND r.approved = 1 directly after LEFT JOIN reviews r ON r.productID = e.productID which adds an extra criteria to the join.
As I mentioned in my comment, the WHERE clause will filter rows out of the combined record set after the join has been made. In some cases the RDBMS may optimise it out and put it into the join criteria but only where that would make no difference to the result set.

Calculating the non-zero sums and joining it to your result may solve it;
fiddle
SELECT a.productID,
NULL AS score,
'0' AS SortOrder
FROM products a
WHERE datelive = -1
AND hidden = 0
UNION
SELECT e.productID,
(min(x.a)/(min(x.cnt)*1.0)+ min(x.b)/(min(x.cnt)*1.0)+ min(x.c)/(min(x.cnt)*1.0)+ min(x.d)/(min(x.cnt)*1.0))/4 AS score,
'1' AS SortOrder
FROM products e
JOIN reviews r ON r.productID = e.productID
LEFT JOIN
(SELECT ee.productID,
sum(rr.a) AS a,
sum(rr.b) AS b,
sum(rr.c) AS c,
sum(rr.d) AS d,
count(*) AS cnt
FROM products ee
LEFT JOIN reviews rr ON ee.productID = rr.productID
GROUP BY ee.productID) x ON e.productID = x.productID
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID HAVING COUNT(*) >= 5
UNION
SELECT e.productID,
(min(x.a)/(min(x.cnt)*1.0)+ min(x.b)/(min(x.cnt)*1.0)+ min(x.c)/(min(x.cnt)*1.0)+ min(x.d)/(min(x.cnt)*1.0))/4 AS score,
'2' AS SortOrder
FROM products e
LEFT JOIN reviews r ON r.productID = e.productID
LEFT JOIN
(SELECT ee.productID,
sum(rr.a) AS a,
sum(rr.b) AS b,
sum(rr.c) AS c,
sum(rr.d) AS d,
count(*) AS cnt
FROM products ee
LEFT JOIN reviews rr ON ee.productID = rr.productID
GROUP BY ee.productID) x ON e.productID = x.productID
WHERE e.hidden = 0
AND e.datelive != -1
GROUP BY e.productID HAVING COUNT(*) < 5
ORDER BY SortOrder ASC,
score DESC

You could create a temp table that only contains rows where approved = 1, and then join on the temp table instead of reviews.
create table tt_reviews like reviews;
insert into tt_reviews
select * from reviews
where approved = 1;
alter table tt_reviews add index(productID);
Then replace reviews with tt_reviews in your above query.

Related

Limit result from inner join query to 2 rows

My query is giving me result from grouped data but now I want only two rows
I have tried HAVING COUNT(*) <= 2 but issue is is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause.
my query is
select f.CompanyName, f.EmployeeCity, f.PrioritySL ,f.EmployeeSeniorityLevel ,f.EmployeeID
from (
select ConcatKey, min(PrioritySL) as PSL
from dbo.WalkerItContacts group by ConcatKey
) as x inner join dbo.WalkerItContacts as f on f.ConcatKey = x.ConcatKey and f.PrioritySL = x.PSL
where f.PrioritySL != '10'
Company apple have 9 records I want only 2 records
my data
company name priority
a 10
a 1
a 3
b 2
b 4
b 3
b 5
c 1
c 10
c 2
my expected data
company name priority
a 1
a 3
b 2
b 3
c 1
c 2
Add a 'top 2' clause to the outer query:
select top 2 f.CompanyName, f.EmployeeCity, f.PrioritySL ,f.EmployeeSeniorityLevel ,f.EmployeeID
from (
select ConcatKey, min(PrioritySL) as PSL
from dbo.WalkerItContacts group by ConcatKey
) as x inner join dbo.WalkerItContacts as f on f.ConcatKey = x.ConcatKey and f.PrioritySL = x.PSL
where f.PrioritySL != '10'
and f.CompanyName= 'Apple'
will give you two rows. Add a order clause by in the outer query so you can control which two rows are returned.
You can phrase this more succinctly and with better performance as:
select top (2) wic.*
from (select wic,
rank() over (partition by CompanyName, ConcatKey order by PrioritySL) as seqnum
from dbo.WalkerItContacts wic
) wic
where seqnum = 1 and
wic.PrioritySL <> 10 and
wic.CompanyName = 'Apple';
I think you could solve your problem using the ROW_NUMBER() function to count the rows and filter it in the WHERE clause to only show 2 rows per group.
I think something like this might work for you:
SELECT rownum, f.CompanyName, f.EmployeeCity, f.PrioritySL,
f.EmployeeSeniorityLevel, f.EmployeeID
FROM ( SELECT ConcatKey, MIN(PrioritySL) AS PSL, ROW_NUMBER() OVER(PARTITION BY
f.CompanyName) AS rownum
FROM dbo.WalkerItContacts
GROUP BY ConcatKey) AS x
INNER JOIN dbo.WalkerItContacts AS f ON f.ConcatKey = x.ConcatKey
AND f.PrioritySL = x.PSL
WHERE f.PrioritySL != '10' AND rownum <= 2
ORDER BY f.CompanyName ASC;
Hope this helps some.

SQL correct query or not

given these relationships, how could you query the following:
The tourists (name and email) that booked at least a pension whose rating is greater than 9, but didn't book any 3 star hotel with a rating less than 9.
Is the following correct?
SELECT Tourists.name, Tourists.email
FROM Tourists
WHERE EXISTS (
SELECT id FROM Bookings
INNER JOIN Tourists ON Bookings.touristId=Tourists.id
INNER JOIN AccomodationEstablishments ON Bookings.accEstId=AccomodationEstablishments.id
INNER JOIN AccomodationTypes ON AccomodationEstablishments.accType=AccomodationTypes.id
WHERE AccomodationTypes.name = 'Pension' AND
AccomodationEstablishments.rating > 9
) AND NOT EXISTS (
SELECT id FROM Bookings
INNER JOIN Tourists ON Bookings.touristId=Tourists.id
INNER JOIN AccomodationEstablishments ON Bookings.accEstId=AccomodationEstablishments.id
INNER JOIN AccomodationTypes ON AccomodationEstablishments.accType=AccomodationTypes.id
WHERE AccomodationTypes.name = 'Hotel' AND
AccomodationEstablishments.noOfStars = 3 AND
AccomodationEstablishments.rating < 9
)
I would do this using aggregation and having:
SELECT t.name, t.email
FROM Bookings b INNER JOIN
Tourists t
ON b.touristId = t.id INNER JOIN
AccomodationEstablishments ae
ON b.accEstId = ae.id INNER JOIN
AccomodationTypes a
ON ae.accType = a.id
GROUP BY t.name, t.email
HAVING SUM(CASE WHEN a.name = 'Pension' AND ae.rating > 9 THEN 1 ELSE 0 END) > 0 AND
SUM(a.name = 'Hotel' AND ae.noOfStars = 3 AND ae.rating < 9 THEN 1 ELSE 0 END)= 0;
Your method also works, but you probably need t.id in the subqueries.

Struggling with left join

I'm struggling with left joining the earliest row in this left join.
The results are showing a 2011 date, but i know for a fact this particular row should be returning 2008.
SELECT TOP 1000
f.name as [Franchisee]
,p.paid_date as paid_date
FROM franchisees_franchisee f
OUTER APPLY (SELECT TOP 1 *
FROM era_project_invoice_payment p
WHERE f.franchiseeid = p.franchiseeid
and p.deleted = 0 and p.payment_confirmed = 1
ORDER BY p.eraprojectinvoicepaymentid ASC) p
where
f.deleted = 0
and f.name LIKE '%VKlinkosch%'
Below returns the correct, 2008 date.
SELECT TOP 1000
f.name as [Franchisee]
,min(p.paid_date) as paid_date
from [era_uat_shared].[dbo].[franchisees_franchisee] f
left join era_project_invoice_payment p
on f.franchiseeid = p.franchiseeid
where f.deleted = 0
and f.name LIKE '%VKlinkosch%'
GROUP BY f.name
Problem is, I need more than just the Paid Date from the payments table! :(
SELECT
f.name as [Franchisee]
, p.*
FROM franchisees_franchisee f
INNER JOIN
(
SELECT
ROW_NUMBER() OVER (PARTITION BY franchiseeid ORDER BY paid_date ASC) rn
, p.*
FROM
era_project_invoice_payment p
WHERE
deleted = 0
AND payment_confirmed = 1
) p
ON
f.franchiseeid = p.franchiseeid
AND f.deleted = 0
AND f.name LIKE '%VKlinkosch%'
AND p.rn = 1

(ORDER BY CASE WHEN) ordering by subquery

I need to order my results by int column ascending, but I want to get only rows with numbers (0...10000) but default ordering gives me rows with null values for this column before numbers. I googled solution which set rows with null into the end of ordering (after all numbers) it looks like
SELECT ProductName
FROM Products
ORDER BY
CASE WHEN Position is null THEN 1 ELSE 0 END,
Position
So I my query looks like:
SELECT c.CompanyId, c.CompanyName, c.CompanyCategoryId, cc.CompanyCategoryName, c.HQCountryISO, c.CrunchBaseUrl,c.AngelListUrl,
(SELECT MAX(mf.NumLikes) FROM MeasurementFacebook mf
JOIN FacebookAccount f ON f.CompanyId = c.CompanyId
WHERE f.FacebookAccountId in (mf.FacebookAccountId)) as Likes,
(SELECT MAX(mt.NumFollowers) FROM MeasurementTwitter mt
JOIN TwitterAccount t ON t.CompanyId = c.CompanyId
WHERE t.TwitterAccountId in (mt.TwitterAccountId)) as Followers,
(SELECT MAX(ma.AlexaRanking) FROM MeasurementAlexa ma
JOIN Website w ON w.CompanyId = c.CompanyId
WHERE w.WebsiteId in (ma.WebsiteId)) as AlexaRank
FROM Company c
JOIN CompanyCategory cc ON c.CompanyCategoryId = cc.CompanyCategoryId
WHERE c.HQCountryISO = 'FRA'
ORDER BY CASE WHEN AlexaRank IS NULL THEN 1 ELSE 0 END, AlexaRank
OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY
As you can see, AlexaRank is the result of third subquery, and I want to order result by this column. But I have an error which says:
Msg 207, Level 16, State 1, Line 14
Invalid column name 'AlexaRank'.
What I'm doing wrong? Thanks
While you can use an alias in the ORDER BY clause, you can't use an alias in an expression, easiest solution is to plop it in a cte/subquery:
;WITH cte AS (SELECT c.CompanyId
, c.CompanyName
, c.CompanyCategoryId
, cc.CompanyCategoryName
, c.HQCountryISO
, c.CrunchBaseUrl
,c.AngelListUrl
,(SELECT MAX(mf.NumLikes)
FROM MeasurementFacebook mf
JOIN FacebookAccount f ON f.CompanyId = c.CompanyId
WHERE f.FacebookAccountId in (mf.FacebookAccountId)) as Likes
,(SELECT MAX(mt.NumFollowers)
FROM MeasurementTwitter mt
JOIN TwitterAccount t ON t.CompanyId = c.CompanyId
WHERE t.TwitterAccountId in (mt.TwitterAccountId)) as Followers
,(SELECT MAX(ma.AlexaRanking)
FROM MeasurementAlexa ma
JOIN Website w ON w.CompanyId = c.CompanyId
WHERE w.WebsiteId in (ma.WebsiteId)) as AlexaRank
FROM Company c
JOIN CompanyCategory cc ON c.CompanyCategoryId = cc.CompanyCategoryId
WHERE c.HQCountryISO = 'FRA')
SELECT *
FROM cte
ORDER BY CASE WHEN AlexaRank IS NULL THEN 1 ELSE 0 END, AlexaRank
OFFSET 0 ROWS FETCH NEXT 10 ROWS ONLY
Very inefficient code but you could do something like the following. Basically wrap your initial query in a common table expression so you don't need to rewrite your 3rd sub-select in your order by.
SELECT * FROM (
SELECT c.companyid,
c.companyname,
c.companycategoryid,
cc.companycategoryname,
c.hqcountryiso,
c.crunchbaseurl,
c.angellisturl,
(SELECT Max(mf.numlikes)
FROM measurementfacebook mf
JOIN facebookaccount f
ON f.companyid = c.companyid
WHERE f.facebookaccountid IN ( mf.facebookaccountid )) AS Likes,
(SELECT Max(mt.numfollowers)
FROM measurementtwitter mt
JOIN twitteraccount t
ON t.companyid = c.companyid
WHERE t.twitteraccountid IN ( mt.twitteraccountid )) AS Followers,
(SELECT Max(ma.alexaranking)
FROM measurementalexa ma
JOIN website w
ON w.companyid = c.companyid
WHERE w.websiteid IN ( ma.websiteid )) AS AlexaRank
FROM company c
JOIN companycategory cc
ON c.companycategoryid = cc.companycategoryid
WHERE c.hqcountryiso = 'FRA' ) Q
ORDER BY CASE
WHEN Q.AlexaRank IS NULL THEN 1
ELSE 0
END,
Q.AlexaRank

SQL Dynamic join?

Please see http://sqlfiddle.com/#!3/2506f/2/0
I have two tables. One is a general record, and the other is a table containing related documents that link to that record.
In my example I've created a straightforward query which shows all records and their associated documents. This is fine, but I want a more complex situation.
In the 'mainrecord' table there is a 'multiple' field. If this is 0, then I only want the most recent document from the documents table (that is, with the highest ID). If it is 1, I want to join all linked documents.
So, rather than the result of the query being this:-
ID NAME MULTIPLE DOCUMENTNAME IDLINK
1 One 1 first document 1
1 One 1 second document 1
2 Two 0 third document 2
2 Two 0 fourth document 2
3 Three 1 fifth document 3
3 Three 1 sixth document 3
It should look like this:-
ID NAME MULTIPLE DOCUMENTNAME IDLINK
1 One 1 first document 1
1 One 1 second document 1
2 Two 0 fourth document 2
3 Three 1 fifth document 3
3 Three 1 sixth document 3
Is there a way of including this condition into my query to get the results I'm after. I'm happy to explain further if needed.
Thanks in advance.
WITH myData
AS
(SELECT mainrecord.*, documentlinks.documentName, documentlinks.idlink,
Row_number()
OVER (
partition BY mainrecord.ID
ORDER BY mainrecord.ID ASC) AS ROWNUM
FROM mainrecord INNER JOIN documentlinks
ON mainrecord.id = documentlinks.idlink)
SELECT *
FROM mydata o
WHERE multiple = 0 AND rownum =
(SELECT max(rownum) FROM mydata i WHERE i.id = o.id)
UNION
SELECT *
FROM myData
WHERE multiple = 1
http://sqlfiddle.com/#!3/2506f/57
Another solution (tested at SQL-Fiddle):
SELECT m.*,
d.id as did, d.documentName, d.IDLink
FROM mainrecord AS m
JOIN documentlinks AS d
ON d.IDLink = m.id
AND m.multiple = 1
UNION ALL
SELECT m.*,
d.id as did, d.documentName, d.IDLink
FROM mainrecord AS m
JOIN
( SELECT d.IDLink
, MAX(d.id) AS did
FROM mainrecord AS m
JOIN documentlinks AS d
ON d.IDLink = m.id
AND m.multiple = 0
GROUP BY d.IDLink
) AS g
ON g.IDLink = m.id
JOIN documentlinks AS d
ON d.id = g.did
ORDER BY id, did ;
This will probably do:
SELECT mainrecord.name, documentlinks.documentname
FROM documentlinks
INNER JOIN mainrecord ON mainrecord.id = documentlinks.IDLink AND multiple = 1
UNION
SELECT mainrecord.name, documentlinks.documentname
FROM (SELECT max(id) id, IDLink FROM documentlinks group by IDLink) maxdocuments
INNER JOIN documentlinks ON documentlinks.id = maxdocuments.id
INNER JOIN mainrecord ON mainrecord.id = documentlinks.IDLink AND multiple = 0
How about this:
select * from mainrecord a inner join documentlinks b on a.Id=b.IDLink
where b.id=(case
when a.multiple=1 then b.id
else (select max(id) from documentlinks c where c.IDLink=b.IDLink) end)
SQL FIDDLE
select
m.ID, m.name, m.multiple, dl.idlink,
dl.documentName
from mainrecord as m
left outer join documentlinks as dl on dl.IDlink = m.id
where
m.multiple = 1 or
not exists (select * from documentlinks as t where t.idlink = m.id and t.id < dl.id)