Filter on two many to many relations - sql

I'm trying to apply multiples filter on a table join to two tables
My tables
main
Id
Name
tags
Id
Name
main_primary_tags
mainId
tagId
main_secondary_tags
mainId
tagId
I want to select some items in main table which have some primary and secondary tags.
The difficulity is I need to have an exact result.
Exemple if I want to select items which have "Tag1 and "Tag2" tags, I want to have items which have both of these tags not just one of them (Like IN operator)
And I can filter on primary AND secondary..
Thanks for your help!

I would aggregate the tags into arrays and compare the arrays:
select main.id as main_id,
mt.tags as main_tags,
st.tags as secondary_tags
from main
left join lateral (
select array_agg(mpt.tag order by t.tag) as tags
from main_primary_tags mpt
join tags t1 on t1.id = mpt.tagid
where mpt.mainid = main.id
) mt on true
left join lateral (
select array_agg(mst.tag order by t.tag) as tags
from main_secondary_tags mst
join tags t2 on t2.id = mst.tagid
where mst.mainid = main.id
) st on true
where mt.tags = array['Tag1', 'Tag2']
and st.tags = array['Tag1', 'Tag2']
Note that the = operator for arrays depends on the order of the elements, so it's important to list them in the same way order by sorts them.

You can also do it like this
WITH tag_filter AS (
SELECT 'Tag1' AS primary_tag, 'Tag2' AS secondary_tag
UNION ALL
SELECT 'Tag1', 'Tag2'
)
SELECT m.*
FROM "main" m
INNER JOIN main_primary_tags mpt ON mpt.mainId = m.Id
INNER JOIN main_secondary_tags mst ON mst.mainId = m.Id
INNER JOIN tags primary_t ON primary_t.Id = mpt.tagId
INNER JOIN tags secondary_t ON secondary_t.Id = mst.tagId
INNER JOIN tag_filter tf
ON tf.primary_tag = primary_t."Name"
AND tf.secondary_tag = secondary_t."Name"
Or if you want something more compact in the tag_filter CTE
WITH tag_filter AS (
SELECT primary_tag, secondary_tag
FROM (VALUES ('Tag1', 'Tag2'), ('Tag1', 'Tag2')) t(primary_tag, secondary_tag)
)
SELECT m.*
FROM "main" m
INNER JOIN main_primary_tags mpt ON mpt.mainId = m.Id
INNER JOIN main_secondary_tags mst ON mst.mainId = m.Id
INNER JOIN tags primary_t ON primary_t.Id = mpt.tagId
INNER JOIN tags secondary_t ON secondary_t.Id = mst.tagId
INNER JOIN tag_filter tf
ON tf.primary_tag = primary_t."Name"
AND tf.secondary_tag = secondary_t."Name"
And in the case the tag_filter is much smaller than the main table, then you can also spool the tagId in the CTE, and then join with that instead.
WITH tag_name_filter AS (
SELECT primary_tag, secondary_tag
FROM (VALUES ('Tag1', 'Tag2'), ('Tag1', 'Tag2')) t(primary_tag, secondary_tag)
),
tag_filter AS (
SELECT primary_t.Id AS primary_tag_id, secondary_t.Id AS secondary_tag_id
FROM tag_name_filter tmf
INNER JOIN tags primary_t ON primary_t."Name" = tmf.primary_tag
INNER JOIN tags secondary_t ON secondary_t."Name" = tmf.secondary_tag
)
SELECT m.*
FROM "main" m
INNER JOIN main_primary_tags mpt ON mpt.mainId = m.Id
INNER JOIN main_secondary_tags mst ON mst.mainId = m.Id
INNER JOIN tag_filter tf
ON tf.primary_tag_id = mpt.tagId
AND tf.secondary_tag_id = mst.tagId

Related

multi level cascade inner join select query

I've following table structure
I'd like to select post_id from any available data of short_name(country name), name (state table) or region_name. Executing following query true result for region_name but not for short_name(country name), name (state table).
select *
from t_post_city
inner join t_region on t_region.region_id = t_post_city.city_id
inner join t_country on t_region.country_id = t_country.country_id
inner join t_states on t_region.province_id = t_states.state_id
where t_country.short_name like %india%
or t_states.name like %rajasthan%
or t_region.region_name like %sitapura%
Tell me please, where I'm mistaking!
select *
from t_post_city
LEFT OUTER join t_region on t_region.region_id = t_post_city.city_id
LEFT OUTER join t_country on t_region.country_id = t_country.country_id
LEFT OUTER join t_states on t_region.province_id = t_states.state_id
where t_country.short_name like '%india%'
or t_states.name like '%rajasthan%'
or t_region.region_name like '%sitapura%'
Use a distinct table expression for each of your three criteria. Write your OR logic using UNION in SQL:
select post_id
from t_post_city
inner join t_region on t_region.region_id = t_post_city.city_id
where t_country.short_name like %india%
t_region.region_name like %sitapura%
union
select post_id
from t_post_city
inner join t_region on t_region.region_id = t_post_city.city_id
inner join t_country on t_region.country_id = t_country.country_id
where t_country.short_name like %india%
union
select post_id
from t_post_city
inner join t_region on t_region.region_id = t_post_city.city_id
inner join t_states on t_region.province_id = t_states.state_id
where t_states.name like %rajasthan%;

Distinct on id with ordering by possible duplicate names

I have the following requisites for a query:
Needs to ordered on a inner joined table (see from_products_products below),
Allow duplicates names on from_products_products
It cannot return duplicates records on the origin table (distinct on products.id).
The following query will eliminate the duplicate names, which is not desired, as I had to put a distinct on from_products_products.name because of the use in order by:
SELECT DISTINCT ON (from_products_products.name, products.id) "products".* FROM "products"
INNER JOIN "suppliers_plugin_source_products" ON "suppliers_plugin_source_products"."to_product_id" = "products"."id"
INNER JOIN "products" "from_products_products" ON "from_products_products"."id" = "suppliers_plugin_source_products"."from_product_id"
INNER JOIN "suppliers_plugin_source_products" "sources_from_products_products_join" ON "sources_from_products_products_join"."to_product_id" = "products"."id"
INNER JOIN "suppliers_plugin_suppliers" ON "suppliers_plugin_suppliers"."id" = "sources_from_products_products_join"."supplier_id"
WHERE "products"."profile_id" = 45781 AND (("products"."type" IN ('SuppliersPlugin::DistributedProduct') OR "products"."type" IS NULL)) AND (products.archived <> true)
ORDER BY from_products_products.name ASC, products.id
Using GROUP BY has the same effect and also don't remove duplicates;
The original query that gives duplicate products when the INNER JOIN doesn't match any product:
SELECT "products".* FROM "products"
INNER JOIN "suppliers_plugin_source_products" ON "suppliers_plugin_source_products"."to_product_id" = "products"."id"
INNER JOIN "products" "from_products_products" ON "from_products_products"."id" = "suppliers_plugin_source_products"."from_product_id"
INNER JOIN "suppliers_plugin_source_products" "sources_from_products_products_join" ON "sources_from_products_products_join"."to_product_id" = "products"."id"
INNER JOIN "suppliers_plugin_suppliers" ON "suppliers_plugin_suppliers"."id" = "sources_from_products_products_join"."supplier_id"
WHERE "products"."profile_id" = 45781 AND (("products"."type" IN ('SuppliersPlugin::DistributedProduct') OR "products"."type" IS NULL)) AND (products.archived <> true)
ORDER BY from_products_products.name ASC
So, how to overcome this on PostgreSQL?
PS: This is part of open-source software Noosfero-ecosol
Does this do what you want?
with t as (
SELECT DISTINCT ON (products.id) "products".*,
from_products_products.name as from_products_name
FROM "products"
INNER JOIN "suppliers_plugin_source_products" ON "suppliers_plugin_source_products"."to_product_id" = "products"."id"
INNER JOIN "products" "from_products_products" ON "from_products_products"."id" = "suppliers_plugin_source_products"."from_product_id"
INNER JOIN "suppliers_plugin_source_products" "sources_from_products_products_join" ON "sources_from_products_products_join"."to_product_id" = "products"."id"
INNER JOIN "suppliers_plugin_suppliers" ON "suppliers_plugin_suppliers"."id" = "sources_from_products_products_join"."supplier_id"
WHERE "products"."profile_id" = 45781 AND (("products"."type" IN ('SuppliersPlugin::DistributedProduct') OR "products"."type" IS NULL)) AND (products.archived <> true)
ORDER BY products.id
)
select t.*
from t
order by from_products_name
It seems to meet your requirements.
EDIT:
If the above does what you want, I can think of five options:
The above using a CTE.
Basically the same logic, using a subquery.
Using window functions, which is structurally very similar.
Using group by.
Using a where clause for the filtering logic.
Here is the group by method:
SELECT "products".*,
MIN(from_products_products.name) as from_products_name
FROM "products"
INNER JOIN "suppliers_plugin_source_products" ON "suppliers_plugin_source_products"."to_product_id" = "products"."id"
INNER JOIN "products" "from_products_products" ON "from_products_products"."id" = "suppliers_plugin_source_products"."from_product_id"
INNER JOIN "suppliers_plugin_source_products" "sources_from_products_products_join" ON "sources_from_products_products_join"."to_product_id" = "products"."id"
INNER JOIN "suppliers_plugin_suppliers" ON "suppliers_plugin_suppliers"."id" = "sources_from_products_products_join"."supplier_id"
WHERE "products"."profile_id" = 45781 AND (("products"."type" IN ('SuppliersPlugin::DistributedProduct') OR "products"."type" IS NULL)) AND (products.archived <> true)
GROUP BY products.id
ORDER BY from_products_name;
This form depends on products.id being declared as a primary key. Alternatively, you can put all the columns from that table in the group by.
Rewriting (simplifying the aliases) yields:
SELECT p1.*
FROM products p1
INNER JOIN suppliers_plugin_source_products spsp
ON spsp.to_product_id = p1.id
INNER JOIN products p2
ON p2.id = spsp.from_product_id
INNER JOIN suppliers_plugin_source_products spsp2
ON spsp2.to_product_id = p1.id -- <<-- Huh?
INNER JOIN suppliers_plugin_suppliers sps
ON sps.id = spsp2.supplier_id
WHERE p1.profile_id = 45781
AND (p1."type" IN ('SuppliersPlugin::DistributedProduct') OR p1."type" IS NULL)
AND p1.archived <> true
ORDER BY p2.name ASC -- <<-- Huh?
;
The outer query only refers to the product tables p1 and p2.
Assuming that JOINing the "suppliers_plugin_source_products" table twice was unintentional, this can be reduced to:
SELECT p1.*
FROM products p1
JOIN products p2
ON EXISTS (
SELECT * FROM suppliers_plugin_source_products spsp
-- the next line might not be necessary ...
INNER JOIN suppliers_plugin_suppliers sps ON sps.id = spsp.supplier_id
WHERE spsp.to_product_id = p1.id
AND spsp.from_product_id = p2.id
)
WHERE p1.profile_id = 45781
AND (p1."type" IN ('SuppliersPlugin::DistributedProduct') OR p1."type" IS NULL)
AND p1.archived <> true
ORDER BY p2.name ASC
;

select between two many-to-many relations

I have four tables Level, Tag, Level_Tag and Tag_hierarchy. How can select all tags of a level which have this condition id_tag = id_parent which means the Tag is the root. I can select from join table (Maybe not a good performance?) but I don't know how to add the other self join here.
SELECT level.name, tag.id, tag.name
FROM level INNER JOIN
tag_level ON level.id = tag_level.id_level INNER JOIN
tag ON tag_level.id_tag = tag.id
WHERE (level.Id = #id)
Tag Table contains thousands of rows and I'm really worry about memory and performance issues.
Could you please help me on this? Here is the schema
Try this:
;with cte as
(select id_tag
from tag_hierarchy where id_tag = id_parent)
select l.name, t.id, t.name
from cte c
inner join tag t on t.id = c.id_tag
inner join tag_level tl on t.id = tl.id_tag
inner join level l on tl.id_level = l.id
where l.lid = #id
Maybe you can add another exists. Like this:
SELECT
level.name,
tag.id,
tag.name
FROM
level
INNER JOIN tag_level
ON level.id = tag_level.id_level
INNER JOIN tag
ON tag_level.id_tag = tag.id
WHERE
(level.Id = #id)
AND EXISTS
(
SELECT NULL
FROM Tag_hierarchy
WHERE Tag_hierarchy.id_tag=tag.id
AND Tag_hierarchy.id_tag=Tag_hierarchy.id_parent
)

JOIN / LEFT JOIN conflict in SQL Server

I have a tricky query. I need to select all recent versions of 2 types of members of administrator groups. Here is the query:
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
This query will return all the administrator groups. The next step will be getting the members of these groups. Since I have 2 types of memberships (Explicit, Computed), I will have to use a LEFT JOIN to make sure that I am not excluding any rows.
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
-- The JOIN bellow can be excluded but it is here just to clarify the architecture
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN tblGroup_ComputedMember cm ON refMem.ObjectUID = cm.GroupObjectID
LEFT JOIN tblGroup_ExplicitMember em ON refMem.ObjectUID = em.GroupObjectID
The last piece in the puzzle is to get the latest version of each member. For that I will have to use JOIN to exclude older versions:
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
and
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ExplicitMember
GROUP BY ObjectID
) MostRecentEM ON MostRecentEM.MaxId = em.Id
The full query will be:
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN tblGroup_ComputedMember cm ON refMem.ObjectUID = cm.GroupObjectID
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
LEFT JOIN tblGroup_ExplicitMember em ON refMem.ObjectUID = em.GroupObjectID
JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ExplicitMember
GROUP BY ObjectID
) MostRecentEM ON MostRecentEM.MaxId = em.Id
The issue is clear: The 2 JOIN to exclude old versions are also applied to the select statement and clearly no rows are returned. What would be the best solution to escape such situation and to return the intended values?
SELECT refGroup.*
FROM tblSystemAdministratorGroups refGroup
JOIN tblGroup refMem ON refGroup.AttributeValue = refMem.ObjectUID
LEFT JOIN (
select GroupObjectID, ID, max(ID) over (partition by ObjectID) as maxID
from tblGroup_ComputedMember
) cm ON refMem.ObjectUID = cm.GroupObjectID and cm.ID = cm.maxID
LEFT JOIN (
select GroupObjectID, ID, max(ID) over (partition by ObjectID) as maxID
from tblGroup_ExplicitMember
) em ON refMem.ObjectUID = em.GroupObjectID and em.ID = em.maxID
where cm.ID = cm.MaxID
What about using LEFT join in your last two joins?
LEFT JOIN (
SELECT MAX([ID]) MaxId
FROM [OmadaReporting].[dbo].tblGroup_ComputedMember
GROUP BY ObjectID
) MostRecentCM ON MostRecentCM.MaxId = cm.Id
And then in Where clause filter values as:
WHERE MostRecentCM.MaxId IS NOT NULL
OR
MostRecentEM.MaxId IS NOT NULL

translating sql sub query to join

I had a long query, I short it out by using joins instead and resultant query is as below but still it has sub query. How to convert this sub query to join
SELECT
pav.post_id as Id, img.path as Path, attr.name as Name, pc.title as Category, pav.value_text as Valuess, post.created_on as createdOn
FROM
postings post inner join post_attributes_values pav on post.post_id = pav.post_id
left outer join images img on post.post_id = img.post_id and img.sequence='1'
inner join attributes attr on pav.attr_id = attr.attr_id
inner join categories_parent_categories pc on attr.cat_id = pc.category_id
where
pav.post_id in (select distinct post_id from post_attributes_values where value_text = 'SFX')
After reading your last comment to Matei's answer I have come to realize that you actually want ALL the posts where one of the attributes has value of 'SFX'. If I understood correctly, your only alternative is to add derived table and join by post_id:
SELECT pav.post_id AS Id,
img.path AS Path,
attr.name AS Name,
pc.title AS Category,
pav.value_text AS Valuess,
post.created_on AS createdOn
FROM postings post
INNER JOIN post_attributes_values pav
ON post.post_id = pav.post_id
LEFT OUTER JOIN images img
ON post.post_id = img.post_id
AND img.sequence = '1'
INNER JOIN attributes attr
ON pav.attr_id = attr.attr_id
INNER JOIN categories_parent_categories pc
ON attr.cat_id = pc.category_id
INNER JOIN
(
SELECT DISTINCT post_id
FROM post_attributes_values
WHERE value_text = 'SFX'
) sfxPosts
ON pav.post_id = sfxPosts.post_id
(Query reformatted thanks to instant sql formatter.)
Maybe this? Please test it
SELECT
pav.post_id as Id, img.path as Path, attr.name as Name, pc.title as Category, pav.value_text as Valuess, post.created_on as createdOn
FROM
postings post
inner join post_attributes_values pav on post.post_id = pav.post_id AND pav.value_text = 'SFX'
left outer join images img on post.post_id = img.post_id and img.sequence='1'
inner join attributes attr on pav.attr_id = attr.attr_id
inner join categories_parent_categories pc on attr.cat_id = pc.category_id