select all records with multiple category - sql

I am trying to figure out how to select all records that are associated with all categories on a list.
For instance take this DB setup:
create table blog (
id integer PRIMARY KEY,
url varchar(100)
);
create table blog_category (
id integer PRIMARY KEY,
name varchar(50)
);
create table blog_to_blog_category (
blog_id integer,
blog_category_id integer
);
insert into blog values
(1, 'google.com'),
(2, 'pets.com'),
(3, 'petsearch.com');
insert into blog_category values
(1, 'search'),
(2, 'pets'),
(3, 'misc');
insert into blog_to_blog_category values
(1,1),
(2,2),
(3,1),
(3,2),
(3,3);
I can query on the main table like this:
select b.*, string_agg(bc.name, ', ') from blog b
join blog_to_blog_category btbc on b.id = btbc.blog_id
join blog_category bc on btbc.blog_category_id = bc.id
where b.url like '%.com'
group by b.id
But lets say I want to only return blogs that have BOTH category 1 & 2 connected with them how do I do that?
This would return just the petsearch.com domain as it is only record to have both of those categories.

Here you go:
Added a check to count the blog_category id (HAVING Clause) and if it is 2 then it should be either 1 or 2 (IN Clause),
select b.*, string_agg(bc.name, ', ') from blog b
join blog_to_blog_category btbc on b.id = btbc.blog_id
join blog_category bc on btbc.blog_category_id = bc.id
where b.url like '%.com' and bc.id in (1,2)
group by b.id
having count(distinct bc.id ) =2

here is one way:
select * from blog where id in (
select blog_id
from blog_to_blog_category bbc
where blog_category_id in (1, 2)
group by blog_id
having count(distinct blog_category_id) = 2
)

Related

Select records that do not have at least one child element

How can I make an SQL query to select records that do not have at least one child element?
I have 3 tables: article (~40K rows), calendar (~450K rows) and calendar_cost (~500K rows).
It is necessary to select such entries of the article table:
there are no entries in the calendar table,
if there are entries in the calendar table, then all of them should not have any entries in the calendar_cost table.
create table article (
id int PRIMARY KEY,
name varchar
);
create table calendar (
id int PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar
);
create table calendar_cost (
id int PRIMARY KEY,
calendar_id int REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
);
insert into article (id, name) values
(1, 'Article 1'),
(2, 'Article 2'),
(3, 'Article 3');
insert into calendar (id, article_id, number) values
(101, 1, 'Point 1-1'),
(102, 1, 'Point 1-2'),
(103, 2, 'Point 2');
insert into calendar_cost (id, calendar_id, cost_value) values
(400, 101, 100.123),
(401, 101, 400.567);
As a result, "Article 2" (condition 2) and "Article 3" (condition 1) will suit us.
My SQL query is very slow (the second condition part), how can I do it optimally? Is it possible to do without "union all" operator?
-- First condition
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
union all
-- Second condition
select a.id from article a
where id not in(
select aa.id from article aa
join calendar c on aa.id = c.article_id
join calendar_cost cost on c.id = cost.calendar_id
where aa.id = a.id limit 1
)
UPDATE
This is how you can fill my tables with random data for about the same amount of data. The #Bohemian query is very fast, and the rest are very slow. But as soon as I applied 2 indexes, as #nik advised, all queries began to be executed very, very quickly!
do $$
declare
article_id int;
calendar_id bigint;
i int; j int;
begin
create table article (
id int PRIMARY KEY,
name varchar
);
create table calendar (
id serial PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar
);
create INDEX ON calendar(article_id);
create table calendar_cost (
id serial PRIMARY KEY,
calendar_id bigint REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
);
create INDEX ON calendar_cost(calendar_id);
for article_id in 1..45000 loop
insert into article (id, name) values (article_id, 'Article ' || article_id);
for i in 0..floor(random() * 25) loop
insert into calendar (article_id, number) values (article_id, 'Number ' || article_id || '-' || i) returning id into calendar_id;
for j in 0..floor(random() * 2) loop
insert into calendar_cost (calendar_id, cost_value) values (calendar_id, round((random() * 100)::numeric, 3));
end loop;
end loop;
end loop;
end $$;
#Bohemian
Planning Time: 0.405 ms
Execution Time: 1196.082 ms
#nbk
Planning Time: 0.702 ms
Execution Time: 165.129 ms
#Chris Maurer
Planning Time: 0.803 ms
Execution Time: 800.000 ms
#Stu
Planning Time: 0.446 ms
Execution Time: 280.842 ms
So which query to choose now as the right one is a matter of taste.
No need to split the conditions: The only condition you need to check for is that there are no calendar_cost rows whatsoever, which is the case if there are no calendar rows.
The trick is to use outer joins, which still return the parent table but have all null values when there is no join. Further, count() does not count null values, so requiring that the count of calendar_cost is zero is all you need.
select a.id
from article a
left join calendar c on c.article_id = a.id
left join calendar_cost cost on cost.calendar_id = c.id
group by a.id
having count(cost.calendar_id) = 0
See live demo.
If there are indexes on the id columns (the usual case), this query will perform quite well given the small table sizes.
Your second condition should start just like your first one: find all the calendar entries without calendar cost and only afterwards join it to article.
select a.id
from article a
Inner Join (
Select article_id
From calendar c left join calendar_cost cc
On c.id=cc.calendar_id
Where cc.calendar_id is null
) cnone
On a.id = cnone.article_id
This approach is based on the thought that calendar entries without calendar_cost is relatively rare compared to all the calendar entries.
Your query is not valid as IN clauses don't support LIMIT
Adding some indexes on article_id and calender_id
Will help the performance
As you can see in the query plan
create table article (
id int PRIMARY KEY,
name varchar(100)
);
create table calendar (
id int PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar(100)
,index(article_id)
);
create table calendar_cost (
id int PRIMARY KEY,
calendar_id int REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
,INDEX(calendar_id)
);
insert into article (id, name) values
(1, 'Article 1'),
(2, 'Article 2'),
(3, 'Article 3');
insert into calendar (id, article_id, number) values
(101, 1, 'Point 1-1'),
(102, 1, 'Point 1-2'),
(103, 2, 'Point 2');
insert into calendar_cost (id, calendar_id, cost_value) values
(400, 101, 100.123),
(401, 101, 400.567);
Records: 3 Duplicates: 0 Warnings: 0
Records: 3 Duplicates: 0 Warnings: 0
Records: 2 Duplicates: 0 Warnings: 2
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
id
3
-- First condition
EXPLAIN
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
union all
-- Second condition
select a.id from article a
JOIN (
select aa.id from article aa
join calendar c on aa.id = c.article_id
join calendar_cost cost on c.id = cost.calendar_id
LIMIT 1
) t1 ON t1.id <> a.id
id
select_type
table
partitions
type
possible_keys
key
key_len
ref
rows
filtered
Extra
1
PRIMARY
a
null
index
null
PRIMARY
4
null
3
100.00
Using index
1
PRIMARY
c
null
ref
article_id
article_id
5
fiddle.a.id
3
33.33
Using where; Not exists; Using index
2
UNION
<derived3>
null
system
null
null
null
null
1
100.00
null
2
UNION
a
null
index
null
PRIMARY
4
null
3
66.67
Using where; Using index
3
DERIVED
cost
null
index
calendar_id
calendar_id
5
null
2
100.00
Using where; Using index
3
DERIVED
c
null
eq_ref
PRIMARY,article_id
PRIMARY
4
fiddle.cost.calendar_id
1
100.00
Using where
3
DERIVED
aa
null
eq_ref
PRIMARY
PRIMARY
4
fiddle.c.article_id
1
100.00
Using index
fiddle
Try the following using a combination of exists criteria.
Usually, with supporting indexes, this is more performant than simply joining tables as it offers a short-circuit to get out as soon as a match is found, where as joining typically filters after all rows are joined.
select a.id
from article a
where not exists (
select * from calendar c
where c.article_id = a.id
)
or (exists (
select * from calendar c
where c.article_id = a.id
)
and not exists (
select * from calendar_cost cc
where cc.calendar_id in (select id from calendar c where c.article_id = a.id)
)
);

HAVING clause with subquery -- Checking if group has at least one row matching conditions

Suppose I have the following table
DROP TABLE IF EXISTS #toy_example
CREATE TABLE #toy_example
(
Id int,
Pet varchar(10)
);
INSERT INTO #toy
VALUES (1, 'dog'),
(1, 'cat'),
(1, 'emu'),
(2, 'cat'),
(2, 'turtle'),
(2, 'lizard'),
(3, 'dog'),
(4, 'elephant'),
(5, 'cat'),
(5, 'emu')
and I want to fetch all Ids that have certain pets (for example either cat or emu, so Ids 1, 2 and 5).
DROP TABLE IF EXISTS #Pets
CREATE TABLE #Pets
(
Animal varchar(10)
);
INSERT INTO #Pets
VALUES ('cat'),
('emu')
SELECT Id
FROM #toy_example
GROUP BY Id
HAVING COUNT(
CASE
WHEN Pet IN (SELECT Animal FROM #Pets)
THEN 1
END
) > 0
The above gives me the error Cannot perform an aggregate function on an expression containing an aggregate or a subquery. I have two questions:
Why is this an error? If I instead hard code the subquery in the HAVING clause, i.e. WHEN Pet IN ('cat','emu') then this works. Is there a reason why SQL server (I've checked with SQL server 2017 and 2008) does not allow this?
What would be a nice way to do this? Note that the above is just a toy example. The real problem has many possible "Pets", which I do not want to hard code. It would be nice if the suggested method could check for multiple other similar conditions too in a single query.
If I followed you correctly, you can just join and aggregate:
select t.id, count(*) nb_of_matches
from #toy_example t
inner join #pets p on p.animal = t.pet
group by t.id
The inner join eliminates records from #toy_example that have no match in #pets. Then, we aggregate by id and count how many recors remain in each group.
If you want to retain records that have no match in #pets and display them with a count of 0, then you can left join instead:
select t.id, count(*) nb_of_records, count(p.animal) nb_of_matches
from #toy_example t
left join #pets p on p.animal = t.pet
group by t.id
How about this approach?
SELECT e.Id
FROM #toy_example e JOIN
#pets p
ON e.pet = p.animal
GROUP BY e.Id
HAVING COUNT(DISTINCT e.pet) = (SELECT COUNT(*) FROM #pets);

MySQL GROUP_CONCAT within GROUP_CONCAT different Group By values

I am trying to get a group_concat to work within another group_concat but grouped by different values.
3 Tables Products, Customers , and Product_Customer ( which holds what product each customer bought and what size )
#Creates the Customer Table
CREATE TABLE Customer
(
Cus_Code INT AUTO_INCREMENT PRIMARY KEY,
Cus_Name VARCHAR(20)
);
#Creates the Product Table
CREATE TABLE Product
(
Prod_Code INT AUTO_INCREMENT PRIMARY KEY,
Prod_Name VARCHAR(30)
);
#Creates the Product_Customer Table
CREATE TABLE Product_Customer
(
Cus_Code INT references Customer(Cus_Code),
Prod_Code INT references Product(Prod_Code),
Size INT,
);
Sample Data
#Inserts data into Customer Table
INSERT INTO Customer (Cus_Name)
VALUES
('Aaron')
('Bob')
('Charlie')
#Inserts data into Product Table
INSERT INTO Product (Prod_Name)
VALUES
('A')
('B')
('C')
#Inserts data into Product_Customer Table
INSERT INTO Product_Customer (Cus_Code, Prod_Code, Size)
VALUES
(1, 1, 1),
(1, 1, 2),
(1, 2, 1),
(2, 1, 1),
(2, 2, 1),
(2, 2, 2),
(3, 1, 1),
(3, 2, 1),
(3, 3, 1),
(3, 3, 2)
Desired Output Something like this
Customer Name | Product(Size)
Aaron | A(1,2), B(1)
Bob | A(1), B(1,2)
Charlie | A(1), B(1), C(1,2)
So i need the Size grouped by the product_code , then all that grouped by customer code
I have tried with variations of the following but to no avail
SELECT Customer.Cus_Name, GROUP_CONCAT(DISTINCT Product.Prod_Code, '(', s.list, ')' SEPARATOR ', ') AS 'Products'
FROM Product
JOIN (
SELECT Product.Prod_Code AS id, GROUP_CONCAT(DISTINCT Product_Customer.Size SEPARATOR ',') AS list
FROM Product
INNER JOIN Product_Customer ON Product.Prod_Code = Product_Customer.Prod_Code
GROUP BY id;
) AS s ON s.id = Product_Customer.Prod_Code
INNER JOIN Product_Customer ON Product.Prod_Code = Product_Customer.Prod_Code
INNER JOIN Customer ON Product_Customer.Cus_Code = Customer.Cus_Code
GROUP BY Customer.Cus_Code;
It seems to include all the sizes bought for that product, not what size each customer bought.
Any help would be appreciated
SELECT c.Cus_Name,
GROUP_CONCAT(cncat.Size ORDER BY cncat.Prod_Name SEPARATOR ', ') AS Size
FROM Customer c
INNER JOIN
(
SELECT pc.Cus_Code,
p.Prod_Name,
CONCAT(p.Prod_Name, '(', GROUP_CONCAT(pc.size), ')') Size
FROM Product_Customer pc
INNER JOIN Product p
ON pc.Prod_Code = p.Prod_Code
GROUP BY pc.Cus_Code,
p.Prod_Name
) AS cncat
ON c.Cus_Code = cncat.Cus_Code
GROUP BY c.Cus_Name
SQLFiddle Demo
I think the following version will do what you want:
SELECT c.Cus_Name, ps.prodsizes
FROM Customer c JOIN
(select cus_code, group_concat(prod_code, '(', sizes, ')' separator ', ') as prodsizes
from (select pc.cus_code, pc.prod_code, group_concat(distinct p.size separator ',') as sizes
from Product_Customer pc join
Product p
on pc.prod_code = p.prod_code
group by pc.cus_code, pc.prod_code
) cp
group by cus_code
) ps
on ps.cus_code = c.cus_code
GROUP BY c.Cus_Code;
Note that there are two levels of aggregation to get the products and sizes together, first at the customer product level then at the customer level.
I also introduces table aliases to make the query easier to write and read. There is no need for a distinct at the outer level, because duplicates are combined in the subquery.

how to do get multiple columns + count in a single query?

I usually don't ask for "scripts" but for mechanisms but I think that in this case if i'll see an example I would understand the principal.
I have three tables as shown below:
and I want to get the columns from all three, plus a count of the number of episodes in each series and to get a result like this:
Currently, I am opening multiple DB threads and I am afraid that as I get more visitors on my site it will eventually respond really slowly.
Any ideas?
Thanks a lot!
First join all the tables together to get the columns. Then, to get a count, use a window function:
SELECT count(*) over (partition by seriesID) as NumEpisodesInSeries,
st.SeriesId, st.SeriesName, et.episodeID, et.episodeName,
ct.createdID, ct.CreatorName
FROM series_table st join
episode_table et
ON et.ofSeries = st.seriesID join
creator_table ct
ON ct.creatorID = st.byCreator;
Do your appropriate joins between the tables and their IDs as you would expect, and also join onto the result of a subquery that determines the total episode count using the Episodes table.
SELECT SeriesCount.NumEpisodes AS #OfEpisodesInSeries,
S.id AS SeriesId,
S.name AS SeriesName,
E.id AS EpisodeId,
E.name AS EpisodeName,
C.id AS CreatorId,
C.name AS CreatorName
FROM
Series S
INNER JOIN
Episodes E
ON E.seriesId = S.id
INNER JOIN
Creators C
ON S.creatorId = C.id
INNER JOIN
(
SELECT seriesId, COUNT(id) AS NumEpisodes
FROM Episodes
GROUP BY seriesId
) SeriesCount
ON SeriesCount.seriesId = S.id
SQL Fiddle Schema:
CREATE TABLE Series (id int, name varchar(20), creatorId int)
INSERT INTO Series VALUES(1, 'Friends', 1)
INSERT INTO Series VALUES(2, 'Family Guy', 2)
INSERT INTO Series VALUES(3, 'The Tonight Show', 1)
CREATE TABLE Episodes (id int, name varchar(20), seriesId int)
INSERT INTO Episodes VALUES(1, 'Joey', 1)
INSERT INTO Episodes VALUES(2, 'Ross', 1)
INSERT INTO Episodes VALUES(3, 'Phoebe', 1)
INSERT INTO Episodes VALUES(4, 'Stewie', 2)
INSERT INTO Episodes VALUES(5, 'Kevin Kostner', 3)
INSERT INTO Episodes VALUES(6, 'Brad Pitt', 3)
INSERT INTO Episodes VALUES(7, 'Tom Hanks', 3)
INSERT INTO Episodes VALUES(8, 'Morgan Freeman', 3)
CREATE TABLE Creators (id int, name varchar(20))
INSERT INTO Creators VALUES(1, 'Some Guy')
INSERT INTO Creators VALUES(2, 'Seth McFarlane')
Try this:
http://www.sqlfiddle.com/#!3/5f938/17
select min(ec.num) as NumEpisodes,s.Id,S.Name,
Ep.ID as EpisodeID,Ep.name as EpisodeName,
C.ID as CreatorID,C.Name as CreatorName
from Episodes ep
join Series s on s.Id=ep.SeriesID
join Creators c on c.Id=s.CreatorID
join (select seriesId,count(*) as Num from Episodes
group by seriesId) ec on s.id=ec.seriesID
group by s.Id,S.Name,Ep.ID,Ep.name,C.ID,C.Name
Thanks Gordon
I would do the following:
SELECT (SELECT Count(*)
FROM episodetbl e1
WHERE e1.ofseries = s.seriesid) AS "#ofEpisodesInSeries",
s.seriesid,
s.seriesname,
e.episodeid,
e.episodename,
c.creatorid,
c.creatorname
FROM seriestbl s
INNER JOIN creatortbl c
ON s.bycreator = c.creatorid
INNER JOIN episodetbl e
ON e.ofseries = s.seriesid

How to show fields from most recently added detail in a view?

QUERY:
drop table #foot
create table #foot (
id int primary key not null,
name varchar(50) not null
)
go
drop table #note
create table #note (
id int primary key not null,
note varchar(MAX) not null,
foot_id int not null references #foot(id)
)
go
insert into #foot values
(1, 'Joe'), (2, 'Mike'), (3, 'Rob')
go
insert into #note (id, note, foot_id) values (1, 'Joe note 1', 1)
go
insert into #note (id, note, foot_id) values(2, 'Joe note 2', 1)
go
insert into #note (id, note, foot_id) values(3, 'Mike note 1', 2)
go
select F.name, N.note, N.id
from #foot F left outer join #note N on N.foot_id=F.id
RESULT:
QUESTION:
How can I create a view/query resulting in one row for each master record (#foot) along with fields from the most recently inserted detail (#note), if any?
GOAL:
(NOTE: the way I would tell which one is most recent is the id which would be higher for newer records)
select t.name, t.note, t.id
from (select F.name, N.note, N.id,
ROW_NUMBER() over(partition by F.id order by N.id desc) as RowNum
from #foot F
left outer join #note N
on N.foot_id=F.id) t
where t.RowNum = 1
Assuming the ID created in the #note table is always incremental (imposed by using IDENTITY or by controlling the inserts to always increment the by by max value) you can use the following query (which uses rank function):
WITH Dat AS
(
SELECT f.name,
n.note,
n.id,
RANK() OVER(PARTITION BY n.foot_id ORDER BY n.id DESC) rn
FROM #foot f LEFT OUTER JOIN #note n
ON n.foot_id = f.id
)
SELECT *
FROM Dat
WHERE rn = 1