SQL Join Table as JSON data - sql

I am trying to join reviews and likes onto products, but it seems, for some reason that the output of "reviews" column is duplicated by the length of another foreign table, likes, the output length of "reviews" is
amount of likes * amount of reviews
I have no idea why this is happening
My desired output is that the "reviews" column contains an array of JSON data such that one array is equal to one row of a related review
Products
Title Image
----------------------
Photo photo.jpg
Book book.jpg
Table table.jpg
Users
Username
--------
Admin
John
Jane
Product Likes
product_id user_id
---------------------
1 1
1 2
2 1
2 3
Product Reviews
product_id user_id review
-------------------------------------
1 1 Great Product!
1 2 Looks Great
2 1 Could be better
This is the query
SELECT "products".*,
array_to_json(array_agg("product_review".*)) as reviews,
EXISTS(SELECT * FROM product_like lk
JOIN users u ON u.id = "lk"."user_id" WHERE u.id = 4
AND "lk"."product_id" = products.id) AS liked,
COUNT("product_like"."product_id") AS totalLikes from "products"
LEFT JOIN "product_review" on "product_review"."product_id" = "products"."id"
LEFT JOIN "product_like" on "product_like"."product_id" = "products"."id"
group by "products"."id"
Query to create schema and insert data
CREATE TABLE products
(id SERIAL, title varchar(50), image varchar(50), PRIMARY KEY(id))
;
CREATE TABLE users
(id SERIAL, username varchar(50), PRIMARY KEY(id))
;
INSERT INTO products
(title,image)
VALUES
('Photo', 'photo.jpg'),
('Book', 'book.jpg'),
('Table', 'table.jpg')
;
INSERT INTO users
(username)
VALUES
('Admin'),
('John'),
('Jane')
;
CREATE TABLE product_review
(id SERIAL, product_id int NOT NULL, user_id int NOT NULL, review varchar(50), PRIMARY KEY(id), FOREIGN KEY (product_id) references products, FOREIGN KEY (user_id) references users)
;
INSERT INTO product_review
(product_id, user_id, review)
VALUES
(1, 1, 'Great Product!'),
(1, 2, 'Looks Great'),
(2, 1, 'Could be better')
;
CREATE TABLE product_like
(id SERIAL, product_id int NOT NULL, user_id int NOT NULL, PRIMARY KEY(id), FOREIGN KEY (product_id) references products, FOREIGN KEY (user_id) references users)
;
INSERT INTO product_like
(product_id, user_id)
VALUES
(1, 1),
(1, 2),
(2, 1),
(2, 3)
fiddle with the schema and query:
http://sqlfiddle.com/#!15/dff2c/1
Thanks in advance

The reason you are getting multiple results is because of the one-to-many relationships between product_id and product_review and product_like causing duplication of rows prior to aggregation. To work around that, you need to perform the aggregation of those tables in subqueries and join the derived tables instead:
SELECT "products".*,
"pr"."reviews",
EXISTS(SELECT * FROM product_like lk
JOIN users u ON u.id = "lk"."user_id" WHERE u.id = 4
AND "lk"."product_id" = products.id) AS liked,
COALESCE("pl"."totalLikes", 0) AS totalLikes
FROM "products"
LEFT JOIN (SELECT product_id, array_to_json(array_agg("product_review".*)) AS reviews
FROM "product_review"
GROUP BY product_id) "pr" on "pr"."product_id" = "products"."id"
LEFT JOIN (SELECT product_id, COUNT(*) AS "totalLikes"
FROM "product_like"
GROUP BY product_id) "pl" on "pl"."product_id" = "products"."id"
Output:
id title image reviews liked totallikes
1 Photo photo.jpg [{"id":1,"product_id":1,"user_id":1,"review":"Great Product!"},{"id":2,"product_id":1,"user_id":2,"review":"Looks Great"}] f 2
2 Book book.jpg [{"id":3,"product_id":2,"user_id":1,"review":"Could be better"}] f 2
3 Table table.jpg f 0
Demo on dbfiddle

Related

Select records that do not have at least one child element

How can I make an SQL query to select records that do not have at least one child element?
I have 3 tables: article (~40K rows), calendar (~450K rows) and calendar_cost (~500K rows).
It is necessary to select such entries of the article table:
there are no entries in the calendar table,
if there are entries in the calendar table, then all of them should not have any entries in the calendar_cost table.
create table article (
id int PRIMARY KEY,
name varchar
);
create table calendar (
id int PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar
);
create table calendar_cost (
id int PRIMARY KEY,
calendar_id int REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
);
insert into article (id, name) values
(1, 'Article 1'),
(2, 'Article 2'),
(3, 'Article 3');
insert into calendar (id, article_id, number) values
(101, 1, 'Point 1-1'),
(102, 1, 'Point 1-2'),
(103, 2, 'Point 2');
insert into calendar_cost (id, calendar_id, cost_value) values
(400, 101, 100.123),
(401, 101, 400.567);
As a result, "Article 2" (condition 2) and "Article 3" (condition 1) will suit us.
My SQL query is very slow (the second condition part), how can I do it optimally? Is it possible to do without "union all" operator?
-- First condition
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
union all
-- Second condition
select a.id from article a
where id not in(
select aa.id from article aa
join calendar c on aa.id = c.article_id
join calendar_cost cost on c.id = cost.calendar_id
where aa.id = a.id limit 1
)
UPDATE
This is how you can fill my tables with random data for about the same amount of data. The #Bohemian query is very fast, and the rest are very slow. But as soon as I applied 2 indexes, as #nik advised, all queries began to be executed very, very quickly!
do $$
declare
article_id int;
calendar_id bigint;
i int; j int;
begin
create table article (
id int PRIMARY KEY,
name varchar
);
create table calendar (
id serial PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar
);
create INDEX ON calendar(article_id);
create table calendar_cost (
id serial PRIMARY KEY,
calendar_id bigint REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
);
create INDEX ON calendar_cost(calendar_id);
for article_id in 1..45000 loop
insert into article (id, name) values (article_id, 'Article ' || article_id);
for i in 0..floor(random() * 25) loop
insert into calendar (article_id, number) values (article_id, 'Number ' || article_id || '-' || i) returning id into calendar_id;
for j in 0..floor(random() * 2) loop
insert into calendar_cost (calendar_id, cost_value) values (calendar_id, round((random() * 100)::numeric, 3));
end loop;
end loop;
end loop;
end $$;
#Bohemian
Planning Time: 0.405 ms
Execution Time: 1196.082 ms
#nbk
Planning Time: 0.702 ms
Execution Time: 165.129 ms
#Chris Maurer
Planning Time: 0.803 ms
Execution Time: 800.000 ms
#Stu
Planning Time: 0.446 ms
Execution Time: 280.842 ms
So which query to choose now as the right one is a matter of taste.
No need to split the conditions: The only condition you need to check for is that there are no calendar_cost rows whatsoever, which is the case if there are no calendar rows.
The trick is to use outer joins, which still return the parent table but have all null values when there is no join. Further, count() does not count null values, so requiring that the count of calendar_cost is zero is all you need.
select a.id
from article a
left join calendar c on c.article_id = a.id
left join calendar_cost cost on cost.calendar_id = c.id
group by a.id
having count(cost.calendar_id) = 0
See live demo.
If there are indexes on the id columns (the usual case), this query will perform quite well given the small table sizes.
Your second condition should start just like your first one: find all the calendar entries without calendar cost and only afterwards join it to article.
select a.id
from article a
Inner Join (
Select article_id
From calendar c left join calendar_cost cc
On c.id=cc.calendar_id
Where cc.calendar_id is null
) cnone
On a.id = cnone.article_id
This approach is based on the thought that calendar entries without calendar_cost is relatively rare compared to all the calendar entries.
Your query is not valid as IN clauses don't support LIMIT
Adding some indexes on article_id and calender_id
Will help the performance
As you can see in the query plan
create table article (
id int PRIMARY KEY,
name varchar(100)
);
create table calendar (
id int PRIMARY KEY,
article_id int REFERENCES article (id) ON DELETE CASCADE,
number varchar(100)
,index(article_id)
);
create table calendar_cost (
id int PRIMARY KEY,
calendar_id int REFERENCES calendar (id) ON DELETE CASCADE,
cost_value numeric
,INDEX(calendar_id)
);
insert into article (id, name) values
(1, 'Article 1'),
(2, 'Article 2'),
(3, 'Article 3');
insert into calendar (id, article_id, number) values
(101, 1, 'Point 1-1'),
(102, 1, 'Point 1-2'),
(103, 2, 'Point 2');
insert into calendar_cost (id, calendar_id, cost_value) values
(400, 101, 100.123),
(401, 101, 400.567);
Records: 3 Duplicates: 0 Warnings: 0
Records: 3 Duplicates: 0 Warnings: 0
Records: 2 Duplicates: 0 Warnings: 2
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
id
3
-- First condition
EXPLAIN
select a.id from article a
left join calendar c on a.id = c.article_id
where c.id is null
union all
-- Second condition
select a.id from article a
JOIN (
select aa.id from article aa
join calendar c on aa.id = c.article_id
join calendar_cost cost on c.id = cost.calendar_id
LIMIT 1
) t1 ON t1.id <> a.id
id
select_type
table
partitions
type
possible_keys
key
key_len
ref
rows
filtered
Extra
1
PRIMARY
a
null
index
null
PRIMARY
4
null
3
100.00
Using index
1
PRIMARY
c
null
ref
article_id
article_id
5
fiddle.a.id
3
33.33
Using where; Not exists; Using index
2
UNION
<derived3>
null
system
null
null
null
null
1
100.00
null
2
UNION
a
null
index
null
PRIMARY
4
null
3
66.67
Using where; Using index
3
DERIVED
cost
null
index
calendar_id
calendar_id
5
null
2
100.00
Using where; Using index
3
DERIVED
c
null
eq_ref
PRIMARY,article_id
PRIMARY
4
fiddle.cost.calendar_id
1
100.00
Using where
3
DERIVED
aa
null
eq_ref
PRIMARY
PRIMARY
4
fiddle.c.article_id
1
100.00
Using index
fiddle
Try the following using a combination of exists criteria.
Usually, with supporting indexes, this is more performant than simply joining tables as it offers a short-circuit to get out as soon as a match is found, where as joining typically filters after all rows are joined.
select a.id
from article a
where not exists (
select * from calendar c
where c.article_id = a.id
)
or (exists (
select * from calendar c
where c.article_id = a.id
)
and not exists (
select * from calendar_cost cc
where cc.calendar_id in (select id from calendar c where c.article_id = a.id)
)
);

Get column values from mapping tables "id | value" binding

I am trying to get all the columns associated to with my item, some columns are "key | value" paired and that's where my problem is. My idea for a structure looks like this
I can retrieve 1 item from Posts along with all associated tag names with this query, but the problem is that I just can get 1 post
SELECT TOP(10)
bm.title, bm.post_id,
a.name AS tag1, b.name AS tag2, c.name AS tag3, d.name AS tag4
FROM
Posts AS bm
INNER JOIN
Tagmap AS tm
INNER JOIN
Tag AS a ON a.tag_id = tm.tag_id1
INNER JOIN
Tag AS b ON b.tag_id = tm.tag_id2
INNER JOIN
Tag AS c ON c.tag_id = tm.tag_id3
INNER JOIN
Tag AS d ON d.tag_id = tm.tag_id4
ON bm.post_id = tm.post_id
Here is the DDL for the table, or you can get it from this PasteBin link:
CREATE TABLE Tag
(
tag_id int NOT NULL identity(0,1) primary key,
name nvarchar(30) NOT NULL,
);
CREATE TABLE Tagmap
(
id int NOT NULL identity(0,1) primary key,
post_id int FOREIGN KEY REFERENCES Posts(post_id),
tag_id1 int FOREIGN KEY REFERENCES Tag(tag_id),
tag_id2 int FOREIGN KEY REFERENCES Tag(tag_id),
tag_id3 int FOREIGN KEY REFERENCES Tag(tag_id),
tag_id4 int FOREIGN KEY REFERENCES Tag(tag_id)
);
CREATE TABLE Posts
(
post_id int NOT NULL identity(0,1) primary key,
title nvarchar(50) not null,
);
INSERT INTO Posts VALUES ('Title1');
INSERT INTO Posts VALUES ('Title2');
INSERT INTO Tag VALUES ('Tag number one');
INSERT INTO Tag VALUES ('Tag number two');
INSERT INTO Tag VALUES ('Tag number three');
INSERT INTO Tag VALUES ('Tag number four');
INSERT INTO Tagmap VALUES (0, 0, 1, 2, 3);
My question: is my approach totally off? Should I change the structure or is it good?
If so how can it be better and how can I retrieve all these "key | value" columns along with my posts?
First, you should fix your data structure, so you have one row in tagMap per post_id and tag_id -- not four!
But event with your current structure, I imagine that not all posts have four tags. So, with your current data model you should be using LEFT JOIN, rather than INNER JOIN.

Select rows that have a specific set of items associated with them through a junction table

Suppose we have the following schema:
CREATE TABLE customers(
id INTEGER PRIMARY KEY,
name TEXT
);
CREATE TABLE items(
id INTEGER PRIMARY KEY,
name TEXT
);
CREATE TABLE customers_items(
customerid INTEGER,
itemid INTEGER,
FOREIGN KEY(customerid) REFERENCES customers(id),
FOREIGN KEY(itemid) REFERENCES items(id)
);
Now we insert some example data:
INSERT INTO customers(name) VALUES ('John');
INSERT INTO customers(name) VALUES ('Jane');
INSERT INTO items(name) VALUES ('duck');
INSERT INTO items(name) VALUES ('cake');
Let's assume that John and Jane have id's of 1 and 2 and duck and cake also have id's of 1 and 2.
Let's give a duck to John and both a duck and a cake to Jane.
INSERT INTO customers_items(customerid, itemid) VALUES (1, 1);
INSERT INTO customers_items(customerid, itemid) VALUES (2, 1);
INSERT INTO customers_items(customerid, itemid) VALUES (2, 2);
Now, what I want to do is to run two types of queries:
Select names of customers who have BOTH a duck and a cake (should return 'Jane' only).
Select names of customers that have a duck and DON'T have a cake (should return 'John' only).
For the two type of queries listed, you could use the EXISTS clause. Below is an example query using the exists clause:
SELECT cust.name
from customers AS cust
WHERE EXISTS (
SELECT 1
FROM items
INNER JOIN customers_items ON items.id = customers_items.itemid
INNER JOIN customers on customers_items.customerid = cust.id
WHERE items.name = 'duck')
AND NOT EXISTS (
SELECT 1
FROM items
INNER JOIN customers_items ON items.id = customers_items.itemid
INNER JOIN customers on customers_items.customerid = cust.id
WHERE items.name = 'cake')
Here is a working example: http://sqlfiddle.com/#!6/3d362/2

Report with two independent columns

I have 3 tables with data, and I would like to create a report which shows me people with their books and furniture. The biggest problem is to get an independent list with two columns: books and furniture.
create table people (id int primary key)
create table books (id int primary key
, personId int foreign key references people(id))
create table furnitures (id int primary key
, personId int foreign key references people(id))
go
insert into people(id)
values (1),(2),(3)
insert into books(id,personId)
values (1,1),(2,1),(3,1),(4,1),(5,1),(6,3),(7,3)
insert into furnitures(id,personId)
values (1,2),(2,2),(3,2),(4,3),(5,3),(6,3),(7,3),(8,3)
I want to get a report in this form:
You need to do a join but you don't have a key. So, let's create one using row_number(). The rest is just a full outer join to combine the data:
select coalesce(b.personId, f.personId) as personId, b.id as bookid, f.id as furnitureid
from (select b.*, row_number() over (partition by personId order by id) as seqnum
from books b
) b
full join
(select f.*, row_number() over (partition by personId order by id) as seqnum
from furnitures f
) f
on f.personId = b.personId and b.seqnum = f.seqnum;

Self join many-to-many relationship

From sample data below, assuming Julie (1) has friends Adam, David, John (2, 3, 4).
Adam (2) has friends Julie, David, John (1, 3, 4).
ID Name
1 Julie
2 Adam
3 David
4 John
5 Sam
This make a self join and many-to-many relationship within ONE table.
In addition to the above problem, say Julie (1) added Sam (5) as friends, technically and practically speaking, Sam (5) is now friend of Julie (1) as well. This make things more complicated because the relationship bi-directional.
So I'm wondering:
How do I design the database?
How do I make a query that will return all friends of every users?
Thank you!
Example Data:
PEOPLE
PERS_ID PERS_NAME
1 Julie
2 Adam
3 David
4 John
5 Sam
FRIENDSHIPS
PERS_ID FRIEND_ID
1 2
1 3
1 4
2 3
2 4
Query:
select people.pers_id as person,
people.pers_name as person_name,
peoplef.pers_id as friend_id,
peoplef.pers_name as friend_name
from people
join friendships
on people.pers_id = friendships.pers_id
or people.pers_id = friendships.friend_id
join people peoplef
on (peoplef.pers_id = friendships.pers_id and
peoplef.pers_id <> people.pers_id)
or (peoplef.pers_id = friendships.friend_id and
peoplef.pers_id <> people.pers_id)
order by 2, 4
SQL Fiddle demo: http://sqlfiddle.com/#!2/97b41/6/0
This will work regardless of whether or not you record both directions on the friendships table.
Pretty much agree with the others. You need a link table. I'll give a bit more detail.. some examples of keys and indexes and the query you wanted (bi-directional).
CREATE TABLE dbo.tblUser
(
ID int identity(0,1),
name varchar(20)
CONSTRAINT PK_tblUser PRIMARY KEY (ID)
)
-- Many to many link table with FKs
CREATE TABLE dbo.tblFriend
(
ID1 int not null constraint FK_tblUser_ID1 foreign key references dbo.tblUser(ID),
ID2 int not null constraint FK_tblUser_ID2 foreign key references dbo.tblUser(ID)
CONSTRAINT PK_tblFriend PRIMARY KEY (ID1, ID2)
)
-- Add index (So you can get an index seek if using ID2)
CREATE INDEX IX_tblFriend_ID2 ON dbo.tblFriend (ID2)
-- Test data
INSERT INTO dbo.tblUser(name)
VALUES ('Julie'),('Adam'),('David'),('John'),('Sam');
Insert INTO dbo.tblFriend (ID1, ID2)
values(0, 1),(2, 0)
-- Get bi-directional friend to friend relationships
SELECT U1.Name as 'User1', U2.Name as 'User2' FROM dbo.tblFriend F
INNER JOIN dbo.tblUser U1 ON U1.ID = F.ID1
INNER JOIN dbo.tblUser U2 ON U2.ID = F.ID2
UNION
SELECT U2.Name as 'User1', U1.Name as 'User2' FROM dbo.tblFriend F
INNER JOIN dbo.tblUser U1 ON U1.ID = F.ID1
INNER JOIN dbo.tblUser U2 ON U2.ID = F.ID2
ORDER BY User1, User2
One approach could be that you create second table that stores the person and friend ids. In this scenario, consider the following tables.
CREATE TABLE User
(
id int auto_increment primary key,
name varchar(20)
);
CREATE TABLE Friend
(
user_id int ,
friend_id int
);
INSERT INTO User
(name)
VALUES
('Julie'),
('Adam'),
('David'),
('John'),
('Sam');
Insert INTO Friend
(user_id, friend_id)
values(1, 5),
(3, 1);
Now the Friend table will store the user_id and his/her friend_id. For getting the list of friends for a particular user, you can search the id matching in either of these two columns. Below are sample queries.
-- Get Friends of Julie
select 1 AS user_id, IF(user_id = 1, friend_id, user_id) AS friend_id
FROM Friend
WHERE user_id=1 OR friend_id=1;
-- Get Friends of David
select 3 AS user_id, IF(user_id = 3, friend_id, user_id) AS friend_id
FROM Friend
WHERE user_id=3 OR friend_id=3
I hope you get idea with this and can play around.
I tried whatever you written in your query:
declare #table table
(
id int,
name varchar(40)
)
insert into #table values
(1, 'Julie'),
(2, 'Adam'),
(3, 'David'),
(4, 'John'),
(5, 'Sam')
select
t1.name ,
t2.name as friend
from #table t1, #table t2 where t1.id <> t2.id
and t1.id in (1,2) and t2.id <> 5
order by t1.id