SQL query. Get similar rows ordered by desc - sql

I have a simple table with two fields.
1) book_id - int
2) tag_id - int
One book can have multiple tags, such as
book_id:1 - tag_id: 2, 3, 5, 9
Here is the question: how can i get a similar books from specific book? Also, they should be ordered desc by something like "likeness" count.
Example: i wanna get all book_ids with similar tags from book_id = 1 ordered by similar tags count.
Specific book: book_id: 1 - tag_id: 2, 3, 5 , 9
Result:
book_id: 54 - tag_id: 2, 3, 5, 14
book_id: 104 - tag id: 2, 3, 10

You can order the books by the number of tags they have in common with your given book:
select bt2.book_id, count(*) as tags_in_common
from book_tags bt join
book_tags bt2
on bt.tag_id = bt2.tag_id
where bt.book_id = ?
group by bt2.book_id
order by tags_in_common desc;

Let's say your table and data look like this:
create table #books_tags(
book_id int,
tag_id int
)
insert into #books_tags values(1, 2), (1, 3), (1, 5), (1, 9) -- book_id:1
insert into #books_tags values(54, 2), (54, 3), (54, 5), (54, 14) -- book_id:54
insert into #books_tags values(104, 2), (104, 3), (104, 10) -- book_id:104
insert into #books_tags values(2, 3), (2, 5), (2, 11), (2, 14) -- book_id:2
insert into #books_tags values(3, 3), (3, 9), (3, 10), (3, 11) -- book_id:3
Then your query is this:
select a.book_id,
b.book_id similar_book_id,
count(*) matching_tags,
string_agg(a.tag_id, ',') tag_ids
from #books_tags a
left join #books_tags b on b.tag_id = a.tag_id and b.book_id <> a.book_id
group by a.book_id, b.book_id
order by matching_tags desc, a.book_id
(in SQL Server 2017 or later)

Related

SQL. Where condition for multiple values of column

Im looking for some hint when trying to filter for multiple values within column.
I'm interested in an "AND" condition for some values in column X (ie. statement Where Column X in (1,2,3) doesn't fulfill my needs).
Consider this example table:
I'm interested in finding COD_OPE that has both status 6 and 7. In this example i'm interested to find only COD_OPE = 3
If i use Where status in (6,7) i'll get cod_ope 1 and 6.
Any smart way to find cod_ope = 3?
Thank you!
Code for table in the example:
CREATE TABLE [TABLE] (
COD_OPE int,
STATUS int,
Observation_date int
)
INSERT INTO [TABLE] (COD_OPE, STATUS, Observation_date)
VALUES (1, 1, 2022),(1, 1, 2021), (1, 1, 2020), (1, 6, 2019), (1, 6, 2018), (2, 1, 2022), (2, 7, 2021), (2, 4, 2020), (2, 4, 2019), (2, 7, 2018), (3, 1, 2022), (3, 1, 2021), (3, 4, 2020), (3, 7, 2019), (3, 6, 2018)
select * from [TABLE]
Use aggregation:
SELECT COD_OPE
FROM [TABLE]
WHERE STATUS IN (6, 7)
GROUP BY COD_OPE
HAVING COUNT(DISTINCT STATUS) = 2;

Joining two different tables with a common third table on a common column

Here are the tables
Table: Status
ID, StatusDesc
1, New
2, Active
3, Cancelled
4, Complete
Table: Order (foreign key relationship with Status table above)
ID, OrderNumber, StatusID
1, 1001 , 1
2, 1002, 1
3, 1003, 2
4, 1004, 3
5, 1500, 4
Table: LineItem(foreign key relationship with Order and Status tables above)
ID, OrderNumber, LineItemNumber, StatusID
1, 1001 , 1, 1
2, 1001 , 2, 1
3, 1002 , 1, 2
4, 1002 , 2, 1
5, 1003 , 1, 2
6, 1004 , 1, 3
7, 1004 , 2, 4
8, 1500 , 1, 3
As you can see, the table Status holds the statuses common for both Order and LineItem tables.
I want to produce the result which will include columns like this, status description for both Order and LineItem:
OrderNumber, LineItemNumber, StatusDesc_Order, StatusDesc_LineItem
How to do this?
You could join the Status table twice to achieve this:
SELECT
o.OrderNumber
, li.LineItemNumber
, orderStatus.StatusDesc AS StatusDesc_Order
, lineItemStatus.StatusDesc AS StatusDesc_LineItem
FROM [LineItem] AS li
INNER JOIN [Status] AS lineItemStatus ON li.StatusID = lineItemStatus.ID
INNER JOIN [Order] AS o ON li.OrderNumber = o.OrderNumber
INNER JOIN [Status] AS orderStatus ON o.StatusID = orderStatus.ID
I do suggest however you try and stay away from table names using reserved keywords like Order and Status, it also is good practice to explcitly add schema prefixes before the table names in the query (i.e. dbo.Status or another user defined schema).
If the required results really are that simple then just use a couple of sub-queries e.g.
-- SETUP TEST DATA
declare #Order table (id int, OrderNumber int, StatusId int)
insert into #Order (id, OrderNumber, StatusId)
values (1, 1001, 1), (2, 1002, 1), (3, 1003, 2), (4, 1004, 3), (5, 1500, 4)
declare #LineItem table (id int, OrderNumber int, LineItemNumber int, StatusId int)
insert into #LineItem (id, OrderNumber, LineItemNumber, StatusId)
values (1, 1001, 1, 1), (2, 1001, 2, 1), (3, 1002, 1, 2), (4, 1002, 2, 1), (5, 1003, 1, 2), (6, 1004, 1, 3), (7, 1004, 2, 4), (8, 1500, 2, 3)
declare #Status table (id int, StatusDesc varchar(32))
insert into #Status(id, StatusDesc)
values (1,'New'), (2,'Active'), (3,'Cancelled'), (4,'Complete')
-- QUERY DATA
select LI.OrderNumber, LI.LineItemNumber
, (select S.StatusDesc from #Status S where S.id = StatusId) [StatusDesc_Order]
, (select S.StatusDesc from #Status S where S.id = (select O.StatusId from #Order O where O.OrderNumber = LI.OrderNumber)) [StatusDesc_LineItem]
from #LineItem LI
order by LI.OrderNumber, LI.LineItemNumber
Note: If you provide your sample data in this format in future questions you make your question much easier to answer.

Select Id where column takes all values in

Please help me with an SQL query. Here go test tables with data:
CREATE TABLE "Cats"
(
"CatId" SERIAL PRIMARY KEY,
"Name" character varying NOT NULL
);
CREATE TABLE "Measures"
(
"MeasureId" SERIAL PRIMARY KEY,
"CatId" integer NOT NULL REFERENCES "Cats",
"Weight" double precision NOT NULL,
"MeasureDay" integer NOT NULL
);
INSERT INTO "Cats" ("Name") VALUES
('A'), ('B'), ('C')
;
INSERT INTO "Measures" ("CatId", "Weight", "MeasureDay") VALUES
(1, 5.0, 1),
(1, 5.3, 2),
(1, 6.1, 5),
(2, 3.2, 1),
(2, 3.5, 2),
(2, 3.8, 3),
(2, 4.0, 4),
(2, 4.0, 5),
(3, 6.6, 1),
(3, 6.9, 2),
(3, 7.0, 3),
(3, 6.9, 4)
;
How do I select those CatId that have measures for ALL 5 days (MeasureDay takes all values in (1, 2, 3, 4, 5)) ?
On this test data, the query should return 2 since only Cat with CatId = 2 has measures for all days (1, 2, 3, 4, 5).
I assume that I should use GROUP BY "CatId" and HAVING clauses, but what kind of query should be inside HAVING?
try like this using group by
select CatId
from Measures
where MeasureDay in (1, 2, 3, 4, 5)
group by CatId
having count(distinct MeasureDay) = 5;
You can use aggregation and a having clause:
select m.CatId
from measures m
group by m.CatId
having count(distinct measureDay) = 5;

Retrieving consecutive rows (and the counts) with the same values

I've got a table with almost 10 million views and would to run this query on the latest million or hundred thousand or so.
Here's a SQL fiddle with example data and input/output: http://sqlfiddle.com/#!9/340a41
Is this even possible?
CREATE TABLE object (`id` int, `name` varchar(7), `value` int);
INSERT INTO object (`id`, `name`, `value`)
VALUES
(1, 'a', 1),
(2, 'b', 2),
(3, 'c', 100),
(4, 'a', 1),
(5, 'b', 2),
(6, 'c', 200),
(7, 'a', 2),
(8, 'b', 2),
(9, 'c', 300),
(10, 'a', 2),
(11, 'b', 2),
(12, 'a', 2),
(13, 'b', 2),
(14, 'c', 400)
;
-- Want:
-- name, max(id), count(id)
-- 'a', 4, 2
-- 'b', 14, 5
-- 'a', 12, 3
If you want the latest and the id is implemented sequentially, then you can do this using limit or top. In SQL Server:
select top 100000 o.*
from object o
order by id desc;
In MySQL, you would use limit:
select o.*
from object o
order by id desc
limit 100000
select name, count(id) cnt, max(id) max_id, max(value) max_v
from
(select
top 1000000 -- MS SQL Server
id,name,value
from myTable
limit 1000000 --mySQL
order by id desc)
group by name
remove line which doesn't match your server.

Display names of col 1 having the same col 2 in at least two col 3s from a three-column many-to-many table

I have a question about SQL Server query.
The tables can be described as following:
Suppliers (SID, sName, status, sCity), PK = SID
Parts (PID, pName, color, weight, pCity), PK = PID
Projects (JID, jName, city), PK = JID
SPJ (SID, PID, JID, quantity), PK = (SID, PID, JID), FK = SID, PID, JID
I am trying to get supplier numbers and names for suppliers of the same part to at least two projects, here is my query for the task:
SELECT
s1.SID, s1.sName, COUNT(*)
FROM
Suppliers s1, Parts p, SPJ s2
WHERE
s1.SID = s2.SID
AND p.PID = s2.PID
GROUP BY
s1.SID, s1.sName
HAVING
COUNT(*)>1
ORDER BY
s1.SID, s1.sName
Please help me correct this query. Thank you in advance
This does exactly what you ask:
SELECT
S.SupplierID,
S.sName
FROM
dbo.Suppliers S
WHERE
EXISTS (
SELECT *
FROM dbo.SPJ SPJ
WHERE S.SupplierID = SPJ.SupplierID
GROUP BY SPJ.PartID
HAVING Count(*) >= 2
);
Please see my SQLFiddle to try it out on a real data set. Note there is also a query showing all the individual rows that participate to make a supplier qualify (see the Qualifies column). Here is a preview of the setup script:
CREATE TABLE dbo.Suppliers (
SupplierID smallint identity(1,1) NOT NULL CONSTRAINT PK_Suppliers PRIMARY KEY CLUSTERED,
sName varchar(40) NOT NULL CONSTRAINT UQ_Suppliers_sName UNIQUE
);
CREATE TABLE dbo.Parts (
PartID int identity(1,1) NOT NULL CONSTRAINT PK_Parts PRIMARY KEY CLUSTERED,
pName varchar(100) NOT NULL CONSTRAINT UQ_Parts_pName UNIQUE
);
CREATE TABLE dbo.Projects (
ProjectID int identity(1,1) NOT NULL CONSTRAINT PK_Projects PRIMARY KEY CLUSTERED,
jName varchar(50) NOT NULL CONSTRAINT UQ_Projects_jName UNIQUE
);
CREATE TABLE dbo.SPJ (
SupplierID smallint NOT NULL CONSTRAINT FK_SPJ_Suppliers FOREIGN KEY REFERENCES dbo.Suppliers (SupplierID),
PartID int NOT NULL CONSTRAINT FK_SPJ_Parts FOREIGN KEY REFERENCES dbo.Parts (PartID),
ProjectID int NOT NULL CONSTRAINT FK_SPJ_Projects FOREIGN KEY REFERENCES dbo.Projects (ProjectID),
CONSTRAINT PK_SPJ PRIMARY KEY CLUSTERED (SupplierID, PartID, ProjectID)
);
INSERT dbo.Suppliers
VALUES
('Joe'' Specialty Goods'),
('Major Supplier'),
('Small Supplier'),
('Tamandua Manufacturing, Ltd.'),
('Things ''n'' Stuff'),
('Stuff ''n'' Things');
INSERT dbo.Parts
VALUES
('Watchamacallit'),
('Thingamajiggy'),
('Thingamabob'),
('Doohickey'),
('Whatsisname'),
('Thingy'),
('Gizmo'),
('Gadget'),
('Widget'),
('Titanium Widget'),
('Electronic Thingamabob'),
('One-Product Patrick');
INSERT dbo.Projects
VALUES
('Video Game Manufacturing'),
('Autodoc Research'),
('Consumer Electronics Expansion'),
('Space Shuttle'),
('Tablet Project Cobalt');
INSERT dbo.SPJ
VALUES
(1, 2, 1),
(1, 3, 1),
(1, 7, 1),
(1, 11, 1),
(2, 1, 1),
(2, 1, 2),
(2, 1, 3),
(2, 1, 4),
(2, 2, 1),
(2, 2, 2),
(2, 2, 3),
(2, 2, 4),
(2, 2, 5),
(2, 10, 1),
(2, 10, 2),
(2, 10, 3),
(2, 10, 4),
(2, 10, 5),
(3, 5, 4),
(4, 4, 2),
(4, 4, 3),
(5, 9, 1),
(5, 8, 2),
(5, 7, 3),
(5, 6, 4),
(5, 5, 5),
(6, 1, 1),
(6, 2, 1),
(6, 3, 1),
(6, 4, 1),
(6, 5, 1),
(6, 6, 1),
(6, 7, 1),
(6, 8, 1),
(6, 9, 1),
(6, 10, 1),
(6, 11, 1);
SELECT SID, PID, COUNT(JID)
FROM SPJ
GROUP BY SID, PID
HAVING COUNT(JID) > 1
You could also do it like this :
SELECT DISTINCT
a.SupplierID,
b.sName
FROM
(
SELECT
SupplierID,
PartID,
NumProjects = count(DISTINCT ProjectID)
FROM SPJ
GROUP BY
SupplierID, PartID
HAVING
count(DISTINCT ProjectID) >= 2
) a
INNER JOIN Suppliers b ON a.SupplierID = b.SupplierID
SELECT Supplier.SID,Supplier.sName
FROM SPJ inner join Supplier on SPJ.SID = Supplier.SID
inner join Parts on SPJ.PID = Parts.PID
GROUP BY Supplier.SID,Supplier.sName
HAVING COUNT(JID) > 1