join within recursive with adjacency - sql

I have something like this:
CREATE TABLE categories (
id varchar(250) PRIMARY KEY,
name varchar(250) NOT NULL,
parentid varchar(250)
);
CREATE TABLE products (
id varchar(250) PRIMARY KEY,
name varchar(250) NOT NULL,
price double precision,
category varchar(250) NOT NULL
);
INSERT INTO categories VALUES ('1', 'Rack', '');
INSERT INTO categories VALUES ('2', 'Women', '1');
INSERT INTO categories VALUES ('3', 'Shorts', '2');
INSERT INTO products VALUES ('1', 'Jean', 2.99, '3');
INSERT INTO products VALUES ('2', 'Inflatable Boat', 5.99, '1');
Now, if I wanted to see the total price of products for each category, I could do something like this:
SELECT
categories.name,
SUM(products.price) AS CATPRICE
FROM
categories,
products
WHERE products.category = categories.id
GROUP BY categories.name
;
Which produces output:
name | catprice
--------+----------
Rack | 5.99
Shorts | 2.99
(2 rows)
But notice that "Shorts" is an ancestor of "Rack". I want a query that will produce output like this:
name | catprice
--------+----------
Rack | 8.98
(1 row)
So that all product prices are added together under the root category. There are multiple root categories in the category table; only one has been shown for simplicity.
This is what I have thus far:
-- "nodes_cte" is the virtual table that is being created as the recursion continues
-- The contents of the ()s are the columns that are being built
WITH RECURSIVE nodes_cte(name, id, parentid, depth, path) AS (
-- Base case?
SELECT tn.name, tn.id, tn.parentid, 1::INT AS depth, tn.id::TEXT AS path FROM categories AS tn, products AS tn2
LEFT OUTER JOIN categories ON tn2.CATEGORY = categories.ID
WHERE tn.parentid IS NULL
UNION ALL
-- nth case
SELECT c.name, c.id, c.parentid, p.depth + 1 AS depth, (p.path || '->' || c.id::TEXT) FROM nodes_cte AS p, categories AS c, products AS c2
LEFT OUTER JOIN categories ON c2.CATEGORY = categories.ID
WHERE c.parentid = p.id
)
SELECT * FROM nodes_cte AS n ORDER BY n.id ASC;
I have no clue what I've done wrong. The above query returns zero results.

Your recursive query is off by a little. Give this a try:
EDIT -- To make this work with the SUM, use this:
WITH RECURSIVE nodes_cte(name, id, id2, parentid, price) AS (
-- Base case?
SELECT c.name,
c.id,
c.id id2,
c.parentid,
p.price
FROM categories c
LEFT JOIN products p on c.id = p.category
WHERE c.parentid = ''
UNION ALL
-- nth case
SELECT n.name,
n.id,
c.id id2,
c.parentid,
p.price
FROM nodes_cte n
JOIN categories c on n.id2 = c.parentid
LEFT JOIN products p on c.id = p.category
)
SELECT id, name, SUM(price) FROM nodes_cte GROUP BY id, name
And here is the Fiddle: http://sqlfiddle.com/#!1/7ac6d/19
Good luck.

Related

sql cte distinct count

I want to add the total number of products of the brand to the total column.
http://sqlfiddle.com/#!18/2d00a/1
this my query;
WITH cte AS
(
SELECT id
FROM category
WHERE id = 1
UNION ALL
SELECT c.id
FROM category c, cte
WHERE c.parentid = cte.id
), cte2 AS
(
SELECT brandid, d.catid
FROM products d, cte
WHERE d.catid = cte.id
UNION ALL
SELECT u.brandid, COUNT(DISTINCT u.id)
FROM products u, cte
WHERE catid = cte.id
GROUP BY brandid
)
SELECT DISTINCT
brandid, logo, brand, id, id as total
FROM
brand, cte2
WHERE
id = cte2.brandid
ORDER BY
brand ASC
SELECT b.id,
b.logo,
b.brand,
count(p.id) total
FROM brand b
LEFT JOIN products p ON p.brandid = b.id
GROUP BY b.id, b.logo, b.brand
Assuming you want to filter products based on category 1 (where you include the subcategories as well)
WITH categories_and_subcategories AS (
SELECT id FROM category
WHERE id = 1
UNION ALL
SELECT c.id
FROM category c
INNER JOIN categories_and_subcategories cs
ON c.parentid = cs.id),
filtered_products AS (
SELECT p.id,
p.name,
p.catid,
p.brandid
FROM products p
INNER JOIN categories_and_subcategories c
ON p.catid = c.id
)
SELECT b.id,
b.logo,
b.brand,
count(p.id) total
FROM brand b
LEFT JOIN filtered_products p ON p.brandid = b.id
GROUP BY b.id, b.logo, b.brand
Here is the Query as per my Understanding to get the total products per brand. DBFIDDLE working code
CREATE TABLE brand (
[id] INT
,[brand] VARCHAR(13)
,[logo] VARCHAR(13)
);
INSERT INTO brand ([id], [brand], [logo])
VALUES ('1', 'samsung', 'logo.jpg');
CREATE TABLE products (
[id] INT
,[name] VARCHAR(13)
,[catid] INT
,[brandid] INT
);
INSERT INTO products ([id], [name], [catid], [brandid])
VALUES ('1', 'samsung tv', '2', '1')
,('1', 'samsung2 tv', '2', '1')
SELECT b.Id
,b.brand
,b.logo
,p.Id
,COUNT(1) [total]
FROM brand b
INNER JOIN products p ON b.Id = p.brandId
GROUP BY b.Id
,b.brand
,b.logo
,p.Id
Results:

SQL - find parent in same column

I cant express the problem correctly to find the answer online so i'm hoping someone could provide me with a link to a solution because i think this is a rather common task.
We have a hierachy of products and want to determine the parents.
All product names are in the same column and the logic is the following
ProductId ProductName
-----------------------------------------------------------------------------
1 ABC
2 ABCD
3 ABCD1
4 ABCD2
Result should be
ABCD1 & ABCD2 are children of ABCD and
ABCD is child of ABC
ProductId ProductName ParentName ParentId
------------------------------------------------------------------------------
1 ABC NULL NULL
2 ABCD ABC 1
3 ABCD1 ABCD 2
...
Hmmm. I think this does what you want:
select p.*, pp.ProductName as parentName, pp.ProductId as parentId
from products p outer apply
(select top (1) pp.*
from products pp
where p.ProductName like pp.ProductName + '%' and
p.ProductId <> pp.ProductId
order by len(pp.ProductName) desc
) pp;
No doubt that gordon's answer is best here, but still I gave it a go as well:
USE TEMPDB
CREATE TABLE #T (ProductID INT, ProductName VARCHAR (100))
INSERT INTO #T VALUES (1, 'ABC'), (2, 'ABCD'), (3, 'ABCD1'), (4, 'ABCD2')
WITH CTE AS
(
SELECT T.*,
T2.ProductID AS ParentID,
T2.ProductName AS ParentName
FROM #T AS T
CROSS JOIN #T AS T2
WHERE T.ProductName LIKE T2.ProductName + '%'
AND T.ProductID <> T2.ProductID
)
, CTE2 AS
(
SELECT TOP 1 T.*,
NULL AS ParentID,
NULL AS ParentName
FROM #T AS T
ORDER BY LEN (T.ProductName)
)
SELECT * FROM CTE UNION ALL SELECT * FROM CTE2 ORDER BY 1
If there's only 1 character difference.
Then you can LEFT JOIN to the ProductName & one wildcard character '_'
SELECT
p1.ProductId,
p1.ProductName,
p2.ProductName AS ParentName,
p2.ProductId AS ParentId
FROM Products p1
LEFT JOIN Products p2 ON p1.ProductName LIKE CONCAT(p2.ProductName,'_')
ORDER BY p1.ProductId;
Example snippet:
declare #Products table (
ProductId INT primary key identity(1,1),
ProductName varchar(30) not null,
unique (ProductName)
);
insert into #Products (ProductName) values
('ABC')
,('ABCD')
,('ABCD1')
,('ABCD2')
;
SELECT
p1.ProductId,
p1.ProductName,
p2.ProductName AS ParentName,
p2.ProductId AS ParentId
FROM #Products p1
LEFT JOIN #Products p2 ON p1.ProductName LIKE CONCAT(p2.ProductName,'_')
ORDER BY p1.ProductId;
Result:
ProductId ProductName ParentName ParentId
1 ABC NULL NULL
2 ABCD ABC 1
3 ABCD1 ABCD 2
4 ABCD2 ABCD 2
If it's possible that there's more than 1 character difference then:
SELECT TOP (1) WITH TIES
p1.ProductId,
p1.ProductName,
p2.ProductName AS ParentName,
p2.ProductId AS ParentId
FROM Products p1
LEFT JOIN Products p2 ON p1.ProductName LIKE CONCAT(p2.ProductName,'_%')
ORDER BY ROW_NUMBER() OVER (PARTITION BY p1.ProductId ORDER BY LEN(p2.ProductName) DESC);
Did you try using Case with the condition and represent each condition as a new column. You can refer to the syntax https://www.w3schools.com/sql/sql_case.asp
You can use Common Table Expression (CTE) to do the job.
with product_table (ProductId, ProductName) as
(
select 1 ProductId , 'ABC' ProductName union all
select 2 ProductId , 'ABCD' ProductName union all
select 3 ProductId , 'ABCD1' ProductName union all
select 4 ProductId , 'ABCD2' ProductName --union all
)
,product_result (ProductId, ProductName, ParentName, ParentId) as
(
select ProductId, ProductName, convert(varchar,null) ParentName, convert(int, null) ParentId
from product_table
where ProductName = 'ABC' --start with
union all
select d.ProductId, d.ProductName, convert(varchar,p.ProductName) ParentName, p.ProductId ParentId
from product_table d
, product_result p
where d.ProductName like p.ProductName+'_'
)
select *
from product_result
The first part product_table must be replaced by your own product table. It is used here to generate a tempory dataset.
Your final query will look like:
with product_result (ProductId, ProductName, ParentName, ParentId) as
(
select ProductId, ProductName, convert(varchar,null) ParentName, convert(int, null) ParentId
from <YOUR_PRODUCT_TABLE_GOES_HERE>
where ProductName = 'ABC' --start with
union all
select d.ProductId, d.ProductName, convert(varchar,p.ProductName) ParentName, p.ProductId ParentId
from <YOUR_PRODUCT_TABLE_GOES_HERE> d
, product_result p
where d.ProductName like p.ProductName+'_'
)
select *
from product_result
CTE is available since SQL2008. for more info WITH common_table_expression (Transact-SQL)

Joining two tables with aggregates

I've got two tables described below:
CREATE TABLE categories
(
id integer NOT NULL,
category integer NOT NULL,
name text,
CONSTRAINT kjhfskfew PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
CREATE TABLE products_
(
id integer NOT NULL,
date date,
id_employee integer,
CONSTRAINT grh PRIMARY KEY (id)
)
WITH (
OIDS=FALSE
);
Now I have to do report in which I need following information:
categories.category, categories.name (all of them, so string_agg is ok) - could be many assigned to one category and products_.id_employee -> but not with comma as above with category name but the one with newest date assigned (and here is my problem);
I've tried already constructions as:
SELECT
DISTINCT ON (category ) category,
string_agg(name, ','),
(SELECT
id_employee
FROM products_
WHERE date = (SELECT
max(date)
FROM products_
WHERE id IN (SELECT
id
FROM categories
WHERE id = c.id)))
FROM categories c
ORDER BY category;
But PostgreSQL says that subquery is returning to many rows...
Please help!
EXAMPLE INSERTS:
INSERT INTO categories(
id, category, name)
VALUES (1,22,'car'),(2,22,'bike'),(3,22,'boat'),(4,33,'soap'),(5,44,'chicken');
INSERT INTO products_(
id, date, id_employee)
VALUES (1,'2009-11-09',11),(2,'2010-09-09',2),(3,'2013-01-01',4),(5,'2014-09-01',90);
OK, I've solved this problem.
This one works just fine:
WITH max_date AS (
SELECT
category,
max(date) AS date,
string_agg(name, ',') AS names
FROM test.products_
JOIN test.categories c
USING (id)
GROUP BY c.category
)
SELECT
max(id_employee) AS id_employee,
md.category,
names
FROM test.products_ p
LEFT JOIN max_date md
USING (date)
LEFT JOIN test.categories
USING (category)
WHERE p.date = md.date AND p.id IN (SELECT
id
FROM test.categories
WHERE category = md.category)
GROUP BY category, names;
It seems that id is being used to join the two tables, which seems strange to me.
In any case, the base query for the category names is:
SELECT c.category, string_agg(c.name, ','),
FROM categories c
group by c.category;
The question is: how to get the most recent name? This approach uses the row_number() function:
SELECT c.category, string_agg(c.name, ','), cp.id_employee
FROM categories c left outer join
(select c.category, c.name, p.id_employee,
row_number() over (partition by c.category order by date desc) as seqnum
from categories c left outer join
products_ p
on c.id = p.id
) cp
on cp.category = c.category and
cp.seqnum = 1
group by c.category, cp.id_employee;

How to group count and join in sequel?

I've looked through all the documentation and I'm having an issue putting together this query in Sequel.
select a.*, IFNULL(b.cnt, 0) as cnt FROM a LEFT OUTER JOIN (select a_id, count(*) as cnt from b group by a_id) as b ON b.a_id = a.id ORDER BY cnt
Think of table A as products and table B is a record indicated A was purchased.
So far I have:
A.left_outer_join(B.group_and_count(:a_id), a_id: :id).order(:count)
Essentially I just want to group and count table B, join it with A, but since B does not necessarily have any records for A and I'm ordering it by the number in B, I need to default a value.
DB[:a].
left_outer_join(DB[:b].group_and_count(:a_id).as(:b), :a_id=>:id).
order(:cnt).
select_all(:a).
select_more{IFNULL(:b__cnt, 0).as(:cnt)}
I can help you in MS SQL syntax.
Let's say your tables are Product and Order.
CREATE TABLE Product (
Id INT NOT NULL,
NAME VARCHAR(100) NOT NULL)
CREATE TABLE [Order] (
Id INT NOT NULL,
ProductId INT)
INSERT INTO Product (Id, Name) VALUES
(1, 'Tea'), (2, 'Coffee'), (3, 'Hot Chocolate')
INSERT INTO [Order] (Id, ProductId) VALUES
(1, 1), (2, 1), (3, 1), (4, 2)
This query will give the number of orders each product has, including ones without any orders.
SELECT p.Id AS ProductId,
p.Name AS ProductName,
COUNT(o.Id) AS Orders
FROM Product p
LEFT OUTER JOIN [Order] o
ON p.Id = o.ProductId
GROUP BY
p.Id,
p.Name
ORDER BY
COUNT(o.Id) DESC

SQL - identifying rows for a value in one table, where all joined rows only has a specific value

IN SQL Server, I have a result set from a joined many:many relationship.
Considering Products linked to Orders via a link table ,
Table - Products
ID
ProductName
Table - Orders
ID
OrderCountry
LinkTable OrderLines (columns not shown)
I'd like to be able to filter these results to show only the results where for an entity from one table, all the values in the other table only have a given value in a particular column. In terms of my example, for each product, I want to return only the joined rows when all the orders they're linked to are for country 'uk'
So if my linked result set is
productid, product, orderid, ordercountry
1, Chocolate, 1, uk
2, Banana, 2, uk
2, Banana, 3, usa
3, Strawberry, 4, usa
I want to filter so that only those products that have only been ordered in the UK are shown (i.e. Chocolate). I'm sure this should be straight-forward, but its Friday afternoon and the SQL part of my brain has given up for the day...
You could do something like this, where first you get all products only sold in one country, then you proceed to get all orders for those products
with distinctProducts as
(
select LinkTable.ProductID
from Orders
inner join LinkTable on LinkTable.OrderID = Orders.ID
group by LinkTable.ProductID
having count(distinct Orders.OrderCountry) = 1
)
select pr.ID as ProductID
,pr.ProductName
,o.ID as OrderID
,o.OrderCountry
from Products pr
inner join LinkTable lt on lt.ProductID = pr.ID
inner join Orders o on o.ID = lt.OrderID
inner join distinctProducts dp on dp.ProductID = pr.ID
where o.OrderCountry = 'UK'
In the hope that some of this may be generally reusable:
;with startingRS (productid, product, orderid, ordercountry) as (
select 1, 'Chocolate', 1, 'uk' union all
select 2, 'Banana', 2, 'uk' union all
select 2, 'Banana', 3, 'usa' union all
select 3, 'Strawberry', 4, 'usa'
), countryRankings as (
select productid,product,orderid,ordercountry,
RANK() over (PARTITION by productid ORDER by ordercountry) as FirstCountry,
RANK() over (PARTITION by productid ORDER by ordercountry desc) as LastCountry
from
startingRS
), singleCountry as (
select productid,product,orderid,ordercountry
from countryRankings
where FirstCountry = 1 and LastCountry = 1
)
select * from singleCountry where ordercountry='uk'
In the startingRS, you put whatever query you currently have to generate the intermediate results you've shown. The countryRankings CTE adds two new columns, that ranks the countries within each productid.
The singleCountry CTE reduces the result set back down to those results where country ranks as both the first and last country within the productid (i.e. there's only a single country for this productid). Finally, we query for those results which are just from the uk.
If you want, for example, all productid rows with a single country of origin, you just skip this last where clause (and you'd get 3,strawberry,4,usa in your results also)
So is you've got a current query that looks like:
select p.productid,p.product,o.orderid,o.ordercountry
from product p inner join order o on p.productid = o.productid --(or however these joins work for your tables)
Then you'd rewrite the first CTE as:
;with startingRS (productid, product, orderid, ordercountry) as (
select p.productid,p.product,o.orderid,o.ordercountry
from product p inner join order o on p.productid = o.productid
), /* rest of query */
Hmm. Based on Philip's earlier approach, try adding something like this to exclude rows where there's been the same product ordered in another country:
SELECT pr.Id, pr.ProductName, od.Id, od.OrderCountry
from Products pr
inner join LinkTable lt
on lt.ProductId = pr.ID
inner join Orders od
on od.ID = lt.OrderId
where
od.OrderCountry = 'UK'
AND NOT EXISTS
(
SELECT
*
FROM
Products MatchingProducts
inner join LinkTable lt
on lt.ProductId = MatchingProducts.ID
inner join Orders OrdersFromOtherCountries
on OrdersFromOtherCountries.ID = lt.OrderId
WHERE
MatchingProducts.ID = Pr.ID AND
OrdersFromOtherCountries.OrderCountry != od.OrderCountry
)
;WITH mytable (productid,ordercountry)
AS
(SELECT productid, ordercountry
FROM Orders od INNER JOIN LinkTable lt ON od.orderid = lt.OrderId)
SELECT * FROM mytable
INNER JOIN dbo.Products pr ON pr.productid = mytable.productid
WHERE pr.productid NOT IN (SELECT productid FROM mytable
GROUP BY productid
HAVING COUNT(ordercountry) > 1)
AND ordercountry = 'uk'
SELECT pr.Id, pr.ProductName, od.Id, od.OrderCountry
from Products pr
inner join LinkTable lt
on lt.ProductId = pr.ID
inner join Orders od
on od.ID = lt.OrderId
where od.OrderCountry = 'UK'
This probably isn't the most efficient way to do this, but ...
SELECT p.ProductName
FROM Product p
WHERE p.ProductId IN
(
SELECT DISTINCT ol.ProductId
FROM OrderLines ol
INNER JOIN [Order] o
ON ol.OrderId = o.OrderId
WHERE o.OrderCountry = 'uk'
)
AND p.ProductId NOT IN
(
SELECT DISTINCT ol.ProductId
FROM OrderLines ol
INNER JOIN [Order] o
ON ol.OrderId = o.OrderId
WHERE o.OrderCountry != 'uk'
)
TestData
create table product
(
ProductId int,
ProductName nvarchar(50)
)
go
create table [order]
(
OrderId int,
OrderCountry nvarchar(50)
)
go
create table OrderLines
(
OrderId int,
ProductId int
)
go
insert into Product VALUES (1, 'Chocolate')
insert into Product VALUES (2, 'Banana')
insert into Product VALUES (3, 'Strawberry')
insert into [order] values (1, 'uk')
insert into [order] values (2, 'uk')
insert into [order] values (3, 'usa')
insert into [order] values (4, 'usa')
insert into [orderlines] values (1, 1)
insert into [orderlines] values (2, 2)
insert into [orderlines] values (3, 2)
insert into [orderlines] values (4, 3)
insert into [orderlines] values (3, 2)
insert into [orderlines] values (3, 3)