Distinct SQL when many to many - sql

3 tables sto_Product, sto_ProductXCategory and sto_Category
sto_Product has all the product info (bvin=Primary Key)
sto_Category has all the Categoryinfo (bvin=Primary Key)
sto_ProductXCategory (ProductId and CategoryId are PK and FK)
currently it results in duplicate products because some products are duplicate in sto_ProductXCategory (with different CategoryId's).
this SQL statement is part of a larger stored procedure so I'm trying to keep it similar.
This is the statement I'm trying to fix
DECLARE #Sortorder int =6;
with product as
(
select distinct p.*
from sto_Product p
inner join sto_ProductXCategory px on(p.bvin = px.productid)
where p.parentID = ''
and p.Status = 1
and p.siteprice >=0
and p.siteprice <= 2147483647
and (
(p.manufacturerid = '8036ab2c-641e-487b-a577-76c09c5c3cc9'
)
)
),
AllProducts AS
(
SELECT
RowNum =
CASE
WHEN #Sortorder = 0
THEN ROW_NUMBER() OVER (ORDER BY px.SortOrder)
WHEN #Sortorder = 1
THEN ROW_NUMBER() OVER (ORDER BY px.SortOrder)
WHEN #Sortorder = 2
THEN ROW_NUMBER() OVER (ORDER BY ProductName)
WHEN #Sortorder = 3
THEN ROW_NUMBER() OVER (ORDER BY SitePrice)
WHEN #Sortorder = 4
THEN ROW_NUMBER() OVER (ORDER BY SitePrice DESC)
WHEN #Sortorder = 6
THEN ROW_NUMBER() OVER (ORDER BY ProductName DESC)
ELSE ROW_NUMBER() OVER (ORDER BY px.SortOrder)
END,
p.*
from product p
inner join sto_ProductXCategory px on(p.bvin = px.productid)
)
SELECT AllProducts.RowNum,*,
(SELECT COUNT(*) FROM AllProducts) As TotalRowCount
FROM AllProducts
WHERE RowNum BETWEEN (1) and (12 )
ORDER BY AllProducts.RowNum
Sample Dataset
sto_Product
bvin,sku,ProductName,SitePrice
0001,001,AProductName,100
0002,002,BProductName,50
0003,003,CProductName,75
sto_Category
bvin,CategoryName
1000,ACategoryName
2000,BCategoryName
3000,CCategoryName
sto_ProductXCategory
ProductId,CategoryId,SortOrder
0001,1000,1
0001,2000,2
0001,3000,3
0002,2000,4
0003,2000,5
0003,3000,6
thanks in advance

You've stated that
...currently it results in duplicate products because some products are duplicate in sto_ProductXCategory (with different CategoryId's).
But you havent said what it is you want to happen.
For my purposes here, I'll assume you want only one row per product, ignoring the different values of CategoryID appearing in sto_ProductXCategory
In AllProducts you have:
from product p inner join sto_ProductXCategory px on(p.bvin = px.productid)
this needs to be changed to filter the duplicate rows in sto_ProductXCategory that you do not want in you output dataset;
from product p join (
select productid, min(categoryid) as Category
from sto_ProductXCategory
group by productid
) x on p.bvin = x.productid
join sto_ProductXCategory px on p.bvin = px.Productid and x.Category = px.CategoryID
How's that?

Related

SQL how to retrieve latest result from each joined table

I would like to retrieve one large table of products with the latest rows from all the joined tables via with MAX(ID) of each group (productToken) which ich unique name of the product. Joined tables are - products (store), availability (status), description (products), and price of the product. All of them contains the unique productToken and the mentioned tables can be changed over time by adding a new record (independently) so my aim is to compose one big table (with actual info about the products) via retrieving the lastest record from each table. My code is this. First added product worked well, but things got strange after adding new records to any of the tables (query has retrieved no results).
SELECT *
FROM products
JOIN productsStore ON products.productToken = productStore.productToken
JOIN productsStatus ON products.productToken = productsStatus.productToken
JOIN productsPrice ON products.produstToken = productsPrice.productToken
JOIN categories ON products.categoryToken = categories.categoryToken
WHERE products.shopToken = '$shopToken'
AND products.productID IN
(SELECT MAX(productID)
FROM products
GROUP BY productToken)
AND productsPrice.productPriceID IN
(SELECT MAX(productPriceID)
FROM productsPrice
GROUP BY produktToken)
AND productsStatus.productStatusID IN
(SELECT MAX(productStatusID)
FROM productsStatus
GROUP BY productToken)
AND produktyStore.productStoreID IN
(SELECT MAX(productStoreID)
FROM productsStore
GROUP BY productToken)
AND categories.categoryID IN
(SELECT MAX(categoryID)
FROM categories
GROUP BY categoryToken)
ORDER BY categories.categoryID DESC
I would like to retrieve one large table of products with the latest rows from all the joined tables
I think that you want equality conditions with correlated subqueries in the where clause rather than in conditions with aggregate queries. This lets you filter each joined table with the "latest" record for the given productToken.
SELECT *
FROM products p
JOIN productsStore psr ON psr.productToken = p.productToken
JOIN productsStatus psu ON psu.productToken = p.productToken
JOIN productsPrice ppr ON ppr.produstToken = p.productToken
JOIN categories c ON c.categoryToken = p.categoryToken
WHERE
p.shopToken = '$shopToken'
AND p.productID = (SELECT MAX(p1.productID) FROM products p1 WHERE p1.productToken = p.productToken)
AND psr.productStoreID = (SELECT MAX(psr1.productStoreID) FROM productsStore psr1 WHERE psr1.productToken = p.productToken)
AND psu.productStatusID = (SELECT MAX(psu1.productStatusID) FROM productStatus psu1 WHERE psu1.productToken = p.productToken)
AND ppr.productPriceID = (SELECT MAX(ppr1.productPriceID) FROM productsPrice ppr1 WHERE ppr1.productToken = p.productToken)
AND c.categoryID = (SELECT MAX(c1.categoryID) FROM category c1 WHERE c1.productToken = p.productToken)
If you are running MySQL 8.0 (or MariaDB 10.3 or higher), you can use ROW_NUMBER() in subqueries instead:
SELECT *
FROM (
SELECT p.*, ROW_NUMBER() OVER(PARTITION BY productToken ORDER BY productID DESC) rn
FROM products p
) p
INNER JOIN (
SELECT psr.*, ROW_NUMBER() OVER(PARTITION BY productToken ORDER BY productStoreID DESC) rn
FROM productsStore psr
) psr ON psr.productToken = p.productToken AND psr.rn = 1
INNER JOIN (
SELECT psu.*, ROW_NUMBER() OVER(PARTITION BY productToken ORDER BY productStatusID DESC) rn
FROM productsStatus psu
) psu ON psu.productToken = p.productToken AND psu.rn = 1
INNER JOIN (
SELECT ppr.*, ROW_NUMBER() OVER(PARTITION BY productToken ORDER BY productsPriceID DESC) rn
FROM productsPrice ppr
) ppr ON ppr.productToken = p.productToken AND ppr.rn = 1
INNER JOIN (
SELECT c.*, ROW_NUMBER() OVER(PARTITION BY productToken ORDER BY categoryID DESC) rn
FROM categories c
) c ON c.productToken = p.productToken AND c.rn = 1
WHERE p.shopToken = '$shopToken' AND p.rn = 1

Multiple subquery join in View with group by returns duplicate rows

I have created a view using subquery but I want this view to return few mendatory column which cant be added in group by subquery, so I have to create one more select statement and join with other group by subquery
I am come up with following query,
But problem I am facing is if group by seller has 28 rows it returns 28 duplicate rows, also I want whole query to order by TotalOrderItem.
Alter VIEW [dbo].[SellersPerformance] AS
Select
RequiredColumns.Id as Id,
aggrgateDT.SellerId as SellerId,
aggrgateDT.TenantId as TenantId,
aggrgateDT.Active as Active,
aggrgateDT.TotalOrderedItem as TotalOrderItem,
aggrgateDT.MoveToPurchase as MoveToPurchase,
aggrgateDT.GoodPurchase as GoodPurchase,
RequiredColumns.Created as Created,
RequiredColumns.Modified as Modified,
RequiredColumns.CreatorId as CreatorId,
RequiredColumns.ModifierId as ModifierId
From
(
(Select
sellerId, p.TenantId, p.Active, count(*) as TotalOrderedItem,
count(*) - count(o.Id) as MoveToPurchase,
count(o.Id) as GoodPurchase,
count(case when o.ApplicationStatus = 'Perfect' then 1 end) as Perfect,
count(case when o.ApplicationStatus = 'R-Perfect' then 1 end) as R_Perfect
FROM [dbo].[AmazonOrderPurchaseInfo] p
left join [dbo].[AmazonOrder] o
on p.AmazonOrderId = o.Id
AND p.Id = o.[AmazonOrderPurchaseInfoId]
group by SellerId, p.TenantId, p.Active
order by TotalOrderedItem offset 0 rows
) aggrgateDT
Left outer Join (
SELECT
NEWID() Id,
purchase.Created AS Created,
purchase.Modified AS Modified,
purchase.CreatorId AS CreatorId,
purchase.ModifierId AS ModifierId,
purchase.SellerId As SellerId
From dbo.AmazonOrderPurchaseInfo purchase
) RequiredColumns ON aggrgateDT.SellerId = RequiredColumns.SellerId
)
GO
You may try Group by for this.
Alter VIEW [dbo].[SellersPerformance] AS
select res.Id, res.SellerId, res.TenandId, res.Active, res.TotalOrderItem, res.MovetoPurchase, res.GoodPurchase, res.Created, res.Modified, res.CreatorId, res.ModifierId
from
(
Select
RequiredColumns.Id as Id,
aggrgateDT.SellerId as SellerId,
aggrgateDT.TenantId as TenantId,
aggrgateDT.Active as Active,
aggrgateDT.TotalOrderedItem as TotalOrderItem,
aggrgateDT.MoveToPurchase as MoveToPurchase,
aggrgateDT.GoodPurchase as GoodPurchase,
RequiredColumns.Created as Created,
RequiredColumns.Modified as Modified,
RequiredColumns.CreatorId as CreatorId,
RequiredColumns.ModifierId as ModifierId
From
(
(Select
sellerId, p.TenantId, p.Active, count(*) as TotalOrderedItem,
count(*) - count(o.Id) as MoveToPurchase,
count(o.Id) as GoodPurchase,
count(case when o.ApplicationStatus = 'Perfect' then 1 end) as Perfect,
count(case when o.ApplicationStatus = 'R-Perfect' then 1 end) as R_Perfect
FROM [dbo].[AmazonOrderPurchaseInfo] p
left join [dbo].[AmazonOrder] o
on p.AmazonOrderId = o.Id
AND p.Id = o.[AmazonOrderPurchaseInfoId]
group by SellerId, p.TenantId, p.Active
order by TotalOrderedItem offset 0 rows
) aggrgateDT
Left outer Join (
SELECT
NEWID() Id,
purchase.Created AS Created,
purchase.Modified AS Modified,
purchase.CreatorId AS CreatorId,
purchase.ModifierId AS ModifierId,
purchase.SellerId As SellerId
From dbo.AmazonOrderPurchaseInfo purchase
) RequiredColumns ON aggrgateDT.SellerId = RequiredColumns.SellerId
) as res
group by res.Id, res.SellerId, res.TenandId, res.Active, res.TotalOrderItem, res.MovetoPurchase, res.GoodPurchase, res.Created, res.Modified, res.CreatorId, res.ModifierId
)
GO
Here if Id, Created, Modified, CreatorId, ModifierId columns will have same id then you may get your expected result.

Getting Latest 3 orders by Supplier ID

I have the following SQL Server code to get information from a combination of 4 tables.
I would like to modify it to only retrieve the latest 3 orders (pmpOrderDate) by supplier (pmpSupplierOrganizationID).
SELECT
PO.pmpPurchaseOrderID, PO.pmpOrderDate, PO.pmpSupplierOrganizationID, O.cmoName
FROM
PurchaseOrders PO
INNER JOIN
PurchaseOrderLines POL ON PO.pmpPurchaseOrderID = POL.pmlPurchaseOrderID
INNER JOIN
Organizations O ON PO.pmpSupplierOrganizationID = O.cmoOrganizationID
INNER JOIN
Parts P ON POL.pmlPartID = P.impPartID
WHERE
P.impPartClassID LIKE 'PUMP%'
Can you please help?
EDIT:
I wasn't fully clear on my actual requirements. To clarify further, what I need in the end is to display the latest 3 unique Purchase Orders by Supplier ID based on at least one of the PartClassID for the PartID in the PurchaseOrderLines to have criteria of beginning with string 'PUMP'
Use a ROW_NUMBER to partition by pmpSupplierOrganizationID and order by pmpOrderDate.
with cteTopOrders AS (
SELECT PO.pmpPurchaseOrderID, PO.pmpOrderDate, PO.pmpSupplierOrganizationID, O.cmoName,
ROW_NUMBER() OVER(PARTITION BY pmpSupplierOrganizationID ORDER BY pmpOrderDate DESC) AS RowNum
FROM PurchaseOrders PO
Inner Join PurchaseOrderLines POL ON PO.pmpPurchaseOrderID = POL.pmlPurchaseOrderID
Inner Join Organizations O On PO.pmpSupplierOrganizationID = O.cmoOrganizationID
Inner Join Parts P ON POL.pmlPartID = P.impPartID
WHERE P.impPartClassID Like 'PUMP%'
)
SELECT pmpPurchaseOrderID, pmpOrderDate, pmpSupplierOrganizationID, cmoName
FROM cteTopOrders
WHERE RowNum <= 3;
I'm a fan of lateral joins for this . . . cross apply:
select p.*, O.cmoName
from Organizations O cross apply
(select top (3) PO.pmpPurchaseOrderID, PO.pmpOrderDate, PO.pmpSupplierOrganizationID
from PurchaseOrders PO join
PurchaseOrderLines POL
on PO.pmpPurchaseOrderID = POL.pmlPurchaseOrderID join
Parts P
on POL.pmlPartID = P.impPartID
where PO.pmpSupplierOrganizationID = O.cmoOrganizationID and
P.impPartClassID Like 'PUMP%'
order by PO.pmpOrderDate desc
) p
You need a nested row_number to get the three rows per supplier and another OLAP-function on top of it:
with OrderRowNum as
(
SELECT PO.pmpPurchaseOrderID, PO.pmpOrderDate, PO.pmpSupplierOrganizationID, O.cmoName, P.impPartClassID,
row_number()
over (partition by PO.pmpSupplierOrganizationID
order by pmpOrderDate desc) as rn
FROM PurchaseOrders PO
Inner Join PurchaseOrderLines POL ON PO.pmpPurchaseOrderID = POL.pmlPurchaseOrderID
Inner Join Organizations O On PO.pmpSupplierOrganizationID = O.cmoOrganizationID
Inner Join Parts P ON POL.pmlPartID = P.impPartID
)
, CheckPUMP as
(
select *,
-- check if at least one of the three rows contains PUMP
max(case when impPartClassID Like 'PUMP%' then 1 else 0 end)
over (partition by PO.pmpSupplierOrganizationID) as PUMPflag
from OrderRowNum
where rn <= 3 -- get the last three rows per supplier
)
select *
from CheckPUMP
where flag = 1

Adding a Helper SQL Index

I have the following View which seems to work quickly enough but when I look at the Execution Plan, it shows the Top N Sort in the second query taking ~90% due to it being repeated for every row in the first query.
Should I be adding an Index to the Loan table to help the ORDER BY clause?
CREATE VIEW [dbo].[ResourceItemStatus] AS
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
(SELECT COUNT(*) FROM Loan WHERE Loan.ItemID = i.ID) AS LoanCount,
(SELECT COUNT(*) FROM Item WHERE Item.DateRemoved IS NULL AND Item.ResourceID = i.ResourceID) AS AvailableItemCount
FROM Item i
OUTER APPLY
(
SELECT TOP 1
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID
FROM Loan l
WHERE l.ItemID = i.ID
ORDER BY l.IssueDateTime DESC, l.ID DESC
) AS ll
Try Windowed Aggregates instead of Scalar Subqueries/Outer Apply:
SELECT
i.ID AS ItemID,
i.ResourceID,
i.DateAdded,
i.LocationID,
i.OwnerID,
i.Barcode,
i.MissingReasonID,
i.DateRemoved,
ll.PatronID,
ll.ID AS LoanID,
ll.IssueDateTime,
ll.DueDate,
ll.ReturnDateTime,
ll.LoanTypeID,
ll.RenewalCount,
ll.DeleteSummary,
ll.ReturnStatusID,
ll.FineID,
coalesce(ll.LoanCount, 0)
COUNT(case when Item.DateRemoved IS NULL then 1 end)
over (partition by ResourceID) AS AvailableItemCount
FROM Item i
LEFT JOIN
(
SELECT
l.ID,
l.ItemID,
l.PatronID,
l.IssueDateTime,
l.DueDate,
l.ReturnDateTime,
l.LoanTypeID,
l.RenewalCount,
l.DeleteSummary,
l.ReturnStatusID,
l.FineID,
COUNT(*) over (partition by ItemId) AS LoanCount,
row_number()
over (partition by ItemId
order by l.IssueDateTime DESC, l.ID DESC) as rn
FROM Loan l
) as ll
on ll.ItemID = i.ID
and ll.rn = 1

Query in SQL Server 2014 for a report (I need the last ROW of a table)

I'm using SQL Server 2014 and I have a problem with a query.
I want to have in my report, ALL the items of the order with ID_Order = 9 that have been delivered. And for the items that have been delivered at two times (Item Code = Art3 for example), I just want to have the last row, that means the last delivery of this Item, with NO repetition.
I already tried these two queries without success:
Attempt #1: DISTINCT
SELECT DISTINCT
Order.ItemCode, Delivery. Qty, Delivery.ID_Delivery,
Order.ID_Order
FROM
Delivery
INNER JOIN
Order ON Order.ID_Order = Delivery.ID_Order
WHERE
Order.ID_Order = '9'
Attempt #2: subquery
SELECT *
FROM
(SELECT
Order.ItemCode, Delivery.Qty,
FROM
Delivery
INNER JOIN
Order ON Order.ID_Order = Delivery.ID_Order
WHERE
Order.ID_Order = '9')
GROUP BY
a.ItemCode, a.Qty
Try this query --
;WITH CTE
AS (
SELECT C.ID_Order
,D.ID_Delivery
,C.ItemCode
,C.Quantity
,ROW_NUMBER() OVER (
PARTITION BY C.ItemCode ORDER BY D.ID_Delivery DESC
) AS RowNum
FROM Customer_Order C
INNER JOIN Delivery D ON C.ID_Order = D.ID_Order
AND C.ItemCode = D.ItemCode
WHERE C.ID_Order = 9
)
SELECT ID_Order
,ID_Delivery
,ItemCode
,Quantity
FROM CTE
WHERE RowNum = 1
SELECT
Order.ItemCode, Delivery. Qty, Delivery.ID_Delivery,
Order.ID_Order
FROM
Delivery
INNER JOIN
Order ON Order.ID_Order = Delivery.ID_Order
WHERE
Order.ID_Order = '9'
AND Delivery.ID_Delivery IN
(
SELECT MAX(ID_Delivery) FROM Delivery D WHERE D.ID_Order = Delivery.ID_Order GROUP BY D.ID_Order
)
I hope it will work for you.