SQL query with JOIN and WHERE IN clause - sql

UPDATE: Initially, I had the order date at line item table and realized that was a mistake and moved it to the Order table. Have updated my example query as well. Sorry
I am trying to write a query to load all orders whose line item order date is after a certain date along with loading all other orders which are out there for the same product returned by the first part of the query. Maybe an example could help
CREATE TABLE DemandOrder
(OrderId INT, OrderDate date, Customer VARCHAR(25))
CREATE TABLE LineItem
(OrderId INT, LineItemId INT, ProductId VARCHAR(10))
INSERT INTO DemandOrder VALUES(1, '01/23/2014', 'ABC');
INSERT INTO DemandOrder VALUES(2, '01/24/2014', 'DEF');
INSERT INTO DemandOrder VALUES(3, '01/24/2014', 'XYZ');
INSERT INTO DemandOrder VALUES(4, '01/23/2014', 'ABC');
INSERT INTO LineItem VALUES(1, 1, 'A');
INSERT INTO LineItem VALUES(1, 2, 'C');
INSERT INTO LineItem VALUES(2, 1, 'B');
INSERT INTO LineItem VALUES(3, 1, 'A');
INSERT INTO LineItem VALUES(4, 1, 'C');
In the above example, I need to query for all orders where the order date is on or after 01/24 along with all other orders which may have the returned by the first part of the query. The result should have orders 1, 2 & 3
Here is the updated sql code (using ErikE's suggestions from a post below)
SELECT
DISTINCT O.*
FROM
dbo.[DemandOrder] O
INNER JOIN dbo.LineItem LI
ON O.OrderID = LI.OrderID
WHERE
EXISTS (
SELECT *
FROM
dbo.DemandOrder O2 INNER JOIN
dbo.LineItem L2 ON O2.OrderId = L2.OrderId
WHERE
O2.OrderDate >= '01/24/2014'
AND LI.ProductID = L2.ProductID -- not clear if correct
);
Thanks for your help and suggestions

You can also do this with window functions:
select o.*
from (Select o.*,
max(li.OrderDate) over (partition by li.product) as maxOrderDate
from Order o INNER JOIN
LineItem li
ON o.OrderId = li.OrderId
) o
where o.maxOrderDate >= '2014-01-24';
You might actually want select distinct in the outer query, to prevent duplicates if one order has multiple products shipped after the given date.
As for your query, you can simplify it. The order table is not needed:
SELECT o.*
FROM Order o INNER JOIN
LineItem li
ON o.OrderId = li.OrderId
WHERE li.Product IN (SELECT li.Product
FROM LineItem li and li.OrderDate >= '2014-01-24'
);

You can also do this with window functions:
select o.*
from (Select o.*,
max(li.OrderDate) over (partition by li.product) as maxProductOrderDate
from Order o INNER JOIN
LineItem li
ON o.OrderId = li.OrderId
) o
where o.maxProductOrderDate >= '2014-01-24';
You might actually want select distinct in the outer query, to prevent duplicates if one order has multiple products shipped after the given date.
As for your query, you can simplify it because you do not need the order table in the subquery, unless you need it for filtering purposes:
SELECT o.*
FROM Order o INNER JOIN
LineItem li
ON o.OrderId = li.OrderId
WHERE li.Product IN (SELECT li.Product
FROM LineItem li
WHERE li.OrderDate >= '2014-01-24'
);
You probably want select distinct o.* in the outer query, to avoid duplicates when an order has two or more products that match the condition.

To get a result set with 1 row per order (meaning you're not interesting in line item data, just the order summary), something like this should do:
select o.*
from ( select distinct OrderId
from dbo.LineItem t1
where exists ( select *
from dbo.LineItem t2
where t2.Product = t1.Product
and t2.OrderDate >= #SomeLowerBoundDateTimeValue
)
) t
join dbo.Order o on o.OrderId = t.OrderId
The first item in the from clause is a derived table consisting of the set of order ids associated with a product that was part of an order dated on or after the specified date. Having done that, the rest is trival: just join against the order table.
Generally, for performance, you want to use correlated subqueries with [not] exists (...) in preference to uncorrelated subqueries with [not] in (...).
exists short circuits as soon as possible; in does not as it must construct the entire result set of the subquery.

I believe this is going to be close to what you're looking for.
All orders that have at least one productID that matches any product ID in an order 1/24/2014 or later.
SELECT
O.*
FROM
dbo.[Order] O
INNER JOIN dbo.LineItem LI
ON O.OrderID = LI.OrderID
WHERE
EXISTS (
SELECT *
FROM
dbo.LineItem L2
INNER JOIN dbo.LineItem L3
ON L2.ProductID = L3.ProductID
INNER JOIN dbo.[Order] O2
ON L3.OrderID = O2.OrderID
WHERE
O2.OrderDate >= '20140124'
AND O.OrderID = L2.OrderID
)
;

first i guess that your result should be OrderId: 2 and 3 because OrderDate is 01/24...
If you want to get that result you could try to do this.
Select o1.OrderId,o1.CustomerName,l1.OrderDate,l1.ProductId
from Order o1 INNER JOIN
LineItem l1
ON o1.OrderId = l1.OrderId
where l1.OrderDate >= '01/242014'
Hope this works and solve your question.
Regards!!!

This is what you're looking for, I believe.
Here's what's happening:
JOIN LineItem liBase: grab the initial records from LineItem based on the MinDate specification
JOIN LineItem liMatches: Self JOIN to to the LineItem table using the ProductIDs collected in the initial JOIN
JOIN LineItem projection: Using the OrderIDs collected from in the previous JOIN, grab the records from the LineItem table (in an additional self JOIN)
SELECT projection.*: projection is the set of results that we are after. SELECT them
Here's the query:
;WITH parms (
MinDate
) AS (
SELECT CONVERT(DATETIME, '01/24/2014')
)
SELECT projection.*
FROM parms p
JOIN LineItem liBase
ON liBase.OrderDate >= p.MinDate
JOIN LineItem liMatches
ON liMatches.ProductId = liBase.ProductId
JOIN LineItem projection
ON projection.OrderId = liMatches.OrderId
ORDER BY projection.OrderId
;
Same query, but with data generation (generates the LineItem and Order data sets that you presented in your question).
;WITH parms (
MinDate
) AS (
SELECT CONVERT(DATETIME, '01/24/2014')
)
, LineItem (
OrderId
, LineItemID
, OrderDate
, ProductId
) AS (
SELECT 1, 1, CONVERT(DATETIME, '01/23/2014'), 'B' UNION
SELECT 4, 1, CONVERT(DATETIME, '01/23/2014'), 'C' UNION
SELECT 2, 1, CONVERT(DATETIME, '01/24/2014'), 'A' UNION
SELECT 3, 1, CONVERT(DATETIME, '01/24/2014'), 'B'
)
, [Order] (
OrderId
, CustomerName
) AS (
SELECT 1, 'ABC' UNION
SELECT 2, 'XYZ' UNION
SELECT 3, 'DEF'
)
SELECT projection.*
FROM parms p
JOIN LineItem liBase
ON liBase.OrderDate >= p.MinDate
JOIN LineItem liMatches
ON liMatches.ProductId = liBase.ProductId
JOIN LineItem projection
ON projection.OrderId = liMatches.OrderId
ORDER BY projection.OrderId
;

Related

SQL where nested select not null

I have a Customers table with CustomerID and CustomerName.
I then have a Orders table with CustomerID, datetime OrderPlaced and datetime OrderDelivered.
Bearing in mind that not all customers have placed orders, I would like to get a list of CustomerName, OrderPlaced and OrderDelivered but only for customers that have placed orders and whose orders have already been delivered, and only the most recent OrderPlaced per customer.
I started by doing (fully aware that this does not implement the OrderDelivered limitation to it yet, but already not doing what I want):
SELECT CustomerID,
(SELECT TOP 1 OrderDelivered
FROM Orders ORDER BY OrderDelivered DESC) AS OrderDelivered
FROM Customer
WHERE OrderDelivered IS NOT NULL
But already MS SQL doesn't like this, it says that it doesn't know what OrderDelivered is on the WHERE clause.
How can I accomplish this?
Personally, I would move your subquery into the FROM and use CROSS APPLY. Then you can far more easily reference the column:
SELECT C.CustomerID,
O.OrderDelivered
FROM Customer C
CROSS APPLY (SELECT TOP 1 OrderDelivered
FROM Orders oa
WHERE oa.CustomerID = C.CustomerID --Guess column name for orders
AND O.OrderDelivered IS NOT NULL
ORDER BY O.OrderDelivered DESC) O;
As, however, this is a CROSS APPLY, then the results will already be filtered; so no need for the WHERE.
If you want the most recent delivered order, then one method uses apply:
select c.*, o.OrderPlaced, o.OrderDelivered
from customer c cross apply
(select top (1) o.*
from orders o
where o.CustomerID = c.CustomerID and
o.OrderDelivered is not null
order by o.OrderPlaced desc
) o;
You can achieve this by using the OVER clause (https://learn.microsoft.com/en-us/sql/t-sql/queries/select-over-clause-transact-sql).
DECLARE #customers TABLE (CustomerId INT, CustomerName NVARCHAR(20))
DECLARE #orders TABLE (CustomerId INT, OrderPlaced DATETIME, OrderDelivered DATETIME)
INSERT INTO #customers VALUES
(1, 'a'),
(2, 'b')
INSERT INTO #orders VALUES
(1, '2019-01-01', null),
(2, '2019-01-03', '2019-02-01'),
(2, '2019-01-05', null)
SELECT
c.CustomerName,
-- Latest OrderPlaced
FIRST_VALUE(o.OrderPlaced)
OVER(PARTITION BY c.CustomerId ORDER BY o.OrderPlaced DESC) AS OrderPlaced,
-- The matching OrderDelivered
FIRST_VALUE(o.OrderDelivered)
OVER(PARTITION BY c.CustomerId ORDER BY o.OrderPlaced DESC) AS OrderDelivered
FROM #customers c
INNER JOIN #orders o ON o.CustomerId = c.CustomerId
WHERE o.OrderDelivered IS NOT NULL

SQL INNER JOIN Without Repeats

Getting the next table:
Column1 - OrderID - Earliest orders of customers from Column2
Column2 - CustomerID - Customers from orders in Column1
Column3 - OrderID - All *Other* orders of customers from Column2
which do not appear in Column1
This is my query and I'm looking for a way to apply the rules mentioned above:
SELECT O1.orderid, C1.customerid, O2.Orderid
FROM orders AS O1
INNER JOIN customers AS C1 ON O1.customerid = C1.customerid
RIGHT JOIN orders AS O2 ON C1.customerid = O2.customerid
WHERE O1.orderdate >= '2014-01-01'
AND O1.orderdate <= '2014-03-31'
ORDER BY O1.orderid
Thanks in advance
Not entirely sure why you want to get a result out like this as the earliest order will repeat for each order for the given customer.
SELECT earliestOrders.orderid, C1.customerid, O1.Orderid
FROM orders AS O1
INNER JOIN customers AS C1 ON O1.customerid = C1.customerid
INNER JOIN (
select o.customerid, min(o.OrderId) as OrderId
from orders o
Group by o.customerid
) earliestOrders
ON earliestOrders.CustomerId = C1.CustomerId
AND earliestOrders.orderid <> O1.Orderid
To find the first order per customer, look for first order dates per customer and then pick the one or one of the orders made by the customer then. (If orderdate really is just a date one customer can have placed more than one order that day, so we pick one of them. With MIN(orderid) we are likely to get the first one of that bunch :-)
Outer join the other orders and you are done.
If your dbms supports IN clauses on tuples, you get a quite readable statement:
select first_order.orderid, first_order.customerid, later_order.orderid
from
(
select customerid, min(first_order.orderid) as first_orderid
from orders
where (customerid, orderdate) in
(
select customerid, min(orderdate)
from orders
group by cutomerid
)
) first_order
left join orders later_order
on later_order.customerid = first_order.customerid
and later_order.orderid <> first_order.orderid
;
If your dbms doesn't support IN clauses on tuples, the statement looks a bit more clumsy:
select first_order.orderid, first_order.customerid, later_order.orderid
from
(
select first_orders.customerid, min(first_orders.orderid) as orderid
from orders first_orders
inner join
(
select customerid, min(orderdate)
from orders
group by cutomerid
) first_order_dates
on first_order_dates.customerid = first_orders.customerid
and first_order_dates.orderdate = first_orders.orderdate
group by first_orders.customerid
) first_order
left join orders later_order
on later_order.customerid = first_order.customerid
and later_order.orderid <> first_order.orderid
;

Sum of all values except the first

I have the following three tables:
Customers:
Cust_ID,
Cust_Name
Products:
Prod_ID,
Prod_Price
Orders:
Order_ID,
Cust_ID,
Prod_ID,
Quantity,
Order_Date
How do I display each costumer and how much they spent excluding their very first purchase?
[A] - I can get the total by multiplying Products.Prod_Price and Orders.Quantity, then GROUP by Cust_ID
[B] - I also can get the first purchase by using TOP 1 on Order_Date for each customer.
But I couldnt figure out how to produce [A]-[B] in one query.
Any help will be greatly appreciated.
For SQL-Server 2005, 2008 and 2008R2:
; WITH cte AS
( SELECT
c.Cust_ID, c.Cust_Name,
Amount = o.Quantity * p.Prod_Price,
Rn = ROW_NUMBER() OVER (PARTITION BY c.Cust_ID
ORDER BY o.Order_Date)
FROM
Customers AS c
JOIN
Orders AS o ON o.Cust_ID = c.Cust_ID
JOIN
Products AS p ON p.Prod_ID = o.Prod_ID
)
SELECT
Cust_ID, Cust_Name,
AmountSpent = SUM(Amount)
FROM
cte
WHERE
Rn >= 2
GROUP BY
Cust_ID, Cust_Name ;
For SQL-Server 2012, using the FIRST_VALUE() analytic function:
SELECT DISTINCT
c.Cust_ID, c.Cust_Name,
AmountSpent = SUM(o.Quantity * p.Prod_Price)
OVER (PARTITION BY c.Cust_ID)
- FIRST_VALUE(o.Quantity * p.Prod_Price)
OVER (PARTITION BY c.Cust_ID
ORDER BY o.Order_Date)
FROM
Customers AS c
JOIN
Orders AS o ON o.Cust_ID = c.Cust_ID
JOIN
Products AS p ON p.Prod_ID = o.Prod_ID ;
Another way (that works in 2012 only) using OFFSET FETCH and CROSS APPLY:
SELECT
c.Cust_ID, c.Cust_Name,
AmountSpent = SUM(x.Quantity * x.Prod_Price)
FROM
Customers AS c
CROSS APPLY
( SELECT
o.Quantity, p.Prod_Price
FROM
Orders AS o
JOIN
Products AS p ON p.Prod_ID = o.Prod_ID
WHERE
o.Cust_ID = c.Cust_ID
ORDER BY
o.Order_Date
OFFSET
1 ROW
-- FETCH NEXT -- not needed,
-- 20000000000 ROWS ONLY -- can be removed
) AS x
GROUP BY
c.Cust_ID, c.Cust_Name ;
Tested at SQL-Fiddle
Note that the second solution returns also the customers with only one order (with the Amount as 0) while the other two solutions do not return those customers.
Which version of SQL? If 2012 you might be able to do something interesting with OFFSET 1, but I'd have to ponder much more how that works with grouping.
EDIT: Adding a 2012 specific solution inspired by #ypercube
I wanted to be able to use OFFSET 1 within the WINDOW to it al in one step, but the syntax I want isn't valid:
SUM(o.Quantity * p.Prod_Price) OVER (PARTITION BY c.Cust_ID
ORDER BY o.Order_Date
OFFSET 1)
Instead I can specify the row boxing, but have to filter the result set to the correct set. The query plan is different from #ypercube's, but the both show 50% when run together. They each run twice as as fast as my original answer below.
WITH cte AS (
SELECT c.Cust_ID
,c.Cust_Name
,SUM(o.Quantity * p.Prod_Price) OVER(PARTITION BY c.Cust_ID
ORDER BY o.Order_ID
ROWS BETWEEN 1 FOLLOWING
AND UNBOUNDED FOLLOWING) AmountSpent
,rn = ROW_NUMBER() OVER(PARTITION BY c.Cust_ID ORDER BY o.Order_ID)
FROM Customers AS c
INNER JOIN
Orders AS o ON o.Cust_ID = c.Cust_ID
INNER JOIN
Products AS p ON p.Prod_ID = o.Prod_ID
)
SELECT Cust_ID
,Cust_Name
,ISNULL(AmountSpent ,0) AmountSpent
FROM cte WHERE rn=1
My more general solution is similar to peter.petrov's, but his didn't work "out of the box" on my sample data. That might be an issue with my sample data or not. Differences include use of CTE and a NOT EXISTS with a correlated subquery.
CREATE TABLE Customers (Cust_ID INT, Cust_Name VARCHAR(10))
CREATE TABLE Products (Prod_ID INT, Prod_Price MONEY)
CREATE TABLE Orders (Order_ID INT, Cust_ID INT, Prod_ID INT, Quantity INT, Order_Date DATE)
INSERT INTO Customers SELECT 1 ,'Able'
UNION SELECT 2, 'Bob'
UNION SELECT 3, 'Charlie'
INSERT INTO Products SELECT 1, 10.0
INSERT INTO Orders SELECT 1, 1, 1, 1, GetDate()
UNION SELECT 2, 1, 1, 1, GetDate()
UNION SELECT 3, 1, 1, 1, GetDate()
UNION SELECT 4, 2, 1, 1, GetDate()
UNION SELECT 5, 2, 1, 1, GetDate()
UNION SELECT 6, 3, 1, 1, GetDate()
;WITH CustomersFirstOrder AS (
SELECT Cust_ID
,MIN(Order_ID) Order_ID
FROM Orders
GROUP BY Cust_ID
)
SELECT c.Cust_ID
,c.Cust_Name
,ISNULL(SUM(Quantity * Prod_Price),0) CustomerOrderTotalAfterInitialPurchase
FROM Customers c
LEFT JOIN (
SELECT Cust_ID
,Quantity
,Prod_Price
FROM Orders o
INNER JOIN
Products p ON o.Prod_ID = p.Prod_ID
WHERE NOT EXISTS (SELECT 1 FROM CustomersFirstOrder a WHERE a.Order_ID=o.Order_ID)
) b ON c.Cust_ID = b.Cust_ID
GROUP BY c.Cust_ID
,c.Cust_Name
DROP TABLE Customers
DROP TABLE Products
DROP TABLE Orders
Try this. It should do it.
SELECT c1.cust_name ,
c1.cust_id ,
SUM(p1.Prod_Price)
FROM orders o1
JOIN products p1 ON o1.prod_id = p1.prod_id
JOIN customers c1 ON o1.cust_id = c1.cust_id
LEFT JOIN ( SELECT o2.cust_id ,
MIN(o2.Order_Date) AS Order_Date
FROM orders o2
GROUP BY o2.cust_id
) t ON o1.cust_id = t.cust_id
AND o1.Order_Date = t.Order_Date
WHERE t.Order_Date IS NULL
GROUP BY c1.cust_name ,
c1.cust_id
You have to number orders by Customer and then you can have the amount for the first order and next orders with a CTE and ROW_NUMBER() like this:
; WITH NumberedOrders
AS ( SELECT Customers.Cust_Id ,
Customers.Cust_Name ,
ROW_NUMBER() OVER ( ORDER BY Customers.Cust_id ) AS Order_Number ,
Orders.Order_Date ,
Products.Prod_price * Orders.Quantity AS Amount
FROM Orders
INNER JOIN Customers ON Orders.Cust_Id = Customers.Cust_Id
INNER JOIN Products ON Orders.Prod_Id = Products.Prod_Id
)
SELECT Cust_Id ,
SUM(CASE WHEN Order_Number = 1 THEN Amount
ELSE 0
END) AS A_First_Order ,
SUM(CASE WHEN Order_Number = 1 THEN 0
ELSE Amount
END) AS B_Other_orders ,
SUM(Amount) AS C_All_orders
FROM NumberedOrders
GROUP BY Cust_Id
ORDER BY Cust_Id

How to get last children records with parent record from database

I have database with two tables:
Customers (Id PK, LastName)
and
Orders (Id PK, CustomerId FK, ProductName, Price, etc.)
I want to retrieve only customer' last orders details together with customer name.
I use .NET L2SQL but I think it's SQL question more than LINQ question so I post here SQL query I tried:
SELECT [t0].[LastName], (
SELECT [t2].[ProductName]
FROM (
SELECT TOP (1) [t1].[ProductName]
FROM [Orders] AS [t1]
WHERE [t1].[CustomerId] = [t0].[Id]
ORDER BY [t1].[Id] DESC
) AS [t2]
) AS [ProductName], (
SELECT [t4].[Price]
FROM (
SELECT TOP (1) [t3].[Price]
FROM [Orders] AS [t3]
WHERE [t3].[CustomerId] = [t0].[Id]
ORDER BY [t3].[Id] DESC
) AS [t4]
) AS [Price]
FROM [Customers] AS [t0]
Problem is that Orders has more columns (30) and with each column the query gets bigger and slower because I need to add next subqueries.
Is there any better way?
In SQL Server 2005 and above:
SELECT *
FROM (
SELECT o.*,
ROW_NUMBER() OVER (PARTITION BY c.id ORDER BY o.id DESC) rn
FROM customers c
LEFT JOIN
orders o
ON o.customerId = c.id
) q
WHERE rn = 1
or this:
SELECT *
FROM customers c
OUTER APPLY
(
SELECT TOP 1 *
FROM orders o
WHERE o.customerId = c.id
ORDER BY
o.id DESC
) o
In SQL Server 2000:
SELECT *
FROM customers с
LEFT JOIN
orders o
ON o.id =
(
SELECT TOP 1 id
FROM orders oi
WHERE oi.customerId = c.id
ORDER BY
oi.id DESC
)

SQL - identifying rows for a value in one table, where all joined rows only has a specific value

IN SQL Server, I have a result set from a joined many:many relationship.
Considering Products linked to Orders via a link table ,
Table - Products
ID
ProductName
Table - Orders
ID
OrderCountry
LinkTable OrderLines (columns not shown)
I'd like to be able to filter these results to show only the results where for an entity from one table, all the values in the other table only have a given value in a particular column. In terms of my example, for each product, I want to return only the joined rows when all the orders they're linked to are for country 'uk'
So if my linked result set is
productid, product, orderid, ordercountry
1, Chocolate, 1, uk
2, Banana, 2, uk
2, Banana, 3, usa
3, Strawberry, 4, usa
I want to filter so that only those products that have only been ordered in the UK are shown (i.e. Chocolate). I'm sure this should be straight-forward, but its Friday afternoon and the SQL part of my brain has given up for the day...
You could do something like this, where first you get all products only sold in one country, then you proceed to get all orders for those products
with distinctProducts as
(
select LinkTable.ProductID
from Orders
inner join LinkTable on LinkTable.OrderID = Orders.ID
group by LinkTable.ProductID
having count(distinct Orders.OrderCountry) = 1
)
select pr.ID as ProductID
,pr.ProductName
,o.ID as OrderID
,o.OrderCountry
from Products pr
inner join LinkTable lt on lt.ProductID = pr.ID
inner join Orders o on o.ID = lt.OrderID
inner join distinctProducts dp on dp.ProductID = pr.ID
where o.OrderCountry = 'UK'
In the hope that some of this may be generally reusable:
;with startingRS (productid, product, orderid, ordercountry) as (
select 1, 'Chocolate', 1, 'uk' union all
select 2, 'Banana', 2, 'uk' union all
select 2, 'Banana', 3, 'usa' union all
select 3, 'Strawberry', 4, 'usa'
), countryRankings as (
select productid,product,orderid,ordercountry,
RANK() over (PARTITION by productid ORDER by ordercountry) as FirstCountry,
RANK() over (PARTITION by productid ORDER by ordercountry desc) as LastCountry
from
startingRS
), singleCountry as (
select productid,product,orderid,ordercountry
from countryRankings
where FirstCountry = 1 and LastCountry = 1
)
select * from singleCountry where ordercountry='uk'
In the startingRS, you put whatever query you currently have to generate the intermediate results you've shown. The countryRankings CTE adds two new columns, that ranks the countries within each productid.
The singleCountry CTE reduces the result set back down to those results where country ranks as both the first and last country within the productid (i.e. there's only a single country for this productid). Finally, we query for those results which are just from the uk.
If you want, for example, all productid rows with a single country of origin, you just skip this last where clause (and you'd get 3,strawberry,4,usa in your results also)
So is you've got a current query that looks like:
select p.productid,p.product,o.orderid,o.ordercountry
from product p inner join order o on p.productid = o.productid --(or however these joins work for your tables)
Then you'd rewrite the first CTE as:
;with startingRS (productid, product, orderid, ordercountry) as (
select p.productid,p.product,o.orderid,o.ordercountry
from product p inner join order o on p.productid = o.productid
), /* rest of query */
Hmm. Based on Philip's earlier approach, try adding something like this to exclude rows where there's been the same product ordered in another country:
SELECT pr.Id, pr.ProductName, od.Id, od.OrderCountry
from Products pr
inner join LinkTable lt
on lt.ProductId = pr.ID
inner join Orders od
on od.ID = lt.OrderId
where
od.OrderCountry = 'UK'
AND NOT EXISTS
(
SELECT
*
FROM
Products MatchingProducts
inner join LinkTable lt
on lt.ProductId = MatchingProducts.ID
inner join Orders OrdersFromOtherCountries
on OrdersFromOtherCountries.ID = lt.OrderId
WHERE
MatchingProducts.ID = Pr.ID AND
OrdersFromOtherCountries.OrderCountry != od.OrderCountry
)
;WITH mytable (productid,ordercountry)
AS
(SELECT productid, ordercountry
FROM Orders od INNER JOIN LinkTable lt ON od.orderid = lt.OrderId)
SELECT * FROM mytable
INNER JOIN dbo.Products pr ON pr.productid = mytable.productid
WHERE pr.productid NOT IN (SELECT productid FROM mytable
GROUP BY productid
HAVING COUNT(ordercountry) > 1)
AND ordercountry = 'uk'
SELECT pr.Id, pr.ProductName, od.Id, od.OrderCountry
from Products pr
inner join LinkTable lt
on lt.ProductId = pr.ID
inner join Orders od
on od.ID = lt.OrderId
where od.OrderCountry = 'UK'
This probably isn't the most efficient way to do this, but ...
SELECT p.ProductName
FROM Product p
WHERE p.ProductId IN
(
SELECT DISTINCT ol.ProductId
FROM OrderLines ol
INNER JOIN [Order] o
ON ol.OrderId = o.OrderId
WHERE o.OrderCountry = 'uk'
)
AND p.ProductId NOT IN
(
SELECT DISTINCT ol.ProductId
FROM OrderLines ol
INNER JOIN [Order] o
ON ol.OrderId = o.OrderId
WHERE o.OrderCountry != 'uk'
)
TestData
create table product
(
ProductId int,
ProductName nvarchar(50)
)
go
create table [order]
(
OrderId int,
OrderCountry nvarchar(50)
)
go
create table OrderLines
(
OrderId int,
ProductId int
)
go
insert into Product VALUES (1, 'Chocolate')
insert into Product VALUES (2, 'Banana')
insert into Product VALUES (3, 'Strawberry')
insert into [order] values (1, 'uk')
insert into [order] values (2, 'uk')
insert into [order] values (3, 'usa')
insert into [order] values (4, 'usa')
insert into [orderlines] values (1, 1)
insert into [orderlines] values (2, 2)
insert into [orderlines] values (3, 2)
insert into [orderlines] values (4, 3)
insert into [orderlines] values (3, 2)
insert into [orderlines] values (3, 3)