Intersect Select Statements on Specific Columns - sql

I've a table of SalesDetails, looking like this:
InvoiceID, LineID, Product
1,1,Apple
1,2,Banana
2,1,Apple
2,2,Mango
3,1,Apple
3,2,Banana
3,3,Mango
My requirement is to return rows where an Invoice contained sales of both: Apple AND Banana, but if there are other products on such an invoice, I don't want those.
So the result should be:
1,1,Apple
1,2,Banana
3,1,Apple
3,2,Banana
I tried the following:
Select * from SalesDetails where Product = 'Apple'
Intersect
Select * from SalesDetails where Product = 'Banana'
Didn't work, because it seems Intersect needs to match all the columns.
What I'm hoping to do is:
Select * from SalesDetails where Product = 'Apple'
Intersect ----On InvoiceID-----
Select * from SalesDetails where Product = 'Banana'
Is there a way to do this?
Or do I have to first Intersect on InvoiceIDs only using my criteria, then select the rows of those InvoiceIDs where the criteria is matched again, I.e.:
Select * From SalesDetails
Where Product In ('Apple', 'Banana') And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Apple'
Intersect
Select InvoiceID from SalesDetails where Product = 'Banana'
)
Which seems somewhat wasteful as it's examining the criteria twice.

Okay this time I've managed to get reuse of the Apple/Banana info by using a CTE.
with sd as (
Select * from SalesDetails
where (Product in ('Apple', 'Banana'))
)
Select * from sd where invoiceid in (Select invoiceid from
sd group by invoiceid having Count(distinct product) = 2)
SQL Fiddle

Do it with conditional aggregation:
select *
from SalesDetails
where product in ('apple', 'banana') and invoiceid in(
select invoiceid
from SalesDetails
group by invoiceid
having sum(case when product in('apple', 'banana') then 1 else 0 end) >= 2)

I think OP's suggestion is about the best one can do. The following might be faster, although I expect the difference to be slight and I have not done any benchmarking.
Select * From SalesDetails
Where Product ='Apple' And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Banana'
)
union all
select * from SalesDetails
Where Product ='Banana' And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Apple'
)

A self-join will solve the problem.
SELECT T1.*
FROM SalesDetails T1
INNER JOIN SalesDetails T2 ON T1.InvoiceId = T2.InvoiceId
AND (T1.Product = 'Apple' AND T2.Product = 'Banana'
OR T1.Product = 'Banana' AND t2.Product = 'Apple')

declare #t table (Id int,val int,name varchar(10))
insert into #t (id,val,name)values
(1,1,'Apple'),
(1,2,'Banana'),
(2,1,'Apple'),
(2,2,'Mango'),
(3,1,'Apple'),
(3,2,'Banana'),
(3,3,'Mango')
;with cte as (
select ID,val,name,ROW_NUMBER()OVER (PARTITION BY id ORDER BY val)RN from #t)
,cte2 AS(
select TOP 1 c.Id,c.val,c.name,C.RN from cte c
WHERE RN = 1
UNION ALL
select c.Id,c.val,c.name,C.RN from cte c
WHERE c.Id <> c.val)
select Id,val,name from (
select Id,val,name,COUNT(RN)OVER (PARTITION BY Id )R from cte2 )R
WHERE R = 2

Other was is to do PIVOT like this:
DECLARE #DataSource TABLE
(
[InvoiceID] TINYINT
,[LineID] TINYINT
,[Product] VARCHAR(12)
);
INSERT INTO #DataSource ([InvoiceID], [LineID], [Product])
VALUES (1,1,'Apple')
,(1,2,'Banana')
,(2,1,'Apple')
,(2,2,'Mango')
,(3,1,'Apple')
,(3,2,'Banana')
,(3,3,'Mango');
SELECT *
FROM #DataSource
PIVOT
(
MAX([LineID]) FOR [Product] IN ([Apple], [Banana])
) PVT
WHERE [Apple] IS NOT NULL
AND [Banana] IS NOT NULL;
It will give you the results in this format, but you are able to UNVPIVOT them if you want:
Or you can use window function like this:
;WITH DataSource AS
(
SELECT *
,SUM(1) OVER (PARTITION BY [InvoiceID]) AS [Match]
FROM #DataSource
WHERE [Product] = 'Apple' OR [Product] = 'Banana'
)
SELECT *
FROM DataSource
WHERE [Match] =2

First, you want to COUNT the number of rows per InvoiceID that matched the criteria Product = 'Apple' or 'Banana'. Then do a SELF-JOIN and filter the rows such that the COUNT must be >= 2, or the number of Products in your critera.
SQL Fiddle
SELECT sd.*
FROM (
SELECT InvoiceID, CC = COUNT(*)
FROM SalesDetails
WHERE Product IN('Apple', 'Banana')
GROUP BY InvoiceID
)t
INNER JOIN SalesDetails sd
ON sd.InvoiceID = t.InvoiceID
WHERE
t.CC >= 2
AND sd.Product IN('Apple', 'Banana')

WITH cte
AS
(
SELECT *
FROM [dbo].[SalesDetails]
WHERE [Product]='banana')
,cte1
AS
(SELECT *
FROM [dbo].[SalesDetails]
WHERE [Product]='apple')
SELECT *
FROM cte c INNER JOIN cte1 c1
ON c.[InvoiceID]=c1.[InvoiceID]

Here is a method using window functions:
select sd.*
from (select sd.*,
max(case when product = 'Apple' then 1 else 0 end) over (partition by invoiceid) as HasApple,
max(case when product = 'Banana' then 1 else 0 end) over (partition by invoiceid) as HasBanana
from salesdetails sd
) sd
where (product = 'Apple' and HasBanana > 0) or
(product = 'Banana' and HasApple > 0);

If you only want to write the condition once and are sure that each Product will only be once in any Order, you can use this:
SELECT * FROM (
SELECT InvoiceID, Product
,COUNT(*) OVER (PARTITION BY InvoiceID) matchcount
FROM SalesDetails
WHERE Product IN ('Apple','Banana') ) WHERE matchcount = 2;

This is what I ended up using, inspired by #Leon Bambrick:
(Expanded a little to support multiple products in the criteria)
WITH cteUnionBase AS
(SELECT * FROM SalesDetails
WHERE Product IN ('Apple Red','Apple Yellow','Apple Green','Banana Small','Banana Large')),
cteBanana AS
(SELECT * FROM cteUnionBase
WHERE Product IN ('Banana Small','Banana Large')),
cteApple AS
(SELECT * FROM cteUnionBase
WHERE Product IN ('Apple Red','Apple Yellow','Apple Green')),
cteIntersect AS
(
SELECT InvoiceID FROM cteApple
Intersect
SELECT InvoiceID FROM cteBanana
)
SELECT cteUnionBase.*
FROM cteUnionBase INNER JOIN cteIntersect
on cteUnionBase.InvoiceID = cteIntersect.InvoiceID

Related

SQL Aggregate most frequent value with GROUP BY

I have a table like this:
PARTNUMBER | QUANTITY | DESCRIPTION
'foo' 2 'a'
'foo' 2 'a1'
'bar' 2 'b'
'bar' 2 'b'
'bar' 2 'b1'
'bizz' 2 'c'
I'm trying to group by PARTNUMBER, aggregate by QUANTITY, and aggregate DESCRIPTION by most-frequent appearance.
I tried using a sub-query to aggregate DESCRIPTION by its most frequent occurrence, but I'm having some trouble getting it right, especially with GROUP BY.
Here is what I have:
SELECT SUM(QUANTITY) AS QUANTITY, PARTNNUMBER,
(SELECT TOP(1) [DESCRIPTION]
FROM [PBJobDB].[dbo].[DEVICES]
/*WHERE DESCRIPTION = t1.PARTNO ?? */
GROUP BY [DESCRIPTION], PARTNNUMBER
ORDER BY COUNT([DESCRIPTION]) DESC) as [DESCRIPTION]
FROM `database.table`
GROUP BY PARTNUMBER, [DESCRIPTION]
The subquery is not getting the most frequent DESCRIPTION by PARTNUMBER, and instead gives the most frequent DESCRIPTION in the whole table.
I would like the output to look like this:
PARTNUMBER | QUANTITY | DESCRIPTION
'foo' 4 'a'
'bar' 6 'b'
'bizz' 2 'c'
I tried below one, please check whether its working for you,
SELECT PARTNUMBER,SUM(QUANTITY) AS QUANTITY,
(
SELECT TOP 1 DESCP FROM
(SELECT [DESCRIPTION]'DESCP',COUNT(*)'CNT'
FROM testtable
WHERE PARTNUMBER = t1.PARTNUMBER
GROUP BY [DESCRIPTION]) A
GROUP BY DESCP,CNT HAVING CNT=MAX(CNT)
)as [DESCRIPTION]
FROM testtable T1
GROUP BY PARTNUMBER
This is what ended up working for me...
select distinct t1.PARTNUMBER , sum(t1.QUANTITY) AS QUANTITY, (
select TOP(1) [DESCRIPTION]
from [PBJobDB].[dbo].[DEVICES] AS t2
where t2.PARTNUMBER = t1.PARTNUMBER
group by [DESCRIPTION]
order by count(*) desc ) as [DESCRIPTION]
from `database.table` AS t1
/* WHERE `column` IS NULL AND `other_column` = 'some_value' */
GROUP BY t1.PARTNUMBER
You are looking for the mode. I would use two levels of aggregation:
select partnumber, sum(quantity) as total_quantity,
max(case when seqnum = 1 then description end) as description
from (select partnumber, description, sum(quantity) as quantity,
row_number() over (partition by partnumber order by sum(quantity) desc, description) as seqnum
from t
group by partnumber, description
) pd
group by partnumber;
I would use Sum() over to get total quantity. Below is the example that worked for me.
SELECT PARTNUMBER, QUANTITY, DESCRIPTION
FROM (
SELECT PARTNUMBER,SUM(Quantity) OVER (PARTITION BY PARTNUMBER ORDER BY CAST(PARTNUMBER AS VARCHAR(30)) ) Quantity,DESCRIPTION,R
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY PARTNUMBER ORDER BY COUNT(DESCRIPTION) DESC) R,
SUM(Quantity) Quantity, --OVER (PARTITION BY PARTNUMBER ORDER BY CAST(PARTNUMBER AS VARCHAR(30)) ) Quantity,
PARTNUMBER,
DESCRIPTION
FROM #Temp
GROUP BY PARTNUMBER,DESCRIPTION
) AS S
) AS S
WHERE R = 1

Combine the result of two select queries into one table

I have the following two tables:
STOCK_ON_HAND: This is showing me all of the stock that I have on hand
STOCK_ON_ORDER: This is showing me all of the stock that I have on order
I have the following two queries to summarise the tables:
SELECT STOCK_CODE, SUM(QTY)
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <>0;
And
SELECT STOCK_CODE, SUM(ON_ORDER)
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(STOCK_ON_ORDER) <>0;
I basically want to combine the above with into one table showing the following fields:
STOCK_CODE
STOCK_ON_HAND
STOCK_ON_ORDER
What would the best approach to achieve this?
For this you need a FULL OUTER JOIN which Access does not support directly, but you can simulate it with UNION like this:
SELECT h.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h LEFT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = h.STOCK_CODE
UNION
SELECT o.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h RIGHT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = h.STOCK_CODE
If you don't want to see nulls in the results (if they exist) but replace them with 0s, use the function Nz(), like this:
SELECT h.STOCK_CODE, Nz(h.HQTY, 0) AS STOCK_ON_HAND, Nz(o.HQTY, 0) AS STOCK_ON_ORDER
Or get the distinct STOCK_CODEs from both tables and LEFT JOIN them to each of the tables:
SELECT c.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM ((
SELECT STOCK_CODE FROM STOCK_ON_HAND
UNION
SELECT STOCK_CODE FROM STOCK_ON_ORDER
) AS c LEFT JOIN (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h ON h.STOCK_CODE = c.STOCK_CODE )
LEFT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = c.STOCK_CODE
I would recommend doing a UNION ALL before aggregating:
SELECT STOCK_CODE, SUM(on_hand), SUM(on_order)
FROM (SELECT STOCK_CODE, SUM(QTY) as on_hand, 0 on_order
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
UNION ALL
SELECT STOCK_CODE, 0, SUM(ON_ORDER) as on_order
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
) s
GROUP BY STOCK_CODE
HAVING on_hand <> 0 OR on_order <> 0;
If MS Access does not support UNION ALL in the FROM clause, you can use a view to set that up.

Select Customer ID who hasnt purchased product X

I have a table of customer IDs and Products Purchased. A customer ID can purchase multiple products over time.
customerID, productID
In BigQuery I need to find the CustomerID for those who have not purchased product A.
I've been going around in circles trying to do self joins, inner joins, but I'm clueless.
Any help appreciated.
select customerID
from your_table
group by customerID
having sum(case when productID = 'A' then 1 else 0 end) = 0
and to check if it only contains a name
sum(case when productID contains 'XYZ' then 1 else 0 end) = 0
Below is for BigQuery Standard SQL
#standardSQL
SELECT CustomerID
FROM `project.dataset.yourTable`
GROUP BY CustomerID
HAVING COUNTIF(Product = 'A') = 0
You can test / play with it using dummy data as below
#standardSQL
WITH `project.dataset.yourTable` AS (
SELECT 1234 CustomerID, 'A' Product UNION ALL
SELECT 11234, 'A' UNION ALL
SELECT 4567, 'A' UNION ALL
SELECT 7896, 'C' UNION ALL
SELECT 5432, 'B'
)
SELECT CustomerID
FROM `project.dataset.yourTable`
GROUP BY CustomerID
HAVING COUNTIF(Product = 'A') = 0
how would I adjust this so it could be productID contains "xyz"
#standardSQL
WITH `project.dataset.yourTable` AS (
SELECT 1234 CustomerID, 'Axyz' Product UNION ALL
SELECT 11234, 'A' UNION ALL
SELECT 4567, 'A' UNION ALL
SELECT 7896, 'Cxyz' UNION ALL
SELECT 5432, 'B'
)
SELECT CustomerID
FROM `project.dataset.yourTable`
GROUP BY CustomerID
HAVING COUNTIF(REGEXP_CONTAINS(Product, 'xyz')) = 0
If you have a customer table, you might want:
select c.*
from customers c
where not exists (select 1 from t where t.customer_id = c.customer_id and t.proectID = 'A');
This will return customers who have made no purchases as well as those who have purchased all but product A. Of course, the definition of a customer in your data might be that the customer has made a purchase, in which case I like Juergen's solution.

DB2 how to sum two column from two different table

Select sum(amt) as totalA from tableA where id>10;
Select sum(amount) as totalB from tableB where people = 'JOSH';
What is the best way if the objective is to have sum(totalA + totalB)?
select sum(total) from
(
select sum(amt) as total from tableA where id>10
union all
select sum(amount) from tableB where people = 'JOSH'
) as q

SQL, conditional column values?

One of the columns in my table is "DataSource". Datasource can have two values, lets say "A" or "B". I would like to always take the row when Datasource = "A", however, if there isn't an entry for A I would like to take "B".
How does one do this in SQL Server?
EDIT:
So for a partucular product (Product ID) one Product ID may have two rows, each containing a different DataSource, wheres another ProductID may only have one DataSource:
{ProductID DataSource}
{1 A},
{1 B},
{2 B}
Here I would wish to select the top and bottom row
A few options following your edit...
SELECT
*
FROM
table
WHERE
DataSource = 'A'
OR DataSource = 'B' AND NOT EXISTS (SELECT * FROM table AS lookup WHERE ProductID = table.ProductID AND DataSource = 'A')
SELECT
*
FROM
table
INNER JOIN
(SELECT ProductID, MAX(DataSource) AS DataSource FROM table) AS lookup
ON lookup.ProductID = table.ProductID
AND lookup.DataSource = table.DataSource
WITH
sequenced AS
(
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY ProductID ORDER BY DataSource) AS sequence_id
FROM
table
)
SELECT
*
FROM
sequenced
WHERE
sequence_id = 1
select ProductID from product where DataSource= 'A'
UNION
select id from DataSource where product not in (select ProductID from product where DataSource = 'A')
SELECT top 1 *
from YourTable
ORDER BY DataSource ASC
Else
if the real values are NOT 'A' or 'B', which are just placeholders of #A and #B variables
SELECT top 1 *
from YourTable
ORDER BY CASE DataSource WHEN #A THEN 0 ELSE 1 END ASC
or
if there more variances than AB
SELECT top 1 *
from YourTable
ORDER BY CASE DataSource WHEN #A THEN 0 WHEN #B THEN 1 ELSE 2 END ASC
OR
If you need to group by ProductId
select * from (
SELECT *,
ROW_NUMBER OVER(PARTITION BY ProductId ORDER BY CASE DataSource WHEN #A THEN 0 WHEN #B THEN 1 ELSE 2 END ASC) ordinal
from YourTable
) t
WHERE t.Ordinal = 1
Based on your sample data, this works:
SELECT ProductID, MIN(DataSource)
FROM #tab
GROUP BY ProductID;
And here is my test data:
declare #tab table (ProductID int, DataSource char(1))
insert into #tab values (1, 'A');
insert into #tab values (1, 'B');
insert into #tab values (2, 'B');
If there are more columns in the table than the two you show then:
SELECT T1.*
FROM #tab T1
JOIN
(
SELECT ProductID, MIN(DataSource) AS DataSource
FROM #tab
GROUP BY ProductID
) T2 ON T1.ProductID = T2.ProductID AND T1.DataSource = T2.DataSource
ok, if I understood you requirement correctly then this would work
select TOP 1 * from yourtable where DATASOURCE = ISNULL(SELECT MAX(DATASOURCE) WHERE DATASOURCE="A","B")
The idea here is simple, you want to change DATASOURCE condition based on whether you have any rows of your first preference ("A") are available or not, if yes, then apply "A" condition or apply "B" condition