SQL Aggregate most frequent value with GROUP BY - sql

I have a table like this:
PARTNUMBER | QUANTITY | DESCRIPTION
'foo' 2 'a'
'foo' 2 'a1'
'bar' 2 'b'
'bar' 2 'b'
'bar' 2 'b1'
'bizz' 2 'c'
I'm trying to group by PARTNUMBER, aggregate by QUANTITY, and aggregate DESCRIPTION by most-frequent appearance.
I tried using a sub-query to aggregate DESCRIPTION by its most frequent occurrence, but I'm having some trouble getting it right, especially with GROUP BY.
Here is what I have:
SELECT SUM(QUANTITY) AS QUANTITY, PARTNNUMBER,
(SELECT TOP(1) [DESCRIPTION]
FROM [PBJobDB].[dbo].[DEVICES]
/*WHERE DESCRIPTION = t1.PARTNO ?? */
GROUP BY [DESCRIPTION], PARTNNUMBER
ORDER BY COUNT([DESCRIPTION]) DESC) as [DESCRIPTION]
FROM `database.table`
GROUP BY PARTNUMBER, [DESCRIPTION]
The subquery is not getting the most frequent DESCRIPTION by PARTNUMBER, and instead gives the most frequent DESCRIPTION in the whole table.
I would like the output to look like this:
PARTNUMBER | QUANTITY | DESCRIPTION
'foo' 4 'a'
'bar' 6 'b'
'bizz' 2 'c'

I tried below one, please check whether its working for you,
SELECT PARTNUMBER,SUM(QUANTITY) AS QUANTITY,
(
SELECT TOP 1 DESCP FROM
(SELECT [DESCRIPTION]'DESCP',COUNT(*)'CNT'
FROM testtable
WHERE PARTNUMBER = t1.PARTNUMBER
GROUP BY [DESCRIPTION]) A
GROUP BY DESCP,CNT HAVING CNT=MAX(CNT)
)as [DESCRIPTION]
FROM testtable T1
GROUP BY PARTNUMBER

This is what ended up working for me...
select distinct t1.PARTNUMBER , sum(t1.QUANTITY) AS QUANTITY, (
select TOP(1) [DESCRIPTION]
from [PBJobDB].[dbo].[DEVICES] AS t2
where t2.PARTNUMBER = t1.PARTNUMBER
group by [DESCRIPTION]
order by count(*) desc ) as [DESCRIPTION]
from `database.table` AS t1
/* WHERE `column` IS NULL AND `other_column` = 'some_value' */
GROUP BY t1.PARTNUMBER

You are looking for the mode. I would use two levels of aggregation:
select partnumber, sum(quantity) as total_quantity,
max(case when seqnum = 1 then description end) as description
from (select partnumber, description, sum(quantity) as quantity,
row_number() over (partition by partnumber order by sum(quantity) desc, description) as seqnum
from t
group by partnumber, description
) pd
group by partnumber;

I would use Sum() over to get total quantity. Below is the example that worked for me.
SELECT PARTNUMBER, QUANTITY, DESCRIPTION
FROM (
SELECT PARTNUMBER,SUM(Quantity) OVER (PARTITION BY PARTNUMBER ORDER BY CAST(PARTNUMBER AS VARCHAR(30)) ) Quantity,DESCRIPTION,R
FROM (
SELECT ROW_NUMBER() OVER (PARTITION BY PARTNUMBER ORDER BY COUNT(DESCRIPTION) DESC) R,
SUM(Quantity) Quantity, --OVER (PARTITION BY PARTNUMBER ORDER BY CAST(PARTNUMBER AS VARCHAR(30)) ) Quantity,
PARTNUMBER,
DESCRIPTION
FROM #Temp
GROUP BY PARTNUMBER,DESCRIPTION
) AS S
) AS S
WHERE R = 1

Related

Combine the result of two select queries into one table

I have the following two tables:
STOCK_ON_HAND: This is showing me all of the stock that I have on hand
STOCK_ON_ORDER: This is showing me all of the stock that I have on order
I have the following two queries to summarise the tables:
SELECT STOCK_CODE, SUM(QTY)
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <>0;
And
SELECT STOCK_CODE, SUM(ON_ORDER)
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(STOCK_ON_ORDER) <>0;
I basically want to combine the above with into one table showing the following fields:
STOCK_CODE
STOCK_ON_HAND
STOCK_ON_ORDER
What would the best approach to achieve this?
For this you need a FULL OUTER JOIN which Access does not support directly, but you can simulate it with UNION like this:
SELECT h.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h LEFT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = h.STOCK_CODE
UNION
SELECT o.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h RIGHT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = h.STOCK_CODE
If you don't want to see nulls in the results (if they exist) but replace them with 0s, use the function Nz(), like this:
SELECT h.STOCK_CODE, Nz(h.HQTY, 0) AS STOCK_ON_HAND, Nz(o.HQTY, 0) AS STOCK_ON_ORDER
Or get the distinct STOCK_CODEs from both tables and LEFT JOIN them to each of the tables:
SELECT c.STOCK_CODE, h.HQTY AS STOCK_ON_HAND, o.OQTY AS STOCK_ON_ORDER
FROM ((
SELECT STOCK_CODE FROM STOCK_ON_HAND
UNION
SELECT STOCK_CODE FROM STOCK_ON_ORDER
) AS c LEFT JOIN (
SELECT STOCK_CODE, SUM(QTY) AS HQTY
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
HAVING SUM(QTY) <> 0
) h ON h.STOCK_CODE = c.STOCK_CODE )
LEFT JOIN (
SELECT STOCK_CODE, SUM(ON_ORDER) AS OQTY
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
HAVING SUM(ON_ORDER) <> 0
) o ON o.STOCK_CODE = c.STOCK_CODE
I would recommend doing a UNION ALL before aggregating:
SELECT STOCK_CODE, SUM(on_hand), SUM(on_order)
FROM (SELECT STOCK_CODE, SUM(QTY) as on_hand, 0 on_order
FROM STOCK_ON_HAND
GROUP BY STOCK_CODE
UNION ALL
SELECT STOCK_CODE, 0, SUM(ON_ORDER) as on_order
FROM STOCK_ON_ORDER
GROUP BY STOCK_CODE
) s
GROUP BY STOCK_CODE
HAVING on_hand <> 0 OR on_order <> 0;
If MS Access does not support UNION ALL in the FROM clause, you can use a view to set that up.

get only row that meet condition if such row exist and if not get the row that meet another condition

this sounds like a simple question but I just cant find the right way.
given the simplified table
with t as (
select ordernumber, orderdate, case when ordertype in (5,21) then 1 else 0 end is_restore , ordertype, row_number() over(order by orderdate) rn from
(
select to_date('29.08.08','DD.MM.YY') orderdate,'313' ordernumber, 1 as ordertype from dual union all
select to_date('13.03.15','DD.MM.YY') orderdate, '90/4/2' ordernumber, 5 as ordertype from dual
)
)
select * from t -- where clause should be here
for every row is_restore guaranteed to be 1 or 0.
if table has a row where is_restore=1 then select ordernumber,orderdate of that row and nothing else.
If a table does not have a row where is_restore=1 then select ordernumber,orderdate of the row where rn=1(row where rn=1 is guaranteed to exist in a table)
Given the requirements above what do I need to put in where clause to get the following?
You could use ROW_NUMBER:
CREATE TABLE t
AS
select ordernumber, orderdate,
case when ordertype in (5,21) then 1 else 0 end is_restore, ordertype,
row_number() over(order by orderdate) rn
from (
select to_date('29.08.08','DD.MM.YY') orderdate,'313' ordernumber,
1 as ordertype
from dual union all
select to_date('13.03.15','DD.MM.YY') orderdate, '90/4/2' ordernumber,
5 as ordertype
from dual);
-------------------
with cte as (
select t.*,
ROW_NUMBER() OVER(/*PARTITION BY ...*/ ORDER BY is_restore DESC, rn) AS rnk
from t
)
SELECT *
FROM cte
WHERE rnk = 1;
db<>fiddle demo
Here is sql, that doesn't use window functions, maybe it will be useful for those, whose databases don't support OVER ( ... ) or when there are indexed fields, on which query is based.
SELECT
*
FROM t
WHERE t.is_restore = 1
OR (
NOT EXISTS (SELECT 1 FROM t WHERE t.is_restore = 1)
AND t.rn = 1
)

aggregation according to different conditions on same column

I have a table #tbl like below, i need to write a query like if there are more than 3 records availble
for particular cid then avg(val of particular cid ) for particular cid should be dispalyed against each id and if there are less than
3 records availble for particular cid then avg(val of all records availble).
Please suggest.
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90)
Your description is not clear, but I believe you need windowed functions:
WITH cte AS (
SELECT *, COUNT(*) OVER(PARTITION BY cid) AS cnt
FROM #tbl
)
SELECT id, (SELECT AVG(val) FROM cte) AS Av
FROM cte
WHERE cnt <=3
UNION ALL
SELECT id, AVG(val) OVER(PARTITION BY cid) AS Av
FROM cte
WHERE cnt > 3
ORDER BY id;
DBFiddle Demo
EDIT:
SELECT id,
CASE WHEN COUNT(*) OVER(PARTITION BY cid) <= 3 THEN AVG(val) OVER()
ELSE AVG(val) OVER(PARTITION BY cid)
END
FROM #tbl
ORDER BY id;
DBFiddle Demo2
You can try with the following. First calculate the average for each Cid depending in it's number of occurences, then join each Cid with the Id to display all table.
;WITH CidAverages AS
(
SELECT
T.cid,
Average = CASE
WHEN COUNT(1) >= 3 THEN AVG(T.val)
ELSE (SELECT AVG(Val) FROM #tbl) END
FROM
#tbl AS T
GROUP BY
T.cid
)
SELECT
T.*,
C.Average
FROM
#tbl AS T
INNER JOIN CidAverages AS C ON T.cid = C.cid
Given the clarifications in comments, I am thinking this is the intention
declare #tbl table(id int, cid int, val float )
insert into #tbl
values(1,100,20),(2,100,30),(3,100,25),(4,100,31),(5,100,50),
(6,200,30),(7,200,30),(8,300,90);
select distinct
cid
, case
when count(*) over (partition by cid) > 3 then avg(val) over (partition by cid)
else avg (val) over (partition by 1)
end as avg
from #tbl;
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=fdf4c4457220ec64132de7452a034976
cid avg
100 31.2
200 38.25
300 38.25
There are a number of aspects of a query like this that when run at scale though are going to be pretty bad on the query plan, I'd want to test this at a larger scale and tune before using.
The description was not clear on what happened if it was exactly 3, it mentions 'more than 3' and 'less than 3' - within this code the 'more than' was used to determine which category it was in, and less than interpreted to mean 'less than or equal to 3'

Remove duplicate rows as an additional column

I have a sql table for student records and I have some duplicate rows for the student dimension cause of the major, so now I have something like this:
ID Major
----------
1 CS
1 Mgt
What I want is to combine this two rows in this form:
ID Major Major2
----------
1 CS Mgt
You need a number for pivoting. Then you can pivot using either pivot or conditional aggregation:
select id,
max(case when seqnum = 1 then major end) as major_1,
max(case when seqnum = 2 then major end) as major_2
from (select t.*,
row_number() over (partition by id order by (select null)) as seqnum
from t
) t
group by id;
Note: you should validate that "2" is large enough to count the majors. You can get the maximum using:
select top 1 id, count(*)
from t
group by id
order by count(*) desc;
If you have at most two different values of major:
select a.id as id,
a.major as major,
b.major as major2
from YOUR_TABLE a
left join YOUR_TABLE b on
a.id = b.id
and (b.major is null or a.major > b.major)
This will help you
Select
ID,
(select top 1 Major from <Your_Table> where id=T.Id order by Major) Major,
(case when count(Id)>1 then (select top 1 Major from #temp where id=T.Id order by Major desc) else null end) Major2
from <Your_Table> T
Group By
ID
You can use pivot function directly
SELECT [ID],[CS] AS Major , [Mgt] AS Major2 from Your_Table_Name
PIVOT
(max(Major)for [Major] IN ([CS] , [Mgt]))as p

Intersect Select Statements on Specific Columns

I've a table of SalesDetails, looking like this:
InvoiceID, LineID, Product
1,1,Apple
1,2,Banana
2,1,Apple
2,2,Mango
3,1,Apple
3,2,Banana
3,3,Mango
My requirement is to return rows where an Invoice contained sales of both: Apple AND Banana, but if there are other products on such an invoice, I don't want those.
So the result should be:
1,1,Apple
1,2,Banana
3,1,Apple
3,2,Banana
I tried the following:
Select * from SalesDetails where Product = 'Apple'
Intersect
Select * from SalesDetails where Product = 'Banana'
Didn't work, because it seems Intersect needs to match all the columns.
What I'm hoping to do is:
Select * from SalesDetails where Product = 'Apple'
Intersect ----On InvoiceID-----
Select * from SalesDetails where Product = 'Banana'
Is there a way to do this?
Or do I have to first Intersect on InvoiceIDs only using my criteria, then select the rows of those InvoiceIDs where the criteria is matched again, I.e.:
Select * From SalesDetails
Where Product In ('Apple', 'Banana') And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Apple'
Intersect
Select InvoiceID from SalesDetails where Product = 'Banana'
)
Which seems somewhat wasteful as it's examining the criteria twice.
Okay this time I've managed to get reuse of the Apple/Banana info by using a CTE.
with sd as (
Select * from SalesDetails
where (Product in ('Apple', 'Banana'))
)
Select * from sd where invoiceid in (Select invoiceid from
sd group by invoiceid having Count(distinct product) = 2)
SQL Fiddle
Do it with conditional aggregation:
select *
from SalesDetails
where product in ('apple', 'banana') and invoiceid in(
select invoiceid
from SalesDetails
group by invoiceid
having sum(case when product in('apple', 'banana') then 1 else 0 end) >= 2)
I think OP's suggestion is about the best one can do. The following might be faster, although I expect the difference to be slight and I have not done any benchmarking.
Select * From SalesDetails
Where Product ='Apple' And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Banana'
)
union all
select * from SalesDetails
Where Product ='Banana' And InvoiceID In
(
Select InvoiceID from SalesDetails where Product = 'Apple'
)
A self-join will solve the problem.
SELECT T1.*
FROM SalesDetails T1
INNER JOIN SalesDetails T2 ON T1.InvoiceId = T2.InvoiceId
AND (T1.Product = 'Apple' AND T2.Product = 'Banana'
OR T1.Product = 'Banana' AND t2.Product = 'Apple')
declare #t table (Id int,val int,name varchar(10))
insert into #t (id,val,name)values
(1,1,'Apple'),
(1,2,'Banana'),
(2,1,'Apple'),
(2,2,'Mango'),
(3,1,'Apple'),
(3,2,'Banana'),
(3,3,'Mango')
;with cte as (
select ID,val,name,ROW_NUMBER()OVER (PARTITION BY id ORDER BY val)RN from #t)
,cte2 AS(
select TOP 1 c.Id,c.val,c.name,C.RN from cte c
WHERE RN = 1
UNION ALL
select c.Id,c.val,c.name,C.RN from cte c
WHERE c.Id <> c.val)
select Id,val,name from (
select Id,val,name,COUNT(RN)OVER (PARTITION BY Id )R from cte2 )R
WHERE R = 2
Other was is to do PIVOT like this:
DECLARE #DataSource TABLE
(
[InvoiceID] TINYINT
,[LineID] TINYINT
,[Product] VARCHAR(12)
);
INSERT INTO #DataSource ([InvoiceID], [LineID], [Product])
VALUES (1,1,'Apple')
,(1,2,'Banana')
,(2,1,'Apple')
,(2,2,'Mango')
,(3,1,'Apple')
,(3,2,'Banana')
,(3,3,'Mango');
SELECT *
FROM #DataSource
PIVOT
(
MAX([LineID]) FOR [Product] IN ([Apple], [Banana])
) PVT
WHERE [Apple] IS NOT NULL
AND [Banana] IS NOT NULL;
It will give you the results in this format, but you are able to UNVPIVOT them if you want:
Or you can use window function like this:
;WITH DataSource AS
(
SELECT *
,SUM(1) OVER (PARTITION BY [InvoiceID]) AS [Match]
FROM #DataSource
WHERE [Product] = 'Apple' OR [Product] = 'Banana'
)
SELECT *
FROM DataSource
WHERE [Match] =2
First, you want to COUNT the number of rows per InvoiceID that matched the criteria Product = 'Apple' or 'Banana'. Then do a SELF-JOIN and filter the rows such that the COUNT must be >= 2, or the number of Products in your critera.
SQL Fiddle
SELECT sd.*
FROM (
SELECT InvoiceID, CC = COUNT(*)
FROM SalesDetails
WHERE Product IN('Apple', 'Banana')
GROUP BY InvoiceID
)t
INNER JOIN SalesDetails sd
ON sd.InvoiceID = t.InvoiceID
WHERE
t.CC >= 2
AND sd.Product IN('Apple', 'Banana')
WITH cte
AS
(
SELECT *
FROM [dbo].[SalesDetails]
WHERE [Product]='banana')
,cte1
AS
(SELECT *
FROM [dbo].[SalesDetails]
WHERE [Product]='apple')
SELECT *
FROM cte c INNER JOIN cte1 c1
ON c.[InvoiceID]=c1.[InvoiceID]
Here is a method using window functions:
select sd.*
from (select sd.*,
max(case when product = 'Apple' then 1 else 0 end) over (partition by invoiceid) as HasApple,
max(case when product = 'Banana' then 1 else 0 end) over (partition by invoiceid) as HasBanana
from salesdetails sd
) sd
where (product = 'Apple' and HasBanana > 0) or
(product = 'Banana' and HasApple > 0);
If you only want to write the condition once and are sure that each Product will only be once in any Order, you can use this:
SELECT * FROM (
SELECT InvoiceID, Product
,COUNT(*) OVER (PARTITION BY InvoiceID) matchcount
FROM SalesDetails
WHERE Product IN ('Apple','Banana') ) WHERE matchcount = 2;
This is what I ended up using, inspired by #Leon Bambrick:
(Expanded a little to support multiple products in the criteria)
WITH cteUnionBase AS
(SELECT * FROM SalesDetails
WHERE Product IN ('Apple Red','Apple Yellow','Apple Green','Banana Small','Banana Large')),
cteBanana AS
(SELECT * FROM cteUnionBase
WHERE Product IN ('Banana Small','Banana Large')),
cteApple AS
(SELECT * FROM cteUnionBase
WHERE Product IN ('Apple Red','Apple Yellow','Apple Green')),
cteIntersect AS
(
SELECT InvoiceID FROM cteApple
Intersect
SELECT InvoiceID FROM cteBanana
)
SELECT cteUnionBase.*
FROM cteUnionBase INNER JOIN cteIntersect
on cteUnionBase.InvoiceID = cteIntersect.InvoiceID