Condition based on column created after partition SQL - sql

I used over partition to create a table with SalespersonID, AvgLineProfit, AvgLineProfitPerSalesPerson.
select distinct
invoices.SalespersonPersonID,
sum(invoicelines.Quantity) over(partition by invoices.SalespersonPersonID) as QuantityPerSalesPerson,
avg(invoicelines.LineProfit) over() as AvgLineProfit,
avg(invoicelines.LineProfit) over(partition by invoices.SalespersonPersonID) as AvgProfitPerSalesPerson
from Sales.InvoiceLines as invoicelines
join Sales.Invoices as invoices on invoicelines.InvoiceID = invoices.InvoiceID
order by invoices.SalespersonPersonID
Now I want to add another column with the SalesPersonID if AvgLineProfitPerSalesPerson > AvgLineProfit, or NULL if not.
How can I use the created columns in my condition?
I tried creating new select query or using 'case when'
case when AvgProfitPerSalesPerson > AvgLineProfit then SalespersonPersonID over() as new_column
but it doesn't work.
Here is how the output looks like
SalespersonPersonID
AvgLinesProfit
AvgProfitPerSalesPerson
1
20
22
2
20
23
3
20
19
4
20
19

You can't refer to aliases in the same select. so you have to repeate the whole thing:
select ...
, case when avg(invoicelines.LineProfit) over(partition by invoices.SalespersonPersonID) > avg(invoicelines.LineProfit) over() then SalespersonPersonID end as new_column
or wrap your original select in a subquery then it works:
select *, case when case when AvgProfitPerSalesPerson > AvgLineProfit...
from (
select distinct
invoices.SalespersonPersonID,
sum(invoicelines.Quantity) over(partition by invoices.SalespersonPersonID) as QuantityPerSalesPerson,
avg(invoicelines.LineProfit) over() as AvgLineProfit,
avg(invoicelines.LineProfit) over(partition by invoices.SalespersonPersonID) as AvgProfitPerSalesPerson
from Sales.InvoiceLines as invoicelines
join Sales.Invoices as invoices on invoicelines.InvoiceID = invoices.InvoiceID
) x
order by SalespersonPersonID

In this way it works
select
new_table.SalespersonPersonID,
new_table.QuantityPerSalesPerson,
new_table.AvgLineProfit,
new_table.AvgProfitPerSalesPerson,
case when new_table.AvgProfitPerSalesPerson > new_table.AvgLineProfit then new_table.SalespersonPersonID
end as succesfull_salesperson
from
(select distinct
invoices.SalespersonPersonID,
sum(invoicelines.Quantity) over(partition by invoices.SalespersonPersonID) as QuantityPerSalesPerson,
avg(invoicelines.LineProfit) over() as AvgLineProfit,
avg(invoicelines.LineProfit) over(partition by invoices.SalespersonPersonID) as AvgProfitPerSalesPerson
from Sales.InvoiceLines as invoicelines
join Sales.Invoices as invoices on invoicelines.InvoiceID = invoices.InvoiceID) as new_table
order by SalespersonPersonID

Related

SQL MAX in two columns by statistic

I have this query:
DECLARE #startTime DATETIME = DATEADD(MINUTE, -100, GETDATE()) --StartTime
SELECT
COUNT(*) Frecuency, mes.receivedqty AS Qty, ac.item AS Item
FROM
mesReservationReceipts mes (nolock)
INNER JOIN
ACCS_Reservation ac (nolock) ON ac.IDReservation = mes.idReservation
WHERE
ac.item IN (SELECT ac2.item
FROM mesReservationReceipts m2
INNER JOIN ACCS_Reservation ac2 ON ac2.IDReservation = m2.idReservation
WHERE m2.receivedate > #startTime)
GROUP BY
mes.receivedqty, ac.item
I get this result, but only I want the yellow highlighted rows - how can I get those? Please help!:
Note: I tried with MAX(Frequency) but that does not work because it should be grouped by the qty, and its the same case. I put a MAX(Qty), but for example, if the Qty is more than Statistic, add in the table result (and I only want the real statistic qty).
You can write something like this
SELECT * FROM(SELECT Frequency,Receivedqty,Item,
ROW_NUMBER() OVER(Partition by Item ORDER BY Quantity desc) as RowId
FROM (
----your query-----
))as q
Where q.RowId = 1
You can use row_number() to get the highest amount in each column. Then filter:
select item, Frecuency, qty
from (select ac.item as Item, count(*) as Frecuency, mes.receivedqty as Qty,
row_number() over (order by count(*) desc) as seqnum_f,
row_number() over (order by mes.receivedqty desc) as seqnum_r
from mesReservationReceipts mes join
ACCS_Reservation ac
on ac.IDReservation = mes.idReservation
where ac.item in (select ac2.item
from mesReservationReceipts m2 inner join
ACCS_Reservation ac2
on ac2.IDReservation = m2.idReservation
where m2.receivedate > #startTime
)
group by mes.receivedqty, ac.item
) ma
where 1 in (seqnum_f, seqnum_r);
Use rank() if you want duplicates, in the event that the highest values have duplicates.

how to use rank/join and where together

I have used multiple inner joins in my code, and I provided rank, but now I want to select a particular rank. So how to use rank in a where statement?
Here is my code, but now please help me to proceed further:
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
Now I want to select only rank 3 and 4. How to do that?
You could either do sub-query or CTE, i would suggest try with 2 methods and look at execution plan pick which performs better:
Sub Query
SELECT * FROM
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
) as SQ
Where sales_rank = 3 or sales_rank = 4
go
Common Table Expression
; with CTE as
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
)
SELECT * FROM CTE WHERE sales_rank = 3 or sales_rank = 4
If you want only rank 3 and 4 then try this:
select * from (
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
) t where sales_rank in (3,4)
If you only want the 3rd and 4th values -- and assuming no ties -- then use offset/fetch:
offset 2 rows fetch first 2 rows only
The offset 2 is because offset starts counting at 0 rather than 1.

SQL show the first and last value in count

I need to write a query that show the max and the min count of order the customer order.
I tried:
(SELECT TOP 1 CustomerID, COUNT(*) AS Number_Of_Orders
FROM Orders
GROUP BY CustomerID
ORDER BY COUNT(*) ASC)
UNION ALL
(SELECT TOP 1 CustomerID, COUNT(*) AS Number_Of_Orders
FROM Orders
GROUP BY CustomerID
ORDER BY COUNT(*) DESC)
But I don't succeed to union between the output, I got the error Incorrect syntax near the keyword 'ORDER'.
How can I get that?
I'm not sure I would want to run the aggregation twice, so use window functions:
SELECT CustomerID, Number_Of_Orders
FROM (SELECT CustomerID, COUNT(*) AS Number_Of_Orders,
ROW_NUMBER() OVER (ORDER BY COUNT(*) ASC) as seqnum_asc,
ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC) as seqnum_desc
FROM Orders
GROUP BY CustomerID
) c
WHERE seqnum_asc = 1 OR seqnum_desc = 1;
You can't use ORDER BY inside the queries that you want to unify with UNION, but you can do this:
SELECT * FROM
(SELECT TOP 1 CustomerID, COUNT(*) AS Number_Of_Orders
FROM Orders
GROUP BY CustomerID
ORDER BY COUNT(*) ASC) t
UNION ALL
SELECT * FROM
(SELECT TOP 1 CustomerID, COUNT(*) AS Number_Of_Orders
FROM Orders
GROUP BY CustomerID
ORDER BY COUNT(*) DESC) t
This does the trick although it is inefficient because you execute twice the same code and sort twice.
You can use window functions for this. This will give you multiple customers for min/max if there are ties (fiddle):
SELECT CustomerID
, OrderCount
, CASE WHEN OrderCount = MinOrderCount THEN 'Customer with min orders'
WHEN OrderCount = MaxOrderCount THEN 'Customer with max orders' END AS Type
FROM (
SELECT CustomerID
, COUNT(*) AS OrderCount
, MIN(COUNT(*)) OVER () AS MinOrderCount
, MAX(COUNT(*)) OVER () AS MaxOrderCount
FROM Orders
GROUP BY CustomerID
) AS x
WHERE OrderCount = MinOrderCount OR OrderCount = MaxOrderCount

SQL Server Group By Complex Query

In SQL Server, suppose we have a SALES_HISTORY table as below.
CustomerNo PurchaseDate ProductId
1 20120411 12
1 20120330 13
2 20120312 14
3 20120222 16
3 20120109 16
... and many records for each purchase of each customer...
How can I write the appropriate query for finding:
For each customer,
find the product he bought at MOST,
find the percentage of this product over all products he bought.
The result table must have columns like:
CustomerNo,
MostPurchasedProductId,
MostPurchasedProductPercentage
Assuming SQL Server 2005+, you can do the following:
;WITH CTE AS
(
SELECT *,
COUNT(*) OVER(PARTITION BY CustomerNo, ProductId) TotalProduct,
COUNT(*) OVER(PARTITION BY CustomerNo) Total
FROM YourTable
), CTE2 AS
(
SELECT *,
RN = ROW_NUMBER() OVER(PARTITION BY CustomerNo
ORDER BY TotalProduct DESC)
FROM CTE
)
SELECT CustomerNo,
ProductId MostPurchasedProductId,
CAST(TotalProduct AS NUMERIC(16,2))/Total*100 MostPurchasedProductPercent
FROM CTE2
WHERE RN = 1
You still need to deal when you have more than one product as the most purchased one. Here is a sqlfiddle with a demo for you to try.
Could do a lot prettier, but it works:
with cte as(
select CustomerNo, ProductId, count(1) as c
from SALES_HISTORY
group by CustomerNo, ProductId)
select CustomerNo, ProductId as MostPurchasedProductId, (t.c * 1.0)/(select sum(c) from cte t2 where t.CustomerNo = t2.CustomerNo) as MostPurchasedProductPercentage
from cte t
where c = (select max(c) from cte t2 where t.CustomerNo = t2.CustomerNo)
SQL Fiddle

Grab top 5 rows and combine the rest SQL Server

I have a column for group name and a column for amount spent.
I need to sum the amounts group them based on the group name and then grab the highest five. After that, I need to combine the the rest into it's own group w/ a total of their amount spent. This is what i have right now
SELECT groupName, SUM(amount) AS theAmountSpent
FROM purchases
GROUP BY groupName
ORDER BY theAmountSpent DESC
This groups and orders them, but i dont know how to then grab the remaining groups to combine them. Any help would be appreciated.
Alternate CTE-approach using row_number() (SQL Server 2005+):
WITH cte AS (
SELECT ROW_NUMBER() OVER (ORDER BY (SUM(amount)) DESC) AS num,
groupName, SUM(amount) AS theAmountSpent
FROM purchases
GROUP BY groupName
)
SELECT groupName, theAmountSpent FROM cte WHERE num BETWEEN 1 AND 5 --top 5
UNION ALL
SELECT 'Sum rest', SUM(theAmountSpent) FROM cte WHERE num > 5 -- sum of rest
If I'm understanding you correctly, this should do it:
SELECT top 5 groupName, SUM(amount) AS theAmountSpent
into #tempSpent FROM purchases
GROUP BY groupName
ORDER BY theAmountSpent DESC
Select * from #tempSpent -- get the top 5
--get sum for the rest
SELECT SUM(amount) AS theAmountSpent
FROM purchases
where groupName not in (select groupName from #tempSpent)
Drop table #tempSpent
Another idea from Larsts code:
WITH cte
AS
(
SELECT case
when ROW_NUMBER() OVER (ORDER BY (SUM(amount)) DESC) <=5
then ROW_NUMBER() OVER (ORDER BY (SUM(amount)) DESC)
else 6 end AS num
, groupName
, SUM(amount) AS theAmountSpent
FROM purchases
GROUP BY groupName
)
SELECT num
, max(groupName)
, sum(theAmountSpent )
FROM cte
group by num