SQL Windowing Ranks Functions - sql

SELECT
*
FROM (
SELECT
Product,
SalesAmount,
ROW_NUMBER() OVER (ORDER BY SalesAmount DESC) as RowNum,
RANK() OVER (ORDER BY SalesAmount DESC) as RankOf2007,
DENSE_RANK() OVER (ORDER BY SalesAmount DESC) as DRankOf2007
FROM (
SELECT
c.EnglishProductName as Product,
SUM(a.SalesAmount) as SalesAmount,
b.CalendarYear as CalenderYear
FROM FactInternetSales a
INNER JOIN DimDate b
ON a.OrderDateKey=b.DateKey
INNER JOIN DimProduct c
ON a.ProductKey=c.ProductKey
WHERE b.CalendarYear IN (2007)
GROUP BY c.EnglishProductName,b.CalendarYear
) Sales
) Rankings
WHERE [RankOf2007] <= 5
ORDER BY [SalesAmount] DESC
I am currently sorting products based on summation of Sales Amount in descending fashion and getting rank based on the summation of sales amount of every product in 2007 and ranking product 1 if it has the highest Sales Amount in that year and so forth.
Currently my database table looks like the one mentioned in the image (apart from RankOf2008 and DRankOf2008 columns), I would like to have rankings in year 2008 for same top 5 products of 2007 (Null value if any of those top 5 products of 2007 are unsold in 2008) in the same table with side by side columns as shown in the image above.

May be you require something like this.
First getting ranks for all products then partition by year, that is rank of products year wise and fetching required data with help of CTE.
WITH cte
AS (
SELECT *
FROM (
SELECT Product
,SalesAmount
,CalenderYear
,ROW_NUMBER() OVER (
PARTITION BY CalenderYear ORDER BY SalesAmount DESC
) AS RowNum
,RANK() OVER (
PARTITION BY CalenderYear ORDER BY SalesAmount DESC
) AS RankOf2007
,DENSE_RANK() OVER (
PARTITION BY CalenderYear ORDER BY SalesAmount DESC
) AS DRankOf2007
FROM (
SELECT c.EnglishProductName AS Product
,SUM(a.SalesAmount) AS SalesAmount
,b.CalendarYear AS CalenderYear
FROM FactInternetSales a
INNER JOIN DimDate b ON a.OrderDateKey = b.DateKey
INNER JOIN DimProduct c ON a.ProductKey = c.ProductKey
--WHERE b.CalendarYear IN (2007)
GROUP BY c.EnglishProductName
,b.CalendarYear
) Sales
) Rankings
--WHERE [RankOf2007] <= 5
--ORDER BY [SalesAmount] DESC
)
SELECT a.*
,b.DRankOf2007 AS [DRankOf2008]
,b.RankOf2007 AS [RankOf2008]
FROM cte a
LEFT JOIN cte b ON a.Product = b.Product
AND b.CalenderYear = 2008
WHERE a.CalenderYear = 2007
AND a.[RankOf2007] <= 5

Use conditional aggregation in your innermost query (i.e. select both years and sum conditionally for one of the years):
select
p.productkey,
p.englishproductname as product,
ranked.salesamount2007,
ranked.salesamount2008,
ranked.rankof2007,
ranked.rankof2008
from
(
select
productkey,
salesamount2007,
salesamount2008,
rank() over (order by salesamount2007 desc) as rankof2007,
rank() over (order by salesamount2008 desc) as rankof2008
from
(
select
s.productkey,
sum(case when d.calendaryear = 2007 then s.salesamount end) as salesamount2007,
sum(case when d.calendaryear = 2008 then s.salesamount end) as salesamount2008
from factinternetsales s
inner join dimdate d on d.datekey = s.orderdatekey
where d.calendaryear in (2007, 2008)
group by s.productkey
) aggregated
) ranked
join dimproduct p on p.productkey = ranked.productkey
where ranked.rankof2007 <= 5
order by ranked.rankof2007 desc;
For the case there are no rows for a product in 2008, salesamount2008 will be null. In standard SQL we would consider this in the ORDER BY clause:
rank() over (order by salesamount2008 desc nulls last) as rankof2008
But SQL Server doesn't comply with the SQL standard here and doesn't feature NULLS FIRST/LAST in the ORDER BY clause. Fortunately, it sorts nulls last when sorting in descending order, so it implicitly does just what we want here.
By the way: we could do the aggregation and ranking in a single step, but in that case we'd have to repeat the SUM expressions. It's a matter of personal preference, whether to do this in one step (shorter query) or two steps (no repetitive expressions).

Related

how to use rank/join and where together

I have used multiple inner joins in my code, and I provided rank, but now I want to select a particular rank. So how to use rank in a where statement?
Here is my code, but now please help me to proceed further:
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
Now I want to select only rank 3 and 4. How to do that?
You could either do sub-query or CTE, i would suggest try with 2 methods and look at execution plan pick which performs better:
Sub Query
SELECT * FROM
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
) as SQ
Where sales_rank = 3 or sales_rank = 4
go
Common Table Expression
; with CTE as
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
)
SELECT * FROM CTE WHERE sales_rank = 3 or sales_rank = 4
If you want only rank 3 and 4 then try this:
select * from (
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
) t where sales_rank in (3,4)
If you only want the 3rd and 4th values -- and assuming no ties -- then use offset/fetch:
offset 2 rows fetch first 2 rows only
The offset 2 is because offset starts counting at 0 rather than 1.

How to choose max of one column per other column

I am using SQL Server and I have a table "a"
month segment_id price
-----------------------------
1 1 100
1 2 200
2 3 50
2 4 80
3 5 10
I want to make a query which presents the original columns where the price will be the max per month
The result should be:
month segment_id price
----------------------------
1 2 200
2 4 80
3 5 10
I tried to write SQL code:
Select
month, segment_id, max(price) as MaxPrice
from
a
but I got an error:
Column segment_id is invalid in the select list because it is not contained in either an aggregate function or the GROUP BY clause
I tried to fix it in many ways but didn't find how to fix it
Because you need a group by clause without segment_id
Select month, max(price) as MaxPrice
from a
Group By month
as you want results per each month, and segment_id is non-aggregated in your original select statement.
If you want to have segment_id with maximum price repeating per each month for each row, you need to use max() function as window analytic function without Group by clause
Select month, segment_id,
max(price) over ( partition by month order by segment_id ) as MaxPrice
from a
Edit (due to your lastly edited desired results) : you need one more window analytic function row_number() as #Gordon already mentioned:
Select month, segment_id, price From
(
Select a.*,
row_number() over ( partition by month order by price desc ) as Rn
from a
) q
Where rn = 1
I would recommend a correlated subquery:
select t.*
from t
where t.price = (select max(t2.price) from t t2 where t2.month = t.month);
The "canonical" solution is to use row_number():
select t.*
from (select t.*,
row_number() over (partition by month order by price desc) as seqnum
from t
) t
where seqnum = 1;
With the right indexes, the correlated subquery often performs better.
Only because it was not mentioned.
Yet another option is the WITH TIES clause.
To be clear, the approach by Gordon and Barbaros would be a nudge more performant, but this technique does not require or generate an extra column.
Select Top 1 with ties *
From YourTable
Order By row_number() over (partition by month order by price desc)
With not exists:
select t.*
from tablename t
where not exists (
select 1 from tablename
where month = t.month and price > t.price
)
or:
select t.*
from tablename inner join (
select month, max(price) as price
from tablename
group By month
) g on g.month = t.month and g.price = t.price

How do you partition results weekly in SQL?

I have to find the 3 highest spending customers from Customer (customer name, id) and order (order id, order amt, order date) for every week. If I run the query today, it should show the top 3 for all weeks for which order date exists.
I am thinking about doing a Partition by over the date (weekly), but I can't find any method to do that? Has anyone done a weekly partition of results?
I know it's not right, but this is what I have:
Select Top 3 customer_name, id OVER (partition by [week])
(
Select c.customer_name, c.id, o.order_amt,
from customer c
Join Order o
on c.id=o.id
group by c.id
)
According to your table structure, where orders.order_id is customer.id
use this statement
select
*
from
(
select
details.*
,dense_rank() over (partition by week_num order by order_amt desc) as rank_num
from
(
select
c.id as customer_id
,c.name
,sum(o.order_amt) as order_amt
,datepart(WEEK,o.order_date) as week_num
from customer c
join orders o on c.id=o.order_id
group by c.id,c.name,datepart(WEEK,o.order_date)
)details
)dets
where dets.rank_num<=3
Updated : changed statement to use just 2 tables
the query should be something like this
Select customer_name, id, order_amt, [week]
(
Select c.customer_name, c.id, o.order_amt, [week],
rn = row_number() over (partition by [week] order by o.order_amt desc)
from customer c
Join Order o
on c.id=o.id
) d
where rn <= 3
this is an idea,
;WITH CTE
AS (
SELECT c.customer_name
,c.id
,o.order_amt
,datepart(wk, datecol) AS Weekcol
)
,CTE1
AS (
SELECT c.customer_name
,c.id
,o.order_amt
,ROW_NUMBER() OVER (
PARTITION BY Weekcol ORDER BY order_amt DESC
) AS rowNUm
FROM CTE
)
SELECT *
FROM CTE1
WHERE rowNUm <= 3

SQL RANK with multiple WHERE clause

I have got few sales offices, together with their sales. I am trying to set-up report that will basically tell how is each office performing. Getting some SUMs, COUNTs are quite easy, however I am struggling with getting rank of single office.
I would like to have this query return the rank of single office, during the entire period and/or specified time (eg. BETWEEN '2015-01-01' AND '2015-01-15')
I need to also exclude some offices from the rank list (eg. OfficeName NOT IN ('GGG','QQQ')), so using the sample data, the rank of office 'XYZ' would be 5.
In case that the OfficeName = 'XYZ' is included in WHERE clause, the RANK would be obviously = 1 as SQL filters out other rows, not contained in WHERE clause before executing the rest of the code.
Is there any way of doing the same, without using the TemporaryTable ?
SELECT OfficeName, SUM(Value) as SUM,
RANK() OVER (ORDER BY SUM(VALUE) DESC) AS Rank
FROM Transactions t
JOIN Office o ON t.TransID=o.ID
WHERE OfficeName NOT IN ('GGG','QQQ')
--AND OfficeName = 'XYZ'
GROUP BY OfficeName
ORDER BY 2 DESC;
I am using MS SQL server 2008.
SQL Fiddle with some random data is here: http://sqlfiddle.com/#!3/fac7a/35
Many thanks for help!
if i understand you correctly you want to do:
SELECT *
FROM (
SELECT OfficeName, SUM(Value) as SUM,
RANK() OVER (ORDER BY SUM(VALUE) DESC) AS Rank
FROM Transactions t
JOIN Office o ON t.TransID=o.ID
WHERE OfficeName NOT IN ('GGG','QQQ')
GROUP BY OfficeName
) dat
WHERE OfficeName = 'XYZ';
You just need to wrap your code as derived table or use a CTE like this and then do the filter for OfficeName = 'XYZ'.
;WITH CTE AS
(
SELECT OfficeName, SUM(Value) as SUM,
RANK() OVER (ORDER BY SUM(VALUE) DESC) AS Rank
FROM Transactions t
JOIN Office o ON t.TransID=o.ID
WHERE OfficeName NOT IN ('GGG','QQQ')
GROUP BY OfficeName
)
SELECT *
FROM CTE
WHERE OfficeName = 'XYZ';
Here is an amusing way to do this without a subquery:
SELECT TOP 1 OfficeName, SUM(Value) as SUM,
RANK() OVER (ORDER BY SUM(VALUE) DESC) AS Rank
FROM Transactions t JOIN
Office o
ON t.TransID = o.ID
WHERE OfficeName NOT IN ('GGG','QQQ')
GROUP BY OfficeName
ORDER BY (CASE WHEN OfficeName = 'XYZ' THEN 1 ELSE 2 END);

Summing the most recent rows, grouped by the id

SELECT distinct on (prices.item_id) *
FROM prices
ORDER BY prices.item_id, prices.updated_at DESC
The above query retrieves the most recent prices, how would I get the total sum of all the current prices?
Is it possible without using a subselect?
This is trivial using a subquery:
select sum(p.price)
from (select distinct on (p.item_id) p.*
from prices p
order by p.item_id, p.updated_at desc
) p
If you don't mind repeated rows, I think the following might work:
select distinct on (p.item_id) sum(prices.price) over ()
from prices p
order by p.item_id, p.updated_at desc
You might be able to add a limit clause to this to get what you want. By the way, I would write this as:
select sum(p.price)
from (select p.*,
row_number() over (partition by p.item_id order by updated_at desc) as seqnum
from prices p
order by p.item_id, p.updated_at desc
) p
where seqnum = 1
ROW_NUMBER() is standard SQL. The DISTINCT ON clause is specific to Postgres.