How do you partition results weekly in SQL? - sql

I have to find the 3 highest spending customers from Customer (customer name, id) and order (order id, order amt, order date) for every week. If I run the query today, it should show the top 3 for all weeks for which order date exists.
I am thinking about doing a Partition by over the date (weekly), but I can't find any method to do that? Has anyone done a weekly partition of results?
I know it's not right, but this is what I have:
Select Top 3 customer_name, id OVER (partition by [week])
(
Select c.customer_name, c.id, o.order_amt,
from customer c
Join Order o
on c.id=o.id
group by c.id
)

According to your table structure, where orders.order_id is customer.id
use this statement
select
*
from
(
select
details.*
,dense_rank() over (partition by week_num order by order_amt desc) as rank_num
from
(
select
c.id as customer_id
,c.name
,sum(o.order_amt) as order_amt
,datepart(WEEK,o.order_date) as week_num
from customer c
join orders o on c.id=o.order_id
group by c.id,c.name,datepart(WEEK,o.order_date)
)details
)dets
where dets.rank_num<=3
Updated : changed statement to use just 2 tables

the query should be something like this
Select customer_name, id, order_amt, [week]
(
Select c.customer_name, c.id, o.order_amt, [week],
rn = row_number() over (partition by [week] order by o.order_amt desc)
from customer c
Join Order o
on c.id=o.id
) d
where rn <= 3

this is an idea,
;WITH CTE
AS (
SELECT c.customer_name
,c.id
,o.order_amt
,datepart(wk, datecol) AS Weekcol
)
,CTE1
AS (
SELECT c.customer_name
,c.id
,o.order_amt
,ROW_NUMBER() OVER (
PARTITION BY Weekcol ORDER BY order_amt DESC
) AS rowNUm
FROM CTE
)
SELECT *
FROM CTE1
WHERE rowNUm <= 3

Related

SQL select row with max value or distinct value and sum all

I have the following data that is returned to me. I need to get a distinct or max sum of all the commission by taxid for a single repnbr. The 'qtrlycommrep' column is the value I'm trying to get to, but not able to. For repnbr c590, I need to get the 854.66 commission amount, which is the max for each taxid.
What am I doing wrong?
Any help would be much appreciated!
Here's what I've tried so far. Using the Row_number
select distinct
sub.Repnbr
, (sub.QtrLYComm) as qtrlycommrep
from (
select distinct repnbr, QtrLYComm
, rn = row_number() over(partition by repnbr order by QtrLYComm desc)
from #qtrly
) sub
where sub.rn = 1
Cross Apply
select distinct
#qtrly.repnbr
, x.QtrLYComm as qtrlycommrep
from #qtrly
cross apply (
select top 1
*
from #qtrly as i
where i.repnbr = Repnbr
order by i.qtrlycomm desc
) as x;
inner join
select
#qtrly.repnbr, #qtrly.qtrlycomm as qtrlycommrep
from #qtrly
inner join (
select maxvalue = max(qtrlycomm), repnbr
from #qtrly
group by repnbr
) as m
on #qtrly.repnbr = m.repnbr
and #qtrly.qtrlycomm = m.maxvalue;
order by row_number
select top 1 with ties
#qtrly.repnbr, #qtrly.qtrlycomm as qtrlycommrep
from #qtrly
order by
row_number() over(partition by repnbr
order by qtrlycomm desc)
You want one value per tax id. You need to include that. For instance:
select q.Repnbr, sum(q.QtrLYComm) as qtrlycommrep
from (select q.*,
row_number() over(partition by repnbr, taxid order by QtrLYComm desc) as seqnum
from #qtrly q
) q
where seqnum = 1
group by q.Repnbr;
However, I would be inclined to use two levels of aggregation:
select q.Repnbr, sum(q.QtrLYComm) as qtrlycommrep
from (select distinct repnbr, taxid, QtrLYComm
from #qtrly q
) q
group by q.Repnbr;

how to use rank/join and where together

I have used multiple inner joins in my code, and I provided rank, but now I want to select a particular rank. So how to use rank in a where statement?
Here is my code, but now please help me to proceed further:
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
Now I want to select only rank 3 and 4. How to do that?
You could either do sub-query or CTE, i would suggest try with 2 methods and look at execution plan pick which performs better:
Sub Query
SELECT * FROM
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
) as SQ
Where sales_rank = 3 or sales_rank = 4
go
Common Table Expression
; with CTE as
(select [YEAR],
[IDManufacturer],
sum([TotalPrice]) TotalPrice,
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
)
SELECT * FROM CTE WHERE sales_rank = 3 or sales_rank = 4
If you want only rank 3 and 4 then try this:
select * from (
select [YEAR],
[IDManufacturer],
sum([TotalPrice]),
rank() over (order by sum(totalprice) desc) as sales_rank
from [dbo].[DIM_DATE]
join [dbo].[FACT_TRANSACTIONS]
on [dbo].[FACT_TRANSACTIONS].Date = [dbo].[DIM_DATE].DATE
join [dbo].[DIM_MODEL]
on [dbo].[DIM_MODEL].IDModel=[dbo].[FACT_TRANSACTIONS].IDModel
where [YEAR] in (2009,2010)
group by IDManufacturer,[year]
order by sum([TotalPrice]) desc
) t where sales_rank in (3,4)
If you only want the 3rd and 4th values -- and assuming no ties -- then use offset/fetch:
offset 2 rows fetch first 2 rows only
The offset 2 is because offset starts counting at 0 rather than 1.

Select most recent status for each ID and department code

I have the following table:
I want to get the most recent status for each dept_code that a CL_ID has. So the desired output would be this:
I have tried the following but this give me just the most recent status for each client and not each of their dept_codes.
SELECT *
FROM [CIMSHR6_MERGED].[dbo].[C3CLSTAT] C
INNER JOIN
(SELECT CLIENT_NUMBER, MAX(STATUS_DATE) AS SDATE
FROM [CIMSHR6_MERGED].[dbo].[C3CLSTAT]
GROUP BY CLIENT_NUMBER) X
ON X.CLIENT_NUMBER = C.CLIENT_NUMBER
AND X.SDATE = C.STATUS_DATE
ORDER BY C.CLIENT_NUMBER
Any help would be much appreciated. Thanks.
A convenient method that works in SQL Server is:
select top (1) cl.*
from [CIMSHR6_MERGED].[dbo].[C3CLSTAT] cl
order by row_number() over (partition by cl_id, dept_code order by status_date desc);
A method that is efficient with the right indexes in almost any database is:
select cl.*
from [CIMSHR6_MERGED].[dbo].[C3CLSTAT] cl
where cl.status_date = (select max(cl2.status_date)
from [CIMSHR6_MERGED].[dbo].[C3CLSTAT] cl2
where cl2.cl_id = cl.cl_id and cl2.dept_code = cl.dept_code
);
The right index is on (cl_id, dept_code, status_date).
I would also use ROW_NUMBER, but with a subquery:
SELECT CL_ID, Status_date, Status, Dept_code
FROM
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY CL_ID, Dept_code ORDER BY Status_date DESC) rn
FROM CIMSHR6_MERGED].[dbo].[C3CLSTAT]
) t
WHERE rn = 1;
1) Firstly group everything on Dept_Code,CL_ID and assign rank for each row with in the group in descending order.
2) Select all the rows with rnk=1 which would display your desired result.
SELECT Z.CL_ID,
Z.Status_Date,
Z.Status,
Z.Dept_Code
FROM
(
SELECT *,
RANK() OVER( PARTITION BY Dept_Code,CL_ID, ORDER BY Status_Date DESC ) AS rnk
FROM [CIMSHR6_MERGED].[dbo].[C3CLSTAT]
) Z
WHERE Z.rnk = 1;
This would work for almost all databases
select * from c3clstat c
where exists
(select 1 from c3clstat c1
where c1.cl_id=c.cl_id
and c1.dept_code=c.dept_code
group by cl_id,dept_code
having c.status_date=max(c1.status_date)
)

Selecting City from Customer ID in SQL

Customer have ordered from different cities. Thus we have multiple cities against same customer_id. I want to display that city against customer id which has occurred maximum number of times , in case where customer has ordered same number of orders from multiple cities that city should be selected from where he has placed last order. I have tried something like
SELECT customer_id,delivery_city,COUNT(DISTINCT delivery_city)
FROM analytics.f_order
GROUP BY customer_id,delivery_city
HAVING COUNT(DISTINCT delivery_city) > 1
WITH cte as (
SELECT customer_id,
delivery_city,
COUNT(delivery_city) as city_count,
MAX(order_date) as last_order
FROM analytics.f_order
GROUP BY customer_id, delivery_city
), ranking as (
SELECT *, row_number() over (partition by customer_id
order by city_count DESC, last_order DESC) as rn
FROM cte
)
SELECT *
FROM ranking
WHERE rn = 1
select customer_id,
delivery_city,
amount
from
(
select t.*,
rank() over (partition by customer_id order by amount asc) as rank
from(
SELECT customer_id,
delivery_city,
COUNT(DISTINCT delivery_city) as amount
FROM analytics.f_order
GROUP BY customer_id,delivery_city
) t
)
where rank = 1

Better solution for PARTITION BY?

I want to optimize this
WITH a as
(SELECT *
,ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn
,(select count(*) from RM_TbPackages where d.ApplicationID=ApplicationID) as PackageCount
FROM CM_VwSupplierApplications d)
select * from a
where rn=1
order by a.ApplicationID
As per the comment, there is nothing wrong with the partition. One possible inefficiency is the subquery (select count(*) from RM_TbPackages where d.ApplicationID=ApplicationID) - a set based approach to this by computing all counts per Application and then joining to the count should improve performance:
WITH a as
(
SELECT * ,
ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn,
x.PackageCount
FROM CM_VwSupplierApplications d
INNER JOIN
(select ApplicationID, count(*) as PackageCount
from RM_TbPackages
group by ApplicationID )x
on x.ApplicationID = d.ApplicationID
)
select * from a
where rn=1
order by a.ApplicationID;
This query will run faster since it is not making a subselect for ever row in CM_VwSupplierApplications:
;WITH a AS
(
SELECT * ,ROW_NUMBER() OVER (PARTITION BY applicationid ORDER BY AgreementStartDate desc) rn
FROM CM_VwSupplierApplications d
)
SELECT a.*, b.PackageCount
FROM a
OUTER APPLY
( SELECT count(*) PackageCount
FROM RM_TbPackages
WHERE d.ApplicationID=ApplicationID) b
WHERE a.rn=1
ORDER BY a.ApplicationID
To improve it even more, you could consider index on table CM_VwSupplierApplications on the columns applicationid and AgreementStartDate