Conditional Max in SQL - sql

I have to following query in SQL Server:
SELECT EmployeeID,
TotalQuantity AS TotalQty,
TotalSales,
MAX(CASE WHEN MonthNumber = MAX(MonthNumber)
THEN TotalSales END) as RecentMonthSale
FROM vwSales
GROUP BY EmployeeID, TotalQuantity , TotalSales
Bu it gives me the error:
Cannot perform an aggregate function on an expression
containing an aggregate or a subquery.
Input View is as follows:
EmployeeID TotaSales MonthNumber
1 4000 1
1 6000 2
2 8500 1
2 6081 2
Desired output:
EmployeeID TotalSale RecentMonthSale
1 10000 6000
2 14581 6081
3 11458 1012
I want following column in my output EmployeeID, TotalQuantity TotalSale RecentMonthSale My View has the following column EmployeeID TotalSale,TotalQuantity, MonthNumber.

This query will show the output that you need, and will scan the table only one time.
select EmployeeID, sum(TotalSales), sum(case when MaxMonth = 1 then TotalSales else 0 end) RecentMonthSales
from
(
select *, rank() over(order by MonthNumber desc) MaxMonth
from
(
select EmployeeID, MonthNumber, sum(TotalSales) TotalSales
from vwSales
group by EmployeeID, MonthNumber
) tt
) tt
group by EmployeeID

SELECT
vw.EmployeeID,
SUM(vw.TotalSale) as Total,
Recent.RecentMonthSale
FROM
vwSales vw
LEFT JOIN
(
SELECT
_vw.EmployeeID,
_vw.TotalSale as RecentMonthSale
FROM
vwSales _vw
INNER JOIN
(
SELECT EmployeeID, MAX(MonthNumber) as MaxMonth
FROM vwSales
GROUP BY EmployeeID
) _a
on _vw.EmployeeID = _a.EmployeeID
and _vw.MonthNumber = _a.MaxMonth
) Recent
on Recent.EmployeeID = vw.EmployeeID
GROUP BY
vw.EmployeeID,
Recent.RecentMonthSale
If you just execute each of the subqueries and view their results you should get a good idea for how this works

Related

Find customers with at least 5 transactions in At most 3 consecutive days

I have a table in SQL Server that contains customers' transactions From 2022-02-10 to 2022-03-10.
I want to find customers that have at least 5 transactions on At most three consecutive days
For example, output of below table should be CustomerId = 2 and customerid=3
Id
CustomerId
Transactiondate
1
1
2022-03-01
2
1
2022_03_01
3
1
2022_03_05
4
1
2022_03_07
5
1
2022_03_07
6
2
2022_03_05
7
2
2022_03_05
8
2
2022_03_06
9
2
2022_03_06
10
2
2022_03_07
1
3
2022-03-01
2
3
2022_03_01
3
3
2022_03_01
4
3
2022_03_03
5
3
2022_03_03
I tried this query but it doesn't have good performance for a large table:
select distinct p1.customerid
from trntbl p1
join trntbl p2 on p2.id <> p1.id
and p2.customerid = p1.customerid
and p2.TransactionDate >= p1.TransactionDate
and p2.TransactionDate < date_add(day, 3, p1.prchasedate)
group by p1.customerid, p1.id
having count(*) >= 4
If customers must have done transactions in three consecutive days (meaning that 5 transactions in a day then nothing in the next two days wouldn't count), then this can be done with two self joins:
with cte as
(select CustomerId, Transactiondate, count(*) ct
from table_name
group by CustomerId, Transactiondate)
select distinct t1.CustomerId
from cte t1 inner join cte t2
on t1.Transactiondate = dateadd(day, 1, t2.Transactiondate)
and t1.CustomerId = t2.CustomerId
inner join cte t3
on t2.Transactiondate = dateadd(day, 1, t3.Transactiondate)
and t3.CustomerId = t2.CustomerId
;
Fiddle
Although this is a gaps-and-islands problem, there are shortcuts you can take.
You can group it up by date, then get the row 2 previous, and filter by only rows where the 2 previous row is exactly two days apart.
SELECT DISTINCT
CustomerId
FROM (
SELECT
t.CustomerId,
v.Date,
Prev2 = LAG(v.Date, 2) OVER (PARTITION BY t.CustomerId ORDER BY v.Date)
FROM YourTable t
CROSS APPLY (VALUES( CAST(Transactiondate AS date) )) v(Date)
GROUP BY
t.CustomerId,
v.Date
) t
WHERE DATEDIFF(day, t.Prev2, t.Date) = 2
db<>fiddle
If the base table only has a maximum of one row per date then you can forgo the GROUP BY.
This is actually a gaps and islands problem, you can solve by using analytic window functions to subtract sequential row_number from consecutive days and then grouping, after first "plugging" any gaps with the help of a numbers table.
with numbers as (select top(20) Row_Number() over(order by (select null))-1 n from master.dbo.spt_values),
dRanges as (
select customerId,
Min(Transactiondate) CustStartDate,
Max(Transactiondate) CustEndDate
from t
group by CustomerId
), dates as (
select *
from dranges r
outer apply (
select DateAdd(day,n,r.CustStartDate) SeqDate
from numbers n
where DateAdd(day,n,r.CustStartDate) < = r.CustEndDate
)d
), q as (
select customerId, transactiondate, Count(*) qty
from t
group by CustomerId, Transactiondate
), g as (
select d.CustomerId, d.SeqDate, IsNull(q.qty,0)Qty,
DateAdd(day, - row_number() over (partition by d.customerid order by d.SeqDate), d.SeqDate) as dGrp
from dates d
left join q on q.Transactiondate = d.SeqDate and q.CustomerId = d.CustomerId
)
select customerId
from g
group by CustomerId, dGrp
having Count(*) <= 3 and Sum(qty) >= 5
DB<>Fiddle
You could make use of datediff function and verify if the sum of the date differences are between 3 and 5 (provided the max of the differences is just 1) since the dates might be unique (for example customerid 2 can have transaction dates as 5,6,7,8,9 of March 2022) and this should be taken into account too.
declare #tbl table(id int identity,customerid int,transactiondate date)
insert into #tbl(customerid,transactiondate)
values(1,'2022-03-01')
,(1,'2022-03-01')
,(1,'2022-03-05')
,(1,'2022-03-07')
,(1,'2022-03-07')
,(2,'2022-03-05')
,(2,'2022-03-05')
,(2,'2022-03-06')
,(2,'2022-03-06')
,(2,'2022-03-07')
select customerid from (
select *
,SUM(datediff)over(partition by customerid order by transactiondate)[sum]
,max(datediff)over(partition by customerid order by transactiondate)[max]
from(
select customerid , transactiondate,
DATEDIFF(DAY
,
case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
LAG(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)
else
transactiondate end
, case when LEAD(transactiondate,1)over(partition by customerid order by transactiondate)
is null then
transactiondate
else
LEAD(transactiondate,1,transactiondate)
over(partition by customerid order by transactiondate)end) as [datediff]
,ROW_NUMBER()over(partition by customerid order by transactiondate)rownum
from #tbl
)t
)t1
where t1.rownum = 5
and t1.max = 1
and t1.sum between 3 and 5

Avoid Unions to get TOP count

Here are two tables:
LocationId Address City State Zip
1 2100, 1st St Austin TX 76819
2 2200, 2nd St Austin TX 76829
3 2300, 3rd St Austin TX 76839
4 2400, 4th St Austin TX 76849
5 2500, 5th St Austin TX 76859
6 2600, 6th St Austin TX 76869
TripId PassengerId FromLocationId ToLocationId
1 746896 1 2
2 746896 2 1
3 234456 1 3
4 234456 3 1
5 234456 1 4
6 234456 4 1
7 234456 1 6
8 234456 6 1
9 746896 1 2
10 746896 2 1
11 746896 1 2
12 746896 2 1
I want TOP 5 locations which each passenger has traveled to (does not matter if its from or to location). I can get it using a UNION, but was wondering if there was a better way to do this.
My Solution:
select top 5 *
from
(select count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
from
Trip t
join LOCATION l on t.FromLocationId = l.LocationId
where t.PassengerId = 746896
group by count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
UNION
select count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
from
Trip t
join LOCATION l on t.ToLocationId = l.LocationId
where t.PassengerId = 746896
group by count(l.LocationId) as cnt, l.LocationId, l.Address1, l.Address2, l.City, St.State , l.Zip
) as tbl
order by cnt desc
This will give you top 5 location.
SELECT TOP 5 tmp.fromlocationid AS locationid,
Count(tmp.fromlocationid) AS Times
FROM (SELECT fromlocationid
FROM trip
UNION ALL
SELECT tolocationid
FROM trip) tmp
GROUP BY tmp.fromlocationid
Method 1: This will give you top 5 location of each passenger.
WITH cte AS
( SELECT passengerid,
locationid,
Count(locationid) AS Times,
Row_number() OVER(partition BY passengerid ORDER BY passengerid ASC) AS RowNum
FROM (SELECT tripid, passengerid, fromlocationid AS locationid
FROM trip
UNION ALL
SELECT tripid, passengerid, tolocationid AS locationid
FROM trip) tmp
GROUP BY passengerid, locationid )
SELECT *
FROM cte
WHERE rownum <= 5
ORDER BY passengerid, Times DESC
Method 2: Same result without Union Operator (Top 5 location of each passenger)
WITH cte AS
( SELECT passengerid,
locationid,
Count(locationid) AS Times,
Row_number() OVER(partition BY passengerid ORDER BY passengerid ASC) AS RowNum
FROM trip
UNPIVOT ( locationid
FOR subject IN (fromlocationid, tolocationid) ) u
GROUP BY passengerid, locationid )
SELECT *
FROM cte
WHERE rownum <= 5
ORDER BY passengerid, times DESC
If you also want to get the location details, you can simply join the location table.
SELECT cte.* , location.*
FROM cte
INNER JOIN location ON location.locationid = cte.locationid
WHERE rownum <= 5
ORDER BY passengerid, times DESC
Reference
- https://stackoverflow.com/a/19056083/6327676
YOou'll need to replace the SELECT *'s with the columns you need, however, something like this should work:
WITH Visits AS (
SELECT *,
COUNT(*) OVER (PARTITION BY t.PassengerID, L.LocationID) AS Visits
FROM Trip T
JOIN [Location] L ON T.FromLocationId = L.LocationId),
Rankings AS (
SELECT *,
DENSE_RANK() OVER (PARTITION BY V.PassengerID ORDER BY Visits DESC) AS Ranking
FROM Visits V)
SELECT *
FROM Rankings
WHERE Ranking <= 5;
Further simplified solution
select top 3 * from
(
Select distinct count(locationId) as cnt, locationId from trip
unpivot
(
locationId
for direction in (fromLocationId, toLocationId)
)u
where passengerId IN (746896, 234456)
group by direction, locationId
)as tbl2
order by cnt desc;
Solution combining columns
The main issue for me is avoiding union to combine the two columns.
The UNPIVOT command can do this.
select top 3 * from (
select count(locationId) cnt, locationId
from
(
Select valu as locationId, passengerId from trip
unpivot
(
valu
for loc in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId
) as tbl
order by cnt desc;
http://sqlfiddle.com/#!18/cec8b/136
If you want to get the counts by direction:
select top 3 * from (
select count(locationId) cnt, locationId, direction
from
(
Select valu as locationId, direction, passengerId from trip
unpivot
(
valu
for direction in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId, direction
) as tbl
order by cnt desc;
http://sqlfiddle.com/#!18/cec8b/139
Same Results as you ( minus some minor descriptions )
select top 3 * from
(
select distinct * from (
select count(locationId) cnt, locationId
from
(
Select valu as locationId, direction, passengerId from trip
unpivot
(
valu
for direction in (fromLocationId, toLocationId)
)u
)united
where passengerId IN (746896, 234456)
group by locationId, direction
) as tbl
)as tbl2
order by cnt desc;
You can do this without union all:
select top (5) t.passengerid, v.locationid, count(*)
from trip t cross apply
(values (fromlocationid), (tolocationid)) v(locationid) join
location l
on v.locationid = l.locationid
where t.PassengerId = 746896
group by t.passengerid, v.locationid
order by count(*) desc;
If you want an answer for all passengers, it would be a similar idea, using row_number(), but your query suggests you want the answer only for one customer at a time.
You can include additional fields from location as well.
Here is a SQL Fiddle.

T-SQL: Select partitions which have more than 1 row

I've managed to use this query
SELECT
PartGrp,VendorPn, customer, sum(sales) as totalSales,
ROW_NUMBER() OVER (PARTITION BY partgrp, vendorpn ORDER BY SUM(sales) DESC) AS seqnum
FROM
BG_Invoice
GROUP BY
PartGrp, VendorPn, customer
ORDER BY
PartGrp, VendorPn, totalSales DESC
To get a result set like this. A list of sales records grouped by a group, a product ID (VendorPn), a customer, the customer's sales, and a sequence number which is partitioned by the group and the productID.
PartGrp VendorPn Customer totalSales seqnum
------------------------------------------------------------
AGS-AS 002A0002-252 10021013 19307.00 1
AGS-AS 002A0006-86 10021013 33092.00 1
AGS-AS 010-63078-8 10020987 10866.00 1
AGS-SQ B71040-39 10020997 7174.00 1
AGS-SQ B71040-39 10020998 2.00 2
AIRFRAME 0130-25 10017232 1971.00 1
AIRFRAME 0130-25 10000122 1243.00 2
AIRFRAME 0130-25 10008637 753.00 3
HARDWARE MS28775-261 10005623 214.00 1
M250 23066682 10013266 175.00 1
How can I filter the result set to only return rows which have more than 1 seqnum? I would like the result set to look like this
PartGrp VendorPn Customer totalSales seqnum
------------------------------------------------------------
AGS-SQ B71040-39 10020997 7174.00 1
AGS-SQ B71040-39 10020998 2.00 2
AIRFRAME 0130-25 10017232 1971.00 1
AIRFRAME 0130-25 10000122 1243.00 2
AIRFRAME 0130-25 10008637 753.00 3
Out of the first result set example, only rows with VendorPn "B71040-39" and "0130-25" had multiple customers purchase the product. All products which had only 1 customer were removed. Note that my desired result set isn't simply seqnum > 1, because i still need the first seqnum per partition.
I would change your query to be like this:
SELECT PartGrp,
VendorPn,
customer,
sum(sales) as totalSales,
ROW_NUMBER() OVER (PARTITION BY partgrp,vendorpn ORDER BY SUM(sales) DESC) as seqnum,
COUNT(1) OVER (PARTITION BY partgrp,vendorpn) as cnt
FROM BG_Invoice
GROUP BY PartGrp,VendorPn, customer
HAVING cnt > 1
ORDER BY PartGrp,VendorPn, totalSales desc
You can try something like:
SELECT PartGrp,VendorPn, customer, sum(sales) as totalSales,
ROW_NUMBER() OVER (PARTITION BY partgrp,vendorpn ORDER BY SUM(sales) DESC) as seqnum
FROM BG_Invoice
GROUP BY PartGrp,VendorPn, customer
HAVING seqnum <> '1'
ORDER BY PartGrp,VendorPn, totalSales desc
WITH CTE AS (
SELECT
PartGrp,VendorPn, customer, sum(sales) as totalSales,
ROW_NUMBER() OVER (PARTITION BY partgrp, vendorpn ORDER BY SUM(sales) DESC) AS seqnum
FROM
BG_Invoice
GROUP BY
PartGrp, VendorPn, customer)
SELECT DISTINCT
a.*
FROM
CTE a
JOIN
CTE b
ON a.PartGrp = b.PartGrp
AND a.VendorPn = b.VendorPn
WHERE
b.seqnum > 1
ORDER BY
a.PartGrp,
a.VendorPn,
a.totalSales DESC;

Taking the Largest SUM from a table

I'm trying to get the Employee with the highest sales
Employee DeptNo Date Sales
Chris 2 2012/1/1 1000
Joe 1 2012/1/1 900
Arthur 3 2012/1/1 1100
Chris 2 2012/3/1 1200
Joe 1 2012/2/1 1500
Arthur 3 2010/2/1 1200
Joe 1 2010/3/1 900
Arthur 3 2010/3/1 1100
Arthur 3 2010/4/1 1200
Joe 1 2012/4/1 1500
Chris 2 2010/4/1 1800
I've tried using two subqueries, and then comparing them together to find the higher value
SELECT c1.Employee,
c1.TOTAL_SALES
FROM (SELECT Employee,
Sum(sales) AS TOTAL_SALES
FROM EmployeeSales
GROUP BY Employee) c1,
(SELECT Employee,
Sum(sales) AS TOTAL_SALES
FROM EmployeeSales
GROUP BY Employee) c2
WHERE ( c1.TOTAL_SALES > c2.TOTAL_SALES
AND c1.Employee > c2.Employee )
But the resulting query gives me two rows of
Employee TOTAL_SALES
joe 4800
joe 4800
What am I doing wrong?
I would use a CTE.
;With [CTE] as (
Select
[Employee]
,sum([Sales]) as [Total_Sales]
,Row_Number()
Over(order by sum([sales]) Desc) as [RN]
From [EmployeeSales]
Group by [Employee]
)
Select
[Employee]
,[Total_Sales]
From [CTE]
Where [RN] = 1
Example of working code SQL Fiddle:
http://sqlfiddle.com/#!3/bd772/2
To return all employees with the highest total sales, you can use SQL Server's proprietary TOP WITH TIES:
SELECT TOP (1) WITH TIES name, SUM(sales) as total_sales
FROM employees
GROUP BY name
ORDER BY SUM(sales) DESC
SELECT name, SUM(sales) as total_sales
FROM employees
GROUP BY name
ORDER by total_sales DESC
LIMIT 1;
A better solution is to group by an employee id so we are sure they are the same person. Since there can be two Chris's.
I would use a window partition
select * from
(
select
employee
, sum(sales) as sales
, row_number() over
(
order by sum(sales) desc
) as rank
from EmployeeSales
group by employee
) tmp
where tmp.rank = 1
And I agree with what someone said (Shawn) about having an employeeID and group by that for this, rather than the name.
(I removed the partition from the row_number() call as it is not needed for this)
you can use CTE for that
WITH CTE
AS ( select employee , sum(sales) as sales,
ROW_NUMBER() OVER (PARTITION BY employee ORDER BY sum(sales) desc) RN
FROM EmployeeSales)
SELECT employee ,
sales
FROM CTE
WHERE RN =1

Is there a way to do something like SQL NOT top statement?

I'm trying to make a SQL statement that gives me the top X records and then all sums all the others. The first part is easy...
select top 3 Department, Sum(sales) as TotalSales
from Sales
group by Department
What would be nice is if I union a second query something like...
select NOT top 3 "Others" as Department, Sum(sales) as TotalSales
from Sales
group by Department
... for a result set that looks like,
Department TotalSales
----------- -----------
Mens Clothes 120.00
Jewelry 113.00
Shoes 98.00
Others 312.00
Is there a way to do an equivalent to a NOT operator on a TOP? (I know I can probably make a temp table of the top X and work with that, but I'd prefer a solution that was just a single sql statement.)
WITH q AS
(
SELECT ROW_NUMBER() OVER (ORDER BY SUM(sales) DESC) rn,
CASE
WHEN ROW_NUMBER() OVER (ORDER BY SUM(sales) DESC) <= 3 THEN
department
ELSE
'Others'
END AS dept,
SUM(sales) AS sales
FROM sales
GROUP BY
department
)
SELECT dept, SUM(sales)
FROM q
GROUP BY
dept
ORDER BY
MAX(rn)
WITH cte
As (SELECT Department,
Sum(sales) as TotalSales
from Sales
group by Department),
cte2
AS (SELECT *,
CASE
WHEN ROW_NUMBER() OVER (ORDER BY TotalSales DESC) <= 3 THEN
ROW_NUMBER() OVER (ORDER BY TotalSales DESC)
ELSE 4
END AS Grp
FROM cte)
SELECT MAX(CASE
WHEN Grp = 4 THEN 'Others'
ELSE Department
END) AS Department,
SUM(TotalSales) AS TotalSales
FROM cte2
GROUP BY Grp
ORDER BY Grp
You can use a union to sum all other departments. A common table expression makes this a little bit more readable:
; with Top3Sales as
(
select top 3 Department
, Sum(sales) as TotalSales
from Sales
group by
Department
order by
Sum(sales) desc
)
select Department
, TotalSales
from Top3Sales
union all
select 'Other'
, SUM(Sales)
from Sales
where Department not in (select Department from Top3Sales)
Example at data.stackexchange.com.
SELECT TOP 3 Department, SUM(Sales) AS TotalSales
FROM Sales
GROUP BY Department
UNION ALL
SELECT 'Others', SUM(s.Sales)
FROM Sales s
WHERE s.Department NOT IN
(SELECT Department
FROM (SELECT TOP 3 Department, SUM(Sales)
FROM Sales
GROUP BY Department) D)