Row-wise count group by variables - sql

I have the below query -
SELECT
P.PRODUCT_NUMBER,
P.PRODUCT_DESCRIPTION,
SUM(S.NET_AMOUNT),
ROUND(STDDEV(S.NET_AMOUNT),2) AS STD_DEV
--(SELECT COUNT OF NET_AMOUNT < = 1$ FROM PFI_FACT_SALES GROUPED BY THE SAME P.PRODUCT_NUMBER) AS CNT
FROM PFI_DIM_PRODUCT P
JOIN PFI_FACT_SALES S
ON P.PRODUCT_PK_ID = S.PRODUCT_PK_ID
WHERE P.PRODUCT_NUMBER = 'ABC'
GROUP BY P.PRODUCT_NUMBER, P.PRODUCT_DESCRIPTION;
This is the part I am not able to figure out -
(SELECT COUNT OF NET_AMOUNT < = 1$ FROM PFI_FACT_SALES GROUPED BY THE SAME P.PRODUCT_NUMBER) AS CNT
What would be the best way to get the necessary row level data group by product number & product description?
Thanks.

One way is with a correlated subquery:
SELECT P.PRODUCT_NUMBER, P.PRODUCT_DESCRIPTION, SUM(S.NET_AMOUNT),
ROUND(STDDEV(S.NET_AMOUNT), 2) AS STD_DEV ,
(SELECT COUNT(*)
FROM PFI_FACT_SALES s2
WHERE s2.PRODUCT_PK_ID = s.PRODUCT_PK_ID AND
NET_AMOUNT <= 1
) as CNT
FROM PFI_DIM_PRODUCT P JOIN
PFI_FACT_SALES S
ON P.PRODUCT_PK_ID = S.PRODUCT_PK_ID
WHERE P.PRODUCT_NUMBER = 'ABC'
GROUP BY P.PRODUCT_NUMBER, P.PRODUCT_DESCRIPTION;
I'm pretty sure that you can also do this with a conditional windowed sum:
SELECT P.PRODUCT_NUMBER, P.PRODUCT_DESCRIPTION, SUM(S.NET_AMOUNT),
ROUND(STDDEV(S.NET_AMOUNT), 2) AS STD_DEV,
SUM(CASE WHEN NET_AMOUNT <= 1 THEN 1 ELSE 0 END) OVER (PARTITION BY s.PRODUCT_PK_ID) as CNT
FROM PFI_DIM_PRODUCT P JOIN
PFI_FACT_SALES S
ON P.PRODUCT_PK_ID = S.PRODUCT_PK_ID
WHERE P.PRODUCT_NUMBER = 'ABC'
GROUP BY P.PRODUCT_NUMBER, P.PRODUCT_DESCRIPTION;

Related

SQL Case When Slowing Down Query

What I'm looking to do is quantify the total value of purchases and the number of months in which a purchase was made within three different timeframes by account. I only want to look at accounts who made a purchase between 1-1-2020 and 4-1-2021.
I'm wondering if there is a more streamlined way to pull in the fields I'm creating using CASE WHEN below (maybe through a series of queries to create the calculations and the left joining?). This query is taking extremely long to pull back, so I'd like to enhance this code where I can. All of my code and desired output is listed below. Thank you!
Creating a temporary table to pull account numbers:
DROP TABLE IF EXISTS #accounts
SELECT DISTINCT s.account_no, c.code, c.code_desc
INTO #accounts
FROM sales AS s
LEFT JOIN customer AS c ON s.account_no = c.account_no
WHERE s.tran_date BETWEEN '2020-01-01' AND '2021-04-01'
GROUP BY s.account_no, c.code, c.code_desc;
Confirming row counts:
SELECT COUNT (*)
FROM #accounts
ORDER BY account_no;
Creating Sales and Sales period count columns for three timeframes:
SELECT
s.account_no, c.code, c.code_desc
SUM(CASE
WHEN s.tran_date BETWEEN '2020-01-01' AND '2021-04-01'
THEN VALUE_USD
END) AS Total_Spend_Pre,
SUM(CASE
WHEN s.tran_date BETWEEN '2021-04-01' AND '2022-03-31'
THEN VALUE_USD
END) Total_Spend_During,
SUM(CASE
WHEN s.tran_date > '2022-04-01'
THEN VALUE_USD
END) Total_Spend_Post,
COUNT(DISTINCT CASE WHEN s.tran_date BETWEEN '2020-01-01' AND '2021-04-01' THEN CONCAT(s.bk_month, s.bk_year) END) Pre_Periods,
COUNT(DISTINCT CASE WHEN s.tran_date BETWEEN '2021-04-01' AND '2022-03-31' THEN CONCAT(s.bk_month, s.bk_year) END) During_Periods,
COUNT(DISTINCT CASE WHEN s.tran_date > '2022-04-01' THEN CONCAT(s.bk_month, s.bk_year) END) Post_Periods
FROM
sales AS s
LEFT JOIN
customer AS c ON s.account_no = c.account_no
WHERE
c.account_no IN (SELECT DISTINCT account_no
FROM #accounts)
GROUP BY
s.account_no, c.code, c.code_desc;
Desired output:
account_no
code
code_desc
Total_Spend_Pre
Total_Spend_During
Total_Spend_Post
Pre_Periods
During_Periods
Post_Periods
25
1234
OTHER
1000
2005
500
2
14
5
11
5678
PC
500
100
2220
5
11
2
You may use your date ranges to join with dataset, and 'Tag' your result like below, this will result in 3 rows, for each group. If you need them in a single row, have PIVOTE over it
;With DateRanges AS (
SELECT CAST('2020-01-01' AS DATE) StartDate, CAST('2021-04-01' AS DATE) EndDate, 'Pre' Tag UNION
SELECT '2021-04-01', '2022-03-31', 'During' UNION
SELECT '2022-04-01', Null, 'Post'
)
SELECT s.account_no, c.code, c.code_desc, d.Tag,
SUM(VALUE_USD) AS Total_Spend,
COUNT(DISTINCT CONCAT(s.bk_month, s.bk_year)) RecordCount
FROM sales as s
LEFT JOIN customer as c
INNER JOIN DateRanges D ON s.tran_date BETWEEN D.StartDate AND ISNULL(D.EndDate,s.tran_date)
ON s.account_no = c.account_no
WHERE c.account_no IN (SELECT DISTINCT account_no FROM #accounts)
GROUP BY s.account_no, c.code, c.code_desc;
with [cte_accountActivityPeriods] as (
select [PeriodOrdinal] = 1, [PeriodName] = 'Total Spend Pre', [PeriodStart] = convert(date,'2020-01-01',23) , [PeriodFinish] = convert(date,'2021-03-31',23) union
select [PeriodOrdinal] = 2, [PeriodName] = 'Total Spend During', [PeriodStart] = convert(date,'2021-04-01',23) , [PeriodFinish] = convert(date,'2022-03-31',23) union
select [PeriodOrdinal] = 3, [PeriodName] = 'Total Spend Post', [PeriodStart] = convert(date,'2022-04-01',23) , [PeriodFinish] = convert(date,'9999-12-31',23)
)
, [cte_allsalesForActivityPeriod]
SELECT s.account_no, bk_month, bk_year, [PeriodOrdinal], s.tran_date, s.value_usd
FROM sales as s
cross join [cte_accountActivityPeriods]
on s.[tran_date] between [cte_ActivityPeriods].[PeriodStart] and [cte_ActivityPeriods].[PeriodFinish]
)
, [cte_uniqueAccounts] as ( /*Unique and qualifying Accounts*/
select distinct account_no from [cte_allsalesForActivityPeriod]
inner join #accounts accs on accs.[account_no] = [cte_allsalesForActivityPeriod].[account_no]
)
, [cte_AllSalesAggregatedByPeriod] as (
select account_no, [PeriodOrdinal], bk_month, bk_year, [PeriodTotalSpend] = sum([value_usd])
from [cte_allsalesForActivityPeriod]
group by s.account_no, [PeriodOrdinal], bk_month, bk_year
)
, [cte_PeriodAnalysis] as (
select account_no, [PeriodOrdinal], [ActivePeriods] = count(distinct concat(bk_month, bk_year))
from [cte_AllSalesAggregatedByPeriod]
group by s.account_no, [PeriodOrdinal]
)
, [cte_pivot_clumsily] as (
/* Aggregations already done - so simple pivot */
select [cte_uniqueAccounts].[account_no]
, [Total_Spend_Pre] = case when [SaleVal].[PeriodOrdinal] in (1) then [SaleVal].[PeriodTotalSpend] else 0 end
, [Total_Spend_During] = case when [SaleVal].[PeriodOrdinal] in (2) then [SaleVal].[PeriodTotalSpend] else 0 end
, [Total_Spend_Post] = case when [SaleVal].[PeriodOrdinal] in (3) then [SaleVal].[PeriodTotalSpend] else 0 end
, [Pre_Periods] = case when [SalePrd].[PeriodOrdinal] in (1) then [SalePrd].[ActivePeriods] else 0 end
, [During_Periods] = case when [SalePrd].[PeriodOrdinal] in (2) then [SalePrd].[ActivePeriods] else 0 end
, [Post_Periods] = case when [SalePrd].[PeriodOrdinal] in (3) then [SalePrd].[ActivePeriods] else 0 end
from [cte_uniqueAccounts]
left join [cte_AllSalesAggregatedByPeriod] [SaleVal] on [SaleVal].[account_no] = [cte_uniqueAccounts].[account_no]
left join [cte_PeriodAnalysis] [SalePrd] on [SalePrd].[account_no] = [cte_uniqueAccounts].[account_no]
)
select c.code, c.code_desc, [cte_pivot_clumsily].*
from [cte_pivot_clumsily]
LEFT JOIN customer as c
ON [cte_pivot_clumsily].account_no = c.account_no

SELECT list expression references column integration_start_date which is neither grouped nor aggregated at

I'm facing an issue with the following query. It gave me this error [SELECT list expression references column integration_start_date which is neither grouped nor aggregated at [34:63]]. In particular, it points to the first 'when' in the result table, which I don't know how to fix. This is on BigQuery if that helps. I see everything is written correctly or I could be wrong. Seeking for help.
with plan_data as (
select format_date("%Y-%m-%d",last_day(date(a.basis_date))) as invoice_date,
a.sponsor_id as sponsor_id,
b.company_name as sponsor_name,
REPLACE(SUBSTR(d.meta,STRPOS(d.meta,'merchant_id')+12,13),'"','') as merchant_id,
a.state as plan_state,
date(c.start_date) as plan_start_date,
a.employee_id as square_employee_id,
date(
(select min(date)
from glproductionview.stats_sponsors
where sponsor_id = a.sponsor_id and sponsor_payroll_provider_identifier = 'square' and date >= c.start_date) )
as integration_start_date,
count(distinct a.employee_id) as eligible_pts_count, --pts that are in active plan and have payroll activities (payroll deductions) in the reporting month
from glproductionview.payroll_activities as a
left join glproductionview.sponsors as b
on a.sponsor_id = b.id
left join glproductionview.dc_plans as c
on a.plan_id = c.id
left join glproductionview.payroll_connections as d
on a.sponsor_id = d.sponsor_id and d.provider_identifier = 'rocket' and a.company_id = d.payroll_id
where a.payroll_provider_identifier = 'rocket'
and format_date("%Y-%m",date(a.basis_date)) = '2021-07'
and a.amount_cents > 0
group by 1,2,3,4,5,6,7,8
order by 2 asc
)
select invoice_date,
sponsor_id,
sponsor_name,
eligible_pts_count,
case
when eligible_pts_count <= 5 and date_diff(current_date(),integration_start_date, month) <= 12 then 20
when eligible_pts_count <= 5 and date_diff(current_date(),integration_start_date, month) > 12 then 15
when eligible_pts_count > 5 and date_diff(current_date(),integration_start_date, month) <= 12 then count(distinct square_employee_id)*4
when eligible_pts_count > 5 and date_diff(current_date(),integration_start_date, month) > 12 then count(distinct square_employee_id)*3
else 0
end as fees
from plan_data
group by 1,2,3,4;

How can i get sum of total row values in a row by using group by?

select customerid,
(rentaldropoffdate - rentalpickupdate) as days,
sum(
RENTALINSURANCEADDITIONALCOST
+ (CATEGORYDAILYRENTALRATE * (rentaldropoffdate - rentalpickupdate))
) AS TOTAL_DUE
from rental,
vehiclerentalcategory,
rentalinsuranceoption,
VEHICLE
where rental.vehicleid = vehicle.vehicleid
and rental.rentalinsuranceoptionid = rentalinsuranceoption.rentalinsuranceoptionid
and vehicle.VEHICLERENTALCATEGORYID = VEHICLERENTALCATEGORY.VEHICLERENTALCATEGORYid
group by
customerid,
(rentaldropoffdate - rentalpickupdate)
ORDER BY
TOTAL_DUE desc;
so how can i have the sum of TOTAL_DUE of Customerid 1 in a row instead of two separate values?
Sum the rentaldropoffdate - rentalpickupdate value rather than using it in the GROUP BY:
select customerid,
SUM(rentaldropoffdate - rentalpickupdate) as days,
SUM(
RENTALINSURANCEADDITIONALCOST
+ (CATEGORYDAILYRENTALRATE * (rentaldropoffdate - rentalpickupdate))
) AS TOTAL_DUE
from rental r
INNER JOIN VEHICLE v
ON ( r.vehicleid = v.vehicleid )
INNER JOIN rentalinsuranceoption rio
ON ( r.rentalinsuranceoptionid = rio.rentalinsuranceoptionid )
INNER JOIN vehiclerentalcategory vrc
ON ( v.VEHICLERENTALCATEGORYID = vrc.VEHICLERENTALCATEGORYid )
group by
customerid
ORDER BY
TOTAL_DUE desc;
Do a sum on the days and remove days from the group by.
select customerid,
sum(rentaldropoffdate - rentalpickupdate) as days,
sum(
RENTALINSURANCEADDITIONALCOST
+ (CATEGORYDAILYRENTALRATE * (rentaldropoffdate - rentalpickupdate))
) AS TOTAL_DUE
from rental,
vehiclerentalcategory,
rentalinsuranceoption,
VEHICLE
where rental.vehicleid = vehicle.vehicleid
and rental.rentalinsuranceoptionid = rentalinsuranceoption.rentalinsuranceoptionid
and vehicle.VEHICLERENTALCATEGORYID = VEHICLERENTALCATEGORY.VEHICLERENTALCATEGORYid
group by
customerid
ORDER BY
TOTAL_DUE desc;

Find median between 2 dates

Anyone know how I can change the Total Median near bottom to show an average of the median instead? For some reason, the Total Median is always 100. Not sure what I should do.
Thanks in advance for any ideas! Current results also below.
WITH CTE AS (
SELECT DISTINCT c.CaseID AS CaseID,
DATEDIFF(d, c.CaseAddDt, coip.DispoDt) AS DaysApart
, DATEPART(month,c.CaseAddDt) AS [Month]
, DATEPART(year,c.CaseAddDt) AS [Year]
, CAST(DATEPART(year,c.CaseAddDt) AS varchar) + '|' + CASE WHEN DATEPART(month,c.CaseAddDt) IN (10,11,12) THEN CAST(DATEPART(month,c.CaseAddDt) AS varchar) ELSE '0' + CAST(DATEPART(month,c.CaseAddDt) AS varchar) END AS Srt
FROM jw50_Case c
JOIN jw50_CaseInvPers def ON def.CaseID = c.CaseID
AND def.InvolveTypeMasterCode = 1
JOIN
jw50_CountInvPers coip ON coip.CaseID = c.CaseID
AND coip.CaseInvPersID = def.CaseInvPersID
AND coip.DispoCode IN ('CODE','CODE')
AND coip.CountNum > 0
OUTER APPLY (
SELECT TOP 1 caz.CaseAgencyID
FROM jw50_CaseAgency caz
WHERE caz.CaseID = c.CaseID
AND caz.AgencyCode = 'ABC'
AND caz.NumberTypeCode IN ('i#','in#')) caz
WHERE
EXISTS (SELECT 1 FROM jw50_CaseAttributes ca WHERE ca.CaseID = c.CaseID AND ca.CaseAttributeCode = 'oa7')
AND caz.CaseAgencyID IS NOT NULL
AND c.CaseStatusCode <> 'AAA'
AND c.CaseAddDt BETWEEN '01/01/2017' AND '08/01/2017'
AND c.CaseAddDt <= coip.DispoDt)
SELECT a.CaseID,
a.Month
, a.Year
, a.DaysApart
, a.Srt
, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY a.DaysApart) OVER (PARTITION BY a.Month, a.Year) AS MonMedian
, PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY a.DaysApart) OVER (PARTITION BY 1) AS TotalMedian
FROM CTE a
Results:

merge and refine two query results sql server

i have two query please tell me how to merge these two queries and also how to refine these query.Merge query on base on GroupID.
(SELECT count(idlee.ObjectId) AS 'Count', idlee.GroupId, idlee.Name
FROM (SELECT CONVERT(int, Sum(idle.distance)) AS distance, idle.ObjectId, idle.GroupId, idle.Name
FROM (SELECT Message.ObjectId, fn_GpsUtil_Distance(Message.x, Message.y, lead(Message.x)
OVER (partition BY Message.objectid
ORDER BY Message.GpsTime), lead(Message.y) OVER (partition BY Message.objectid
ORDER BY Message.GpsTime)) AS distance, [Group].GroupId, [Group].Name
FROM [Group] INNER JOIN
GroupObject ON [Group].GroupId = GroupObject.GroupId INNER JOIN
Message ON GroupObject.ObjectId = Message.ObjectId INNER JOIN
Object ON GroupObject.ObjectId = Object.ObjectId
WHERE (Object.Enabled = 1) AND (Object.ClientId = 5) AND (Message.GpsTime >= GETDATE() - 1) AND
(Message.GpsTime <= GETDATE())) AS idle
GROUP BY idle.ObjectId, idle.GroupId, idle.Name) AS idlee
WHERE idlee.distance < 10
GROUP BY idlee.GroupId, idlee.Name)
output
Count GroupID Group
36 15 DC-1
30 16 DC-2
13 17 DC-3
64 13 LC-1
16 14 LC-2
second query which i use to retrieve data
(SELECT count(idlee.ObjectId) AS 'Count', idlee.GroupId, idlee.Name
FROM (SELECT CONVERT(int, Sum(idle.distance)) AS distance, idle.ObjectId, idle.GroupId, idle.Name
FROM (SELECT Message.ObjectId, fn_GpsUtil_Distance(Message.x, Message.y, lead(Message.x)
OVER (partition BY Message.objectid
ORDER BY Message.GpsTime), lead(Message.y) OVER (partition BY Message.objectid
ORDER BY Message.GpsTime)) AS distance, [Group].GroupId, [Group].Name
FROM [Group] INNER JOIN
GroupObject ON [Group].GroupId = GroupObject.GroupId INNER JOIN
Message ON GroupObject.ObjectId = Message.ObjectId INNER JOIN
Object ON GroupObject.ObjectId = Object.ObjectId
WHERE (Object.Enabled = 1) AND (Object.ClientId = 5) AND (Message.GpsTime >= GETDATE() - 1) AND
(Message.GpsTime <= GETDATE())) AS idle
GROUP BY idle.ObjectId, idle.GroupId, idle.Name) AS idlee
WHERE idlee.distance >= 100 AND idlee.distance <= 300
GROUP BY idlee.GroupId, idlee.Name)
Count GroupID Group
40 15 DC-1
50 16 DC-2
20 17 DC-3
64 13 LC-1
16 14 LC-2
but i want output like this on Group base.
GroupID Group Count 0<10 Count 100 To 300
15 DC-1 36 40
16 DC-2 30 50
17 DC-3 13 20
13 LC-1 64 64
14 LC-2 16 16
Try this...join the SQLs with UNION, in the first one count(idlee.ObjectId) AS count1, 0 as count2 and in the second one 0 as count1, count(idlee.ObjectId) AS count2.
Then enclose the entire SQL as a temp table and sum Count1 and Count2.
SELECT Temp.GroupID, Temp.Group, Sum(Temp.Count1) as CountLess10, Sum(Temp.Count2) as Count100300 FROM
(
(SELECT count(idlee.ObjectId) AS count1, 0 as count2, idlee.GroupId as GroupID, idlee.Name as Group
FROM (SELECT CONVERT(int, Sum(idle.distance)) AS distance, idle.ObjectId, idle.GroupId, idle.Name
FROM (SELECT Message.ObjectId, fn_GpsUtil_Distance(Message.x, Message.y, lead(Message.x)
OVER (partition BY Message.objectid
ORDER BY Message.GpsTime), lead(Message.y) OVER (partition BY Message.objectid
ORDER BY Message.GpsTime)) AS distance, [Group].GroupId, [Group].Name
FROM [Group] INNER JOIN
GroupObject ON [Group].GroupId = GroupObject.GroupId INNER JOIN
Message ON GroupObject.ObjectId = Message.ObjectId INNER JOIN
Object ON GroupObject.ObjectId = Object.ObjectId
WHERE (Object.Enabled = 1) AND (Object.ClientId = 5) AND (Message.GpsTime >= GETDATE() - 1) AND
(Message.GpsTime <= GETDATE())) AS idle
GROUP BY idle.ObjectId, idle.GroupId, idle.Name) AS idlee
WHERE idlee.distance < 10
GROUP BY idlee.GroupId, idlee.Name)
UNION
(SELECT 0 as count1, count(idlee.ObjectId) AS count2, idlee.GroupId as GroupID, idlee.Name as Name
FROM (SELECT CONVERT(int, Sum(idle.distance)) AS distance, idle.ObjectId, idle.GroupId, idle.Name
FROM (SELECT Message.ObjectId, fn_GpsUtil_Distance(Message.x, Message.y, lead(Message.x)
OVER (partition BY Message.objectid
ORDER BY Message.GpsTime), lead(Message.y) OVER (partition BY Message.objectid
ORDER BY Message.GpsTime)) AS distance, [Group].GroupId, [Group].Name
FROM [Group] INNER JOIN
GroupObject ON [Group].GroupId = GroupObject.GroupId INNER JOIN
Message ON GroupObject.ObjectId = Message.ObjectId INNER JOIN
Object ON GroupObject.ObjectId = Object.ObjectId
WHERE (Object.Enabled = 1) AND (Object.ClientId = 5) AND (Message.GpsTime >= GETDATE() - 1) AND
(Message.GpsTime <= GETDATE())) AS idle
GROUP BY idle.ObjectId, idle.GroupId, idle.Name) AS idlee
WHERE idlee.distance >= 100 AND idlee.distance <= 300
GROUP BY idlee.GroupId, idlee.Name)
) Temp Group By Temp.GroupID, Temp.Group
Hope this helps.