Conditional sum and count in Teradata (SQL help) - sql

My data is setup in the following way:
Person Account Revenue Region
A W 100 AU
A W 200 AU
A W 300 AU
B X 200 AU
B X 50 CH
B X 50 CH
Here is code for the sample data:
IF OBJECT_ID('tempdb..#StackTest') IS NOT NULL
DROP TABLE #StackTest;
CREATE TABLE #StackTest
(Person varchar(1)
, Account varchar(1)
, Revenue int
, Region varchar(2));
INSERT INTO #StackTest
(Person
, Account
, Revenue
, Region)
VALUES
('A', 'W', 100, 'AU'),
('A', 'W', 200, 'AU'),
('A', 'W', 300, 'AU'),
('B', 'X', 200, 'AU'),
('B', 'X', 50, 'CH'),
('B', 'X', 50, 'CH');
I need to write a SQL query that sums revenue for only those accounts when total sum of an account Q exceeds Y. Similarly, I also need to count only those accounts when total sum of an account Q exceeds Y. So if my sum threshold for region AU is 500 and for region CH is 200, then I would want the following output
Output # of accounts exceeding threshold sum Revenue from these accounts
A 1 600
B 0 0
However, my current query is checking each line item separately and not at the account level.
What should I do?

In standard SQL, you would use two levels of aggregation. I suspect that the query is something like:
select person,
sum(case when region = 'AU' and revenue > 500 then 1
when region = 'CH' and revenue > 200 then 1
else 0
end) as numAccounts,
sum(case when region = 'AU' and revenue > 500 then revenue
when region = 'CH' and revenue > 200 then revenue
else 0
end) as reveue,
from (select person, region, sum(revenue) as revenue
from t
group by person, region
) t
group by person;

The following query will aggregate by person/region, and then apply regional thresholds from separate table in order to generate results.
Updated to account for separate regional thresholds
IF OBJECT_ID('tempdb..#Thresholds') IS NOT NULL
DROP TABLE #Thresholds
CREATE TABLE #Thresholds (Region VARCHAR(2), Revenue INT)
INSERT #Thresholds VALUES ('AU', 500), ('CH', 200)
--DECLARE #Threshold INT = 500
SELECT
T.Person,
SUM(CASE WHEN T.[Revenue] >= Thresholds.Revenue THEN T.[Count] ELSE 0 END) AS [# of accounts exceeding threshold sum],
SUM(CASE WHEN T.[Revenue] >= Thresholds.Revenue THEN T.[Revenue] ELSE 0 END) AS [Revenue from these accounts]
FROM (
SELECT
Person,
Region, -- Add region to apply thresholds by region
COUNT(DISTINCT Account) AS [Count],
SUM(Revenue) AS [Revenue]
FROM #StackTest
GROUP BY Person, Region
) T
INNER JOIN #Thresholds Thresholds
ON Thresholds.Region = T.Region
GROUP BY Person
ORDER BY Person

Related

How to get a difference between two query result sets

I have two queries. The output for the first query is the total sales quantity for all Brands and the output for the second query is the total sales quantity only for 'New' Brands.
I need to create only one query (By merging below two queries: Query1 & Query2) where we can see the total sales of 'New' brands per Region, total sales of All brands per region and a new column named difference (Total sales quantity of All brands- Total sales quantity of New brands) side by side.
Expected Output :
InvoiceDate
Brand
Region
Quantity for 'New' Brand
Quantity for All Brand
Difference
2021/10/01
New
A
40
100
60
2021/10/01
New
B
10
90
80
2021/10/01
New
C
50
150
100
2021/10/01
New
D
30
200
170
These are my queries:
Query1:
SELECT InvoiceDate, Brand, Region, Quantity From TotalSales // For All Brands
Query2:
SELECT InvoiceDate, Brand, Region, Quantity From TotalSales where Brand='New' // For New Brands
There are a couple of ways of doing this...
First - I don't think you want the "Brand" column in your result. That doesn't make must sense. Also, I think you are going to want a summation for the AllBrands total...
Use subqueries
select allBrands.InvoiceDate, allBrands.Region, newBrands.Quantity as NewQuantity, allBrands.Quantity as allQuantity, allBrands.Quantity-newBrands.Quantity as Difference
FROM
(SELECT InvoiceDate, Region, SUM(Quantity) as Quantity From TotalSales GROUP BY InvoiceDate, Region) as allBrands
LEFT OUTER JOIN (SELECT InvoiceDate, Region, Quantity From TotalSales where Brand='New') as NewBrands ON NewBrands.InvoiceDate = allBrands.InvoiceDate AND NewBrands.Region = AllBrands.Region
or 2. use temp tables
SELECT InvoiceDate, Region, SUM(Quantity) as Quantity INTO #allSales From TotalSales GROUP BY InvoiceDate, Region;
SELECT InvoiceDate, Region, Quantity INTO #newSales From TotalSales where Brand='New';
select allBrands.InvoiceDate, allBrands.Region, newBrands.Quantity as NewQuantity, allBrands.Quantity as allQuantity, allBrands.Quantity-newBrands.Quantity as Difference
FROM #allBrands allBrands
LEFT OUTER JOIN #newBrands newBrands ON NewBrands.InvoiceDate = allBrands.InvoiceDate AND NewBrands.Region = AllBrands.Region;
You want to get the quantity for brand = 'new' and the total quantity for all brands and compare the two.
One way to achieve this is conditional aggregation:
select
invoicedate,
'New' as brand,
region,
sum(case when brand = 'New' then quantity else 0 end) as qty_new,
sum(quantity) as qty_all,
sum(quantity) - sum(case when brand = 'New' then quantity else 0 end) as diff
from totalsales
group by invoicedate, region
having sum(case when brand = 'New' then quantity else 0 end) > 0
order by invoicedate, region;
Another is a join
with qnew as
(
select invoicedate, brand, region, quantity
from totalsales
where brand = 'New'
)
, qall as
(
select invoicedate, region, sum(quantity) as total
from totalsales
group by invoicedate, region
)
select
qnew.*, qall.total, qall.total- qnew.quantity as diff
from qnew
join qall on qall.invoicedate = qnew.invoicedate
and qall.brand = qnew.brand
and qall.region = qnew.region
order by qnew.invoicedate, qnew.brand, qnew.region;
You can use simple conditional aggregation (SUM) on the data such as this:
DECLARE #TotalSales TABLE (InvoiceDate DATE, Brand NVARCHAR(16), Region NCHAR(1), Quantity INT)
INSERT INTO
#TotalSales(
InvoiceDate,
Brand,
Region,
Quantity
)
VALUES ('10/1/2021', 'New', 'A', 20),
('10/1/2021', 'New', 'A', 20),
('10/1/2021', 'Old', 'A', 30),
('10/1/2021', 'Old', 'A', 30),
('10/1/2021', 'New', 'B', 10),
('10/1/2021', 'Old', 'B', 30),
('10/1/2021', 'Old', 'B', 50),
('10/1/2021', 'New', 'C', 50),
('10/1/2021', 'Old', 'C', 100),
('10/1/2021', 'New', 'D', 10),
('10/1/2021', 'New', 'D', 10),
('10/1/2021', 'New', 'D', 10),
('10/1/2021', 'Old', 'D', 100),
('10/1/2021', 'Old', 'D', 70),
('11/1/2021', 'Old', 'A', 50)
;WITH Data AS (
SELECT
ts.InvoiceDate,
ts.Region,
SUM(ts.Quantity) AS QuantityAll,
SUM(CASE WHEN ts.Brand = 'New' THEN ts.Quantity ELSE 0 END) AS QuantityNew
FROM
#TotalSales ts
GROUP BY
ts.InvoiceDate,
ts.Region
)
SELECT
d.InvoiceDate,
d.Region,
d.QuantityAll,
d.QuantityNew,
d.QuantityAll - d.QuantityNew AS TheDifference
FROM
Data d
ORDER BY
d.InvoiceDate,
d.Region
I used a CTE so that we don't have to repeat the conditional SUM(CASE WHEN... for subtracting between QuantityNew and QuantityAll.
Output is:
InvoiceDate Region QuantityAll QuantityNew TheDifference
2021-10-01 A 100 40 60
2021-10-01 B 90 10 80
2021-10-01 C 150 50 100
2021-10-01 D 200 30 170
2021-11-01 A 50 0 50

How to divide results into separate rows based on year?

I have a query that looks at profits and operations costs of different stores based on the fiscal year, and currently the fiscal years and variables are sorted into single, respective columns such as:
FiscalYear Metric Store Amount
2017 Profit A 220
2017 Cost A 180
2018 Profit B 200
2018 Cost B 300
...
I need to cross tab the rows so that for each store, I can compare the 2017 profit against the 2018 profit, and 2017 cost against the 2018 cost.
I broke out profits and costs by creating CASE WHEN statements for the ProfitLossTable, but I don't know how to make it create a "2017 Profit" and "2018 Profit" column, respectively, for each Store.
WITH [Profits, Cost] AS
(
SELECT ID, StoreID, Number, FYYearID,
CASE WHEN ID = 333 then Number END AS Profit
CASE WHEN ID = 555 then Number END AS Cost
FROM ProfitLossTable
),
Location AS
(
Select StoreID, StoreName
FROM StoreTable
),
FiscalMonth AS
(
SELECT FYYearID, FYYear
FROM FiscalMonthTable
)
SELECT A.Profit, A.Cost
FROM [Profits, Cost] A
JOIN Location B
ON A.StoreID = B.StoreID
JOIN FiscalMonth C
ON A.FYYearID = C.FYYearID
The code above shows this, and I feel like I am close to creating columns based on year, but I don't know what to do next.
FiscalYear Store Profit Cost
2017 A 220 100
2017 A 180 100
2018 B 200 100
2018 B 300 100
As a working (on my machine anyway ;-p) example using your data:
create table #temp(
FiscalYear int not null,
Metric nvarchar(50) not null,
Store nvarchar(10) not null,
Amount int not null
)
insert into #temp
values
(2017, N'Profit', N'A', 220),
(2017, N'Cost', N'A', 180),
(2018, N'Profit', N'B', 200),
(2018, N'Cost', N'B', 300)
select * from #temp
select Metric,
[2017] as [2017],
[2018] as [2018]
from (select FiscalYear, Amount, Metric from #temp) base_data
PIVOT
(SUM(Amount) FOR FiscalYear in ([2017], [2018])
) as pvt
order by pvt.Metric
drop table #temp

SQL First In First Out Loyalty Point

fellow developers and analysts. I have some experience in SQL and have resorted to similar posts. However, this is slightly more niche. Thank you in advance for helping.
I have the below dataset (edited. Apology)
Setup
CREATE TABLE CustomerPoints
(
CustomerID INT,
[Date] Date,
Points INT
)
INSERT INTO CustomerPoints
VALUES
(1, '20150101', 500),
(1, '20150201', -400),
(1, '20151101', 300),
(1, '20151201', -400)
and need to turn it into (edited. The figures in previous table were incorrect)
Any positive amount of points are points earned whereas negative are redeemed. Because of the FIFO (1st in 1st out concept), of the second batch of points spent (-400), 100 of those were taken from points earned on 20150101 (UK format) and 300 from 20151101.
The goal is to calculate, for each customer, the number of points spent within x and y months of earning. Again, thank you for your help.
I have already answered a similar question here and here
You need to explode points earned and redeemed by single units and then couple them, so each point earned will be matched by a redeemed point.
For each of these matching rows calculate the months elapsed from the earning to the redeeming and then aggregate it all.
For FN_NUMBERS(n) it is a tally table, look at other answers I have linked above.
;with
p as (select * from CustomerPoints),
e as (select * from p where points>0),
r as (select * from p where points<0),
ex as (
select *, ROW_NUMBER() over (partition by CustomerID order by [date] ) rn
from e
join FN_NUMBERS(1000) on N<= e.points
),
rx as (
select *, ROW_NUMBER() over (partition by CustomerID order by [date] ) rn
from r
join FN_NUMBERS(1000) on N<= -r.points
),
j as (
select ex.CustomerID, DATEDIFF(month,ex.date, rx.date) mm
from ex
join rx on ex.CustomerID = rx.CustomerID and ex.rn = rx.rn and rx.date>ex.date
)
-- use this select to see points redeemed in current and past semester
select * from j join (select 0 s union all select 1 s ) p on j.mm >= (p.s*6)+(p.s) and j.mm < p.s*6+6 pivot (count(mm) for s in ([0],[2])) p order by 1, 2
-- use this select to see points redeemed with months detail
--select * from j pivot (count(mm) for mm in ([0],[1],[2],[3],[4],[5],[6],[7],[8],[9],[10],[11],[12])) p order by 1
-- use this select to see points redeemed in rows per month
--select CustomerID, mm, COUNT(mm) PointsRedeemed from j group by CustomerID, mm order by 1
output of default query, 0 is 0-6 months, 1 is 7-12 (age of redemption in months)
CustomerID 0 1
1 700 100
output of 2nd query, 0..12 is the age of redemption in months
CustomerID 0 1 2 3 4 5 6 7 8 9 10 11 12
1 0 700 0 0 0 0 0 0 0 0 0 100 0
output from 3rd query, is the age of redemption in months
CustomerID mm PointsRedeemed
1 1 700
1 11 100
bye

Histogram: Counting orders with variable bins in SQL

I have a table containing orders, items, and prices. I am trying to generate histograms for each item based on the prices.
Create Table #Customer_Pricing
(
customer_id int,
item_id VARCHAR(10),
qty DECIMAL(5,2),
price DECIMAL(5,2),
)
;
GO
-- Insert Statements
Insert into #Customer_Pricing values(128456, 'SOM 555', 8, 2.50)
Insert into #Customer_Pricing values(123856, 'SOM 554', 1, 2.50)
Insert into #Customer_Pricing values(123456, 'SOM 554', 55, 2.00)
Insert into #Customer_Pricing values(123556, 'SOM 555', 2, 2.20)
Insert into #Customer_Pricing values(123456, 'SOM 553', 12, 2.13)
;
For each item, I wanted 3 bins so I determined the bin sizes by dividing the difference of the MAX-MIN by 3, then adding that value to the MIN.
WITH Stats_Table_CTE (item_id2,max_p, min_p, int_p, r1_upper, r2_lower, r2_upper, r3_lower)
AS
( SELECT item_id
,max(price)
,min(price)
,(max(price) - min(price))/3
,min(price)+(max(price) - min(price))/3-0.01
,min(price)+(max(price) - min(price))/3
,min(price)+((max(price) - min(price))/3)*2-0.01
,min(price)+((max(price) - min(price))/3)*2
FROM #Customer_Pricing
GROUP BY item_id)
Now, I need to count the frequencies for each range and each item. I have attempted to do so by using SUM(CASE...) but was unsuccessful.
SELECT item_id
,SUM(CASE WHEN price <= r1_upper, THEN 1 ELSE 0 END) AS r1_count
,SUM(CASE WHEN price >= r2_lower AND <= r2_upper, THEN 1 ELSE 0 END) AS r2_count
,SUM(CASE WHEN price >= r3_lower, THEN 1 ELSE 0 END) AS r3_count
FROM Stats_Table_CTE
GROUP BY item_id
I also attempted to use COUNT in the form
SELECT item_id, price
count(price <= r1_upper) AS r1_count.... but I got stuck
In one attempt, INNER JOINed the #Customer_Pricing table and Stats_Table_CTE but didn't know where to go from there.
Ideally, I would like the output table to appear as follows: *This is not the actual data, but I included it to show the desired format of the output.
Item ID min_p r1_upper (r2 bins) r3_lower max_p r1_count r2_ct
SOM 553 2.00 2.16 saving space 2.33 2.50 2 1
SOM 554 2.13 2.48 2.88 3.25 1 0
SOM 555 2.31 2.51 2.72 2.92 3 2
*The format of the output table is off, but I have item ID, the bins, and the counts across the top grouped by item
Here is my recommendation:
WITH Stats_Table_CTE AS (
SELECT item_id, max(price) as maxprice, min(price) as minprice,
(max(price) - min(price))/3 as binsize
FROM #Customer_Pricing
GROUP BY item_id
)
SELECT cp.item_id,
SUM(CASE WHEN price < minprice + binsize THEN 1 ELSE 0
END) AS r1_count
SUM(CASE WHEN price >= minprice + binsize AND price < minprice+ 2*binsize
THEN 1 ELSE 0
END) AS r2_count
SUM(CASE WHEN price >= minprice + 2*binsize
THEN 1 ELSE 0
END) AS r3_count
FROM #Customer_Pricing cp JOIN
Stats_Table_CTE st
ON st.item_id = cp.item_id
GROUP BY cp.item_id
The important part is the join back to #Customer_Pricing. Also important is the simplification of the logic -- you can define the bounds for the bins and use <, rather than having a lower and upper bound for each one. Also, your query had some syntax errors in it.
Note that in many databases, the CTE would not be necessary because you could just use window functions. Your question is not tagged with the database (although I could guess what it is), so that change seems unwarranted.

Results of multiple queries with aggregates combined

I have 2 seperate select statements, using aggregate functions in each. I would like to be able to take the results and combine them.
table_a
id int
entered_date datetime (holds utc stamp)
balance money
group_id int
table_b
id int
entered_date date
balance money
transaction_type int
query 1:
select convert(date,entered_date), sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
query 2:
select convert(date,entered_date), sum(balance) as spent
where transaction_type = 2
group by convert(date,entered_Date)
results:
query 1:
2012-05-13, 5000
2012-05-14, 12000
...
query 2:
2012-05-13, 9000
2012-05-14, 55856
...
I would like to return one row for each record without using temp tables. The result set should have a date, then earned vs. spent. I have a report running using union to get the totals and that is fine, but i need to generate a result set with 1 record and a earned vs against line. Any help with this is appreciated.
Try:
;With AllDates AS
(
select convert(date,entered_date) As EnteredDate
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
UNION
select convert(date,entered_date)
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
, AllEarned AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Earned
from table_a
where group_id in (1, 2, 3, 4)
group by convert(date,entered_Date)
)
, AllSpent AS (
select convert(date,entered_date) AS EnteredDate, sum(balance) as Spent
from table_b
where transaction_type = 2
group by convert(date,entered_Date)
)
SELECT
d.EnteredDate, e.Earned, s.Spent
FROM AllDates d
LEFT OUTER JOIN AllEarned e ON d.EnteredDate=e.EnteredDate
LEFT OUTER JOIN AllSpent s ON d.EnteredDate=s.EnteredDate
ORDER BY 1,2,3
You can combine these using logic, assuming that both are from the same table
(the second query is missing the from statement):
select convert(date,entered_date),
sum(case when group_id in (1, 2, 3, 4) then balance end) as earned,
sum(case when transaction_type = 2 then balance end) as spend
from table_a
group by convert(date,entered_Date)
SELECT
CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data,
a.earned,
b.spent
FROM
(select
convert(date,entered_date) as a_date,
sum(balance) as earned
from table_a
where group_id in (1, 2, 3, 4)
group by entered_Date) A
FULL OUTER JOIN
(select
convert(date,entered_date) as a_date,
sum(balance) as spent
from table_a
where transaction_type = 2
group by entered_Date) B
ON A.a_date=b.a_date
Or using FULL OUTER JOIN if there are data that don't meet both conditions. And using CASE WHEN a.a_date IS NULL THEN b.a_date ELSE a.a_date END as a_data
Assuming earned amounts are from table_a and spent amounts are from table_b,
; WITH a AS (
select entered_date=convert(date,entered_date), balance as earned, 0 AS spent
from table_a
where group_id in (1, 2, 3, 4)
UNION ALL
select entered_date=convert(date,entered_date), 0 AS earned, balance as spent
from table_b
where transaction_type = 2
)
SELECT entered_date
, earned=SUM(earned)
, spent=SUM(spent)
FROM a
GROUP BY entered_date