how to do the program using subquery approach - sql

SELECT SKU, SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=6 AND STYPE='P'
THEN AMT
END) AS VALUEJUNE,
SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=7 AND STYPE='P'
THEN AMT
END) AS VALUEJULY,
SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=8 AND STYPE='P'
THEN AMT
END) AS VALUEAUGUST
,(VALUEJUNE+VALUEJULY+VALUEAUGUST) AS totalsales
FROM TRNSACT
GROUP BY SKU
ORDER BY totalsales DESC ;

Put most of your query in a derived table, including the GROUP BY. Calculate totalsales on its result:
select sku, VALUEJUNE, VALUEJULY, VALUEAUGUST, (VALUEJUNE+VALUEJULY+VALUEAUGUST) AS totalsales
from
(
SELECT SKU,
SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=6 AND STYPE='P'
THEN AMT
END) AS VALUEJUNE,
SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=7 AND STYPE='P'
THEN AMT
END) AS VALUEJULY,
SUM(CASE WHEN EXTRACT(MONTH FROM SALEDATE)=8 AND STYPE='P'
THEN AMT
END) AS VALUEAUGUST
FROM TRNSACT
GROUP BY SKU
) dt
ORDER BY totalsales DESC ;

Related

SQL How to group data into separate month columns

So I'm running this query to get the name of the customer, total amount ordered, and number of orders they've submitted. With this query, I get their entire history from March to July, what I want is the name, march amount total/# of orders, april amount total/# of orders, may amount total/# of orders, ..... etc.
SELECT customer_name,MONTH(created_on), SUM(amount), COUNT(order_id)
FROM customer_orders
WHERE created_on BETWEEN '2020-03-01' AND '2020-08-01'
GROUP BY customer_name, MONTH(created_on)
If you want the values in separate columns, then use conditional aggregation:
SELECT customer_name,
SUM(CASE WHEN MONTH(created_on) = 3 THEN amount END) as march_amount,
SUM(CASE WHEN MONTH(created_on) = 3 THEN 1 ELSE 0 END) as march_count,
SUM(CASE WHEN MONTH(created_on) = 4 THEN amount END) as april_amount,
SUM(CASE WHEN MONTH(created_on) = 4 THEN 1 ELSE 0 END) as april_count,
. . .
FROM customer_orders
WHERE created_on >= '2020-03-01' AND
created_on < '2020-08-01'
GROUP BY customer_name;
Notice that I changed the date filter so it does not include 2020-08-01.

I am looking to find customers repurchase frequency in SQL from their first purchase date

I am trying to find the customer's repurchase rates from their first order date. For example, for 2016, how many customer purchased 1X in days 1-365 from their initial purchase, how many purchased twice etc.
I have a transaction_detail table which looks like below:
txn_date Customer_ID Transaction_Number Sales
1/2/2019 1 12345 $10
4/3/2018 1 65890 $20
3/22/2019 3 64453 $30
4/3/2019 4 88567 $20
5/21/2019 4 85446 $15
1/23/2018 5 89464 $40
4/3/2019 5 99674 $30
4/3/2019 6 32224 $20
1/23/2018 6 46466 $30
1/20/2018 7 56558 $30
I am able to find the customers who have shopped in 2016 and how many times have they repurchased in 2016, but I need to find the customer who have shopped in 2016 and how many times have they come back from their first purchase date.
I need a starting point for the query, I am not sure how to build this logic in my SQL code.
Any help would be appreciated.
I am using the below query:
WITH by_year
AS (SELECT
Customer_ID,
to_char(txn_date, 'YYYY') AS visit_year
FROM table
GROUP BY Customer_ID, to_char(txn_date, 'YYYY')),
with_first_year
AS (SELECT
Customer_ID,
visit_year,
FIRST_VALUE(visit_year) OVER (PARTITION BY Customer_ID ORDER BY visit_year) AS first_year
FROM by_year),
with_year_number
AS (SELECT
Customer_ID,
visit_year,
first_year,
(visit_year - first_year) AS year_number
FROM with_first_year)
SELECT
first_year AS first_year,
SUM(CASE WHEN year_number = 0 THEN 1 ELSE 0 END) AS year_0,
SUM(CASE WHEN year_number = 1 THEN 1 ELSE 0 END) AS year_1,
SUM(CASE WHEN year_number = 2 THEN 1 ELSE 0 END) AS year_2,
SUM(CASE WHEN year_number = 3 THEN 1 ELSE 0 END) AS year_3,
SUM(CASE WHEN year_number = 4 THEN 1 ELSE 0 END) AS year_4,
SUM(CASE WHEN year_number = 5 THEN 1 ELSE 0 END) AS year_5,
SUM(CASE WHEN year_number = 6 THEN 1 ELSE 0 END) AS year_6,
SUM(CASE WHEN year_number = 7 THEN 1 ELSE 0 END) AS year_7,
SUM(CASE WHEN year_number = 8 THEN 1 ELSE 0 END) AS year_8,
SUM(CASE WHEN year_number = 9 THEN 1 ELSE 0 END) AS year_9
FROM with_year_number
GROUP BY first_year
ORDER BY first_year
Use window functions and aggregation:
select cnt, count(*), min(customer_id), max(customer_id)
from (select customer_id, count(*) as cnt
from (select td.*,
min(txn_date) over (partition by Customer_ID) as min_txn_date
from transaction_detail td
) td
where txn_date >= min_txn_date and txn_date < min_txn_date + interval '365' day
group by customer_id
) c
group by cnt
order by cnt;
So as per my understanding, you want to know the count of the distinct person who first purchased in 2016 and repurchased after one year or more from date of purchase.
Select * from
(
Select customer_id,
Floor(months_between(txn_date, lead_txn_date)/12) as num_years
From
(
Select customer_id,
txn_date,
row_number() over (partition by Customer_ID order by txn_date) as rn,
lead(txn_date) over (partition by Customer_ID order by txn_date) as lead_txn_date
From your_table
)
Where txn_date >= date '2016-01-01'
and txn_date < date '2017-01-01'
and rn = 1
And months_between(txn_date, lead_txn_date) >= 12
)
Pivot
(
Count(1) for num_year in (1,2,3,4)
)
Ultimately, we are finding the number of years between first and second purchase of the customer. And first purchase must be in 2016.
Cheers!!

SUM values BETWEEN specific dates in BigQuery

I need to query a 12, 24, 36 and 48 month total for each customer
I've got a dataset that includes customer information (customer_id, products, spend, qty, purchase_date, etc) I need to display the totals for the different periods per customer
SELECT customer_id, MIN(purchase_date) AS first_purchase,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 1 YEAR THEN spend END) AS 12_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 2 YEAR THEN spend END) AS 24_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 3 YEAR THEN spend END) AS 36_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 4 YEAR THEN spend END) AS 48_mnth_total
FROM SalesTable
GROUP BY customer_id, purchase_date
ORDER BY purchase_date
My query shows me the following error: Syntax error: Expected ")" but got keyword THEN
You seem to want to count from the first purchase. You cannot nest aggregation functions the way that you are doing it. Instead, use a window function to get the minimum date for each customer and then aggregate:
SELECT customer_id, MIN(purchase_date) AS first_purchase,
SUM(CASE WHEN purchase_date BETWEEN min_purchase_date AND DATETIME_ADD(min_purchase_date, INTERVAL 1 YEAR) THEN spend
END) AS 12_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN min_purchase_date AND DATETIME_ADD(min_purchase_date, INTERVAL 2 YEAR) THEN spend
END) AS 24_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN min_purchase_date AND DATETIME_ADD(min_purchase_date, INTERVAL 3 YEAR) THEN spend
END) AS 36_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN min_purchase_date AND DATETIME_ADD(min_purchase_date, INTERVAL 4 YEAR) THEN spend
END) AS 48_mnth_total,
FROM (SELECT s.*,
MIN(purchase_date) OVER (PARTITION BY customer_id) as min_purchase_date
FROM SalesTable s
) t
GROUP BY customer_id
ORDER BY first_purchase;
You ca simplify the logic by removing the first comparison in the case:
SELECT customer_id, MIN(purchase_date) AS first_purchase,
SUM(CASE WHEN purchase_date <= DATETIME_ADD(min_purchase_date, INTERVAL 1 YEAR) THEN spend
END) AS 12_mnth_total,
SUM(CASE WHEN purchase_date <= DATETIME_ADD(min_purchase_date, INTERVAL 2 YEAR) THEN spend
END) AS 24_mnth_total,
SUM(CASE WHEN purchase_date <= DATETIME_ADD(min_purchase_date, INTERVAL 3 YEAR) THEN spend
END) AS 36_mnth_total,
SUM(CASE WHEN purchase_date <= DATETIME_ADD(min_purchase_date, INTERVAL 4 YEAR) THEN spend
END) AS 48_mnth_total,
FROM (SELECT s.*,
MIN(purchase_date) OVER (PARTITION BY customer_id) as min_purchase_date
FROM SalesTable s
) t
GROUP BY customer_id
ORDER BY first_purchase;
Any purchase is logically on or after the first one.
The function DATETIME_ADD is not closed. I put it here INTERVAL 1 YEAR")".
Wouldnt know the exact sintax but its a good guess.
SELECT customer_id, MIN(purchase_date) AS first_purchase,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 1 YEAR) THEN spend END) AS 12_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 2 YEAR) THEN spend END) AS 24_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 3 YEAR) THEN spend END) AS 36_mnth_total,
SUM(CASE WHEN purchase_date BETWEEN MIN(purchase_date) AND DATETIME_ADD(MIN(purchase_date), INTERVAL 4 YEAR) THEN spend END) AS 48_mnth_total
FROM SalesTable
GROUP BY customer_id, purchase_date
ORDER BY purchase_date

Error with group by statement?

I'm getting the following error when I run this code. please help me out.
SELECT store, COUNT(DISTINCT saledate), CountNov, CountDec, SumNov, SumDec, (SumNov/CountNov) AS NovAvgRvn, (SumDec/CountDec) AS DecAvgRvn FROM
(
SELECT store, saledate,
CASE WHEN SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN amt END) IS NULL THEN 0
ELSE SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN amt END)
END AS SumNov,
CASE WHEN SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN amt END) IS NULL THEN 0
ELSE SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN amt END)
END AS SumDec,
CASE WHEN COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN saledate END) IS NULL THEN 0
ELSE COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN saledate END)
END AS CountNov,
CASE WHEN COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN saledate END) IS NULL THEN 0
ELSE COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN saledate END)
END AS CountDec
FROM trnsact
WHERE stype = 'p'
GROUP BY store, saledate
) AS T1
WHERE CountDec > 0 AND CountNov > 0
GROUP BY store
ORDER BY store;
Error:
Error Code - 3504
Error Message - [Teradata Database] [TeraJDBC 15.10.00.05] [Error 3504] [SQLState HY000] Selected non-aggregate values must be part of the associated group.
Why are you using a nested query for this?
SELECT store, COUNT(DISTINCT saledate),
SUM(CASE EXTRACT(MONTH FROM saledate) WHEN 11 THEN amt ELSE 0 END) as SumNov,
SUM(CASE EXTRACT(MONTH FROM saledate) WHEN 11 THEN amt ELSE 0 END) as SumDec,
COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN 11 THEN saledate END) as CountNov,
COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN 12 THEN saledate END) as CountDec
FROM trnsact
WHERE stype = 'p'
GROUP BY store;
Notes:
COUNT() never returns NULL so there is no need for the CASE at all.
With an ELSE clause, the SUM() doesn't return NULL either (there would have to be no rows matching for the SUM() to return NULL, and with no matching rows in the group, the group wouldn't exist).
EXTRACT() returns a number, so compare to a number.
Anything that is in your select statement which is not an aggregate like SUM, AVG etc. need to be included in your group by clause if you have a group by clause
Try this hope it helps:
SELECT store, COUNT(DISTINCT saledate), CountNov, CountDec, SumNov, SumDec, (SumNov/CountNov) AS NovAvgRvn, (SumDec/CountDec) AS DecAvgRvn FROM
(
SELECT store, saledate,
CASE WHEN
SUM(CASE EXTRACT(MONTH FROM saledate)
WHEN '11' THEN amt END) IS NULL THEN 0
ELSE SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN amt END)
END AS SumNov,
CASE WHEN SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN amt END) IS NULL THEN 0
ELSE SUM(CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN amt END)
END AS SumDec,
CASE WHEN COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN saledate END) IS NULL THEN 0
ELSE COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '11' THEN saledate END)
END AS CountNov,
CASE WHEN COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN saledate END) IS NULL THEN 0
ELSE COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN '12' THEN saledate END)
END AS CountDec
FROM trnsact
WHERE stype = 'p'
GROUP BY store, saledate, SumNov, SumDec, CountNov, CountDec
) AS T1
WHERE CountDec > 0 AND CountNov > 0
GROUP BY store, saledate, CountNov, CountDec, SumNov, SumDec, NovAvgRvn, DecAvgRvn
ORDER BY store;
The GROUP BY in the Derived Table is not doing aggregation on a month level, thus the COUNT(DISTINCT saledate)will be 1. You shouldn't use strings for numeric data (result of EXTRACT). You don't need the CASE(SUM) because COUNT never returns a NULL (you might use COALESCE instead):
I assume you want a query like this instead:
SELECT store,
-- if you only need the dates from Nov & Dec you can simply do
-- CountNov + CountDec instead
COUNT(DISTINCT saledate),
SUM(CASE EXTRACT(MONTH FROM saledate) WHEN 11 THEN amt ELSE 0 END) AS SumNov,
SUM(CASE EXTRACT(MONTH FROM saledate) WHEN 12 THEN amt ELSE 0 END) AS SumDec,
COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN 11 THEN saledate END) AS CountNov,
COUNT(DISTINCT CASE EXTRACT(MONTH FROM saledate) WHEN 12 THEN saledate END) AS CountDec,
(SumNov/CountNov) AS NovAvgRvn,
(SumDec/CountDec) AS DecAvgRvn
FROM trnsact
WHERE stype = 'p'
-- don't you need a condition to filter for a specific year/month?
AND EXTRACT(MONTH FROM saledate) IN (11,12)
GROUP BY store
ORDER BY store;

Incremental adding in sql select

I have a table where customer transactions are stored in this format:
Account Tran_type Tran_Amount tran_particular Tran_date
165266 C 5000 deposit 19_SEP-2014
165266 D 3000 withdrawal 20-SEP-2014
165266 C 8000 Deposit 21-SEP-2014
I am attempting to extract the Information for a Statement like this:
select tran_date, tran_particular,
(case when tran_type = 'C' then tran_amt else 0 end) CREDIT,
(case when tran_type = 'D' then tran_amt else 0 end) DEBIT
from tran_table order bby tran_date asc;
Is there a wat to add the Balance column on each row so it would show the Balance after the Transaction? say:
DATE DESC CREDIT DEBIT BALANCE
19-SEP-2014 DEPOSIT 5000 0 5000
20-SEP-2014 WITHDRAWAL 3000 2000
21-SEP-2014 DEPOSIT 8000 0 10000
Please assist.
EDIT I have trie the aswers suggested but it seems my balance is tagged to the date. See the output I have currently:
See the Balance does not change until the date changes.
select tran_date, tran_particular, Credit, Debit,
SUM(Delta) OVER (ORDER BY tran_date) AS Balance
from
(
select tran_date, tran_particular,
Case Tran_Type
When 'C' THEN Tran_Amount
Else 0
End AS Credit,
Case Tran_Type
When 'D' THEN Tran_Amount
Else 0
End AS Debit,
Case Tran_Type
When 'C' THEN Tran_Amount
When 'D' THEN -1 * Tran_Amount
Else 0
End AS Delta
from TRANSACTIONS
order by tran_date
)
Should do it
Select *,Sum( case when type ='C'
then amount
else -amount
end ) over (ORDER BY date ROWS
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW )'Balance'
from #tt1
That will cost a sub-query:
SELECT tran_date, tran_particular,
(CASE when tran_type = 'C' THEN tran_amt ELSE 0 end) CREDIT,
(CASE when tran_type = 'D' THEN tran_amt ELSE 0 end) DEBIT,
(SELECT
SUM(CASE when type = 'C' tran_amt ELSE (-1) * tran_amt end)
FROM tran_table trn2
WHERE
trn2.Account = trn1.Account
AND trn2.tran_id <= trn1.tran_id
-- AND trn2.tran_date <= trn1.tran_date
)
BALANCE
FROM
tran_table trn1 ORDER BY tran_date asc;
In large scale data, having such a sub-query is not recommended. Having a materialized view is more rational.