SQL Sum Group by Custom Group - sql

I have generated the following query, and I want to sum the patient counts by the new IMS_CUST_ID_GRP I created (roll it up to this a remove the CUST_ID) in my query. How can i modify my query to return my desired result? I tried using an analytic function but I get an error.
SELECT (CASE WHEN SUM(NEW_PAT_CNT) = 1 THEN '1'
WHEN SUM(NEW_PAT_CNT) >=2 AND SUM(NEW_PAT_CNT) <=12 THEN '2-12'
WHEN SUM(NEW_PAT_CNT) >=13 AND SUM(NEW_PAT_CNT)<=24 THEN '13-24'
WHEN SUM(NEW_PAT_CNT) >=24 AND SUM(NEW_PAT_CNT) <=48 THEN '25-48'
WHEN SUM(NEW_PAT_CNT) >48 THEN '>48'
END) IMS_CUST_ID_GRP, SUM(NEW_PAT_CNT), CUST_ID
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct,
DEXWHS.D_DATE dt,
DEXWHS.D_ACCOUNT_VEEVA ac
WHERE fct.DATE_DIM_KEY = dt.DATE_ID
AND fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
AND NEW_PAT_CNT >0
AND dt.year in n'2016'
GROUP BY CUST_ID
Analytic function which returns an error
SELECT (CASE WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) = 1 THEN '1'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=2 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=12 THEN '2-12'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=13 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=24 THEN '13-24'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=24 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=48 THEN '25-48'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >48 THEN '>48'
END) IMS_CUST_ID_GRP, CUST_ID, SUM(NEW_PAT_CNT)
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct,
DEXWHS.D_DATE dt,
DEXWHS.D_ACCOUNT_VEEVA ac
WHERE fct.DATE_DIM_KEY = dt.DATE_ID
AND fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
AND NEW_PAT_CNT >0
GROUP BY CUST_ID
Dataset with CUST_ID included
IMS_CUST_ID_GRP SUM(NEW_PAT_CNT) CUST_ID
1 1 55671832
1 1 56097728
2-12 4 56106239
2-12 5 56728330
2-12 9 57590869
2-12 2 55609391
2-12 9 55880657
2-12 10 56339375
2-12 3 57371546
25-48 39 55891493
13-24 21 55714333
13-24 22 56542678
Desired Dataset rolled up to IMS_CUST_ID_GRP
IMS_CUST_ID_GRP SUM(NEW_PAT_CNT)
1 2
2-12 42
13-24 43
25-48 39

If you are doing a pivot query, using GROUP BY, then you don't need to use SUM() as an analytic function. Just use it normally:
SELECT CUST_ID,
SUM(CASE WHEN CUST_ID = 1 THEN NEW_PAT_CNT ELSE 0 END) AS '1',
SUM(CASE WHEN CUST_ID >= 2 AND CUST_ID <= 12 THEN NEW_PAT_CNT ELSE 0 END) AS '2-12',
SUM(CASE WHEN CUST_ID >= 13 AND CUST_ID <= 24 THEN NEW_PAT_CNT ELSE 0 END) AS '13-24',
SUM(CASE WHEN CUST_ID >= 24 AND CUST_ID <= 48 THEN NEW_PAT_CNT ELSE 0 END) AS '25-48',
SUM(CASE WHEN CUST_ID > 48 THEN NEW_PAT_CNT ELSE 0 END) AS '>48'
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct
INNER JOIN DEXWHS.D_DATE dt
ON fct.DATE_DIM_KEY = dt.DATE_ID
INNER JOIN DEXWHS.D_ACCOUNT_VEEVA ac
ON fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
WHERE NEW_PAT_CNT > 0 -- should not be necessary assuming count no less than zero
GROUP BY CUST_ID
Note that I have replaced your implicit joins with explicit INNER JOINs. As you may have heard, you should try to avoid putting commas in the FROM clause.

Related

I am looking to find customers repurchase frequency in SQL from their first purchase date

I am trying to find the customer's repurchase rates from their first order date. For example, for 2016, how many customer purchased 1X in days 1-365 from their initial purchase, how many purchased twice etc.
I have a transaction_detail table which looks like below:
txn_date Customer_ID Transaction_Number Sales
1/2/2019 1 12345 $10
4/3/2018 1 65890 $20
3/22/2019 3 64453 $30
4/3/2019 4 88567 $20
5/21/2019 4 85446 $15
1/23/2018 5 89464 $40
4/3/2019 5 99674 $30
4/3/2019 6 32224 $20
1/23/2018 6 46466 $30
1/20/2018 7 56558 $30
I am able to find the customers who have shopped in 2016 and how many times have they repurchased in 2016, but I need to find the customer who have shopped in 2016 and how many times have they come back from their first purchase date.
I need a starting point for the query, I am not sure how to build this logic in my SQL code.
Any help would be appreciated.
I am using the below query:
WITH by_year
AS (SELECT
Customer_ID,
to_char(txn_date, 'YYYY') AS visit_year
FROM table
GROUP BY Customer_ID, to_char(txn_date, 'YYYY')),
with_first_year
AS (SELECT
Customer_ID,
visit_year,
FIRST_VALUE(visit_year) OVER (PARTITION BY Customer_ID ORDER BY visit_year) AS first_year
FROM by_year),
with_year_number
AS (SELECT
Customer_ID,
visit_year,
first_year,
(visit_year - first_year) AS year_number
FROM with_first_year)
SELECT
first_year AS first_year,
SUM(CASE WHEN year_number = 0 THEN 1 ELSE 0 END) AS year_0,
SUM(CASE WHEN year_number = 1 THEN 1 ELSE 0 END) AS year_1,
SUM(CASE WHEN year_number = 2 THEN 1 ELSE 0 END) AS year_2,
SUM(CASE WHEN year_number = 3 THEN 1 ELSE 0 END) AS year_3,
SUM(CASE WHEN year_number = 4 THEN 1 ELSE 0 END) AS year_4,
SUM(CASE WHEN year_number = 5 THEN 1 ELSE 0 END) AS year_5,
SUM(CASE WHEN year_number = 6 THEN 1 ELSE 0 END) AS year_6,
SUM(CASE WHEN year_number = 7 THEN 1 ELSE 0 END) AS year_7,
SUM(CASE WHEN year_number = 8 THEN 1 ELSE 0 END) AS year_8,
SUM(CASE WHEN year_number = 9 THEN 1 ELSE 0 END) AS year_9
FROM with_year_number
GROUP BY first_year
ORDER BY first_year
Use window functions and aggregation:
select cnt, count(*), min(customer_id), max(customer_id)
from (select customer_id, count(*) as cnt
from (select td.*,
min(txn_date) over (partition by Customer_ID) as min_txn_date
from transaction_detail td
) td
where txn_date >= min_txn_date and txn_date < min_txn_date + interval '365' day
group by customer_id
) c
group by cnt
order by cnt;
So as per my understanding, you want to know the count of the distinct person who first purchased in 2016 and repurchased after one year or more from date of purchase.
Select * from
(
Select customer_id,
Floor(months_between(txn_date, lead_txn_date)/12) as num_years
From
(
Select customer_id,
txn_date,
row_number() over (partition by Customer_ID order by txn_date) as rn,
lead(txn_date) over (partition by Customer_ID order by txn_date) as lead_txn_date
From your_table
)
Where txn_date >= date '2016-01-01'
and txn_date < date '2017-01-01'
and rn = 1
And months_between(txn_date, lead_txn_date) >= 12
)
Pivot
(
Count(1) for num_year in (1,2,3,4)
)
Ultimately, we are finding the number of years between first and second purchase of the customer. And first purchase must be in 2016.
Cheers!!

Aging bucket in SQL query

I have a table which has member ID along with LM_Conversion_date and retired_date. I have managed to get the difference between two date but now i would like to have the aging bucket and reflect those membership number which falls under those bucket. Here is my table example and how i want to see the data,
Member_no LM_Conversion_date Retired_date Date_difference
100026 08/12/2017 31/12/2017 23
100114 31/08/2017 31/08/2017 0
100620 15/09/2017 30/09/2017 15
100726 10/01/2017 31/12/2016 -10
I want the output to be
All negative 0-15 15-30 >30
100726 100114 100026
100620
Any Help will be much appreciated
You can do this using conditional aggregation:
select max(case when grp = '<0' then member_no end) as all_negative,
max(case when grp = '<=15' then member_no end) as [0-15],
max(case when grp = '<=30' then member_no end) as [15-30],
max(case when grp = '>30' then member_no end) as [>30]
from (select t.*, v.grp,
row_number() over (partition by grp order by member_no) as seqnum
from t cross apply
(values (case when date_difference <= 0 then '<0'
when date_difference <= 15 then '<=15'
when date_difference <= 30 then '<=30'
else '>30'
end)
) v(grp)
) t
group by seqnum
order by seqnum;
The subquery basically enumerates the members in each group. These are aggregated into separate rows by the aggregation.

rank out of the total in postgres

I am writing a sql script in which I want to get total number of appointments per salesperson then also get how much he rank out of the rest salesperson. e.g Salesperson x has 5 appointment and he rate 4 out of 10 salespersons.
**expected results**:
Salesperson x 5 4/10
Salesperson D 6 5/10
Salesperson s 8 7/10
Use rank()
with sales as
(
select Salesperson, count(appointment) appointments
from SalesTable
group by Salesperson
)
select sales.*, rank() over (order by appointments desc) as salesrank
from sales
Hi Thanks for your response. I tried it this way it works:
select id,sales_person,"Appointment/Day",rank_for_the_day,"Appointment/Week",rank_for_the_week,"Appointment/Month",
rank_for_the_month,"Appointment/year",rank_for_the_year
from(
select supplied_id,salesperson,sum(case when appointment_date::date=current_date then 1 else 0 end )"Appointment/Day",
rank() over (order by sum(case when appointment_date::date=current_date then 1 else 0 end ) desc )||'/'||
(select sum(case when appointment_date::date=current_date then 1 else 0 end ) from match where date_part( 'year', appointment_date)=2017
and appointment_date is not null and date_part('day',appointment_date)=date_part('day',current_date) ) rank_for_the_day,
sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end )"Appointment/Week",
rank() over (order by sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end ) desc)||'/'||
(select sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('week',appointment_date)=date_part('week',current_date) ) rank_for_the_week,
sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end )"Appointment/Month",
rank() over (order by sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end ) desc)||'/'||
(select sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('month',appointment_date)=date_part('month',current_date) ) rank_for_the_month,
sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end )"Appointment/year",
rank() over (order by sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end ) desc)||'/'||
(select sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('year',appointment_date)=date_part('year',current_date) ) rank_for_the_year
from salespersontable
where date_part( 'year', appointment_date)=2017 and appointment_date is not null
group by id,salesperson
)x order by 6 desc
However,I would appreciate an efficient way to write this query to minimize resource consumption.

SQL query to group by age range from date created

I want to get statistics with sql query. My table is like this:
ID MATERIAL CREATEDATE DEPARTMENT
1 M1 10.10.1980 D1
2 M2 11.02.1970 D2
2 M3 18.04.1971 D3
.....................
.....................
.....................
How can I get a range of data count like this
DEPARTMENT AGE<10 10<AGE<20 20<AGE
D1 24 123 324
D2 24 123 324
Assuming that CREATEDATE is a date column, in PostgreSQL you can use the AGE function:
select DEPARTMENT, age(CREATEDATE) as AGE
from Materials
and with date_part you can get the age in years. To show the data in the format that you want, you could use this GROUP BY query:
select
DEPARTMENT,
sum(case when date_part('year', age(CREATEDATE))<10 then 1 end) as "age<10",
sum(case when date_part('year', age(CREATEDATE))>=10 and date_part('year', age(CREATEDATE))<20 then 1 end) as "10<age<20",
sum(case when date_part('year', age(CREATEDATE))>=20 then 1 end) as "20<age"
from
Materials
group by
DEPARTMENT
which can be simplified as:
with mat_age as (
select DEPARTMENT, date_part('year', age(CREATEDATE)) as mage
from Materials
)
select
DEPARTMENT,
sum(case when mage<10 then 1 end) as "age<10",
sum(case when mage>=10 and mage<20 then 1 end) as "10<age<20",
sum(case when mage>=20 then 1 end) as "20<age"
from
mat_age
group by
DEPARTMENT;
if you are using PostgreSQL 9.4 you can use FILTER:
with mat_age as (
select DEPARTMENT, date_part('year', age(CREATEDATE)) as mage
from Materials
)
select
DEPARTMENT,
count(*) filter (where mage<10) as "age<10",
count(*) filter (where mage>=10 and mage<20) as "10<age<20",
count(*) filter (where mage>=20) as "20<age"
from
mat_age
group by
DEPARTMENT;
The following solution assumes that your CREATEDATE column exists as some sort of valid Postgres date type. If this be not the case, and it is being stored as text, you will first have to convert it to date in order for the query to work.
SELECT DEPARTMENT,
SUM(CASE WHEN DATEDIFF(year, CREATEDATE, now()::date) < 10 THEN 1 ELSE 0 END) AS "AGE<10",
SUM(CASE WHEN DATEDIFF(year, CREATEDATE, now()::date) >= 10 AND
DATEDIFF(year, CREATEDATE, now()::date) < 20 THEN 1 ELSE 0 END) AS "10<AGE<20",
SUM(CASE WHEN DATEDIFF(year, CREATEDATE, now()::date) >= 20 THEN 1 ELSE 0 END) AS "20<AGE"
FROM Materials
GROUP BY DEPARTMENT
You can use extract(year FROM age(createdate)) to get the exact age
i.e
select extract(year FROM age(timestamp '01-01-1989')) age
will give you
Result:
age
---
27
so you can use following select statement to get your desired output:
SELECT dept
,sum(CASE WHEN age < 10THEN 1 END) "age<10"
,sum(CASE WHEN age >= 10 AND age < 20 THEN 1 END) "10<age<20"
,sum(CASE WHEN age >= 20 THEN 1 END) "20<age"
FROM (
SELECT dept,extract(year FROM age(crdate)) age
FROM dt
) t
GROUP BY dept
If you don't want to use a sub select use this.
SELECT dept
,sum(CASE WHEN extract(year FROM age(crdate)) < 10THEN 1 END) "age<10"
,sum(CASE WHEN extract(year FROM age(crdate)) >= 10 AND extract(year FROM age(crdate)) < 20 THEN 1 END) "10<age<20"
,sum(CASE WHEN extract(year FROM age(crdate)) >= 20 THEN 1 END) "20<age"
FROM dt
GROUP BY dept

Oracle opening and closing balance - SQL or PL/SQL needed?

select year,
month ,
d.PROD_ID,
T.CUSTOMER_ID,
SUM(CASE WHEN D.OP_TYPE = 1 THEN d.qty END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN d.qty END) SPEND
FROM TXN_HEADER T ,
TXN_DETAIL d ,
CUSTOMER A,
PRODUCT e
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND T.CUSTOMER_ID = A.CUSTOMER_ID
AND T.TXN_PK = D.TXN_PK
and d.PROD_ID = e.PROD_ID
and e.unit = 0
group by year, month ,d.PROD_ID, T.CUSTOMER_ID
ORDER BY 1,2,3,4
Output is as follows (here opening and closing not generated by query, but I required that has to be from the query)
YEAR MONTH PROD CUSTOMER OPENING EARNED SPEND CLOSING
---- ----- ---- -------- ------- ------ ----- -------
2012 8 548 12033 0 8 2 6
2012 9 509 12033 0 24 0 24
2012 9 509 12047 0 14 0 14
2012 9 548 12033 6 1 0 7
2012 9 548 12047 0 1 0 1
I required to generate the output as above. Here PROD_ID,CUSTOMER_ID wise dynamically the prev closing balance to be populated as opening and it shoulde calculate closing balance (opening+earned-spend) monthwise,customer wise ,product wise. is it possible to write in SQL or need to go PL/SQL?
I'd use analytics, with PROD_ID and CUSTOMER_ID in the partition clause to avoid mixing products and customers.
WITH
MONTHLY_BALANCE AS
(
SELECT
YEAR,
MONTH,
D.PROD_ID,
T.CUSTOMER_ID,
SUM(CASE WHEN D.OP_TYPE = 1 THEN D.QTY ELSE NULL END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN D.QTY ELSE NULL END) SPEND,
FROM TXN_HEADER T
JOIN CUSTOMER A
ON T.CUSTOMER_ID = A.CUSTOMER_ID
JOIN TXN_DETAIL D
ON T.TXN_PK = D.TXN_PK
JOIN PRODUCT E
ON D.PROD_ID = E.PROD_ID
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND E.UNIT = 0
GROUP BY YEAR, MONTH, D.PROD_ID, T.CUSTOMER_ID
)
SELECT
YEAR,
MONTH,
PROD_ID,
CUSTOMER_ID,
SUM(NVL(EARNED, 0) - NVL(SPEND, 0)) OVER(PARTITION BY PROD_ID, CUSTOMER_ID ORDER BY YEAR, MONTH ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) OPENING,
EARNED,
SPEND,
SUM(NVL(EARNED, 0) - NVL(SPEND, 0)) OVER(PARTITION BY PROD_ID, CUSTOMER_ID ORDER BY YEAR, MONTH ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT_ROW) CLOSING
FROM MONTHLY_BALANCE
ORDER BY 1, 2, 3, 4
Your CASE needs an ELSE
CASE WHEN D.OP_TYPE = 1 THEN d.qty ELSE 0 END
Without the else the CASE will return NULL when D.OP_TYPE is not equal to 1, and anything+NULL=NULL. When your WHEN is not satisfied it returns NULL and that is why you do not see anything for those columns.
To get OPENING and CLOSING calculated as you may want to use analytic functions like LEAD and LAG.
Select year,month,prod_id,customer_id,
LAG(closing,1,0) OVER (order by year,month,prod_id,customer_id) as opening,
earned,spend
,(LAG(closing,1,0) OVER (order by year,month,prod_id,customer_id)+closing) as closing
from (WITH temp AS (select year,
month ,
d.PROD_ID,
T.CUSTOMER_ID,
0 OPEN,
SUM(CASE WHEN D.OP_TYPE = 1 THEN d.qty END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN d.qty END) SPEND,
0 CLOSE
FROM TXN_HEADER T ,
TXN_DETAIL d ,
CUSTOMER A,
PRODUCT e
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND T.CUSTOMER_ID = A.CUSTOMER_ID
AND T.TXN_PK = D.TXN_PK
and d.PROD_ID = e.PROD_ID
and e.unit = 0
group by year, month ,d.PROD_ID, T.CUSTOMER_ID
ORDER BY 1,2,3,4)
SELECT year,month,prod_id,customer_id,open,earned,spend,(open+earned-spend) as closing
from temp);