rank out of the total in postgres - sql

I am writing a sql script in which I want to get total number of appointments per salesperson then also get how much he rank out of the rest salesperson. e.g Salesperson x has 5 appointment and he rate 4 out of 10 salespersons.
**expected results**:
Salesperson x 5 4/10
Salesperson D 6 5/10
Salesperson s 8 7/10

Use rank()
with sales as
(
select Salesperson, count(appointment) appointments
from SalesTable
group by Salesperson
)
select sales.*, rank() over (order by appointments desc) as salesrank
from sales

Hi Thanks for your response. I tried it this way it works:
select id,sales_person,"Appointment/Day",rank_for_the_day,"Appointment/Week",rank_for_the_week,"Appointment/Month",
rank_for_the_month,"Appointment/year",rank_for_the_year
from(
select supplied_id,salesperson,sum(case when appointment_date::date=current_date then 1 else 0 end )"Appointment/Day",
rank() over (order by sum(case when appointment_date::date=current_date then 1 else 0 end ) desc )||'/'||
(select sum(case when appointment_date::date=current_date then 1 else 0 end ) from match where date_part( 'year', appointment_date)=2017
and appointment_date is not null and date_part('day',appointment_date)=date_part('day',current_date) ) rank_for_the_day,
sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end )"Appointment/Week",
rank() over (order by sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end ) desc)||'/'||
(select sum(case when appointment_date::date between current_date-7 and current_date then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('week',appointment_date)=date_part('week',current_date) ) rank_for_the_week,
sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end )"Appointment/Month",
rank() over (order by sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end ) desc)||'/'||
(select sum(case when date_part('month',appointment_date)=date_part('month',current_date) then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('month',appointment_date)=date_part('month',current_date) ) rank_for_the_month,
sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end )"Appointment/year",
rank() over (order by sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end ) desc)||'/'||
(select sum(case when date_part('year',appointment_date)=date_part('year',current_date) then 1 else 0 end )
from match m where date_part( 'year', appointment_date)=2017 and appointment_date is not null
and date_part('year',appointment_date)=date_part('year',current_date) ) rank_for_the_year
from salespersontable
where date_part( 'year', appointment_date)=2017 and appointment_date is not null
group by id,salesperson
)x order by 6 desc
However,I would appreciate an efficient way to write this query to minimize resource consumption.

Related

Select data where sum for last 7 from max-date is greater than x

I have a data set as such:
Date Value Type
2020-06-01 103 B
2020-06-01 100 A
2020-06-01 133 A
2020-06-11 150 A
2020-07-01 1000 A
2020-07-21 104 A
2020-07-25 140 A
2020-07-28 1600 A
2020-08-01 100 A
Like this:
Type ISHIGH
A 1
B 0
Here's the query i tried,
select type, case when sum(value) > 10 then 1 else 0 end as total_usage
from table_a
where (select sum(value) as usage from tableA where date = max(date)-7)
group by type, date
This is clearly not right. What is a simple way to do this?
It is a simply group by except that you need to be able to access max date before grouping:
select type
, max(date) as last_usage_date
, sum(value) as total_usage
, case when sum(case when date >= cutoff_date then value end) >= 1000 then 'y' end as [is high!]
from t
cross apply (
select dateadd(day, -6, max(date))
from t as x
where x.type = t.type
) as ca(cutoff_date)
group by type, cutoff_date
If you want just those two columns then a simpler approach is:
select t.type, case when sum(value) >= 1000 then 'y' end as [is high!]
from t
left join (
select type, dateadd(day, -6, max(date)) as cutoff_date
from t
group by type
) as a on t.type = a.type and t.date >= a.cutoff_date
group by t.type
Find the max date by type. Then used it to find last 7 days and sum() the value.
with
cte as
(
select [type], max([Date]) as MaxDate
from tableA
group by [type]
)
select c.[type], sum(a.Value),
case when SUM(a.Value) > 1000 then 1 else 0 end as ISHIGH
from cte c
inner join tableA a on a.[type] = c.[type]
and a.[Date] >= DATEADD(DAY, -7, c.MaxDate)
group by c.[type]
This can be done through a cumulative total as follows:
;With CTE As (
Select [type], [date],
SUM([value]) Over (Partition by [type] Order by [date] Desc) As Total,
Row_Number() Over (Partition by [type] Order by [date] Desc) As Row_Num
From Tbl)
Select Distinct CTE.[type], Case When C.[type] Is Not Null Then 1 Else 0 End As ISHIGH
From CTE Left Join CTE As C On (CTE.[type]=C.[type]
And DateDiff(dd,CTE.[date],C.[date])<=7
And C.Total>1000)
Where CTE.Row_Num=1
I think you are quite close with you initial attempt to solve this. Just a tiny edit:
select type, case when sum(value) > 1000 then 1 else 0 end as total_usage
from tableA
where date > (select max(date)-7 from tableA)
group by type

I am looking to find customers repurchase frequency in SQL from their first purchase date

I am trying to find the customer's repurchase rates from their first order date. For example, for 2016, how many customer purchased 1X in days 1-365 from their initial purchase, how many purchased twice etc.
I have a transaction_detail table which looks like below:
txn_date Customer_ID Transaction_Number Sales
1/2/2019 1 12345 $10
4/3/2018 1 65890 $20
3/22/2019 3 64453 $30
4/3/2019 4 88567 $20
5/21/2019 4 85446 $15
1/23/2018 5 89464 $40
4/3/2019 5 99674 $30
4/3/2019 6 32224 $20
1/23/2018 6 46466 $30
1/20/2018 7 56558 $30
I am able to find the customers who have shopped in 2016 and how many times have they repurchased in 2016, but I need to find the customer who have shopped in 2016 and how many times have they come back from their first purchase date.
I need a starting point for the query, I am not sure how to build this logic in my SQL code.
Any help would be appreciated.
I am using the below query:
WITH by_year
AS (SELECT
Customer_ID,
to_char(txn_date, 'YYYY') AS visit_year
FROM table
GROUP BY Customer_ID, to_char(txn_date, 'YYYY')),
with_first_year
AS (SELECT
Customer_ID,
visit_year,
FIRST_VALUE(visit_year) OVER (PARTITION BY Customer_ID ORDER BY visit_year) AS first_year
FROM by_year),
with_year_number
AS (SELECT
Customer_ID,
visit_year,
first_year,
(visit_year - first_year) AS year_number
FROM with_first_year)
SELECT
first_year AS first_year,
SUM(CASE WHEN year_number = 0 THEN 1 ELSE 0 END) AS year_0,
SUM(CASE WHEN year_number = 1 THEN 1 ELSE 0 END) AS year_1,
SUM(CASE WHEN year_number = 2 THEN 1 ELSE 0 END) AS year_2,
SUM(CASE WHEN year_number = 3 THEN 1 ELSE 0 END) AS year_3,
SUM(CASE WHEN year_number = 4 THEN 1 ELSE 0 END) AS year_4,
SUM(CASE WHEN year_number = 5 THEN 1 ELSE 0 END) AS year_5,
SUM(CASE WHEN year_number = 6 THEN 1 ELSE 0 END) AS year_6,
SUM(CASE WHEN year_number = 7 THEN 1 ELSE 0 END) AS year_7,
SUM(CASE WHEN year_number = 8 THEN 1 ELSE 0 END) AS year_8,
SUM(CASE WHEN year_number = 9 THEN 1 ELSE 0 END) AS year_9
FROM with_year_number
GROUP BY first_year
ORDER BY first_year
Use window functions and aggregation:
select cnt, count(*), min(customer_id), max(customer_id)
from (select customer_id, count(*) as cnt
from (select td.*,
min(txn_date) over (partition by Customer_ID) as min_txn_date
from transaction_detail td
) td
where txn_date >= min_txn_date and txn_date < min_txn_date + interval '365' day
group by customer_id
) c
group by cnt
order by cnt;
So as per my understanding, you want to know the count of the distinct person who first purchased in 2016 and repurchased after one year or more from date of purchase.
Select * from
(
Select customer_id,
Floor(months_between(txn_date, lead_txn_date)/12) as num_years
From
(
Select customer_id,
txn_date,
row_number() over (partition by Customer_ID order by txn_date) as rn,
lead(txn_date) over (partition by Customer_ID order by txn_date) as lead_txn_date
From your_table
)
Where txn_date >= date '2016-01-01'
and txn_date < date '2017-01-01'
and rn = 1
And months_between(txn_date, lead_txn_date) >= 12
)
Pivot
(
Count(1) for num_year in (1,2,3,4)
)
Ultimately, we are finding the number of years between first and second purchase of the customer. And first purchase must be in 2016.
Cheers!!

Cohort Analysis in SQL while recounting users

I'm trying to create a cohort query using SQL.
Usually with cohort analysis we look at users and check if a user who performed a specific action at a specific time and count if that user performs the same action over time.
WITH by_week
AS (SELECT
user_id,
TD_DATE_TRUNC('week', login_time) AS login_week
FROM logins
GROUP BY 1, 2),
with_first_week
AS (SELECT
user_id,
login_week,
FIRST_VALUE(login_week) OVER (PARTITION BY user_id ORDER BY login_week) AS first_week
FROM by_week),
with_week_number
AS (SELECT
user_id,
login_week,
first_week,
(login_week - first_week) / (24 * 60 * 60 * 7) AS week_number
FROM with_first_week)
SELECT
TD_TIME_FORMAT(first_week, 'yyyy-MM-dd') AS first_week,
SUM(CASE WHEN week_number = 1 THEN 1 ELSE 0 END) AS week_1,
SUM(CASE WHEN week_number = 2 THEN 1 ELSE 0 END) AS week_2,
SUM(CASE WHEN week_number = 3 THEN 1 ELSE 0 END) AS week_3,
SUM(CASE WHEN week_number = 4 THEN 1 ELSE 0 END) AS week_4,
SUM(CASE WHEN week_number = 5 THEN 1 ELSE 0 END) AS week_5,
SUM(CASE WHEN week_number = 6 THEN 1 ELSE 0 END) AS week_6,
SUM(CASE WHEN week_number = 7 THEN 1 ELSE 0 END) AS week_7,
SUM(CASE WHEN week_number = 8 THEN 1 ELSE 0 END) AS week_8,
SUM(CASE WHEN week_number = 9 THEN 1 ELSE 0 END) AS week_9
FROM with_week_number
GROUP BY 1
ORDER BY 1
But let say now I don't care that much about first time/user-level analysis and I only want to see if my login action increases over time (i.e I want to add up logins of the first cohort during week 2 with logins of the second cohort in week 1). Is there a simple/elegant way to do this?
Edit:
Giving an example below
WeekStart Week1 Week2 Week 3
2017/05/03 66 **53** **49**
2017/05/10 (**53**+74) (**49**+70) **65**
2017/05/17 (**49**+ 70 + 45) (**65** + 80) etc.
I think you need to group by login_week instead of first_week so you count all logins during the given week in every row, not by cohort, and then you have to use >= instead of = so it will sum up this week's cohort with all older cohorts in any given row.
WITH
by_week AS (
SELECT
user_id,
TD_DATE_TRUNC('week', login_time) AS login_week
FROM logins
GROUP BY 1, 2
)
,with_first_week AS (
SELECT
user_id,
login_week,
FIRST_VALUE(login_week) OVER (PARTITION BY user_id ORDER BY login_week) AS first_week
FROM by_week
)
,with_week_number AS (
SELECT
user_id,
login_week,
first_week,
(login_week - first_week) / (24 * 60 * 60 * 7) AS week_number
FROM with_first_week
)
SELECT
TD_TIME_FORMAT(login_week, 'yyyy-MM-dd') AS login_week,
SUM(CASE WHEN week_number>= 1 THEN 1 ELSE 0 END) AS week_1,
SUM(CASE WHEN week_number>= 2 THEN 1 ELSE 0 END) AS week_2,
SUM(CASE WHEN week_number>= 3 THEN 1 ELSE 0 END) AS week_3,
SUM(CASE WHEN week_number>= 4 THEN 1 ELSE 0 END) AS week_4,
SUM(CASE WHEN week_number>= 5 THEN 1 ELSE 0 END) AS week_5,
SUM(CASE WHEN week_number>= 6 THEN 1 ELSE 0 END) AS week_6,
SUM(CASE WHEN week_number>= 7 THEN 1 ELSE 0 END) AS week_7,
SUM(CASE WHEN week_number>= 8 THEN 1 ELSE 0 END) AS week_8,
SUM(CASE WHEN week_number>= 9 THEN 1 ELSE 0 END) AS week_9
FROM with_week_number
GROUP BY 1
ORDER BY 1;

SQL Sum Group by Custom Group

I have generated the following query, and I want to sum the patient counts by the new IMS_CUST_ID_GRP I created (roll it up to this a remove the CUST_ID) in my query. How can i modify my query to return my desired result? I tried using an analytic function but I get an error.
SELECT (CASE WHEN SUM(NEW_PAT_CNT) = 1 THEN '1'
WHEN SUM(NEW_PAT_CNT) >=2 AND SUM(NEW_PAT_CNT) <=12 THEN '2-12'
WHEN SUM(NEW_PAT_CNT) >=13 AND SUM(NEW_PAT_CNT)<=24 THEN '13-24'
WHEN SUM(NEW_PAT_CNT) >=24 AND SUM(NEW_PAT_CNT) <=48 THEN '25-48'
WHEN SUM(NEW_PAT_CNT) >48 THEN '>48'
END) IMS_CUST_ID_GRP, SUM(NEW_PAT_CNT), CUST_ID
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct,
DEXWHS.D_DATE dt,
DEXWHS.D_ACCOUNT_VEEVA ac
WHERE fct.DATE_DIM_KEY = dt.DATE_ID
AND fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
AND NEW_PAT_CNT >0
AND dt.year in n'2016'
GROUP BY CUST_ID
Analytic function which returns an error
SELECT (CASE WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) = 1 THEN '1'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=2 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=12 THEN '2-12'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=13 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=24 THEN '13-24'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >=24 AND SUM(NEW_PAT_CNT)OVER (PARTITION BY CUST_ID) <=48 THEN '25-48'
WHEN SUM(NEW_PAT_CNT) OVER (PARTITION BY CUST_ID) >48 THEN '>48'
END) IMS_CUST_ID_GRP, CUST_ID, SUM(NEW_PAT_CNT)
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct,
DEXWHS.D_DATE dt,
DEXWHS.D_ACCOUNT_VEEVA ac
WHERE fct.DATE_DIM_KEY = dt.DATE_ID
AND fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
AND NEW_PAT_CNT >0
GROUP BY CUST_ID
Dataset with CUST_ID included
IMS_CUST_ID_GRP SUM(NEW_PAT_CNT) CUST_ID
1 1 55671832
1 1 56097728
2-12 4 56106239
2-12 5 56728330
2-12 9 57590869
2-12 2 55609391
2-12 9 55880657
2-12 10 56339375
2-12 3 57371546
25-48 39 55891493
13-24 21 55714333
13-24 22 56542678
Desired Dataset rolled up to IMS_CUST_ID_GRP
IMS_CUST_ID_GRP SUM(NEW_PAT_CNT)
1 2
2-12 42
13-24 43
25-48 39
If you are doing a pivot query, using GROUP BY, then you don't need to use SUM() as an analytic function. Just use it normally:
SELECT CUST_ID,
SUM(CASE WHEN CUST_ID = 1 THEN NEW_PAT_CNT ELSE 0 END) AS '1',
SUM(CASE WHEN CUST_ID >= 2 AND CUST_ID <= 12 THEN NEW_PAT_CNT ELSE 0 END) AS '2-12',
SUM(CASE WHEN CUST_ID >= 13 AND CUST_ID <= 24 THEN NEW_PAT_CNT ELSE 0 END) AS '13-24',
SUM(CASE WHEN CUST_ID >= 24 AND CUST_ID <= 48 THEN NEW_PAT_CNT ELSE 0 END) AS '25-48',
SUM(CASE WHEN CUST_ID > 48 THEN NEW_PAT_CNT ELSE 0 END) AS '>48'
FROM DEXODS.OPUB_ONE_IMS_IDS_FACT fct
INNER JOIN DEXWHS.D_DATE dt
ON fct.DATE_DIM_KEY = dt.DATE_ID
INNER JOIN DEXWHS.D_ACCOUNT_VEEVA ac
ON fct.ACCOUNT_DIM_KEY = ac.ACCOUNT_DIM_KEY
WHERE NEW_PAT_CNT > 0 -- should not be necessary assuming count no less than zero
GROUP BY CUST_ID
Note that I have replaced your implicit joins with explicit INNER JOINs. As you may have heard, you should try to avoid putting commas in the FROM clause.

Oracle opening and closing balance - SQL or PL/SQL needed?

select year,
month ,
d.PROD_ID,
T.CUSTOMER_ID,
SUM(CASE WHEN D.OP_TYPE = 1 THEN d.qty END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN d.qty END) SPEND
FROM TXN_HEADER T ,
TXN_DETAIL d ,
CUSTOMER A,
PRODUCT e
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND T.CUSTOMER_ID = A.CUSTOMER_ID
AND T.TXN_PK = D.TXN_PK
and d.PROD_ID = e.PROD_ID
and e.unit = 0
group by year, month ,d.PROD_ID, T.CUSTOMER_ID
ORDER BY 1,2,3,4
Output is as follows (here opening and closing not generated by query, but I required that has to be from the query)
YEAR MONTH PROD CUSTOMER OPENING EARNED SPEND CLOSING
---- ----- ---- -------- ------- ------ ----- -------
2012 8 548 12033 0 8 2 6
2012 9 509 12033 0 24 0 24
2012 9 509 12047 0 14 0 14
2012 9 548 12033 6 1 0 7
2012 9 548 12047 0 1 0 1
I required to generate the output as above. Here PROD_ID,CUSTOMER_ID wise dynamically the prev closing balance to be populated as opening and it shoulde calculate closing balance (opening+earned-spend) monthwise,customer wise ,product wise. is it possible to write in SQL or need to go PL/SQL?
I'd use analytics, with PROD_ID and CUSTOMER_ID in the partition clause to avoid mixing products and customers.
WITH
MONTHLY_BALANCE AS
(
SELECT
YEAR,
MONTH,
D.PROD_ID,
T.CUSTOMER_ID,
SUM(CASE WHEN D.OP_TYPE = 1 THEN D.QTY ELSE NULL END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN D.QTY ELSE NULL END) SPEND,
FROM TXN_HEADER T
JOIN CUSTOMER A
ON T.CUSTOMER_ID = A.CUSTOMER_ID
JOIN TXN_DETAIL D
ON T.TXN_PK = D.TXN_PK
JOIN PRODUCT E
ON D.PROD_ID = E.PROD_ID
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND E.UNIT = 0
GROUP BY YEAR, MONTH, D.PROD_ID, T.CUSTOMER_ID
)
SELECT
YEAR,
MONTH,
PROD_ID,
CUSTOMER_ID,
SUM(NVL(EARNED, 0) - NVL(SPEND, 0)) OVER(PARTITION BY PROD_ID, CUSTOMER_ID ORDER BY YEAR, MONTH ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) OPENING,
EARNED,
SPEND,
SUM(NVL(EARNED, 0) - NVL(SPEND, 0)) OVER(PARTITION BY PROD_ID, CUSTOMER_ID ORDER BY YEAR, MONTH ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT_ROW) CLOSING
FROM MONTHLY_BALANCE
ORDER BY 1, 2, 3, 4
Your CASE needs an ELSE
CASE WHEN D.OP_TYPE = 1 THEN d.qty ELSE 0 END
Without the else the CASE will return NULL when D.OP_TYPE is not equal to 1, and anything+NULL=NULL. When your WHEN is not satisfied it returns NULL and that is why you do not see anything for those columns.
To get OPENING and CLOSING calculated as you may want to use analytic functions like LEAD and LAG.
Select year,month,prod_id,customer_id,
LAG(closing,1,0) OVER (order by year,month,prod_id,customer_id) as opening,
earned,spend
,(LAG(closing,1,0) OVER (order by year,month,prod_id,customer_id)+closing) as closing
from (WITH temp AS (select year,
month ,
d.PROD_ID,
T.CUSTOMER_ID,
0 OPEN,
SUM(CASE WHEN D.OP_TYPE = 1 THEN d.qty END) EARNED,
SUM(CASE WHEN D.OP_TYPE = 2 THEN d.qty END) SPEND,
0 CLOSE
FROM TXN_HEADER T ,
TXN_DETAIL d ,
CUSTOMER A,
PRODUCT e
WHERE T.AMOUNT > 0
AND A.TYPE = 0
AND T.CUSTOMER_ID = A.CUSTOMER_ID
AND T.TXN_PK = D.TXN_PK
and d.PROD_ID = e.PROD_ID
and e.unit = 0
group by year, month ,d.PROD_ID, T.CUSTOMER_ID
ORDER BY 1,2,3,4)
SELECT year,month,prod_id,customer_id,open,earned,spend,(open+earned-spend) as closing
from temp);