Trying to Divide a SUM with CASE by COUNT of Total Trips and return Percentage - sql

I have Two Tables Trips & Users.
I am trying to write a SQL query to find the cancellation rate of requests with unbanned users (both client and driver must not be banned) each day between
"2013-10-01" and "2013-10-03". Round Cancellation Rate in a percentage output
So in my case if Total Trips is 4 and Cancellations is 2 then I want to see .50 or if 3 and 1 then .33
Here is what I was trying to do so far...
With CTE AS (
Select Request_at as [DAY],
SUM (CASE
When Status= 'cancelled_by_driver' or Status='cancelled_by_client' THEN 1
Else 0
END)
AS Cancellations,
Count(*) as TotalTrips
FROM Trips
Where Client_id NOT IN (SELECT [Users_id] FROM [Study].[dbo].[Users] Where Banned='YES' and Roll='Client') or Driver_id NOT IN (SELECT [Users_id] FROM [Study].[dbo].[Users] Where Banned='YES' and Roll='Driver')
Group By Request_at )
Select DAY, ((TotalTrips/Cancellations) *100) as CancellationRate
From CTE
But the Divide Function is not working. I am not sure how else to approach this.
This is an error I am getting.
Any help is appreciated.

Try this one:
With CTE AS (
Select Request_at as [DAY],
SUM (CASE
When Status= 'cancelled_by_driver' or Status='cancelled_by_client' THEN 1
Else 0
END)
AS Cancellations,
Count(*) as TotalTrips
FROM Trips
Where Client_id NOT IN (SELECT [Users_id] FROM [Study].[dbo].[Users] Where Banned='YES' and Roll='Client') or Driver_id NOT IN (SELECT [Users_id] FROM [Study].[dbo].[Users] Where Banned='YES' and Roll='Driver')
Group By Request_at )
Select DAY, convert(float, TotalTrips) * 100/NULLIF(Cancellations, 0) as CancellationRate
From CTE

Since we have to query users_id twice, we can put that in a CTE. No need of a second query to calculate the percent.
With Unbanned_users As (
Select [users_id] FROM [Study].[dbo].[Users]
Where Banned<>'YES'
)
Select Request_at as [DAY],
(100*Count(CASE When Status Like 'cancelled%' Then 1 End)+Count(*)/2)
/Count(*) as Cancellation_percent
From Trips t
Inner Join Unbanned_users cl ON t.client_id=cl.users_id
Inner Join Unbanned_users dr ON t.driver_id=dr.users_id
Group By Request_at
This assumes that all of the user ids in Trips, both client and driver, are defined in the Users table.
The +Count(*)/2 is to round 2 out of 3 to 67, instead of the truncated result of 66.

Related

How to add a condition for the query so that it can calculate the unique months and years in PostgreSQL

How can I write down the condition so that he counts the months for me from a certain client (redirect) and source (source)? I need to know how many invoices were issued, and this is counted by month, type January and February are 2 invoices, March April June 3 invoices, etc. I could write max instead of count, but this is not correct, since the client may appear in the middle of the year, for example in May, and he will have the values of the maximum month.
Here is my request:
select TA.redirect,
count(case when TA.source='zlat1' then extract(month from TA.date) else 0 end) number_of_accounts_zlat1,
count(case when TA.source='zlat2' then extract(month from TA.date) else 0 end) number_of_accounts_zlat2,
sum(TA.result_for_the_day) accrued
from total_accounts TA
group by TA.redirect
Here are tables and data + query and result ---->
https://dbfiddle.uk/?rdbms=postgres_14&fiddle=0bc8002e59b03afedeac8d1b8dfc98d1
insert into finace_base (redirect)
select distinct Ta.redirect /*this select will display those names that are not present
in FB if there is other info that u must add to insert then
just add , next to redirect and add whatever u like*/
from total_acounts TA
left join finace_base FB on TA.redirect=FB.redirect
where FB.redirect is null;
update finace_base FB
set zlat1=TA.zlat1,
zlat2=TA.zlat2,
accrued=TA.accrued
from (select TA.redirect,
count(*) filter ( where TA.source='zlat1' ) as zlat1,
count(*) filter ( where TA.source='zlat2' ) as zlat2,
sum(TA.accrued) as accrued
from(
select sum(TA.accrued) as accrued,
TA.date,
TA.redirect,
TA.source
from (select TA.result_for_the_day as accrued,
to_char(TA.date, 'yyyy-mm') as date,
TA.redirect,
TA.source
from total_accounts TA) TA
group by TA.redirect, TA.date, TA.source) TA
group by TA.redirect) TA
where FB.redirect=TA.redirect
i could not add it into comments cause it was too long essentialy you first run the insert into statement and then update it will only do inserts for redirects that are not added yet
select TA.redirect,
count(*) filter ( where TA.source='zlat1' ) as zlat1,
count(*) filter ( where TA.source='zlat2' ) as zlat2,
sum(TA.accrued)
from(
select sum(TA.accrued) as accrued,
TA.date,
TA.redirect,
TA.source
from (select TA.result_for_the_day as accrued,
to_char(TA.date, 'yyyy-mm') as date,
TA.redirect,
TA.source
from total_accounts TA) TA
group by TA.redirect, TA.date, TA.source) TA
group by TA.redirect
there you go thats the answer. giving back to comunity that i have taken :D

Cohort Analysis using SQL (Snowflake)

I am doing a cohort analysis using the table TRANSACTIONS. Below is the table schema,
USER_ID NUMBER,
PAYMENT_DATE_UTC DATE,
IS_PAYMENT_ADDED BOOLEAN
Below is a quick query to see how USER_ID 12345 (an example) goes through the different cohorts based on the date filter provided,
WITH RESULT(
SELECT
USER_ID,
TO_DATE(PAYMENT_DATE_UTC) AS PAYMENT_DATE,
SUM(CASE WHEN IS_PAYMENT_ADDED=TRUE THEN 1 ELSE 0 END) AS PAYMENT_ADDED_COUNT
FROM TRANSACTIONS
GROUP BY 1,2
HAVING PAYMENT_ADDED_COUNT>=1
ORDER BY 2
)
SELECT
COUNT(DISTINCT r.USER_ID),
SUM(r.PAYMENT_ADDED_COUNT)
FROM RESULT r
WHERE r.USER_ID=12345
AND (r.PAYMENT_DATE>='2021-02-01' AND r.PAYMENT_DATE<'2021-02-15')
The result for this query with the time frame (two weeks) would be
| 1 | 55 |
and this USER_ID would be classified as a Regular User Cohort (one who has made more than 10 payments) for the provided date filter
If the same query is run with the time frame as just one day say '2021-02-07', the result would be
| 1 | 10 |
and this USER_ID would be classified as as Occasional User Cohort (one who has made between 1 and 10 payments) for the provided date filter
I have this below query to bucket the USER_ID's into the two different cohorts based on the sum of the payments added,
WITH
ALL_USER_COHORT AS
(SELECT
USER_ID,
SUM(CASE WHEN IS_PAYMENT_ADDED=TRUE THEN 1 ELSE 0 END ) AS PAYMENT_ADDED_COUNT
FROM TRANSACTIONS
GROUP BY USER_ID
),
OCASSIONAL_USER_COHORT AS
(SELECT
USER_ID,
SUM(CASE WHEN IS_PAYMENT_ADDED=TRUE THEN 1 ELSE 0 END ) AS PAYMENT_ADDED_COUNT
FROM TRANSACTIONS
GROUP BY USER_ID
HAVING (PAYMENT_ADDED_COUNT>=1 AND PAYMENT_ADDED_COUNT<=10)
),
REGULAR_USER_COHORT AS
(SELECT
USER_ID,
SUM(CASE WHEN IS_PAYMENT_ADDED=TRUE THEN 1 ELSE 0 END ) AS PAYMENT_ADDED_COUNT
FROM TRANSACTIONS
GROUP BY USER_ID
HAVING PAYMENT_ADDED_COUNT>10
)
SELECT
COUNT(DISTINCT ou.USER_ID) AS "OCCASIONAL USERS",
COUNT(DISTINCT ru.USER_ID) AS "REGULAR USERS"
FROM ALL_USER_COHORT au
LEFT JOIN OCASSIONAL_USER_COHORT ou ON au.USER_ID=ou.USER_ID
LEFT JOIN REGULAR_USER_COHORT ru ON au.USER_ID=ru.USER_ID
LEFT JOIN TRANSACTIONS t ON au.USER_ID=t.USER_ID
WHERE au.USER_ID=12345
AND TO_DATE(t.PAYMENT_DATE_UTC)>='2021-02-07'
Ideally the USER_ID 12345 should be bucketed as "OCCASIONAL USERS" as per the provided date filter but the query buckets it as "REGULAR USERS" instead.
For starters you CTE could have the redundancy removed like so:
WITH all_user_cohort AS (
SELECT
USER_ID,
SUM(IFF(is_payment_added=TRUE, 1,0)) AS payment_added_count
FROM transactions
GROUP BY user_id
), ocassional_user_cohort AS (
SELECT * FROM all_user_cohort
WHERE PAYMENT_ADDED_COUNT between 1 AND 10
), regular_user_cohort AS (
SELECT * FROM all_user_cohort
WHERE PAYMENT_ADDED_COUNT > 10
)
SELECT
COUNT(DISTINCT ou.user_id) AS "OCCASIONAL USERS",
COUNT(DISTINCT ru.user_id) AS "REGULAR USERS"
FROM all_user_cohort AS au
LEFT JOIN ocassional_user_cohort ou ON au.user_id=ou.user_id
LEFT JOIN regular_user_cohort ru ON au.user_id=ru.user_id
LEFT JOIN transactions t ON au.user_id=t.user_id
WHERE au.user_id=12345
AND TO_DATE(t.payment_date_utc)>='2021-03-01'
But the reason you are getting this problem is you are doing the which do the belong in across all time.
What you are wanting is to move the date filter into all_user_cohort, and not making tables when you can just sum the number of rows meeting the need.
WITH all_user_cohort AS (
SELECT
USER_ID,
SUM(IFF(is_payment_added=TRUE, 1,0)) AS payment_added_count
FROM transactions
WHERE TO_DATE(payment_date_utc)>='2021-03-01'
GROUP BY user_id
)
SELECT
SUM(IFF(payment_added_count between 1 AND 10, 1,0)) AS "OCCASIONAL USERS"
SUM(IFF(payment_added_count > 10, 1,0)) AS "REGULAR USERS"
FROM transactions
WHERE au.user_id=12345
Which can also be done differently, if that is more what your looking for, for other reasons.
WITH all_user_cohort AS (
SELECT
USER_ID,
SUM(IFF(is_payment_added=TRUE, 1,0)) AS payment_added_count
FROM transactions
WHERE TO_DATE(payment_date_utc)>='2021-03-01'
GROUP BY user_id
), classify_users AS (
SELECT user_id
,CASE
WHEN payment_added_count between 1 AND 10 THEN 'OCCASIONAL USERS'
WHEN payment_added_count > 10 THEN 'REGULAR USERS'
ELSE 'users with zero payments'
END AS classified
FROM all_user_cohort
)
SELECT classified
,count(*)
FROM classify_users
WHERE user_id=12345
GROUP BY 1

Group by in columns and rows, counts and percentages per day

I have a table that has data like following.
attr |time
----------------|--------------------------
abc |2018-08-06 10:17:25.282546
def |2018-08-06 10:17:25.325676
pqr |2018-08-05 10:17:25.366823
abc |2018-08-06 10:17:25.407941
def |2018-08-05 10:17:25.449249
I want to group them and count by attr column row wise and also create additional columns in to show their counts per day and percentages as shown below.
attr |day1_count| day1_%| day2_count| day2_%
----------------|----------|-------|-----------|-------
abc |2 |66.6% | 0 | 0.0%
def |1 |33.3% | 1 | 50.0%
pqr |0 |0.0% | 1 | 50.0%
I'm able to display one count by using group by but unable to find out how to even seperate them to multiple columns. I tried to generate day1 percentage with
SELECT attr, count(attr), count(attr) / sum(sub.day1_count) * 100 as percentage from (
SELECT attr, count(*) as day1_count FROM my_table WHERE DATEPART(week, time) = DATEPART(day, GETDate()) GROUP BY attr) as sub
GROUP BY attr;
But this also is not giving me correct answer, I'm getting all zeroes for percentage and count as 1. Any help is appreciated. I'm trying to do this in Redshift which follows postgresql syntax.
Let's nail the logic before presenting:
with CTE1 as
(
select attr, DATEPART(day, time) as theday, count(*) as thecount
from MyTable
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
select t1.attr, t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
From here you can pivot to create a day by day if you feel the need
I am trying to enhance the query #johnHC btw if you needs for 7days then you have to those days in case when
with CTE1 as
(
select attr, time::date as theday, count(*) as thecount
from t group by attr,time::date
)
, CTE2 as
(
select theday, sum(thecount) as daytotal
from CTE1
group by theday
)
,
CTE3 as
(
select t1.attr, EXTRACT(DOW FROM t1.theday) as day_nmbr,t1.theday, t1.thecount, t1.thecount/t2.daytotal as percentofday
from CTE1 t1
inner join CTE2 t2
on t1.theday = t2.theday
)
select CTE3.attr,
max(case when day_nmbr=0 then CTE3.thecount end) as day1Cnt,
max(case when day_nmbr=0 then percentofday end) as day1,
max(case when day_nmbr=1 then CTE3.thecount end) as day2Cnt,
max( case when day_nmbr=1 then percentofday end) day2
from CTE3 group by CTE3.attr
http://sqlfiddle.com/#!17/54ace/20
In case that you have only 2 days:
http://sqlfiddle.com/#!17/3bdad/3 (days descending as in your example from left to right)
http://sqlfiddle.com/#!17/3bdad/5 (days ascending)
The main idea is already mentioned in the other answers. Instead of joining the CTEs for calculating the values I am using window functions which is a bit shorter and more readable I think. The pivot is done the same way.
SELECT
attr,
COALESCE(max(count) FILTER (WHERE day_number = 0), 0) as day1_count, -- D
COALESCE(max(percent) FILTER (WHERE day_number = 0), 0) as day1_percent,
COALESCE(max(count) FILTER (WHERE day_number = 1), 0) as day2_count,
COALESCE(max(percent) FILTER (WHERE day_number = 1), 0) as day2_percent
/*
Add more days here
*/
FROM(
SELECT *, (count::float/count_per_day)::decimal(5, 2) as percent -- C
FROM (
SELECT DISTINCT
attr,
MAX(time::date) OVER () - time::date as day_number, -- B
count(*) OVER (partition by time::date, attr) as count, -- A
count(*) OVER (partition by time::date) as count_per_day
FROM test_table
)s
)s
GROUP BY attr
ORDER BY attr
A counting the rows per day and counting the rows per day AND attr
B for more readability I convert the date into numbers. Here I take the difference between current date of the row and the maximum date available in the table. So I get a counter from 0 (first day) up to n - 1 (last day)
C calculating the percentage and rounding
D pivot by filter the day numbers. The COALESCE avoids the NULL values and switched them into 0. To add more days you can multiply these columns.
Edit: Made the day counter more flexible for more days; new SQL Fiddle
Basically, I see this as conditional aggregation. But you need to get an enumerator for the date for the pivoting. So:
SELECT attr,
COUNT(*) FILTER (WHERE day_number = 1) as day1_count,
COUNT(*) FILTER (WHERE day_number = 1) / cnt as day1_percent,
COUNT(*) FILTER (WHERE day_number = 2) as day2_count,
COUNT(*) FILTER (WHERE day_number = 2) / cnt as day2_percent
FROM (SELECT attr,
DENSE_RANK() OVER (ORDER BY time::date DESC) as day_number,
1.0 * COUNT(*) OVER (PARTITION BY attr) as cnt
FROM test_table
) s
GROUP BY attr, cnt
ORDER BY attr;
Here is a SQL Fiddle.

Retrieve records by continuation of days in oracle

I want to retrieve records where cash deposits are more than 4 totaling to 1000000 during a day and continues for more than 5 days.
I have came up with below query.
SELECT COUNT(a.txamt) AS "txcount"
, SUM(a.txamt) AS "txsum"
, b.custcd
, a.txdate
FROM tb_transactions a
INNER JOIN tb_accounts b
ON a.acctno = b.acctno
WHERE a.cashflowtype = 'CR'
GROUP BY b.custcd, a.txdate
HAVING COUNT(a.txamt)>4 and SUM(a.txamt)>='1000000'
ORDER BY a.txdate;
But I'm stuck on how to fetch the records if the pattern continues for 5 days.
How to achieve the desired result?
Something like:
SELECT *
FROM (
SELECT t.*,
COUNT( txdate ) OVER ( PARTITION BY custcd
ORDER BY txdate
RANGE BETWEEN INTERVAL '0' DAY PRECEDING
AND INTERVAL '4' DAY FOLLOWING ) AS
num_days
FROM (
select count(a.txamt) as "txcount",
sum(a.txamt) as "txsum",
b.custcd,
a.txdate
from tb_transactions a inner join tb_accounts b on a.acctno=b.acctno
where a.cashflowtype='CR'
group by b.custcd, a.txdate
having count(a.txamt)>4 and sum(a.txamt)>=1000000
) t
)
WHERE num_days = 5
order by a.txdate;

Aggregating a sub query within query

I am currently working on aggregating the sum qty of "OUT" and "OUT+IN".
Current query is the following:
Select
a.Date
,a.DepartmentID
from
(Select
dris.Date
,dris.RentalItemKey
,dris.WarehouseKey
,ISNULL((Select TOP 1 dris.Date where OutQty=1 order by Date DESC),(Select ri.ReceiveDate from RentalItem ri where ri.RentalItemKey=dris.RentalItemKey)) as LastOutDate
,(Select d.DepartmentKey from Department d where d.Department=i.Department)as DepartmentID
, (CASE WHEN OutQty=1 OR (RepairQty=1 AND RentedQty=1) THEN 'IN' ELSE 'OUT' END) as Status
from DailyRentalItemStatus dris
inner join Inventory i on i.InventoryKey=dris.InventoryKey
where dris.Date='2014-08-02'
and i.ICode='3223700'
and i.Classification IN ('ITEM', 'ACCESSORY')
and i.AvailFor='RENT'
and i.AvailFrom='WAREHOUSE'
and dris.Warehouse='TORONTO')a
and I would like the result to be the following:
Date WarehouseID DepartmentID ICode Owned NotRedundant Out
2014-08-02 001T A00G 3223700 30 30 19
Where Owned is is The items with status as "OUT+IN", out is "OUT" and Not Redundant as where the lastout date is within the last 2 years from the date.
Help would be greatly appreciated.
I think this is close to what you're looking for. Your Not Redundant description, is hard to understand. Which dates are you comparing. The same trick for OUT may be used for that though.
My query also assumes that you always have a department connecting to the inventory table and that there's always a rentalitem.receivedate.
;WITH LastOut as
(Select Max(Date) as LastOutDate, rentalItemKey
from DailyRentalItemStatus
WHERE OutQty=1
)
Select
dris.Date
,dris.WarehouseKey as WarehouseID
,d.DepartmentKey as DepartmentID
, i.Icode
--,ISNULL((Select TOP 1 dris.Date where OutQty=1 order by Date DESC),(Select ri.ReceiveDate from RentalItem ri where ri.RentalItemKey=dris.RentalItemKey)) as LastOutDate
, Count(1) as Owned
, Sum(CASE WHEN NOT (OutQty=1 OR (RepairQty=1 AND RentedQty=1)) THEN 1 ELSE 0 END) as OUT
, Sum(CASE WHEN DateAdd(yy, 2,dris.[date]) >= ISNULL(lastout.lastoutdate, ri.ReceiveDate) then 1 else 0 end) as NonRedundent
from DailyRentalItemStatus dris
inner join Inventory i on i.InventoryKey=dris.InventoryKey
INNER JOIN Department d ON d.Department=i.Department
INNER JOIN RentalItem ri ON ri.RentalItemKey=dris.RentalItemKey
LEFT OUTER JOIN LastOUT ON LastOut.rentalItemKey=dris.RentalItemKey
where dris.Date='2014-08-02'
and i.ICode='3223700'
and i.Classification IN ('ITEM', 'ACCESSORY')
and i.AvailFor='RENT'
and i.AvailFrom='WAREHOUSE'
and dris.Warehouse='TORONTO'
Group BY dris.Date, d.DepartmentKey, Dris.WarehouseKey , i.icode