Unique Count (Print out 0) within Time Range - sql

I have a table TB contains columns Time, User_id, State with data as below.
INSERT INTO TB
(Time, User_id, State)
VALUES
(1, 1, 'VA'),
(1, 2, 'VA'),
(1, 2, 'DC'),
(2, 1, 'VA'),
(2, 2, 'MD'),
(3, 1, 'MD'),
(3, 112, 'MD'),
(3, 134, 'VA'),
(3, 111, 'MD'),
(4, 12, 'VA'),
(4, 22, 'MD')
;
I would like a unique count of User_id given State = 'DC' every 2 seconds. The result should be looking like this:
NewTime | Count
1 | 1
2 | 0
I got answer to print out 0 when the count is 0 using
SELECT TIME,
SUM(CASE WHEN State = 'DC' THEN 1 ELSE 0 END) Count
FROM TB
GROUP BY Time
And get the unique count within a time range using
SELECT
((Time - 1) / 2) + 1 as NewTime,
COUNT(DISTINCT User_id) as Count
FROM ...
GROUP BY ((Time - 1) / 2) + 1
But I'm not sure how to combine these two to get the result I want.
Thanks in advance.

Are you looking for this:
SELECT
((Time - 1) / 2) + 1 as NewTime,
COUNT(DISTINCT case when state = 'DC' then User_id end) as Count
FROM ...
GROUP BY ((Time - 1) / 2) + 1
This assumes that you have some record (though not necessarily DC) every two seconds.

Related

SQL: how to get sum of grouped items for a given corhort

I have the following orders table:
(1, 2, '2021-03-05', 15, 'books'),
(1, 13, '2022-03-07', 3, 'music'),
(1, 14, '2022-06-15', 900, 'travel'),
(1, 11, '2021-11-17', 25, 'books'),
(1, 16, '2022-08-03', 32, 'books'),
(2, 4, '2021-04-12', 4, 'music'),
(2, 7, '2021-06-29', 9, 'music'),
(2, 20, '2022-11-03', 8, 'music'),
(2, 22, '2022-11-07', 575, 'food'),
(2, 24, '2022-11-20', 95, 'food'),
(3, 3, '2021-03-17', 25, 'books'),
(3, 5, '2021-06-01', 650, 'travel'),
(3, 17, '2022-08-17', 1200, 'travel'),
(3, 19, '2022-10-02', 6, 'music'),
(3, 23, '2022-11-08', 7, 'food'),
(4, 9, '2021-08-20', 3200, 'travel'),
(4, 10, '2021-10-29', 2750, 'travel'),
(4, 15, '2022-07-15', 1820, 'travel'),
(4, 21, '2022-11-05', 8000, 'travel'),
(4, 25, '2022-11-29', 2300, 'travel'),
(5, 1, '2021-01-04', 3, 'music'),
(5, 6, '2021-06-09', 820, 'travel'),
(5, 8, '2021-07-30', 19, 'books'),
(5, 12, '2021-12-10', 22, 'music'),
(5, 18, '2022-09-19', 20, 'books'),
(6, 26, '2023-01-09', 700, 'travel'),
(6, 27, '2023-01-23', 1900, 'travel')
Here's a Fiddle: http://sqlfiddle.com/#!17/71698/3
I would like to get the sum of revenue by product among those customers who have ever purchased a travel product.
In this case, customers 1, 3, 4, 5, and 6 have purchased the travel product. Therefore, the desired result set would look like this:
customer_id
revenue_books
revenue_music
revenue_food
1
72
3
0
3
25
6
7
4
0
0
0
5
39
25
0
6
0
0
0
How would I do this? Thank you!
I my answer I show how to think about the problem to get the result -- break it down to parts and then combine it. Some answer give a less verbose query, but I don't think they will be faster. This should be easier to understand for someone new to SQL
First the people who have purchased a travel product
SELECT DISTINCT user_id
FROM orders
WHERE product = 'travel'
You care about books, music and food, you can get the totals for those like this:
SELECT user_id, product, SUM(revenue) as TOT
FROM orders
GROUP BY user_id, product
WHERE product in ('books', 'music', 'food'),
Now join them together
WITH sums AS (
SELECT user_id, product, SUM(revenue) as TOT
FROM orders
GROUP BY user_id, product
WHERE product in ('books', 'music', 'food'),
)
SELECT u.user_id, books.TOT as book_total, music.TOT as music_total, food.TOT as food_total
FROM (
SELECT DISTINCT user_id
FROM orders
WHERE product = 'travel'
) as U
LEFT JOIN sums as books ON u.userid = books.user_id and books.product = 'books'
LEFT JOIN sums as music ON u.userid = music.user_id and music.product = 'music'
LEFT JOIN sums as food ON u.userid = food.user_id and food.product = 'food'
SELECT
user_id
,sum(case when product='books' then revenue else 0 end) as revenue_books
,sum(case when product='music' then revenue else 0 end) as revenue_music
,sum(case when product='food' then revenue else 0 end) as revenue_food
FROM
orders
where user_id in (select user_id from orders where product='travel')
group by user_id
http://sqlfiddle.com/#!17/71698/5
EDIT
As suggested, this is another option. But it shows null instead of zero. If zero is needed you should use coalesce()
SELECT
user_id
,coalesce(sum(revenue) filter (where product = 'books'),0) as revenue_books
,coalesce(sum(revenue) filter (where product = 'music'),0) as revenue_music
,coalesce(sum(revenue) filter (where product = 'food'),0) as revenue_food
FROM orders
where user_id in (select user_id from orders where product='travel')
group by user_id
http://sqlfiddle.com/#!17/71698/7

SQL not using ALIAS column for calculation

Question Statement - From the given trips and users tables for a taxi service, write a query to return the cancellation rate in the first two days in October, rounded to two decimal places, for trips not involving banned riders or drivers.
Question code on Oracle SQL.
create table trips (trip_id int, rider_id int, driver_id int, status varchar2(200), request_date date);
insert into trips values (1, 1, 10, 'completed', to_date ('2020-10-01', 'YYYY/MM/DD'));
insert into trips values (2, 2, 11, 'cancelled_by_driver', to_date ('2020-10-01', 'YYYY/MM/DD'));
insert into trips values (3, 3, 12, 'completed', to_date ('2020-10-01', 'YYYY/MM/DD'));
insert into trips values (4, 4, 10, 'cancelled_by_driver', to_date ('2020-10-02', 'YYYY/MM/DD'));
insert into trips values (5, 1, 11, 'completed', to_date ('2020-10-02', 'YYYY/MM/DD'));
insert into trips values (6, 2, 12, 'completed', to_date ('2020-10-02', 'YYYY/MM/DD'));
insert into trips values (7, 3, 11, 'completed', to_date ('2020-10-03', 'YYYY/MM/DD'));
create table users (user_id int, banned varchar2(200), type varchar2(200));
insert into users values (1, 'no', 'rider');
insert into users values (2, 'yes', 'rider');
insert into users values (3, 'no', 'rider');
insert into users values (4, 'no', 'rider');
insert into users values (10, 'no', 'driver');
insert into users values (11, 'no', 'driver');
insert into users values (12, 'no', 'driver');
My Solution Code is below. However, I get the following error. Can someone pleas help?
ORA-00904: "TOTAL_TRIPS": invalid identifier
SOLUTION CODE:
select request_date, (1-(trips_completed/total_trips)) as "cancel_rate"
from
((
select request_date,
sum(case when status = 'completed' then 1 else 0 end) as "trips_completed",
sum(case when status = 'cancelled_by_driver' then 1 else 0 end) as "trips_cancelled",
sum(case when status = 'cancelled_by_driver' then 1 when status= 'completed' then 1 else 0 end) as "total_trips"
from
(
select t.rider_id, t.driver_id, t.status, t.request_date, u.banned as "not_banned_rider", u.banned as "not_banned_driver"
from trips t
join users u
on t.rider_id=u.user_id
where u.banned='no'
)
group by request_date
having request_date <> to_date ('2020-10-03', 'YYYY/MM/DD')
));
First, don't put identifiers in double quotes. They just clutter up queries.
Some other things to fix:
No need for two levels of subqueries.
Learn to use proper date literal syntax.
I think you want < rather than <>.
So that suggests:
select request_date, (1-(trips_completed/total_trips)) as cancel_rate
from (select request_date,
sum(case when status = 'completed' then 1 else 0 end) as trips_completed,
sum(case when status = 'cancelled_by_driver' then 1 else 0 end) as trips_cancelled,
sum(case when status = 'cancelled_by_driver' then 1 when status = 'completed' then 1 else 0 end) as total_trips
from trips t join
users u
on t.rider_id = u.user_id
where u.banned = 'no' and
t.request_date < date '2020-10-03'
group by request_date
) rd;
This can be further simplified using avg():
select request_date,
avg(case when status = 'completed' then 1 else 0 end) as cancel_rate
from trips t join
users u
on t.rider_id = u.user_id
where u.banned = 'no' and
request_date < date '2020-10-03'
group by request_date ;
Note: This addresses fixing the query in your question. It doesn't actually correctly answer the question, for the following reasons:
I'm pretty sure the question entails one cancellation rate, not one for two dates.
It doesn't take into account banned drivers.
I'm not sure how "cancelled by user" would be handled.
ORA-00904: "TOTAL_TRIPS": invalid identifier
just means what is written "total_trips" is invalid
Just use total_trips (without quote)

sql that finds records within 3 days of a condition being met

I am trying to find all records that exist within a date range prior to an event occurring. In my table below, I want to pull all records that are 3 days or less from when the switch field changes from 0 to 1, ordered by date, partitioned by product. My solution does not work, it includes the first record when it should skip as it's outside the 3 day window. I am scanning a table with millions of records, is there a way to reduce the complexity/cost while maintaining my desired results?
http://sqlfiddle.com/#!18/eebe7
CREATE TABLE productlist
([product] varchar(13), [switch] int, [switchday] date)
;
INSERT INTO productlist
([product], [switch], [switchday])
VALUES
('a', 0, '2019-12-28'),
('a', 0, '2020-01-02'),
('a', 1, '2020-01-03'),
('a', 0, '2020-01-06'),
('a', 0, '2020-01-07'),
('a', 1, '2020-01-09'),
('a', 1, '2020-01-10'),
('a', 1, '2020-01-11'),
('b', 1, '2020-01-01'),
('b', 0, '2020-01-02'),
('b', 0, '2020-01-03'),
('b', 1, '2020-01-04')
;
my solution:
with switches as (
SELECT
*,
case when lead(switch) over (partition by product order by switchday)=1
and switch=0 then 'first day switch'
else null end as leadswitch
from productlist
),
switchdays as (
select * from switches
where leadswitch='first day switch'
)
select pl.*
,'lead'
from productlist pl
left join switchdays ss
on pl.product=ss.product
and pl.switchday = ss.switchday
and datediff(day, pl.switchday, ss.switchday)<=3
where pl.switch=0
desired output, capturing records that occur within 3 days of a switch going from 0 to 1, for each product, ordered by date:
product switch switchday
a 0 2020-01-02 lead
a 0 2020-01-06 lead
a 0 2020-01-07 lead
b 0 2020-01-02 lead
b 0 2020-01-03 lead
If I understand correctly, you can just use lead() twice:
select pl.*
from (select pl.*,
lead(switch) over (partition by product order by switchday) as next_switch_1,
lead(switch, 2) over (partition by product order by switchday) as next_switch_2
from productlist pl
) pl
where switch = 0 and
1 in (next_switch_1, next_switch_2);
Here is a db<>fiddle.
EDIT (based on comment):
select pl.*
from (select pl.*,
min(case when switch = 1 then switchdate end) over (partition by product order by switchdate desc) as next_switch_1_day
from productlist pl
) pl
where switch = 0 and
next_switch_one_day <= dateadd(day, 2, switchdate);

How to group and collect data in PostgreSQL by date periods?

Here is a demo data:
create table Invoices (
id INT,
name VARCHAR,
customer_id INT,
total_amount FLOAT,
state VARCHAR,
invoice_date DATE
);
INSERT INTO Invoices
(id, name, customer_id, total_amount, state, invoice_date)
VALUES
(1, 'INV/2020/0001', 2, 100, 'posted', '2020-04-05'),
(2, 'INV/2020/0002', 1, 100, 'draft', '2020-04-05'),
(3, 'INV/2020/0003', 2, 100, 'draft', '2020-05-24'),
(4, 'INV/2020/0004', 1, 100, 'posted', '2020-05-25'),
(5, 'INV/2020/0005', 2, 100, 'posted', '2020-06-05'),
(6, 'INV/2020/0006', 1, 100, 'posted', '2020-07-05'),
(7, 'INV/2020/0007', 1, 100, 'draft', '2020-08-24'),
(8, 'INV/2020/0008', 1, 100, 'posted', '2020-08-25'),
(9, 'INV/2020/0009', 1, 100, 'posted', '2020-09-05'),
(10, 'INV/2020/0010', 1, 100, 'draft', '2020-09-05'),
(11, 'INV/2020/0011', 2, 100, 'draft', '2020-10-24'),
(12, 'INV/2020/0012', 1, 100, 'posted', '2020-10-25'),
(13, 'INV/2020/0013', 2, 100, 'posted', '2020-11-05'),
(14, 'INV/2020/0014', 1, 100, 'posted', '2020-11-05'),
(15, 'INV/2020/0015', 2, 100, 'draft', '2020-11-24'),
(16, 'INV/2020/0016', 1, 100, 'posted', '2020-11-25')
I have a query that computes a sum of all posted invoices for customer with id = 1
SELECT sum(total_amount), customer_id
FROM Invoices
WHERE state = 'posted' AND customer_id = 1
GROUP BY customer_id
I need to group the data (sum(total_amount)) by 3 time periods - 2 or 3 months each (2 or 3 needs to be able to change by changing the number in the query. I want to pass it as a parameter to the query from python code).
Also I need to get the average sums of the period.
Can you help me please?
Expected output for period = 2 months is:
+--------------+--------------+--------------+--------+
| Period_1_sum | Period_2_sum | Period_3_sum | Avg |
+--------------+--------------+--------------+--------+
| 300 | 300 | 100 | 233.33 |
+--------------+--------------+--------------+--------+
You can use conditional aggregation for that:
SELECT customer_id,
sum(total_amount) as total_amount,
sum(total_amount) filter (where invoice_date >= date '2020-04-01' and invoice_date < date '2020-07-01') as period_1_sum,
sum(total_amount) filter (where invoice_date >= date '2020-07-01' and invoice_date < date '2020-10-01') as period_2_sum,
sum(total_amount) filter (where invoice_date >= date '2020-10-01' and invoice_date < date '2021-01-01') as period_3_sum
FROM Invoices
WHERE state = 'posted'
GROUP BY customer_id
By changing the filter condition you can control which rows are aggregated for each period.
Online example

Making a pivot table group by users

I want to see user statics, so I made query:
SELECT l.partner AS Partner ,
bu.meno||' '||decode(substr(bu.priezvisko, 1, 2), 'Sz',
substr(bu.priezvisko, 1, 2), 'Gy',
substr(bu.priezvisko, 1, 2), 'Ny',
substr(bu.priezvisko, 1, 2), 'Zs',
substr(bu.priezvisko, 1, 2), 'Cs',
substr(bu.priezvisko, 1, 2),
substr(bu.priezvisko, 1, 1))
||'.' AS prod_man --hungarian names have 2letter (surname)
, SUM(CASE
WHEN o.pocet!=0 THEN 1
ELSE 0
END) AS obj_pocet -- counting items
, SUM(CASE
WHEN o.pocet=0 OR o.p_del+o.p_del_dod>=o.pocet THEN 1
ELSE 0
END) AS nedod_pocet -- counting items2
, ROUND(SUM(CASE
WHEN o.pocet=0 OR o.p_del+o.p_del_dod>=o.pocet THEN 1
ELSE 0
END)/count(*), 3) * 100 AS "%" --percentage
FROM obj_odb_o o
JOIN obj_odb_l l ON o.rid_o=l.rid
JOIN sklad_karta sk ON sk.id=o.kod_id
JOIN bartex_users bu ON bu.id=sk.id.prod_man
WHERE l.partner in (325,
326)
GROUP BY l.partner
, bu.meno||' '||decode(substr(bu.priezvisko, 1, 2), 'Sz',
substr(bu.priezvisko, 1, 2), 'Gy',
substr(bu.priezvisko, 1, 2), 'Ny',
substr(bu.priezvisko, 1, 2), 'Zs',
substr(bu.priezvisko, 1, 2), 'Cs',
substr(bu.priezvisko, 1, 2),
substr(bu.priezvisko, 1, 1))
||'.'
It's working. Here is the result:
But I want to make a pivot by Months (last 6 months)...
WITH MONTHS AS
(
SELECT ADD_MONTHS(TRUNC(SYSDATE,'MONTH'),-LEVEL+1) AS MONTH,
DECODE(LEVEL,1,'Akt_mesiac','minuly_mesiac'||(LEVEL-1)) AS MONTH_NAME FROM DUAL CONNECT BY LEVEL <=7)
SELECT
partner,
prod_man,
'%',
NVL(Akt_mesiac,0) AS Akt_mesiac,
NVL(minuly_mesiac1,0) AS minuly_mesiac1,
NVL(minuly_mesiac2,0) AS minuly_mesiac2,
NVL(minuly_mesiac3,0) AS minuly_mesiac3,
NVL(minuly_mesiac4,0) AS minuly_mesiac4,
NVL(minuly_mesiac5,0) AS minuly_mesiac5,
NVL(minuly_mesiac6,0) AS minuly_mesiac6
FROM (
SELECT
-- my query - HERE I HAVE PROBLEM HERE
FROM MONTHS M
JOIN obj_odb_l l ON M.MONTH=TRUNC(l.datum_p,'MONTH')
) PIVOT
( SUM(CNT)
FOR MONTH_NAME IN
('Akt_mesiac' AS Akt_mesiac,
'minuly_mesiac1' AS minuly_mesiac1,
'minuly_mesiac2' AS minuly_mesiac2,
'minuly_mesiac3' AS minuly_mesiac3,
'minuly_mesiac4' AS minuly_mesiac4,
'minuly_mesiac5' AS minuly_mesiac5,
'minuly_mesiac6' AS minuly_mesiac6)
);
Table: obj_odb_l l ->date column -> l.datum_p -> trunc(l.datum_p,'MONTH')
How can I make a pivot table ?
Consider adding the month expression, TRUNC(l.datum_p,'MONTH'), into above aggregate query. Then run the query as another CTE in pivot query for JOIN in pivot's data source.
WITH MONTHS AS (
SELECT ADD_MONTHS(TRUNC(SYSDATE,'MONTH'),-LEVEL+1) AS MONTH
, DECODE(LEVEL,1,'Akt_mesiac','minuly_mesiac'||(LEVEL-1)) AS MONTH_NAME
FROM DUAL CONNECT BY LEVEL <=7
)
, AGG AS (
-- SAME AGGREGATE QUERY WITH TRUNC(l.datum_p,'MONTH') ADDED TO SELECT AND GROUP BY
-- POSSIBLY ADD WHERE CONDITION FOR LAST SIX MONTHS (IF DATA GOES BACK YEARS)
)
SELECT *
FROM (
SELECT AGG.partner
, AGG.prod_man
, AGG.obj_pocet
, AGG.nedod_pocet
, AGG.'%' AS PCT -- AVOID SPECIAL CHARS AS NAME
, M.MONTH_NAME
FROM MONTHS M
INNER JOIN AGG
ON M.MONTH = AGG.MONTH -- NEW FIELD USED FOR JOIN
)
PIVOT
( SUM(PCT) -- ONLY PIVOTS ONE NUM AT A TIME
FOR MONTH_NAME IN
('Akt_mesiac' AS Akt_mesiac,
'minuly_mesiac1' AS minuly_mesiac1,
'minuly_mesiac2' AS minuly_mesiac2,
'minuly_mesiac3' AS minuly_mesiac3,
'minuly_mesiac4' AS minuly_mesiac4,
'minuly_mesiac5' AS minuly_mesiac5,
'minuly_mesiac6' AS minuly_mesiac6)
);