Use rank command to limit find last purchase - sql

I'm trying to find the last purchase for each customer_id. Since there are 3 customers I was expecting to get back 3 rows but I'm getting more.
Can someone tell me what's wrong and how to fix this issue. Any help would be greatly appreciated
ALTER SESSION SET NLS_TIMESTAMP_FORMAT = 'DD-MON-YYYY HH24:MI:SS.FF';
ALTER SESSION SET NLS_DATE_FORMAT = 'DD-MON-YYYY HH24:MI:SS';
CREATE TABLE customers
(CUSTOMER_ID, FIRST_NAME, LAST_NAME) AS
SELECT 1, 'Faith', 'Mazzarone' FROM DUAL UNION ALL
SELECT 2, 'Lisa', 'Saladino' FROM DUAL UNION ALL
SELECT 3, 'Jerry', 'Torchiano' FROM DUAL;
CREATE TABLE items
(PRODUCT_ID, PRODUCT_NAME) AS
SELECT 100, 'Black Shoes' FROM DUAL UNION ALL
SELECT 101, 'Brown Shoes' FROM DUAL UNION ALL
SELECT 102, 'White Shoes' FROM DUAL;
CREATE TABLE purchases
(CUSTOMER_ID, PRODUCT_ID, QUANTITY, PURCHASE_DATE) AS
SELECT 1, 100, 1, TIMESTAMP'2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 1, 100, 1, TIMESTAMP '2022-10-11 19:04:18' FROM DUAL UNION ALL
SELECT 2, 101,1, TIMESTAMP '2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 2,101,1, TIMESTAMP '2022-10-17 19:04:18' FROM DUAL UNION ALL
SELECT 3, 101,1, TIMESTAMP '2022-10-11 09:54:48' FROM DUAL UNION ALL
SELECT 3,102,1, TIMESTAMP '2022-10-17 19:04:18' FROM DUAL UNION ALL
SELECT 3,102, 4,TIMESTAMP '2022-10-10 17:00:00' + NUMTODSINTERVAL ( LEVEL * 2, 'DAY') FROM dual
CONNECT BY LEVEL <= 5;
with cte as
(select
CUSTOMER_ID,
PRODUCT_ID,
QUANTITY,
PURCHASE_DATE,
rank() over (partition by customer_id order by purchase_date desc) rnk
from purchases
)
SELECT p.customer_id,
c.first_name,
c.last_name,
p.product_id,
i.product_name,
p.quantity,
p.purchase_date
from cte p
JOIN customers c ON c.customer_id = p.customer_id
JOIN items i ON i.product_id = p.product_id
where rnk = 1:

First, don't use RANK or DENSE_RANK - they will assign identical purchase_date values with the same rank and hence give you more than one "1" value. Use ROW_NUMBER instead.
Second, you have "from cte p" in there twice. Remove the second one.
And lastly, the real answer to your question is that you have a semicolon before the "where rank = 1" and so nothing after the semicolon is being executed. Hence it isn't filtering. A semicolon ends the SQL, completely.

Related

How can I use the LAG FUNCTION to show revenue this year vs last year in Snowflake with?

I would like to show the current revenue vs last's year's revenue in the same row per region per type. Example:For 2022-04-01, US, Type 1 --> REVENUE: 2456, REVENUE_LAST_YEAR: 4000
2021-04-01, US, Type 1 --> REVENUE: 4000, REVENUE_LAST_YEAR: 0
For some reason, the Lag formula in Snowflake is showing wrong values. Could someone please help ?
WITH
indata(dt,region,type,revenue) AS (
SELECT DATE '2021-04-01','US','Type 1',4000 UNION ALL SELECT DATE '2021-05-01','Europe','Type 2',5777
UNION ALL SELECT DATE '2021-06-01','US','Type 1',45433 UNION ALL SELECT DATE '2021-07-01','Europe','Type 2',8955
UNION ALL SELECT DATE '2021-08-01','US','Type 1',45777 UNION ALL SELECT DATE '2021-09-01','Asia','Type 1',7533
UNION ALL SELECT DATE '2021-10-01','US','Type 1',8866 UNION ALL SELECT DATE '2021-11-01','Asia','Type 2',5534
UNION ALL SELECT DATE '2021-12-01','US','Type 2',4000 UNION ALL SELECT DATE '2022-01-01','Asia','Type 1',7244
UNION ALL SELECT DATE '2022-02-01','US','Type 1',6678 UNION ALL SELECT DATE '2022-03-01','Asia','Type 1',5654
UNION ALL SELECT DATE '2022-04-01','US','Type 1',2456 UNION ALL SELECT DATE '2022-05-01','Asia','Type 1',4525
UNION ALL SELECT DATE '2022-06-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-07-01','Asia','Type 1',6654
UNION ALL SELECT DATE '2022-08-01','US','Type 1',6677 UNION ALL SELECT DATE '2022-09-01','Asia','Type 2',5754
UNION ALL SELECT DATE '2022-10-01','US','Type 1',7744 UNION ALL SELECT DATE '2022-11-01','Asia','Type 2',5644
UNION ALL SELECT DATE '2022-12-01','Europe','Type 2',6775 UNION ALL SELECT DATE '2023-01-01','Asia','Type 2',6777
UNION ALL SELECT DATE '2023-02-01','Europe','Type 2',7755
)
SELECT indata.*,
lag(REVENUE, 1, 0) over (partition by region,type,revenue order by year(dt)) REVENUE_last_year
FROM indata
order by year(dt)
Partitioning by region, type and month-day:
SELECT indata.*,
LAG(REVENUE, 1, 0) over (partition by region,type, TO_VARCHAR(dt, 'mmdd')
order by dt) AS REVENUE_last_year
FROM indata
ORDER BY dt;
Output:

Identify which users have positive balance every day in SQL

I have user transaction data in a bank from several users with the following schema:
CREATE TABLE if not EXISTS transactions (
id int,
user_id int,
created_at DATE,
amount float
);
INSERT INTO transactions VALUES
(1, 1, '2020-01-01', 100),
(2, 1, '2020-01-02', -50),
(3, 1, '2020-01-04', -50),
(4, 2, '2020-01-04', 80),
(5, 3, '2020-01-06', 10),
(6, 3, '2020-01-10', -10);
I want to know, for each day from the beginning of the transactions to the current date, which users have a positive balance on their accounts.
In this case, the output of the query would be:
date,user_id
'2020-01-01',1
'2020-01-02',1
'2020-01-03',1
'2020-01-04',1
'2020-01-04',2
'2020-01-05',2
'2020-01-06',2
'2020-01-07',2
...
'2021-05-17',2 -- Today's date, user 2 still has positive balance
'2020-01-06',3
'2020-01-07',3
'2020-01-08',3
'2020-01-09',3
'2020-01-10',3
Is there an easy way to do this using PostgreSQL? Or even better, in BigQuery?
Try this for BigQuery:
with transactions as (
select 1 as user_id, date '2020-01-01' as date, 100 as amount union all
select 1, '2020-01-02', -50 union all
select 1, '2020-01-04', -50 union all
select 2, '2020-01-04', 80 union all
select 3, '2020-01-06', 10 union all
select 3, '2020-01-10', -10
),
all_users as (
select min(date) as min_date, user_id
from transactions
group by user_id
),
all_days as (
select *
from all_users, unnest(generate_date_array('2020-01-01', current_date())) as date
where date >= min_date
)
select date, user_id
from all_days left join transactions using (user_id, date)
where true
qualify sum(amount) over (partition by user_id order by date) > 0
Without qualify:
with transactions as (
select 1 as user_id, date '2020-01-01' as date, 100 as amount union all
select 1, '2020-01-02', -50 union all
select 1, '2020-01-04', -50 union all
select 2, '2020-01-04', 80 union all
select 3, '2020-01-06', 10 union all
select 3, '2020-01-10', -10
),
all_users as (
select min(date) as min_date, user_id
from transactions
group by user_id
),
all_days as (
select *
from all_users, unnest(generate_date_array('2020-01-01', current_date())) as date
where date >= min_date
)
select date, user_id
from (
select date, user_id, sum(amount) over (partition by user_id order by date) as balance
from all_days left join transactions using (user_id, date)
)
where balance > 0

Count only when the next row is different from the previous row

I have a table with 12 registers. I want to count the row just if the column "AREA_OPERATIVA" is different from the previous row (ordering by date asc).
For example, from row 1 to row 2 it shouldn't count anything because both have same area 'CROSS' but between rows 2 and 3, it should count (or sum 1, I don't care) since 'CROSS' and 'UTRDANIOS' are different. So the final count should be 3 for the whole table.
Is it possible to do this via query or do I need to make a script with a cursor for this purpose?
I've tried this:
SELECT a.creclama,
sum (CASE WHEN b.area_operativa NOT LIKE a.area_operativa THEN 1 ELSE 0 END) AS increment
FROM TR_ASGAREOPE a
INNER JOIN TR_ASGAREOPE b ON a.creclama = b.creclama
and a.cdistribuidora = b.cdistribuidora
and a.secuencia = b.secuencia
WHERE a.creclama = 10008354
group by a.creclama;
But is counting the full 12 rows.
EDIT:
Finally I could resolve this by the next query:
select sum (
CASE WHEN (comparacion.area_operativa not like comparacion.siguiente_fila THEN 1 ELSE 0 END) AS incremento
from (
select creclama,
area_operativa,
lead(area_operativa) over (order by fmodifica) as siguiente_fila
from TR_ASGAREOPE
where creclama = 10008354
order by fmodifica
);
Hope it is useful for someone in the future, it really got me stuck for a day. Thank you all guys.
You could try using analytic functions like lead or lag, for example
SELECT CRECLAMA,
CASE WHEN AREA_OPERATIVA <> NEXTROW THEN 1 ELSE 0 END AS INCREMENT
FROM (
SELECT CRECLAMA,
AREA_OPERATIVA,
LEAD(AREA_OPERATIVA) OVER (PARTITION BY 1 ORDER BY CRECLAMA) AS NEXTROW
FROM TR_ASGAREOPE
)
Here's an approach using LEAD:
WITH TR_ASGAREOPE(CRECLAMA, AREA_OPERATIVA, DATE_FIELD) AS
(SELECT 10008354, 'CROSS', DATE '2019-01-01' FROM DUAL UNION ALL
SELECT 10008354, 'CROSS', DATE '2019-01-02' FROM DUAL UNION ALL -- 1
SELECT 10008354, 'UTRDANIOS', DATE '2019-01-03' FROM DUAL UNION ALL -- 2
SELECT 10008354, 'EXP263', DATE '2019-01-04' FROM DUAL UNION ALL -- 3
SELECT 10008354, 'EXP6', DATE '2019-01-05' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-06' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-07' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-08' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-09' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-10' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-11' FROM DUAL UNION ALL
SELECT 10008354, 'EXP6', DATE '2019-01-12' FROM DUAL UNION ALL
SELECT 12345678, 'AREA49', DATE '2019-02-01' FROM DUAL UNION ALL
SELECT 12345678, 'AREA49', DATE '2019-02-02' FROM DUAL UNION ALL -- 1
SELECT 12345678, 'AREA50', DATE '2019-02-03' FROM DUAL UNION ALL
SELECT 12345678, 'AREA50', DATE '2019-02-04' FROM DUAL UNION ALL -- 2
SELECT 12345678, 'AREA52', DATE '2019-02-05' FROM DUAL UNION ALL
SELECT 12345678, 'AREA52', DATE '2019-02-06' FROM DUAL UNION ALL
SELECT 12345678, 'AREA52', DATE '2019-02-07' FROM DUAL UNION ALL -- 3
SELECT 12345678, 'AREA53', DATE '2019-02-08' FROM DUAL UNION ALL -- 4
SELECT 12345678, 'AREA52', DATE '2019-02-09' FROM DUAL UNION ALL -- 5
SELECT 12345678, 'AREA53', DATE '2019-02-10' FROM DUAL),
cteData AS (SELECT CRECLAMA,
LEAD(CRECLAMA) OVER (ORDER BY DATE_FIELD) AS NEXT_CRECLAMA,
AREA_OPERATIVA,
LEAD(AREA_OPERATIVA) OVER (ORDER BY DATE_FIELD) AS NEXT_AREA_OPERATIVA
FROM TR_ASGAREOPE)
SELECT CRECLAMA, COUNT(*)
FROM cteData
WHERE CRECLAMA = NEXT_CRECLAMA AND
AREA_OPERATIVA <> NEXT_AREA_OPERATIVA
GROUP BY CRECLAMA
ORDER BY CRECLAMA;
I added data for another CRECLAMA value to show how it would work.
Results:
CRECLAMA COUNT(*)
10008354 3
12345678 5
dbfiddle here
You can use lag() analytic function :
with t as
(
select a.*,
lag(a.area_operativa,1,a.area_operativa) over (order by a."date") as lg
from asgareope a
where a.creclama = 10008354
)
select t.creclama, sum(case when lg = area_operativa then 0 else 1 end) as "increment"
from t
group by t.creclama
I think you can simple use COUNT(DISTINCT ...) for your problem, if the AREA_OPERATIVA could not return to previously used value:
SELECT CRECLAMA, COUNT(DISTINCT AREA_OPERATIVA)
FROM TR_ASGAREOPE
GROUP BY CRECLAMA

PL/SQL distinct date for loop

I want to use for loop for date in my table which only cares years and months, not days.
CURSOR ret_cur is SELECT orderdate FROM Orders WHERE status
= 'DELAYED';
ret_rec ret_cur%ROWTYPE;
I currently have
insert into Orders(OrderId, CustomerId, RetailerId, ProductId, Count,
UnitPrice, OrderDate, Status) values (2,2,1,10,45,60,
to_date('20180102','YYYYMMDD'),'DELIVERED');
this data type in my orders table. (its an example for format)
I want to use DISTINCT to iterate through orderdate based on YYYY-MM. (dont care Day)
I have tried select distinct to_char(orderdate, 'YYYY-MM') but I seems to not work.
for example, if i have 20180103, 20180104, 20180105 , it should be one iteration since they all have same years and months.
To select days without time you could tunc(sysdate). For months we have to group by a char-value:
select to_char(mydatecol,'yyyymm'), count(*) from
(
select sysdate mydatecol from dual UNION ALL -- Fake-Table with some dates
select sysdate - 1 mydatecol from dual UNION ALL
select sysdate - 2 mydatecol from dual UNION ALL
select sysdate - 3 mydatecol from dual UNION ALL
select sysdate - 4 mydatecol from dual UNION ALL
select sysdate - 30 mydatecol from dual UNION ALL
select sysdate - 31 mydatecol from dual UNION ALL
select sysdate - 32 mydatecol from dual UNION ALL
select sysdate - 33 mydatecol from dual
)
group by to_char(mydatecol,'yyyymm')
Result:
201809 3
201810 6
I think you'd like to have such a collation as below :
with Orders
(
OrderId, CustomerId, RetailerId, ProductId,
Count, UnitPrice, OrderDate, Status
) as
(
select 2,2,1,10,45,60, to_date('20180102','YYYYMMDD'),'DELIVERED' from dual
)
select o.*
from Orders o
where to_char(OrderDate,'yyyy-mm')
= to_char(to_date('&myDate','yyyymmdd'),'yyyy-mm');
-- for "myDate" substitution variable use 20180103 or 20180104 or 20180105 .. etc.
The best way todo this kind of query is to truncate the date value:
SELECT CustomerId, trunc(OrderDate,'MM') OrderMonth
, sum(Count) totalCount
, sum(Count*UnitPrice) totalPrice
FROM Orders
GROUP BY CustomerId, trunc(OrderDate,'MM')
for example...

SQL last status change date

I am trying to get the dates of last status changes. Below is an example data table.
In brief I want to query the minimum DATE value of the latest STATUS (ordered by CHANGE_NO) for each PRODUCT_ID. Mentioned values are the ones filled with yellow.
So far, I could get only the latest dates for each product.
SELECT
*
FROM
(
SELECT
PRODUCT_ID, CHANGE_NO, STATUS, DATE
,MAX(CHANGE_NO) OVER(PARTITION BY PRODUCT_ID) MAX_CHANGE_NO
FROM TABLE
ORDER BY PRODUCT_ID, CHANGE_NO
)
WHERE MAX_CHANGE_NO = CHANGE_NO
Please kindly share the link if there is already a question/answer for a similar case; I've searched but couldn't find any.
Note: I am using Oracle SQL.
Thanks in advance.
Here's one way to do this with analytic functions (avoiding joins).
with
test_data ( product_id, change_no, status, dt ) as (
select 1, 1, 'A', date '2016-10-10' from dual union all
select 1, 2, 'B', date '2016-10-11' from dual union all
select 1, 3, 'C', date '2016-10-12' from dual union all
select 1, 4, 'D', date '2016-10-13' from dual union all
select 2, 1, 'Y', date '2016-02-02' from dual union all
select 2, 2, 'X', date '2016-02-03' from dual union all
select 2, 3, 'X', date '2016-02-04' from dual union all
select 3, 1, 'H', date '2016-06-20' from dual union all
select 3, 2, 'G', date '2016-06-21' from dual union all
select 3, 3, 'T', date '2016-06-22' from dual union all
select 3, 4, 'K', date '2016-06-23' from dual union all
select 3, 5, 'K', date '2016-06-24' from dual union all
select 3, 6, 'K', date '2016-06-25' from dual
)
-- End of test data (not part of the solution). SQL query begins below this line.
select product_id,
max(status) keep (dense_rank last order by change_no) as status,
max(dt) as dt
from (
select product_id, change_no, status, dt,
case when lead(status) over (partition by product_id
order by change_no desc)
= status then 0 else 1 end as flag
from test_data
)
where flag = 1
group by product_id
order by product_id -- if needed
;
Output
PRODUCT_ID STATUS DT
---------- ------ ----------
1 D 13/10/2016
2 X 03/02/2016
3 K 23/06/2016
SELECT * FROM (
SELECT PRODUCT_ID, CHANGE_NO, STATUS,DATE, MIN(DATE) OVER(PARTITION BY PRODUCT_ID,STATUS) as MIN_DATE_OF_LATEST_STATUS
FROM (SELECT PRODUCT_ID, CHANGE_NO, STATUS, DATE
,FIRST_VALUE(STATUS) OVER(PARTITION BY PRODUCT_ID ORDER BY CHANGE_NO DESC) LATEST_STATUS
FROM TABLE
) T
WHERE STATUS = LATEST_STATUS
) T
WHERE DATE = MIN_DATE_OF_LATEST_STATUS
Use the FIRST_VALUE window function to get the latest status for each product_id
Get the MIN date for those status rows
Finally get those rows where min_date = date
If change_no isn't needed in the final result, the query can be simplified to
SELECT PRODUCT_ID, STATUS, MIN(DATE) as MIN_DATE_OF_LATEST_STATUS
FROM (SELECT PRODUCT_ID, CHANGE_NO, STATUS, DATE
,FIRST_VALUE(STATUS) OVER(PARTITION BY PRODUCT_ID ORDER BY CHANGE_NO DESC) LATEST_STATUS
FROM TABLE
) T
WHERE STATUS = LATEST_STATUS
GROUP BY PRODUCT_ID, STATUS