Related
Consider I have a table like this
PASSENGER CITY DATE
43 NEW YORK 1-Jan-21
44 CHICAGO 4-Jan-21
43 NEW YORK 2-Jan-21
43 NEW YORK 3-Jan-21
44 ROME 5-Jan-21
43 LONDON 4-Jan-21
44 CHICAGO 6-Jan-21
44 CHICAGO 7-Jan-21
How would I group Passenger and City column in sequence to get a result like below?
PASSENGER CITY COUNT
43 NEW YORK 3
44 CHICAGO 1
44 ROME 1
43 LONDON 1
44 CHICAGO 2
One way to deal with such a gaps-and-islands problem is to calculate a ranking for the gaps.
Then group also on that ranking.
SELECT PASSENGER, CITY
, COUNT(*) AS "Count"
-- , MIN("DATE") AS StartDate
-- , MAX("DATE") AS EndDate
FROM (
SELECT q1.*
, SUM(gap) OVER (PARTITION BY PASSENGER ORDER BY "DATE") as Rnk
FROM (
SELECT PASSENGER, CITY, "DATE"
, CASE
WHEN 1 = TRUNC("DATE")
- TRUNC(LAG("DATE")
OVER (PARTITION BY PASSENGER, CITY ORDER BY "DATE"))
THEN 0 ELSE 1 END as gap
FROM table_name t
) q1
) q2
GROUP BY PASSENGER, CITY, Rnk
ORDER BY MIN("DATE"), PASSENGER
PASSENGER
CITY
Count
43
NEW YORK
3
43
LONDON
1
44
CHICAGO
1
44
ROME
1
44
CHICAGO
2
db<>fiddle here
From Oracle 12, you can use MATCH_RECOGNIZE:
SELECT *
FROM table_name
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY "DATE"
MEASURES
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
);
Which, for the sample data:
CREATE TABLE table_name (PASSENGER, CITY, "DATE") AS
SELECT 43, 'NEW YORK', DATE '2021-01-01' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-02' FROM DUAL UNION ALL
SELECT 43, 'NEW YORK', DATE '2021-01-03' FROM DUAL UNION ALL
SELECT 44, 'ROME', DATE '2021-01-05' FROM DUAL UNION ALL
SELECT 43, 'LONDON', DATE '2021-01-04' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-06' FROM DUAL UNION ALL
SELECT 44, 'CHICAGO', DATE '2021-01-07' FROM DUAL
Outputs:
PASSENGER
CITY
COUNT
43
NEW YORK
3
43
LONDON
1
44
CHICAGO
1
44
ROME
1
44
CHICAGO
2
If you have ordered the input result set (note: tables should be considered to be unordered) and want to maintain the order then:
SELECT *
FROM (SELECT t.*, ROWNUM AS rn FROM table_name t)
MATCH_RECOGNIZE (
PARTITION BY passenger
ORDER BY RN
MEASURES
FIRST(rn) AS rn,
FIRST("DATE") AS "DATE",
FIRST(city) AS city,
COUNT(*) AS count
PATTERN (same_city+)
DEFINE
same_city AS FIRST(city) = city
)
ORDER BY rn
Outputs:
PASSENGER
RN
DATE
CITY
COUNT
43
1
01-JAN-21
NEW YORK
3
44
2
04-JAN-21
CHICAGO
1
44
5
05-JAN-21
ROME
1
43
6
04-JAN-21
LONDON
1
44
7
06-JAN-21
CHICAGO
2
db<>fiddle here
I have the following table
EMP_ID ,DATETIME_OF_MOVEMENT,CITY ,RANK
2258325 ,1/18/2020 5:37 ,London ,1
2258325 ,1/19/2020 11:01 ,Manchester ,2
2258325 ,1/20/2020 15:06 ,London ,3
2656700 ,1/20/2020 23:59 ,London ,1
2656700 ,1/21/2020 6:48 ,Manchester ,2
2656700 ,1/21/2020 6:48 ,Liverpool ,3
2656700 ,1/26/2020 10:47 ,London ,4
6631583 ,1/18/2020 18:00 ,London ,1
6631583 ,1/19/2020 14:25 ,Manchester ,2
6631583 ,1/20/2020 8:53 ,Liverpool ,3
6631583 ,1/20/2020 14:48 ,Manchester ,4
6631583 ,1/21/2020 11:34 ,London ,5
I want a query to get the employee who were in london and come back to london.
the firt location should be london second is the first location after london and third location should be london
I used the following query but it miss some employees
select emp_id , date_of_movement as first_movement , city as first_city ,lead
(DATETIME_OF_MOVEMENT, 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as second_movement ,
lead (city , 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as second_city ,
lead (DATETIME_OF_MOVEMENT, 2) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as third_movement ,
lead (city , 1) over ( partition by emp_id order by DATETIME_OF_MOVEMENT) as third_city ,
from table
result of the code
CUSTOMER_ID,first_movement ,first_city,second_movment ,second_movement,third_movment ,third_city
2258325 ,1/18/2020 5:37 ,London ,1/19/2020 11:01,Manchester ,1/20/2020 15:06,London
2656700 ,1/20/2020 23:59,London ,1/21/2020 6:48 ,Manchester ,1/21/2020 6:48 ,Liverpool
6631583 ,1/18/2020 18:00,London ,1/19/2020 14:25,Manchester ,1/20/2020 8:53 ,Liverpool
This code catch the employee it their track of movement ( ex. emp_id : 2258325 )
1 - London
2 - any other city
3 - London
It will not works fine If the movement of the employee like the following
1 - london
2 - any other city
3 - any other city
4 - London
i want the result to be like
CUSTOMER_ID,first_movement ,first_city,second_movement,second_city,third_movement ,third_city
2258325 ,1/18/2020 5:37 ,London ,1/19/2020 11:01,Manchester ,1/20/2020 15:06,London
2656700 ,1/20/2020 23:59,London ,1/21/2020 6:48 ,Manchester ,1/26/2020 10:47,London
6631583 ,1/18/2020 18:00,London ,1/19/2020 14:25,Manchester ,1/21/2020 11:34,London
Any suggestion please?
WITH YOUR_TABLE (EMP_ID,
DATETIME_OF_MOVEMENT,
CITY,
RANK_)
AS (SELECT 2258325,
'1/18/2020 5:37',
'London',
1
FROM DUAL
UNION ALL
SELECT 2258325,
'1/19/2020 11:01',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 2258325,
'1/20/2020 15:06',
'London',
3
FROM DUAL
UNION ALL
SELECT 2656700,
'1/20/2020 23:59',
'London',
1
FROM DUAL
UNION ALL
SELECT 2656700,
'1/21/2020 6:48',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 2656700,
'1/21/2020 6:48',
'Liverpool',
3
FROM DUAL
UNION ALL
SELECT 2656700,
'1/26/2020 10:47',
'London',
4
FROM DUAL
UNION ALL
SELECT 6631583,
'1/18/2020 18:00',
'London',
1
FROM DUAL
UNION ALL
SELECT 6631583,
'1/19/2020 14:25',
'Manchester',
2
FROM DUAL
UNION ALL
SELECT 6631583,
'1/20/2020 8:53',
'Liverpool',
3
FROM DUAL
UNION ALL
SELECT 6631583,
'1/20/2020 14:48',
'Manchester',
4
FROM DUAL
UNION ALL
SELECT 6631583,
'1/21/2020 11:34',
'London',
5
FROM DUAL
UNION ALL
SELECT 6631583,
'1/22/2020 14:48',
'Manchester',
6
FROM DUAL
UNION ALL
SELECT 6631583,
'1/24/2020 11:34',
'London',
7
FROM DUAL) -- YOUR QUERY STARTS FROM HERE SELECT EMP_ID,
MAX (CASE WHEN MINRN = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS first_movement,
MAX (CASE WHEN MINRN = RANK_ THEN CITY END) AS first_CITY,
MAX (CASE WHEN MINRN + 1 = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS SECOND_movement,
MAX (CASE WHEN MINRN + 1 = RANK_ THEN CITY END) AS SECOND_CITY,
MAX (CASE WHEN MAXRN = RANK_ THEN DATETIME_OF_MOVEMENT END)
AS THIRD_movement,
MAX (CASE WHEN MAXRN = RANK_ THEN CITY END) AS THIRD_CITY
FROM (SELECT T.*,
MAX (CASE WHEN CITY = 'London' THEN RANK_ END)
OVER (PARTITION BY EMP_ID)
AS MAXRN,
MIN (CASE WHEN CITY = 'London' THEN RANK_ END)
OVER (PARTITION BY EMP_ID)
AS MINRN
FROM YOUR_TABLE T) WHERE MAXRN - MINRN > 1 GROUP BY EMP_ID;
check this it is not works #Tejash
result should be like this:
SELECT emp_id,
Lead (datetime_of_movement, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS first_movement,
Lead (city, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS first_city,
Lead (datetime_of_movement, 1)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS second_movement,
Lead (city, 1)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement ASC) AS second_city,
Lead (datetime_of_movement, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement DESC) AS third_movement,
Lead (city, 0)
OVER (
partition BY emp_id
ORDER BY datetime_of_movement DESC) AS third_city,
FROM table
WHERE frist_city = "London" AND third_city = "London"
I couldn't test this SQL but maybe helpful. please let me know if it works for you.
I think you need a record starting from London then next record and the last London record. You can use the combination of the conditional aggregate and analytical function as following:
SQL> WITH YOUR_TABLE (
2 EMP_ID,
3 DATETIME_OF_MOVEMENT,
4 CITY,
5 RANK
6 ) AS
7 (
8 SELECT 2258325 ,'1/18/2020 5:37' ,'London' ,1 FROM DUAL UNION ALL
9 SELECT 2258325 ,'1/19/2020 11:01' ,'Manchester' ,2 FROM DUAL UNION ALL
10 SELECT 2258325 ,'1/20/2020 15:06' ,'London' ,3 FROM DUAL UNION ALL
11 SELECT 2656700 ,'1/20/2020 23:59' ,'London' ,1 FROM DUAL UNION ALL
12 SELECT 2656700 ,'1/21/2020 6:48' ,'Manchester' ,2 FROM DUAL UNION ALL
13 SELECT 2656700 ,'1/21/2020 6:48' ,'Liverpool' ,3 FROM DUAL UNION ALL
14 SELECT 2656700 ,'1/26/2020 10:47' ,'London' ,4 FROM DUAL UNION ALL
15 SELECT 6631583 ,'1/18/2020 18:00' ,'London' ,1 FROM DUAL UNION ALL
16 SELECT 6631583 ,'1/19/2020 14:25' ,'Manchester' ,2 FROM DUAL UNION ALL
17 SELECT 6631583 ,'1/20/2020 8:53' ,'Liverpool' ,3 FROM DUAL UNION ALL
18 SELECT 6631583 ,'1/20/2020 14:48' ,'Manchester' ,4 FROM DUAL UNION ALL
19 SELECT 6631583 ,'1/21/2020 11:34' ,'London' ,5 FROM DUAL
20 )
21 -- YOUR QUERY STARTS FROM HERE
22 SELECT
23 EMP_ID,
24 MAX(CASE WHEN MINRN = RANK THEN DATETIME_OF_MOVEMENT END) AS first_movement,
25 MAX(CASE WHEN MINRN = RANK THEN CITY END) AS first_CITY,
26 MAX(CASE WHEN MINRN + 1 = RANK THEN DATETIME_OF_MOVEMENT END) AS SECOND_movement,
27 MAX(CASE WHEN MINRN + 1 = RANK THEN CITY END) AS SECOND_CITY,
28 MAX(CASE WHEN MAXRN = RANK THEN DATETIME_OF_MOVEMENT END) AS THIRD_movement,
29 MAX(CASE WHEN MAXRN = RANK THEN CITY END) AS THIRD_CITY
30 FROM
31 (
32 SELECT T.*,
33 MAX(CASE WHEN CITY = 'London' THEN RANK END) OVER(PARTITION BY EMP_ID) AS MAXRN,
34 MIN(CASE WHEN CITY = 'London' THEN RANK END) OVER(PARTITION BY EMP_ID) AS MINRN
35 FROM YOUR_TABLE T
36 )
37 WHERE MAXRN - MINRN > 1
38 GROUP BY EMP_ID;
EMP_ID FIRST_MOVEMENT FIRST_CITY SECOND_MOVEMENT SECOND_CIT THIRD_MOVEMENT THIRD_CITY
---------- --------------- ---------- --------------- ---------- --------------- ----------
2258325 1/18/2020 5:37 London 1/19/2020 11:01 Manchester 1/20/2020 15:06 London
2656700 1/20/2020 23:59 London 1/21/2020 6:48 Manchester 1/26/2020 10:47 London
6631583 1/18/2020 18:00 London 1/19/2020 14:25 Manchester 1/21/2020 11:34 London
SQL>
Cheers!!
You can try this below script. But this will return only onle row as only one EMP has city London where Rank = 3 ( and as per explanation your concern is location 3 as well)
DEMO HERE
SELECT
DISTINCT A.EMP_ID CUSTOMER_ID,
A.DATETIME_OF_MOVEMENT first_movement ,A.city first_city,
B.DATETIME_OF_MOVEMENT secondt_movement,B.city second_city,
C.DATETIME_OF_MOVEMENT third_movement ,C.city third_city
FROM your_table A
INNER JOIN your_table B ON A.Rank = B.Rank - 1 AND A.EMP_ID = B.EMP_ID
INNER JOIN your_table C ON A.Rank = C.Rank - 2 AND A.EMP_ID = C.EMP_ID
WHERE A.RANK = 1
AND C.City = 'London'
New logic if the last city is not the third city-
DEMO HERE
WITH CTE AS
(
SELECT A.EMP_ID,A.DATETIME_OF_MOVEMENT,A.CITY,A.RANK,
CASE WHEN A.RANK > 2 THEN 3 ELSE A.Rank END NEW_RANK
FROM your_table A
WHERE A.RANK IN (1,2)
OR A.RANK = (SELECT MAX(Rank) FROM your_table B WHERE B.EMP_ID = A.EMP_ID)
)
SELECT
A.EMP_ID CUSTOMER_ID,
A.DATETIME_OF_MOVEMENT first_movement ,A.city first_city,
B.DATETIME_OF_MOVEMENT secondt_movement,B.city second_city,
C.DATETIME_OF_MOVEMENT third_movement ,C.city third_city
FROM CTE A
INNER JOIN CTE B ON A.NEW_RANK = B.NEW_RANK - 1 AND A.EMP_ID = B.EMP_ID
INNER JOIN CTE C ON A.NEW_RANK = C.NEW_RANK - 2 AND A.EMP_ID = C.EMP_ID
WHERE A.NEW_RANK = 1
AND C.City = 'London'
I have a table EMPLOYEE as under:
Enroll Date STS EMP_ID EMP_Name DEPT Rank OST BLOCK
12-Jan-17 Q 123 ABC ABC123 12 Y 1000
14-Jan-17 Q 123 ABC DEF123 12 Y 1000
15-Jan-17 R 123 ABC DEF123 12 Y 100
15-Jan-17 R 123 ABC DEF123 12 Y 200
15-Jan-17 R 123 ABC DEF123 12 Y 300
20-Jan-17 R 123 ABC DEF123 10 Y 300
26-Jan-17 R 456 RST DEF456 8 N 200
26-Jan-17 R 456 RST DEF456 8 N 300
2-Feb-17 Q 123 ABC ABC123 12 Y 300
Now i need to remove the duplicate rows for each emp_id (duplicate if EMP_Name, DEPT, OST and rank is same). If 2 rows have these 4 value same and enroll_date is different then i need not delete that row. And if 2 rows have same enroll date and the 4 fields (OST, EMP_Name, DEPT and rank) are same then i need to keep the row with highest block (1000 followed by 300 followed by 200 and so on)
So after deleting such data my table should have these rows:
Enroll Date STS EMP_ID EMP_Name DEPT Rank OST BLOCK
12-Jan-17 Q 123 ABC ABC123 12 Y 1000
14-Jan-17 Q 123 ABC DEF123 12 Y 1000
15-Jan-17 R 123 ABC DEF123 12 Y 100
2-Feb-17 Q 123 ABC ABC123 12 Y 300
20-Jan-17 R 123 ABC DEF123 10 Y 300
26-Jan-17 R 456 RST DEF456 8 N 200
26-Jan-17 R 456 RST DEF456 8 N 300
I tried using below query and will delete rows which have rn >1
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST, row_number() over ( partition BY emp_id, enroll_date,emp_name, dept, ost, rank ORDER BY enroll_date ASC, block DESC)rn
FROM employee
But i am getting rn as 1 only everytime.
can someone check the issue here or suggest some other way to do so?
I am creating a temporary table which will have all non duplicate values:
create table employee_temp as
with duplicates as (
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST, row_number() over ( partition BY emp_id, trunc(enroll_date),emp_name, dept, ost, rank ORDER BY enroll_date ASC, block DESC)rn FROM employee )
SELECT enroll_date, STS, BLOCK, EMP_ID, EMP_NAME, DEPT,RANK, OST from duplicates where rn =1;
It looks like your enroll_date values have non-midnight times, so partitioning by those also made those combinations unique (even though they don't look it when you only show the date part).
My initial thought was that your analytic row_number() was partitoned by too many columns, and that you shouldn't be including the date value you want to order by - it doesn't really make sense to partition by and order by the same thing, as it will be unique. Reducing the columns you actually want to check against, perhaps to:
row_number() over (partition BY emp_id, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC)
would produce different ranks rather than all being 1. But I don't think that's right; that would probably make your secondary block ordering somewhat redundant, as you'll maybe be unlikely to have two rows with exactly the same time for one ID. Unlikely but not impossible, perhaps.
Re-reading your wording again I don't think you want to be ordering by the enroll_date at all, and you do want to be partitioning by the date instead; but, given that it contains non-midnight times that you apparently want to ignore for this exercise, the partitioning would have to be on the truncated date (which strips the time back to midnight, by default:
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC)
With your sample data as a CTE, including slightly different times within each day, and one extra row to get everything the same but the date, this shows your original rn and my two calculated values:
with employee (enroll_date, sts, emp_id, emp_name, dept, rank, ost, block) as (
select to_date('12-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 1000 from dual
union all select to_date('14-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'DEF123', 12, 'Y', 1000 from dual
union all select to_date('15-Jan-17 00:00:01', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 100 from dual
union all select to_date('15-Jan-17 00:00:02', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 200 from dual
union all select to_date('15-Jan-17 00:00:03', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 12, 'Y', 300 from dual
union all select to_date('20-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'R', 123, 'ABC', 'DEF123', 10, 'Y', 300 from dual
union all select to_date('26-Jan-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'R', 456, 'RST', 'DEF456', 8, 'N', 200 from dual
union all select to_date('26-Jan-17 00:00:01', 'DD-Mon-RR HH24:MI:SS'), 'R', 456, 'RST', 'DEF456', 8, 'N', 300 from dual
union all select to_date('2-Feb-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 300 from dual
union all select to_date('3-Feb-17 00:00:00', 'DD-Mon-RR HH24:MI:SS'), 'Q', 123, 'ABC', 'ABC123', 12, 'Y', 300 from dual
)
SELECT to_char(enroll_date, 'DD-Mon-RR') as date_only,
enroll_date, sts, block, emp_id, emp_name, dept, rank, ost,
row_number() over ( partition BY emp_id, enroll_date, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC) your_rn,
row_number() over (partition BY emp_id, emp_name, dept, ost, rank
ORDER BY enroll_date ASC, block DESC) my_rn_1,
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC) as my_rn_2
FROM employee
ORDER BY enroll_date;
DATE_ONLY ENROLL_DATE S BLOCK EMP_ID EMP DEPT RANK O YOUR_RN MY_RN_1 MY_RN_2
--------- ------------------- - ----- ------ --- ------ ---- - ------- ------- -------
12-Jan-17 2017-01-12 00:00:00 Q 1000 123 ABC ABC123 12 Y 1 1 1
14-Jan-17 2017-01-14 00:00:00 Q 1000 123 ABC DEF123 12 Y 1 1 1
15-Jan-17 2017-01-15 00:00:01 R 100 123 ABC DEF123 12 Y 1 2 3
15-Jan-17 2017-01-15 00:00:02 R 200 123 ABC DEF123 12 Y 1 3 2
15-Jan-17 2017-01-15 00:00:03 R 300 123 ABC DEF123 12 Y 1 4 1
20-Jan-17 2017-01-20 00:00:00 R 300 123 ABC DEF123 10 Y 1 1 1
26-Jan-17 2017-01-26 00:00:00 R 200 456 RST DEF456 8 N 1 1 2
26-Jan-17 2017-01-26 00:00:01 R 300 456 RST DEF456 8 N 1 2 1
02-Feb-17 2017-02-02 00:00:00 Q 300 123 ABC ABC123 12 Y 1 2 1
03-Feb-17 2017-02-03 00:00:00 Q 300 123 ABC ABC123 12 Y 1 3 1
To identify the rows to delete you can use a subquery:
SELECT enroll_date, sts, block, emp_id, emp_name, dept, rank, ost
FROM (
SELECT enroll_date, sts, block, emp_id, emp_name, dept, rank, ost,
row_number() over (partition BY trunc(enroll_date), emp_id, emp_name, dept, ost, rank
ORDER BY block DESC) as my_rn_2
FROM employee
)
WHERE my_rn_2 > 1
ORDER BY enroll_date;
ENROLL_DATE S BLOCK EMP_ID EMP DEPT RANK O
------------------- - ----- ------ --- ------ ---- -
2017-01-15 00:00:01 R 100 123 ABC DEF123 12 Y
2017-01-15 00:00:02 R 200 123 ABC DEF123 12 Y
2017-01-26 00:00:00 R 200 456 RST DEF456 8 N
You'll need to decide what actually makes sense for your data and requirements though.
The goal is to select the count of distinct customer_id's who have not made a purchase in the rolling 30 day period prior to every day in the calendar year 2016. I have created a calendar table in my database to join to.
Here is an example table for reference, let's say you have customers orders normalized as follows:
+-------------+------------+----------+
| customer_id | date | order_id |
+-------------+------------+----------+
| 123 | 01/25/2016 | 1000 |
+-------------+------------+----------+
| 123 | 04/27/2016 | 1025 |
+-------------+------------+----------+
| 444 | 02/02/2016 | 1010 |
+-------------+------------+----------+
| 521 | 01/23/2016 | 998 |
+-------------+------------+----------+
| 521 | 01/24/2016 | 999 |
+-------------+------------+----------+
The goal output is effectively a calendar with 1 row for every single day of 2016 with a count on each day of how many customers "lapsed" on that day, meaning their last purchase was 30 days or more prior from that day of the year. The final output will look like this:
+------------+--------------+
| date | lapsed_count |
+------------+--------------+
| 01/01/2016 | 0 |
+------------+--------------+
| 01/02/2016 | 0 |
+------------+--------------+
| ... | ... |
+------------+--------------+
| 03/01/2016 | 12 |
+------------+--------------+
| 03/02/2016 | 9 |
+------------+--------------+
| 03/03/2016 | 7 |
+------------+--------------+
This data does not exist in 2015, therefore it's not possible for Jan-01-2016 to have a count of lapsed customers because that is the first possible day to ever make a purchase.
So for customer_id #123, they purchased on 01/25/2016 and 04/27/2016. They should have 2 lapse counts because their purchases are more than 30 days apart. One lapse occurring on 2/24/2016 and another lapse on 05/27/2016.
Customer_id#444 only purchased once, so they should have one lapse count for 30 days after 02/02/2016 on 03/02/2016.
Customer_id#521 is tricky, since they purchased with a frequency of 1 day we will not count the first purchase on 03/02/2016, so there is only one lapse starting from their last purchase of 03/03/2016. The count for the lapse will occur on 04/02/2016 (+30 days).
If you have a table of dates, here is one expensive method:
select date,
sum(case when prev_date < date - 30 then 1 else 0 end) as lapsed
from (select c.date, o.customer_id, max(o.date) as prev_date
from calendar c cross join
(select distinct customer_id from orders) c left join
orders o
on o.date <= c.date and o.customer_id = c.customer_id
group by c.date, o.customer_id
) oc
group by date;
For each date/customer pair, it determines the latest purchase the customer made before the date. It then uses this information to count the lapsed.
To be honest, this will probably work well on a handful of dates, but not for a full year's worth.
Apologies, I didn't read your question properly the first time around. This query will give you all the lapses you have. It takes each order and uses an analytic function to work out the next order date - if the gap is greater than 30 days then a lapse is recorded
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
;
Now join that to a set of dates and run a COUNT function (enter the year parameter as YYYY) :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,COUNT(*)
FROM
(
SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
) lapses
,calendar
WHERE 1=1
AND calendar.date_value = TRUNC(lapses.lapse_date)
GROUP BY
calendar.date_value
;
Or if you really want every date printed out then use this :
WITH
cust_orders (customer_id , order_date , order_id )
AS
(SELECT 1, TO_DATE('01/01/2016','DD/MM/YYYY'), 1001 FROM dual UNION ALL
SELECT 1, TO_DATE('29/01/2016','DD/MM/YYYY'), 1002 FROM dual UNION ALL
SELECT 1, TO_DATE('01/03/2016','DD/MM/YYYY'), 1003 FROM dual UNION ALL
SELECT 2, TO_DATE('01/01/2016','DD/MM/YYYY'), 1004 FROM dual UNION ALL
SELECT 2, TO_DATE('29/01/2016','DD/MM/YYYY'), 1005 FROM dual UNION ALL
SELECT 2, TO_DATE('01/04/2016','DD/MM/YYYY'), 1006 FROM dual UNION ALL
SELECT 2, TO_DATE('01/06/2016','DD/MM/YYYY'), 1007 FROM dual UNION ALL
SELECT 2, TO_DATE('01/08/2016','DD/MM/YYYY'), 1008 FROM dual UNION ALL
SELECT 3, TO_DATE('01/09/2016','DD/MM/YYYY'), 1009 FROM dual UNION ALL
SELECT 3, TO_DATE('01/12/2016','DD/MM/YYYY'), 1010 FROM dual UNION ALL
SELECT 3, TO_DATE('02/12/2016','DD/MM/YYYY'), 1011 FROM dual UNION ALL
SELECT 3, TO_DATE('03/12/2016','DD/MM/YYYY'), 1012 FROM dual UNION ALL
SELECT 3, TO_DATE('04/12/2016','DD/MM/YYYY'), 1013 FROM dual UNION ALL
SELECT 3, TO_DATE('05/12/2016','DD/MM/YYYY'), 1014 FROM dual UNION ALL
SELECT 3, TO_DATE('06/12/2016','DD/MM/YYYY'), 1015 FROM dual UNION ALL
SELECT 3, TO_DATE('07/12/2016','DD/MM/YYYY'), 1016 FROM dual
)
,lapses
AS
(SELECT
customer_id
,order_date
,order_id
,next_order_date
,order_date + 30 lapse_date
FROM
(SELECT
customer_id
,order_date
,order_id
,LEAD(order_date) OVER (PARTITION BY customer_id ORDER BY order_date) next_order_date
FROM
cust_orders
)
WHERE NVL(next_order_date,sysdate) - order_date > 30
)
,calendar (date_value)
AS
(SELECT TO_DATE('01/01/'||:P_year,'DD/MM/YYYY') + (rownum -1)
FROM all_tables
WHERE rownum < (TO_DATE('31/12/'||:P_year,'DD/MM/YYYY') - TO_DATE('01/01/'||:P_year,'DD/MM/YYYY')) + 2
)
SELECT
calendar.date_value
,(SELECT COUNT(*)
FROM lapses
WHERE calendar.date_value = lapses.lapse_date
)
FROM
calendar
WHERE 1=1
ORDER BY
calendar.date_value
;
Here's how I'd do it:
WITH your_table AS (SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12345 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/01/2016', 'dd/mm/yyyy') order_date, 12346 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('25/01/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('24/02/2016', 'dd/mm/yyyy') order_date, 12347 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('16/03/2016', 'dd/mm/yyyy') order_date, 12348 order_id FROM dual UNION ALL
SELECT 123 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12349 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/02/2016', 'dd/mm/yyyy') order_date, 12350 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('01/03/2016', 'dd/mm/yyyy') order_date, 12351 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('03/03/2016', 'dd/mm/yyyy') order_date, 12352 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('18/04/2016', 'dd/mm/yyyy') order_date, 12353 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('20/05/2016', 'dd/mm/yyyy') order_date, 12354 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('23/06/2016', 'dd/mm/yyyy') order_date, 12355 order_id FROM dual UNION ALL
SELECT 456 customer_id, to_date('19/01/2017', 'dd/mm/yyyy') order_date, 12356 order_id FROM dual),
-- end of mimicking your_table with data in it
lapsed_info AS (SELECT customer_id,
order_date,
CASE WHEN TRUNC(SYSDATE) - order_date <= 30 THEN NULL
WHEN COUNT(*) OVER (PARTITION BY customer_id ORDER BY order_date RANGE BETWEEN 1 FOLLOWING AND 30 FOLLOWING) = 0 THEN order_date+30
ELSE NULL
END lapsed_date
FROM your_table),
dates AS (SELECT to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 dt
FROM dual
CONNECT BY to_date('01/01/2016', 'dd/mm/yyyy') + LEVEL -1 <= TRUNC(SYSDATE))
SELECT dates.dt,
COUNT(li.lapsed_date) lapsed_count
FROM dates
LEFT OUTER JOIN lapsed_info li ON dates.dt = li.lapsed_date
GROUP BY dates.dt
ORDER BY dates.dt;
Results:
DT LAPSED_COUNT
---------- ------------
01/01/2016 0
<snip>
23/01/2016 0
24/01/2016 0
25/01/2016 0
26/01/2016 0
<snip>
19/02/2016 0
20/02/2016 0
21/02/2016 0
22/02/2016 0
23/02/2016 0
24/02/2016 1
25/02/2016 0
<snip>
29/02/2016 0
01/03/2016 0
02/03/2016 0
03/03/2016 0
04/03/2016 0
<snip>
15/03/2016 0
16/03/2016 0
17/03/2016 0
<snip>
20/03/2016 0
21/03/2016 0
22/03/2016 0
<snip>
30/03/2016 0
31/03/2016 0
01/04/2016 0
02/04/2016 1
03/04/2016 0
<snip>
14/04/2016 0
15/04/2016 1
16/04/2016 0
17/04/2016 0
18/04/2016 0
19/04/2016 0
<snip>
17/05/2016 0
18/05/2016 2
19/05/2016 0
20/05/2016 0
21/05/2016 0
<snip>
18/06/2016 0
19/06/2016 1
20/06/2016 0
21/06/2016 0
22/06/2016 0
23/06/2016 0
24/06/2016 0
<snip>
22/07/2016 0
23/07/2016 1
24/07/2016 0
<snip>
18/01/2017 0
19/01/2017 0
20/01/2017 0
<snip>
08/02/2017 0
This takes your data, and uses an the analytic count function to work out the number of rows that have a value within 30 days of (but excluding) the current row's date.
Then we apply a case expression to determine that if the row has a date within 30 days of today's date, we'll count those as not lapsed. If a count of 0 was returned, then the row is considered lapsed and we'll output the lapsed date as the order_date plus 30 days. Any other count result means the row has not lapsed.
The above is all worked out in the lapsed_info subquery.
Then all we need to do is list the dates (see the dates subquery) and outer join the lapsed_info subquery to it based on the lapsed_date and then do a count of the lapsed dates for each day.
Given the following table
PAYMENT_Date TRANSACTION_TYPE PAYMENT_AMT
1/1/2012 P 184366
1/1/2012 R -5841
1/2/2012 P 941
1/3/2012 P 901
1/3/2012 R 5841
and the following query:
select payment_date, transaction_type, payment_amt,
SUM(payment_amt) OVER(ORDER BY payment_date, transaction_type
RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS RUNNING_BALANCE
from TABLE;
I get these results:
PAYMENT_Date TRANSACTION_TYPE PAYMENT_AMT RUNNING_BALANCE
1/1/2012 P 184366 0
1/1/2012 R -5841 -184366
1/2/2012 P 941 -178525
1/3/2012 P 901 -179466
1/3/2012 R 5841 -180367
EXPECTED:
PAYMENT_Date TRANSACTION_TYPE PAYMENT_AMT RUNNING_BALANCE
1/1/2012 P 184366 0
1/1/2012 R -5841 184366
1/2/2012 P 941 178525
1/3/2012 P 901 179466
1/3/2012 R 5841 180367
Why does RUNNING_BALANCE come back as a negative number?
How can I make it not, besides the obvious abs()?
First, the data and query you posted don't appear to generate the output you're seeing. So there is some sort of copy and paste error somewhere
SQL> with t as (
2 select date '2012-01-01' payment_date, 'P' transaction_type, 184366 payment_amt from dual union all
3 select date '2012-01-01', 'R', -5841 from dual union all
4 select date '2012-01-02', 'P', 941 from dual union all
5 select date '2012-01-03', 'P', 901 from dual union all
6 select date '2012-01-03', 'R', 5841 from dual
7 )
8 select payment_date, transaction_type, payment_amt,
9 SUM(payment_amt) OVER(ORDER BY payment_date, transaction_type
10 RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS RUNNING_BALANCE
11 from T;
PAYMENT_D T PAYMENT_AMT RUNNING_BALANCE
--------- - ----------- ---------------
01-JAN-12 P 184366 186208
01-JAN-12 R -5841 1842
02-JAN-12 P 941 7683
03-JAN-12 P 901 6742
03-JAN-12 R 5841 5841
Normally, a running balance would be done just by omitting the RANGE BETWEEN clause.
SQL> ed
Wrote file afiedt.buf
1 with t as (
2 select date '2012-01-01' payment_date, 'P' transaction_type, 184366 payment_amt from dual union all
3 select date '2012-01-01', 'R', -5841 from dual union all
4 select date '2012-01-02', 'P', 941 from dual union all
5 select date '2012-01-03', 'P', 901 from dual union all
6 select date '2012-01-03', 'R', 5841 from dual
7 )
8 select payment_date, transaction_type, payment_amt,
9 SUM(payment_amt) OVER(ORDER BY payment_date, transaction_type) AS RUNNING_BALANCE
10* from T
SQL> /
PAYMENT_D T PAYMENT_AMT RUNNING_BALANCE
--------- - ----------- ---------------
01-JAN-12 P 184366 184366
01-JAN-12 R -5841 178525
02-JAN-12 P 941 179466
03-JAN-12 P 901 180367
03-JAN-12 R 5841 186208
In your case, though, it sounds like you want the running balance to exclude the current row's payment. That's a bit odd buy you can do it by adding an additional LAG
SQL> ed
Wrote file afiedt.buf
1 with t as (
2 select date '2012-01-01' payment_date, 'P' transaction_type, 184366 payment_amt from dual union all
3 select date '2012-01-01', 'R', -5841 from dual union all
4 select date '2012-01-02', 'P', 941 from dual union all
5 select date '2012-01-03', 'P', 901 from dual union all
6 select date '2012-01-03', 'R', 5841 from dual
7 )
8 select payment_date,
9 transaction_type,
10 payment_amt,
11 NVL( LAG(running_balance) OVER(ORDER BY payment_date,
12 transaction_type), 0) new_running_balance
13 from (select payment_date,
14 transaction_type,
15 payment_amt,
16 SUM(payment_amt) OVER(ORDER BY payment_date,
17 transaction_type) AS RUNNING_BALANCE
18* from t)
SQL> /
PAYMENT_D T PAYMENT_AMT NEW_RUNNING_BALANCE
--------- - ----------- -------------------
01-JAN-12 P 184366 0
01-JAN-12 R -5841 184366
02-JAN-12 P 941 178525
03-JAN-12 P 901 179466
03-JAN-12 R 5841 180367
I think you need to change:
RANGE BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING`
to:
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING
Test in SQLfiddle:
SELECT payment_date, transaction_type, payment_amt,
COALESCE( SUM(payment_amt)
OVER( ORDER BY payment_date, transaction_type
ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
, 0) AS RUNNING_BALANCE
FROM T;