Select Query Oracle - sql

My Table Structure is like below:
Carrier Terminal timestamp1
1 1 21-Mar-17
2 101 21-Mar-17
3 2 21-Mar-17
4 202 21-Mar-17
5 3 21-Mar-17
6 303 21-Mar-17
where carrier
flight 1,2 = Delta
flight 3,4 = Air France
flight 5,6 = Lufthanse
and
Terminal 1,101 = T1
terminal 2,202 = T2
terminal 3,303 = T3
I am trying output like below:
count(Delta), count(Air France), count(Lufthansa), terminal as column output
2, 0, 0, T1
0, 2, 0, T2
0, 0, 2, T3
I have started like this
select count(Delta), count(Air France), count(Lufthansa), terminal
from table_name
where timestamp between '01-Mar-18 07.00.00.000000 AM' and '30-Mar-18 07.59.59.999999 AM'
I am trying to write a query to have a count of different carriers flown through a particular day for each terminal
Any Advise will be highly appreciated

I'm making a whole lot of assumptions for this to work... I've extracted all the rules you've mentioned in your text and I've assumed that those structures are are already in place. Otherwise, let us know :)
with flights(carrier, terminal, departure) as(
select 1, 1, timestamp '2017-03-01 01:00:00' from dual union all
select 2, 101, timestamp '2017-03-01 02:00:00' from dual union all
select 3, 2, timestamp '2017-03-01 03:00:00' from dual union all
select 4, 202, timestamp '2017-03-01 04:00:00' from dual union all
select 5, 3, timestamp '2017-03-01 05:00:00' from dual union all
select 6, 303, timestamp '2017-03-01 06:00:00' from dual
)
,carriers(carrier, carrier_name) as(
select 1, 'Delta' from dual union all
select 2, 'Delta' from dual union all
select 3, 'Air France' from dual union all
select 4, 'Air France' from dual union all
select 5, 'Lufthanse' from dual union all
select 6, 'Lufthanse' from dual
)
,terminals(terminal, terminal_name) as(
select 1, 'T1' from dual union all
select 101, 'T1' from dual union all
select 2, 'T2' from dual union all
select 202, 'T2' from dual union all
select 3, 'T3' from dual union all
select 303, 'T3' from dual
)
select terminal_name
,count(case when carrier_name = 'Delta' then 1 end) as "Delta"
,count(case when carrier_name = 'Air France' then 1 end) as "Air France"
,count(case when carrier_name = 'Lufthanse' then 1 end) as "Lufthanse"
from flights f
join carriers c using(carrier)
join terminals t using(terminal)
where departure >= timestamp '2017-03-01 00:00:00'
and departure < timestamp '2017-04-01 00:00:00'
group by terminal_name
order by terminal_name;

with
t ( flight, gate, ts ) as (
select 1, 1, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual union all
select 2, 101, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual union all
select 3, 2, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual union all
select 4, 202, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual union all
select 5, 3, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual union all
select 6, 303, to_timestamp('21-Mar-17', 'dd-Mon-rr') from dual
)
-- End of simulated inputs (for testing only, not part of the solution).
-- SQL query begins below this line. Use your actual table and column names.
select count (case when flight in (1, 2) then 1 end) as delta
, count (case when flight in (3, 4) then 1 end) as air_france
, count (case when flight in (5, 6) then 1 end) as lufthansa
, case when gate in (1, 101) then 'T1'
when gate in (2, 202) then 'T2'
when gate in (3, 303) then 'T3' end as terminal
from t
where ts between '21-Mar-17 02.00.00.000000 AM' and '21-Mar-17 10.00.00.000000 AM'
group by case when gate in (1, 101) then 'T1'
when gate in (2, 202) then 'T2'
when gate in (3, 303) then 'T3' end
order by terminal
;
DELTA AIR_FRANCE LUFTHANSA TERMINAL
---------- ---------- ---------- --------
2 0 0 T1
0 2 0 T2
0 0 2 T3

Related

Which statement to use based on its row and transaction in stay that way

I am getting stuck on this, hard to explain to write down here as well the subject heading hopefully by the data example and output result below that enough to give you an idea.
I am run out which Oracle SQL statement best to use. The over (partition) seem didn't work out for the below scenario.
Before & After:
DATA
customer_id
row_id
date
type
LB01
22
14/03/2022
CAR 1
LB01
21
11/03/2022
CAR 1
LB01
20
9/11/2001
CAR 1
LB01
19
16/10/2001
CAR 1
LB01
18
10/08/2001
CAR 2
LB01
17
29/05/2001
CAR 2
LB01
16
24/04/2001
CAR 2
LB01
15
13/03/2001
CAR 3
LB01
14
21/12/2000
CAR 3
LB01
13
13/11/2000
CAR 3
LB01
12
20/10/2000
CAR 3
LB01
11
14/03/2000
CAR 1
LB01
10
18/01/2000
CAR 1
LB01
9
24/12/1999
CAR 2
LB01
8
14/09/1999
CAR 2
LB01
7
30/03/1999
CAR 2
LB01
6
24/02/1999
CAR 2
LB01
5
19/02/1999
CAR 4
LB01
4
15/12/1998
CAR 4
LB01
3
15/12/1998
CAR 4
LB01
2
24/09/1998
CAR 4
LB01
1
06/08/1998
CAR 4
The output that I want result like this:
customer_id
type
min_date
max_date
LB01
Car 1
16/10/2001
14/03/2002
LB01
Car 2
24/04/2001
10/08/2001
LB01
Car 3
20/10/2000
13/03/2001
LB01
Car 1
18/01/2000
14/03/2000
LB01
Car 2
24/02/1999
24/12/1999
LB01
Car 4
6/08/1998
19/02/1999
I hope those makes sense for you and hopefully you can assist me on this.
You have to apply Tabibitosan algorithm first:
select customer_id, type, rn, min(dat) as min_date, max(dat) as max_dat
from (
select d.*, row_id+1-row_number() over(partition by customer_id, type order by row_id) as rn
from data d
)
group by customer_id, type, rn
order by customer_id, type
;
CUST TYPE RN MIN_DATE MAX_DAT
---- ----- ---------- -------- --------
LB01 CAR 1 10 18/01/00 14/03/00
LB01 CAR 1 17 16/10/01 14/03/22
LB01 CAR 2 6 24/02/99 24/12/99
LB01 CAR 2 12 24/04/01 10/08/01
LB01 CAR 3 12 20/10/00 13/03/01
LB01 CAR 4 1 06/08/98 19/02/99
Just create your own grouping calculation out of already existing ROW_ID and use MIN_DATE for ordering:
SELECT CUST_ID, A_TYPE, Min(A_DATE) "MIN_DATE", Max(A_DATE) "MAX_DATE"
FROM (SELECT t.*,
Max(ROW_ID) OVER(Partition By CUST_ID || A_TYPE ORDER BY ROW_ID DESC Rows Between Unbounded Preceding And Current Row) -
(ROW_ID + Sum(1) OVER(PARTITION BY CUST_ID || A_TYPE ORDER BY ROW_ID DESC) - 1) "GRP"
FROM tbl t
ORDER BY ROW_ID DESC )
GROUP BY CUST_ID, A_TYPE, GRP
ORDER BY CUST_ID, MIN(A_DATE) DESC
With your sample data:
WITH
tbl (CUST_ID, ROW_ID, A_DATE, A_TYPE) AS
( Select 'LB01', 22, To_Date('14/03/2022', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
Select 'LB01', 21, To_Date('11/03/2022', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
Select 'LB01', 20, To_Date('09/11/2001', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
Select 'LB01', 19, To_Date('16/10/2001', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
--
Select 'LB01', 18, To_Date('10/08/2001', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
Select 'LB01', 17, To_Date('29/05/2001', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
Select 'LB01', 16, To_Date('24/04/2001', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
--
Select 'LB01', 15, To_Date('13/03/2001', 'dd/mm/yyyy'), 'CAR 3' From Dual Union All
Select 'LB01', 14, To_Date('21/12/2000', 'dd/mm/yyyy'), 'CAR 3' From Dual Union All
Select 'LB01', 13, To_Date('13/11/2000', 'dd/mm/yyyy'), 'CAR 3' From Dual Union All
Select 'LB01', 12, To_Date('20/10/2000', 'dd/mm/yyyy'), 'CAR 3' From Dual Union All
--
Select 'LB01', 11, To_Date('14/03/2000', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
Select 'LB01', 10, To_Date('18/01/2000', 'dd/mm/yyyy'), 'CAR 1' From Dual Union All
--
Select 'LB01', 9, To_Date('24/12/1999', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
Select 'LB01', 8, To_Date('14/09/1999', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
Select 'LB01', 7, To_Date('30/03/1999', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
Select 'LB01', 6, To_Date('24/02/1999', 'dd/mm/yyyy'), 'CAR 2' From Dual Union All
--
Select 'LB01', 5, To_Date('19/02/1999', 'dd/mm/yyyy'), 'CAR 4' From Dual Union All
Select 'LB01', 4, To_Date('15/12/1998', 'dd/mm/yyyy'), 'CAR 4' From Dual Union All
Select 'LB01', 3, To_Date('15/12/1998', 'dd/mm/yyyy'), 'CAR 4' From Dual Union All
Select 'LB01', 2, To_Date('24/09/1998', 'dd/mm/yyyy'), 'CAR 4' From Dual Union All
Select 'LB01', 1, To_Date('06/08/1998', 'dd/mm/yyyy'), 'CAR 4' From Dual
)
R e s u l t :
CUST_ID
A_TYPE
MIN_DATE
MAX_DATE
LB01
CAR 1
16-OCT-01
14-MAR-22
LB01
CAR 2
24-APR-01
10-AUG-01
LB01
CAR 3
20-OCT-00
13-MAR-01
LB01
CAR 1
18-JAN-00
14-MAR-00
LB01
CAR 2
24-FEB-99
24-DEC-99
LB01
CAR 4
06-AUG-98
19-FEB-99
PLEASE NOTE: For some reason this website can't tick both answers that the solution provided by #p3consulting and #d r are working on my data by creating a new column for a new row id in that way row numbers are in orderly
subquery before that;
row_number() over(partition by cust_id order by seq_id ) row_id
depend on how your data has numbers are not in orderly number
refer to the above post made by #p3consulting regarding the Tabibitosan algorithm
here is the below script that you can copy and paste into your Oracle SQL to learn
for my question here still does not work for my data and still searching the right script
with
sample_data (customer_id, row_id, mock_date, product_type) as (
select 'LB01', '22', to_date ('03/14/22', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '21', to_date ('03/11/22', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '20', to_date ('11/09/01', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '19', to_date ('10/16/01', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '18', to_date ('08/10/01', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '17', to_date ('05/29/01', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '16', to_date ('04/24/01', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '15', to_date ('03/13/01', 'mm/dd/rr'), 'CAR 3' from dual union all
select 'LB01', '14', to_date ('12/21/00', 'mm/dd/rr'), 'CAR 3' from dual union all
select 'LB01', '13', to_date ('11/13/00', 'mm/dd/rr'), 'CAR 3' from dual union all
select 'LB01', '12', to_date ('10/20/00', 'mm/dd/rr'), 'CAR 3' from dual union all
select 'LB01', '11', to_date ('03/14/00', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '10', to_date ('01/18/00', 'mm/dd/rr'), 'CAR 1' from dual union all
select 'LB01', '09', to_date ('12/24/99', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '08', to_date ('09/14/99', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '07', to_date ('03/30/99', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '06', to_date ('02/24/99', 'mm/dd/rr'), 'CAR 2' from dual union all
select 'LB01', '05', to_date ('02/19/99', 'mm/dd/rr'), 'CAR 4' from dual union all
select 'LB01', '04', to_date ('12/15/98', 'mm/dd/rr'), 'CAR 4' from dual union all
select 'LB01', '03', to_date ('12/15/98', 'mm/dd/rr'), 'CAR 4' from dual union all
select 'LB01', '02', to_date ('09/24/98', 'mm/dd/rr'), 'CAR 4' from dual union all
select 'LB01', '01', to_date ('06/08/98', 'mm/dd/rr'), 'CAR 4' from dual
)
select
customer_id ,
product_type ,
grp ,
min(mock_date) as min_date ,
max(mock_date) as max_date
from (
select d.* ,
row_id + 1 - row_number () over (partition by customer_id, product_type order by row_id) as grp
from sample_data d )
group by
customer_id ,
product_type ,
grp
order by
grp desc

how to use windows function during merge in sql

I am working in oracle sql. I have two table which is linked to each other by one column - company_id (see on the picture); I want to merge table 1 to table 2 and calculate 6 month average (6 month before period from table 2) of income for each company_id and each date of table2. I appreciate any code/idea how to solve this task.
You can use an analytic range window to calculate the averages for table1 and then JOIN the result to table2:
SELECT t2.*,
t1.avg_income_6,
t1.avg_income_12
FROM table2 t2
LEFT OUTER JOIN (
SELECT company_id,
dt,
ROUND(AVG(income) OVER (
PARTITION BY company_id
ORDER BY dt
RANGE BETWEEN INTERVAL '5' MONTH PRECEDING
AND INTERVAL '0' MONTH FOLLOWING
), 2) AS avg_income_6,
ROUND(AVG(income) OVER (
PARTITION BY company_id
ORDER BY dt
RANGE BETWEEN INTERVAL '11' MONTH PRECEDING
AND INTERVAL '0' MONTH FOLLOWING
), 2) AS avg_income_12
FROM table1
) t1
ON (t2.company_id = t1.company_id AND t2.dt = t1.dt);
Which, for the sample data:
CREATE TABLE table1 (company_id, dt, income) AS
SELECT 1, date '2019-01-01', 65 FROM DUAL UNION ALL
SELECT 1, date '2019-02-01', 58 FROM DUAL UNION ALL
SELECT 1, date '2019-03-01', 12 FROM DUAL UNION ALL
SELECT 1, date '2019-04-01', 81 FROM DUAL UNION ALL
SELECT 1, date '2019-05-01', 38 FROM DUAL UNION ALL
SELECT 1, date '2019-06-01', 81 FROM DUAL UNION ALL
SELECT 1, date '2019-07-01', 38 FROM DUAL UNION ALL
SELECT 1, date '2019-08-01', 69 FROM DUAL UNION ALL
SELECT 1, date '2019-09-01', 54 FROM DUAL UNION ALL
SELECT 1, date '2019-10-01', 90 FROM DUAL UNION ALL
SELECT 1, date '2019-11-01', 10 FROM DUAL UNION ALL
SELECT 1, date '2019-12-01', 12 FROM DUAL UNION ALL
SELECT 1, date '2020-01-01', 11 FROM DUAL UNION ALL
SELECT 1, date '2020-02-01', 83 FROM DUAL UNION ALL
SELECT 1, date '2020-03-01', 18 FROM DUAL UNION ALL
SELECT 1, date '2020-04-01', 28 FROM DUAL UNION ALL
SELECT 1, date '2020-05-01', 52 FROM DUAL UNION ALL
SELECT 1, date '2020-06-01', 21 FROM DUAL UNION ALL
SELECT 1, date '2020-07-01', 54 FROM DUAL UNION ALL
SELECT 1, date '2020-08-01', 30 FROM DUAL UNION ALL
SELECT 1, date '2020-09-01', 12 FROM DUAL UNION ALL
SELECT 1, date '2020-10-01', 25 FROM DUAL UNION ALL
SELECT 1, date '2020-11-01', 86 FROM DUAL UNION ALL
SELECT 1, date '2020-12-01', 4 FROM DUAL UNION ALL
SELECT 1, date '2021-01-01', 10 FROM DUAL UNION ALL
SELECT 1, date '2021-02-01', 72 FROM DUAL UNION ALL
SELECT 1, date '2021-03-01', 65 FROM DUAL UNION ALL
SELECT 1, date '2021-04-01', 25 FROM DUAL;
CREATE TABLE table2 (company_id, dt) AS
SELECT 1, date '2019-06-01' FROM DUAL UNION ALL
SELECT 1, date '2019-09-01' FROM DUAL UNION ALL
SELECT 1, date '2019-12-01' FROM DUAL UNION ALL
SELECT 1, date '2020-01-01' FROM DUAL UNION ALL
SELECT 1, date '2020-07-01' FROM DUAL UNION ALL
SELECT 1, date '2020-08-01' FROM DUAL UNION ALL
SELECT 1, date '2021-03-01' FROM DUAL UNION ALL
SELECT 1, date '2021-04-01' FROM DUAL;
Outputs:
COMPANY_ID
DT
AVG_INCOME_6
AVG_INCOME_12
1
2019-06-01 00:00:00
55.83
55.83
1
2019-09-01 00:00:00
60.17
55.11
1
2019-12-01 00:00:00
45.5
50.67
1
2020-01-01 00:00:00
41
46.17
1
2020-07-01 00:00:00
42.67
41.83
1
2020-08-01 00:00:00
33.83
38.58
1
2021-03-01 00:00:00
43.67
38.25
1
2021-04-01 00:00:00
43.67
38
db<>fiddle here
I don't think you need any window function here (if you were thinking of analytic functions); ordinary avg with appropriate join conditions should do the job.
Sample data:
SQL> with
2 table1 (company_id, datum, income) as
3 (select 1, date '2019-01-01', 65 from dual union all
4 select 1, date '2019-02-01', 58 from dual union all
5 select 1, date '2019-03-01', 12 from dual union all
6 select 1, date '2019-04-01', 81 from dual union all
7 select 1, date '2019-05-01', 38 from dual union all
8 select 1, date '2019-06-01', 81 from dual union all
9 select 1, date '2019-07-01', 38 from dual union all
10 select 1, date '2019-08-01', 69 from dual union all
11 select 1, date '2019-09-01', 54 from dual union all
12 select 1, date '2019-10-01', 90 from dual union all
13 select 1, date '2019-11-01', 10 from dual union all
14 select 1, date '2019-12-01', 12 from dual
15 ),
16 table2 (company_id, datum) as
17 (select 1, date '2019-06-01' from dual union all
18 select 1, date '2019-09-01' from dual union all
19 select 1, date '2019-12-01' from dual union all
20 select 1, date '2020-01-01' from dual union all
21 select 1, date '2020-07-01' from dual
22 )
Query begins here:
23 select b.company_id,
24 b.datum ,
25 round(avg(a.income), 2) result
26 from table1 a join table2 b on a.company_id = b.company_id
27 and a.datum > add_months(b.datum, -6)
28 and a.datum <= b.datum
29 group by b.company_id, b.datum;
COMPANY_ID DATUM RESULT
---------- -------- ----------
1 01.06.19 55,83
1 01.09.19 60,17
1 01.12.19 45,5
1 01.01.20 47
SQL>

Count daily fidelity

I have the below table and I would like to count, day by day, the number of distinct people who logged in everyday. For example, for day 1, everyone logged in, so it's 4. For day 4, there's just one person ID who logged in everyday since day 1, so the count would be 1.
DAY
PERSON_ID
1
01
1
02
1
03
1
04
2
01
2
02
2
03
3
01
4
02
4
01
Expected output.
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01, 02, 03, 04
2
3
01, 02, 03
3
1
01
4
1
01
EDIT: the query should also work on the below data.
with t ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
Expected output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
EXPLANATION
10
3
01, 02, 04
Three unique people in day 10
12
2
01, 02
Day 11 does not have values, so it's not included. From those in day 10, only 2 appear in day 12
13
1
01
From those in day 10 and 12, only 01 appears in day 13
14
1
01
From those in day 10, 12 and 13, only 01 appears in day 14
You can use listagg() with group by clause. If day is always start from the 1 and increases by 1 then you can use below query. He with the help of exits I have selected only those person_id which are available in all the previous days.
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable values(1, '01');
insert into yourtable values(1, '02');
insert into yourtable values(1, '03');
insert into yourtable values(1, '04');
insert into yourtable values(2, '01');
insert into yourtable values(2, '02');
insert into yourtable values(2, '03');
insert into yourtable values(3, '01');
insert into yourtable values(4, '02');
insert into yourtable values(4, '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=a.day)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01,02,03,04
2
3
01,02,03
3
1
01
4
1
01
db<fiddle here
Instead of day sequence if you had increasing dates in day column:
create table yourtable(DAY date, PERSON_ID varchar(10));
insert into yourtable values(date '2021-01-01', '01');
insert into yourtable values(date '2021-01-01', '02');
insert into yourtable values(date '2021-01-01', '03');
insert into yourtable values(date '2021-01-01', '04');
insert into yourtable values(date '2021-01-02', '01');
insert into yourtable values(date '2021-01-02', '02');
insert into yourtable values(date '2021-01-02', '03');
insert into yourtable values(date '2021-01-03', '01');
insert into yourtable values(date '2021-01-04', '02');
insert into yourtable values(date '2021-01-04', '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=( max(day)- min(day))+1)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
01-JAN-21
4
01,02,03,04
02-JAN-21
3
01,02,03
03-JAN-21
1
01
04-JAN-21
1
01
db<fiddle here
Revised answer
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable(day,person_id)
with cte ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
select * from cte ;
Query#1 (for Oracle 19c and later)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(distinct person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
group by day;
Query#1 (for Oracle 18c and earlier)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG( person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from
(
select distinct day, person_id
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
)t group by day
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
10
3
01,02,04
12
2
01,02
13
1
01
14
1
01
db<fiddle here
In Standard SQL, I would approach this by doing the following:
Enumerate the days for each person.
Determine the earliest day for each person.
Filter where the earliest day is "1" and the enumeration equals the days.
Then aggregate:
select day, count(*),
listagg(person_id, ',') within group (order by person_id)
from (select t.*,
row_number() over (partition by person_id order by day) as seqnum,
min(day) over (partition by person_id) as min_day
from t
) t
where seqnum = day and min_day = 1
group by day
order by day;
Note only is this simpler than using match recognize, but I would guess that the performance would be much better too.
You can use either:
SELECT DAY,
COUNT(DISTINCT person_id) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day)
- DENSE_RANK() OVER (PARTITION BY person_id ORDER BY day) AS day_grp
FROM table_name t
)
WHERE day_grp = 0
GROUP BY day
ORDER BY day
or MATCH_RECOGNIZE to find the successive days:
SELECT day,
COUNT(
DISTINCT
CASE cls WHEN 'CONSECUTIVE_DAYS' THEN person_id END
) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day) AS day_rank
FROM table_name t
)
MATCH_RECOGNIZE(
PARTITION BY person_id
ORDER BY day
MEASURES
classifier() AS cls
ALL ROWS PER MATCH
PATTERN ( ^ consecutive_days* )
DEFINE
consecutive_days AS COALESCE( PREV(day_rank) + 1, 1 ) = day_rank
)
GROUP BY day
ORDER BY day
Which, for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 1, '01' FROM DUAL UNION ALL
SELECT 1, '02' FROM DUAL UNION ALL
SELECT 1, '03' FROM DUAL UNION ALL
SELECT 1, '04' FROM DUAL UNION ALL
SELECT 2, '01' FROM DUAL UNION ALL
SELECT 2, '02' FROM DUAL UNION ALL
SELECT 2, '03' FROM DUAL UNION ALL
SELECT 3, '01' FROM DUAL UNION ALL
SELECT 3, '02' FROM DUAL UNION ALL
SELECT 4, '01' FROM DUAL;
Outputs:
DAY
NUM_PEOPLE
1
4
2
3
3
2
4
1
and for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL
Outputs:
DAY
NUM_PEOPLE
10
3
12
2
13
1
14
1
db<>fiddle here

Complex query analyzing historical records

I am using Oracle and trying to retrieve the total number of days a person was out of the office during the year. I have 2 tables involved:
Statuses
1 - Active
2 - Out of the Office
3 - Other
ScheduleHistory
RecordID - primary key
PersonID
PreviousStatusID
NextStatusID
DateChanged
I can easily find when the person went on vacation and when they came back, using
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND NextStatusID = 2
and
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND PreviousStatusID = 2
But in case a person went on vacation more than once, how can I can I calculate total number of days a person was out of the office. Is it possible to do programmatically, given only PersonID?
Here is some sample data:
RecordID PersonID PreviousStatusID NextStatusID DateChanged
-----------------------------------------------------------------------------
1 111 1 2 03/11/2020
2 111 2 1 03/13/2020
3 111 1 3 04/01/2020
4 111 3 1 04/07/2020
5 111 1 2 06/03/2020
6 111 2 1 06/05/2020
7 111 1 2 09/14/2020
8 111 2 1 09/17/2020
So from the data above, for the year 2020 for PersonID 111 the query should return 7
Try this:
with aux1 AS (
SELECT
a.*,
to_date(datechanged, 'MM/DD/YYYY') - LAG(to_date(datechanged, 'MM/DD/YYYY')) OVER(
PARTITION BY personid
ORDER BY
recordid
) lag_date
FROM
ScheduleHistory a
)
SELECT
personid,
SUM(lag_date) tot_days_ooo
FROM
aux1
WHERE
previousstatusid = 2
GROUP BY
personid;
If you want total days (or weekdays) for each year (and to account for periods when it goes over the year boundary) then:
WITH date_ranges ( personid, status, start_date, end_date ) AS (
SELECT personid,
nextstatusid,
datechanged,
LEAD(datechanged, 1, datechanged) OVER(
PARTITION BY personid
ORDER BY datechanged
)
FROM table_name
),
split_year_ranges ( personid, year, start_date, end_date, max_date ) AS (
SELECT personid,
TRUNC( start_date, 'YY' ),
start_date,
LEAST(
end_date,
ADD_MONTHS( TRUNC( start_date, 'YY' ), 12 )
),
end_date
FROM date_ranges
WHERE status = 2
UNION ALL
SELECT personid,
end_date,
end_date,
LEAST( max_date, ADD_MONTHS( end_date, 12 ) ),
max_date
FROM split_year_ranges
WHERE end_date < max_date
)
SELECT personid,
EXTRACT( YEAR FROM year) AS year,
SUM( end_date - start_date ) AS total_days,
SUM(
( TRUNC( end_date, 'IW' ) - TRUNC( start_date, 'IW' ) ) * 5 / 7
+ LEAST( end_date - TRUNC( end_date, 'IW' ), 5 )
- LEAST( start_date - TRUNC( start_date, 'IW' ), 5 )
) AS total_weekdays
FROM split_year_ranges
GROUP BY personid, year
ORDER BY personid, year
Which, for the sample data:
CREATE TABLE table_name ( RecordID, PersonID, PreviousStatusID, NextStatusID, DateChanged ) AS
SELECT 1, 111, 1, 2, DATE '2020-03-11' FROM DUAL UNION ALL
SELECT 2, 111, 2, 1, DATE '2020-03-13' FROM DUAL UNION ALL
SELECT 3, 111, 1, 3, DATE '2020-04-01' FROM DUAL UNION ALL
SELECT 4, 111, 3, 1, DATE '2020-04-07' FROM DUAL UNION ALL
SELECT 5, 111, 1, 2, DATE '2020-06-03' FROM DUAL UNION ALL
SELECT 6, 111, 2, 1, DATE '2020-06-05' FROM DUAL UNION ALL
SELECT 7, 111, 1, 2, DATE '2020-09-14' FROM DUAL UNION ALL
SELECT 8, 111, 2, 1, DATE '2020-09-17' FROM DUAL UNION ALL
SELECT 9, 222, 1, 2, DATE '2019-12-31' FROM DUAL UNION ALL
SELECT 10, 222, 2, 2, DATE '2020-12-01' FROM DUAL UNION ALL
SELECT 11, 222, 2, 2, DATE '2021-01-02' FROM DUAL;
Outputs:
PERSONID
YEAR
TOTAL_DAYS
TOTAL_WEEKDAYS
111
2020
7
7
222
2019
1
1
222
2020
366
262
222
2021
1
1
db<>fiddle here
Provided no vacation crosses a year boundary
with grps as (
SELECT sh.*,
row_number() over (partition by PersonID, NextStatusID order by DateChanged) grp
FROM ScheduleHistory sh
WHERE NextStatusID in (1,2) and 3 not in (NextStatusID, PreviousStatusID)
), durations as (
SELECT PersonID, min(DateChanged) DateChanged, max(DateChanged) - min(DateChanged) duration
FROM grps
GROUP BY PersonID, grp
)
SELECT PersonID, sum(duration) days_out
FROM durations
GROUP BY PersonID;
db<>fiddle
year_span is used to split an interval spanning across two years in two different records
H1 adds a row number dependent from PersonID to get the right sequence for each person
H2 gets the periods for each status change and extract 1st day of the year of the interval end
H3 split records that span across two years and calculate the right date_start and date_end for each interval
H calculates days elapsed in each interval for each year
final query sum up the records to get output
EDIT
If you need workdays instead of total days, you should not use total_days/7*5 because it is a bad approximation and in some cases gives weird results.
I have posted a solution to jump on fridays to mondays here
with
statuses (sid, sdescr) as (
select 1, 'Active' from dual union all
select 2, 'Out of the Office' from dual union all
select 3, 'Other' from dual
),
ScheduleHistory(RecordID, PersonID, PreviousStatusID, NextStatusID , DateChanged) as (
select 1, 111, 1, 2, date '2020-03-11' from dual union all
select 2, 111, 2, 1, date '2020-03-13' from dual union all
select 3, 111, 1, 3, date '2020-04-01' from dual union all
select 4, 111, 3, 1, date '2020-04-07' from dual union all
select 5, 111, 1, 2, date '2020-06-03' from dual union all
select 6, 111, 2, 1, date '2020-06-05' from dual union all
select 7, 111, 1, 2, date '2020-09-14' from dual union all
select 8, 111, 2, 1, date '2020-09-17' from dual union all
SELECT 9, 222, 1, 2, date '2019-12-31' from dual UNION ALL
SELECT 10, 222, 2, 2, date '2020-12-01' from dual UNION ALL
SELECT 11, 222, 2, 2, date '2021-01-02' from dual
),
year_span (n) as (
select 1 from dual union all
select 2 from dual
),
H1 AS (
SELECT ROW_NUMBER() OVER (PARTITION BY PersonID ORDER BY RecordID) PID, H.*
FROM ScheduleHistory H
),
H2 as (
SELECT
H1.*, H2.DateChanged DateChanged2,
EXTRACT(YEAR FROM H2.DateChanged) - EXTRACT(YEAR FROM H1.DateChanged) + 1 Y,
trunc(H2.DateChanged,'YEAR') Y2
FROM H1 H1
LEFT JOIN H1 H2 ON H1.PID = H2.PID-1 AND H1.PersonID = H2.PersonID
),
H3 AS (
SELECT Y, N, H2.PID, H2.RecordID, H2.PersonID, H2.NextStatusID,
CASE WHEN Y=1 THEN H2.DateChanged ELSE CASE WHEN N=1 THEN H2.DateChanged ELSE Y2 END END D1,
CASE WHEN Y=1 THEN H2.DateChanged2 ELSE CASE WHEN N=1 THEN Y2 ELSE H2.DateChanged2 END END D2
FROM H2
JOIN year_span N ON N.N <=Y
),
H AS (
SELECT PersonID, NextStatusID, EXTRACT(year FROM d1) Y, d2-d1 D
FROM H3
)
select PersonID, sdescr Status, Y, sum(d) d
from H
join statuses s on NextStatusID = s.sid
group by PersonID, sdescr, Y
order by PersonID, sdescr, Y
output
PersonID Status Y d
111 Active 2020 177
111 Other 2020 6
111 Out of the Office 2020 7
222 Out of the Office 2019 1
222 Out of the Office 2020 366
222 Out of the Office 2021 1
check the fiddle here

Oracle list only records that changed

I have the following code that produces a table as shown in the image:
with test (code, datum) as
(select 600, date '2018-02-01' from dual union all
select 600, date '2018-02-02' from dual union all
select 0, date '2018-02-03' from dual union all
select 0, date '2018-02-04' from dual union all
select 0, date '2018-02-05' from dual union all
select 600, date '2018-02-06' from dual union all
select 600, date '2018-02-07' from dual union all
select 0, date '2018-02-08' from dual union all
select 0, date '2018-02-09' from dual
)
select * from test;
I have tried the following, but does not return what I need.
select * from (
select test.*, min(datum) over (partition by code order by code) as min_date,
max(datum) over (partition by code order by code) as max_date
from test) where min_date = datum;
What I would like to achieve is list only the records where a change occurs on the 'code' column (before and after record where the change occurs).
So the result set should look like this:
02/FEB/18 00:00:00 600
03/FEB/18 00:00:00 0
05/FEB/18 00:00:00 0
06/FEB/18 00:00:00 600
07/FEB/18 00:00:00 600
08/FEB/18 00:00:00 0
I referenced this question, but it does not address the same issue I have.
question
Any help appreciated, thank you.
UPDATE:
This is closer to what I would like to achieve. I can list all rows where columns code and change are not the same. However, I need to list the record after where these values are different as well.
with test (code, datum) as
(select 600, date '2018-02-01' from dual union all
select 600, date '2018-02-02' from dual union all
select 0, date '2018-02-03' from dual union all
select 0, date '2018-02-04' from dual union all
select 0, date '2018-02-05' from dual union all
select 600, date '2018-02-06' from dual union all
select 600, date '2018-02-07' from dual union all
select 0, date '2018-02-08' from dual union all
select 0, date '2018-02-09' from dual
)
,y1 as (
select test.datum, test.code, lead(code) over (order by datum) as change
from test
)
select * from y1;
The final result set should contain the highlighted rows only.
UPDATE 2:
I think I may have got it right, still need to verify but this seems to work:
with test (code, datum) as
(select 600, date '2018-02-01' from dual union all
select 600, date '2018-02-02' from dual union all
select 0, date '2018-02-03' from dual union all
select 0, date '2018-02-04' from dual union all
select 0, date '2018-02-05' from dual union all
select 600, date '2018-02-06' from dual union all
select 600, date '2018-02-07' from dual union all
select 0, date '2018-02-08' from dual union all
select 0, date '2018-02-09' from dual
)
,y1 as (
select test.datum, test.code, lag(nvl(code,code)) over (order by datum) as after, lead(nvl(code,code)) over (order by datum) as before
from test
)
select * from y1 where code != before or code != after;
Not sure if this would help i couldnot see any relevance to sort out the expected output in your question.
with test (code, datum) as
(select 600, date '2018-02-01' from dual union all
select 600, date '2018-02-02' from dual union all
select 0, date '2018-02-03' from dual union all
select 0, date '2018-02-04' from dual union all
select 0, date '2018-02-05' from dual union all
select 600, date '2018-02-06' from dual union all
select 600, date '2018-02-07' from dual union all
select 0, date '2018-02-08' from dual union all
select 0, date '2018-02-09' from dual
)
,y1 as (
select test.datum, test.code, lead(code) over (order by datum) as change
from test
UNION
select test.datum, test.code, lag(code) over (order by datum) as change
from test
)
select * from y1
where change = 600;
The following script produced the expected result set:
with test (code, datum) as
(select 600, date '2018-02-01' from dual union all
select 600, date '2018-02-02' from dual union all
select 0, date '2018-02-03' from dual union all
select 0, date '2018-02-04' from dual union all
select 0, date '2018-02-05' from dual union all
select 600, date '2018-02-06' from dual union all
select 600, date '2018-02-07' from dual union all
select 0, date '2018-02-08' from dual union all
select 0, date '2018-02-09' from dual
)
,y1 as (
select test.datum, test.code, lag(nvl(code,code)) over (order by datum) as after, lead(nvl(code,code)) over (order by datum) as before
from test
)
select * from y1 where code != before or code != after;