Finding records over continuous date range

Finding records over continuous date range - sql

I need to write a query which returns loans that have been taken in a semester, the semesters are defined in this way:
Jan - April - Spring
May - August - Summer
September - Dec - Fall
Now my loans can start at any time and can span for any duration of time:
say a loan starts in Dec 2013 to Oct 2014, then the loan has spanned the four semesters:
Fall 2013
Spring 2014
Summer 2014
Fall 2014
And so when counting the number of loans for each semester for each year, the count of this loan will be present in all the four semesters
Fall 2013
Spring 2014
Summer 2014
Fall 2014
The schema of the loan table is
LOAN(Loan_ID, St_id#, Comp_id#, Start_Date, Date_Returned)
where start_date and date_returned are the corresponding start and end dates of a loan.
What I have so far does not take into account loans that have overlapped into semesters.
SELECT extract(YEAR FROM start_date) AS year,
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END AS semester,
Count(comp_id) AS num_of_loans
FROM loan
GROUP BY (extract(YEAR FROM start_date),
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END)
ORDER BY YEAR, Decode(semester, 'spring', 1, 'summer', 2, 'fall', 3);
Sample Input:
INSERT INTO loan VALUES('L101', '101', 'H101', TO_DATE('2014-10-19','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD'));
INSERT INTO loan VALUES('L102', '102', 'H101', TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD'));
INSERT INTO loan VALUES('L103', '102', 'H102', TO_DATE('2014-10-24','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD'));
INSERT INTO loan VALUES('L104', '101', 'H102', TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD'));
INSERT INTO loan VALUES('L105', '102', 'H102', TO_DATE('2014-11-04','YYYY-MM-DD'), TO_DATE('2014-11-10','YYYY-MM-DD'));
INSERT INTO loan VALUES('L106', '103', 'N101', TO_DATE('2014-10-15','YYYY-MM-DD'), TO_DATE('2014-10-20','YYYY-MM-DD'));
INSERT INTO loan VALUES('L107', '201', 'N101', TO_DATE('2013-09-01','YYYY-MM-DD'), TO_DATE('2013-09-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L108', '201', 'N102', TO_DATE('2013-11-15','YYYY-MM-DD'), TO_DATE('2013-11-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L109', '202', 'N102', TO_DATE('2013-10-10','YYYY-MM-DD'), TO_DATE('2013-10-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L110', '202', 'N102', TO_DATE('2013-08-23','YYYY-MM-DD'), TO_DATE('2013-09-02','YYYY-MM-DD'));
INSERT INTO loan VALUES('L111', '202', 'N104', TO_DATE('2014-11-12','YYYY-MM-DD'), TO_DATE('2014-11-15','YYYY-MM-DD'));
INSERT INTO loan VALUES('L112', '203', 'N104', TO_DATE('2014-08-27','YYYY-MM-DD'), TO_DATE('2014-08-31','YYYY-MM-DD'));
INSERT INTO loan VALUES('L113', '301', 'N104', TO_DATE('2014-09-13','YYYY-MM-DD'), TO_DATE('2014-09-23','YYYY-MM-DD'));
INSERT INTO loan VALUES('L114', '301', 'N104', TO_DATE('2014-10-23','YYYY-MM-DD'), TO_DATE('2014-10-24','YYYY-MM-DD'));
INSERT INTO loan VALUES('L115', '301', 'N107', TO_DATE('2014-10-11','YYYY-MM-DD'), TO_DATE('2014-10-14','YYYY-MM-DD'));
INSERT INTO loan VALUES('L116', '302', 'N107', TO_DATE('2014-09-10','YYYY-MM-DD'), TO_DATE('2014-09-15','YYYY-MM-DD'));
INSERT INTO loan VALUES('L117', '101', 'H101', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L118', '101', 'H103', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L119', '101', 'H104', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L120', '101', 'H103', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L121', '101', 'H104', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L122', '101', 'H105', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L123', '101', 'H106', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L124', '101', 'H106', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L125', '101', 'H105', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
Sample Output:
SEMESTER YEARS NUM_LOANS
Spring 2013 0
Summer 2013 3
Fall 2013 5
Spring 2014 1
Summer 2014 2
Fall 2014 20
what I get:
YEAR SEMESTER NUM_OF_LOANS
2013 summer 2
2013 fall 3
2014 summer 1
2014 fall 19
Thanks!

Query ::
WITH FNL AS(
SELECT EXTRACT(YEAR FROM A.START_DATE) AS YEAR,B.SEMESTER AS semester,0 as num_of_loans
FROM LOAN A,(SELECT 1 SEQ,'spring' SEMESTER,0 NUM_OF_LOANS FROM DUAL
UNION ALL
SELECT 2,'summer' SEMESTER,0 num_of_loans from dual
UNION ALL
SELECT 3,'fall' SEMESTER ,0 num_of_loans FROM DUAL) b
GROUP BY B.SEMESTER,EXTRACT(YEAR FROM START_DATE)
union all
SELECT extract(YEAR FROM start_date) AS year,
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END AS semester,
Count(comp_id) AS num_of_loans
FROM loan
GROUP BY (extract(YEAR FROM start_date),
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END)
)
select year,semester,sum(num_of_loans) from fnl group by year,semester
ORDER BY YEAR, Decode(semester, 'spring', 1, 'summer', 2, 'fall', 3);
Output :
Year semester num_of_loans
2013 spring 0
2013 summer 1
2013 fall 3
2014 spring 0
2014 summer 1
2014 fall 20
i hope this helpful for you.Good Luck.. :)

I don't currently have an operating SQL Server available at the moment, but I believe it should look something like this:
select #min = min(start_date),
#max = max(end_date)
FROM LOAN
SELECT #minYear = YEAR(#min),
#minSem = CEILING(MONTH(#min)/4),
#maxYear = YEAR(#max),
#maxSem = CEILING(MONTH(#max)/4),
#semCount= (4-#minSem+#maxSem)+(#maxYear-#minYear-1)*3,
#i = 0
CREATE TABLE #TMP (Semester INT, Year INT, LoanCount INT)
WHILE #i < #semCount BEGIN
SELECT #curSem = (#minSem + #i)%3, -- 3 semesters per year
#curYear = #minYear + FLOOR((#minSem +#i)/3)
INSERT INTO #TMP
SELECT #curSem,
#curYear,
(SELECT COUNT * FROM LOAN
WHERE YEAR(end_date) >= #curYear
AND CEILING(MONTH(end_date)/4)>+#curSem
YEAR(start_date) <= #curYear
AND CEILING(MONTH(start_date)/4)<=#curSem) as loanCount
SELECT #i = #i + 1
END
SELECT * FROM #TMP
Hope this helps. Good Luck

You have to join semestrs with loans, here's a brief example:
with loan as (
-- This is your sample data
select TO_DATE('2014-10-19','YYYY-MM-DD') startdate, TO_DATE('2014-10-30','YYYY-MM-DD') datereturned from dual union all
select TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-24','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-04','YYYY-MM-DD'), TO_DATE('2014-11-10','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-15','YYYY-MM-DD'), TO_DATE('2014-10-20','YYYY-MM-DD') from dual union all
select TO_DATE('2013-09-01','YYYY-MM-DD'), TO_DATE('2013-09-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-11-15','YYYY-MM-DD'), TO_DATE('2013-11-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-10-10','YYYY-MM-DD'), TO_DATE('2013-10-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-08-23','YYYY-MM-DD'), TO_DATE('2013-09-02','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-12','YYYY-MM-DD'), TO_DATE('2014-11-15','YYYY-MM-DD') from dual union all
select TO_DATE('2014-08-27','YYYY-MM-DD'), TO_DATE('2014-08-31','YYYY-MM-DD') from dual union all
select TO_DATE('2014-09-13','YYYY-MM-DD'), TO_DATE('2014-09-23','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-23','YYYY-MM-DD'), TO_DATE('2014-10-24','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-11','YYYY-MM-DD'), TO_DATE('2014-10-14','YYYY-MM-DD') from dual union all
select TO_DATE('2014-09-10','YYYY-MM-DD'), TO_DATE('2014-09-15','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null FROM dual
),
timescale as ( -- Timescale boundaries to build a list of semesters
select to_date ('01.01.2013', 'dd.mm.yyyy') d1, -- Start date
to_date ('01.12.2015', 'dd.mm.yyyy') d2 -- End date
from dual
),
months as ( -- List of months withing the timescale , semestr is specified for each month
select y || '*' ||
case
when m < 5 then 'Spring'
when m > 8 then 'Fall'
else 'Summer'
end s,
d d1,
add_months(d,1)-1 d2
from (
select add_months (d1, level-1) d,
extract (year from add_months (d1, level-1)) y,
extract (month from add_months (d1, level-1)) m
from timescale
connect by add_months (d1, level-2) < d2
)
),
semestr as( -- List of semesters with their boundaries, built by grouping the list of months
select s, min(d1) d1, max(d2) d2
from months
group by s
)
-- the query itself - quite easy
select s,
(select count(1) from loan where startdate <= d2 and nvl(datereturned, d1) >= d1 ) x
from semestr
order by d1

Related

Find customer ids who ordered more in 2019 than they did in 2018

This one was asked in an interview.
Below is the structure of the table.
Shipments- Shipment_id,Price, Order_id, Ship_date, Delivery_Location, Price, Ship_method , ShipETA,
Customer-Customer_id, order_id, customer_address, prime_eligible
Order - Order_id , Order_Qty, Order_date , Order_location, Item_id , Shipment_id
Item- Item _id , Item_description, Item_Location
Question: List of customer ids who ordered more in 2019 than they did in 2018.
SELECT customer_id
FROM Customer join Order using (order_id)
WHERE YEAR(Order_date) IN (2019)
GROUP BY customer_id
HAVING
SUM(CASE WHEN YEAR(Order_date) = 2019 THEN Order_Qty ELSE 0 END)
> SUM(CASE WHEN YEAR(Order_date) = 2018 THEN Order_Qty ELSE 0 END)
Unfortunately, I don't have sample data can anyone help with the approach to solve this one.

Data model you posted looks somewhat "strange"; I wouldn't keep ORDER_ID in CUSTOMER table, it just doesn't belong there. I'd add CUSTOMER_ID into SHIPMENT instead.
Anyway, here's one option:
sample data in lines #1 - 21
temp CTE calculates summaries (ordered quantities) per customers and years (just or 2018 and 2019)
final query just checks who ordered more items in 2019 than in 2018
SQL> with
2 customer (customer_id, order_id) as
3 (select 'A', 1 from dual union all
4 select 'A', 3 from dual union all
5 select 'B', 2 from dual union all
6 select 'B', 4 from dual union all
7 select 'B', 5 from dual union all
8 --
9 select 'A', 6 from dual
10 ),
11 orders (order_id, order_qty, order_date) as
12 -- A's summaries: 2018: 100 / 2019: 400
13 -- B's summaries: 2018: 400 / 2019: 300 --> should be returned
14 (select 1, 100, date '2018-05-03' from dual union all -- A
15 select 2, 200, date '2018-07-23' from dual union all -- B
16 select 3, 400, date '2019-04-02' from dual union all -- A
17 select 4, 300, date '2019-08-14' from dual union all -- B
18 select 5, 200, date '2018-11-14' from dual union all -- B
19 --
20 select 6, 900, date '2020-01-01' from dual -- A
21 ),
22 -- summaires per customers and years
23 temp as
24 (select c.customer_id,
25 extract(year from o.order_date) as year,
26 sum(o.order_qty) sum_qty
27 from customer c join orders o on o.order_id = c.order_id
28 where extract(year from o.order_date) in (2018, 2019)
29 group by c.customer_id,
30 extract(year from o.order_date)
31 )
32 select t.customer_id
33 from temp t
34 group by t.customer_id
35 having sum(case when t.year = 2019 then t.sum_qty end) <
36 sum(case when t.year = 2018 then t.sum_qty end);
CUSTOMER_ID
-----------
B
SQL>

Count daily fidelity

I have the below table and I would like to count, day by day, the number of distinct people who logged in everyday. For example, for day 1, everyone logged in, so it's 4. For day 4, there's just one person ID who logged in everyday since day 1, so the count would be 1.
DAY
PERSON_ID
1
01
1
02
1
03
1
04
2
01
2
02
2
03
3
01
4
02
4
01
Expected output.
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01, 02, 03, 04
2
3
01, 02, 03
3
1
01
4
1
01
EDIT: the query should also work on the below data.
with t ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
Expected output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
EXPLANATION
10
3
01, 02, 04
Three unique people in day 10
12
2
01, 02
Day 11 does not have values, so it's not included. From those in day 10, only 2 appear in day 12
13
1
01
From those in day 10 and 12, only 01 appears in day 13
14
1
01
From those in day 10, 12 and 13, only 01 appears in day 14

You can use listagg() with group by clause. If day is always start from the 1 and increases by 1 then you can use below query. He with the help of exits I have selected only those person_id which are available in all the previous days.
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable values(1, '01');
insert into yourtable values(1, '02');
insert into yourtable values(1, '03');
insert into yourtable values(1, '04');
insert into yourtable values(2, '01');
insert into yourtable values(2, '02');
insert into yourtable values(2, '03');
insert into yourtable values(3, '01');
insert into yourtable values(4, '02');
insert into yourtable values(4, '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=a.day)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01,02,03,04
2
3
01,02,03
3
1
01
4
1
01
db<fiddle here
Instead of day sequence if you had increasing dates in day column:
create table yourtable(DAY date, PERSON_ID varchar(10));
insert into yourtable values(date '2021-01-01', '01');
insert into yourtable values(date '2021-01-01', '02');
insert into yourtable values(date '2021-01-01', '03');
insert into yourtable values(date '2021-01-01', '04');
insert into yourtable values(date '2021-01-02', '01');
insert into yourtable values(date '2021-01-02', '02');
insert into yourtable values(date '2021-01-02', '03');
insert into yourtable values(date '2021-01-03', '01');
insert into yourtable values(date '2021-01-04', '02');
insert into yourtable values(date '2021-01-04', '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=( max(day)- min(day))+1)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
01-JAN-21
4
01,02,03,04
02-JAN-21
3
01,02,03
03-JAN-21
1
01
04-JAN-21
1
01
db<fiddle here
Revised answer
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable(day,person_id)
with cte ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
select * from cte ;
Query#1 (for Oracle 19c and later)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(distinct person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
group by day;
Query#1 (for Oracle 18c and earlier)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG( person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from
(
select distinct day, person_id
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
)t group by day
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
10
3
01,02,04
12
2
01,02
13
1
01
14
1
01
db<fiddle here

In Standard SQL, I would approach this by doing the following:
Enumerate the days for each person.
Determine the earliest day for each person.
Filter where the earliest day is "1" and the enumeration equals the days.
Then aggregate:
select day, count(*),
listagg(person_id, ',') within group (order by person_id)
from (select t.*,
row_number() over (partition by person_id order by day) as seqnum,
min(day) over (partition by person_id) as min_day
from t
) t
where seqnum = day and min_day = 1
group by day
order by day;
Note only is this simpler than using match recognize, but I would guess that the performance would be much better too.

You can use either:
SELECT DAY,
COUNT(DISTINCT person_id) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day)
- DENSE_RANK() OVER (PARTITION BY person_id ORDER BY day) AS day_grp
FROM table_name t
)
WHERE day_grp = 0
GROUP BY day
ORDER BY day
or MATCH_RECOGNIZE to find the successive days:
SELECT day,
COUNT(
DISTINCT
CASE cls WHEN 'CONSECUTIVE_DAYS' THEN person_id END
) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day) AS day_rank
FROM table_name t
)
MATCH_RECOGNIZE(
PARTITION BY person_id
ORDER BY day
MEASURES
classifier() AS cls
ALL ROWS PER MATCH
PATTERN ( ^ consecutive_days* )
DEFINE
consecutive_days AS COALESCE( PREV(day_rank) + 1, 1 ) = day_rank
)
GROUP BY day
ORDER BY day
Which, for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 1, '01' FROM DUAL UNION ALL
SELECT 1, '02' FROM DUAL UNION ALL
SELECT 1, '03' FROM DUAL UNION ALL
SELECT 1, '04' FROM DUAL UNION ALL
SELECT 2, '01' FROM DUAL UNION ALL
SELECT 2, '02' FROM DUAL UNION ALL
SELECT 2, '03' FROM DUAL UNION ALL
SELECT 3, '01' FROM DUAL UNION ALL
SELECT 3, '02' FROM DUAL UNION ALL
SELECT 4, '01' FROM DUAL;
Outputs:
DAY
NUM_PEOPLE
1
4
2
3
3
2
4
1
and for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL
Outputs:
DAY
NUM_PEOPLE
10
3
12
2
13
1
14
1
db<>fiddle here

Complex query analyzing historical records

I am using Oracle and trying to retrieve the total number of days a person was out of the office during the year. I have 2 tables involved:
Statuses
1 - Active
2 - Out of the Office
3 - Other
ScheduleHistory
RecordID - primary key
PersonID
PreviousStatusID
NextStatusID
DateChanged
I can easily find when the person went on vacation and when they came back, using
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND NextStatusID = 2
and
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND PreviousStatusID = 2
But in case a person went on vacation more than once, how can I can I calculate total number of days a person was out of the office. Is it possible to do programmatically, given only PersonID?
Here is some sample data:
RecordID PersonID PreviousStatusID NextStatusID DateChanged
-----------------------------------------------------------------------------
1 111 1 2 03/11/2020
2 111 2 1 03/13/2020
3 111 1 3 04/01/2020
4 111 3 1 04/07/2020
5 111 1 2 06/03/2020
6 111 2 1 06/05/2020
7 111 1 2 09/14/2020
8 111 2 1 09/17/2020
So from the data above, for the year 2020 for PersonID 111 the query should return 7

Try this:
with aux1 AS (
SELECT
a.*,
to_date(datechanged, 'MM/DD/YYYY') - LAG(to_date(datechanged, 'MM/DD/YYYY')) OVER(
PARTITION BY personid
ORDER BY
recordid
) lag_date
FROM
ScheduleHistory a
)
SELECT
personid,
SUM(lag_date) tot_days_ooo
FROM
aux1
WHERE
previousstatusid = 2
GROUP BY
personid;

If you want total days (or weekdays) for each year (and to account for periods when it goes over the year boundary) then:
WITH date_ranges ( personid, status, start_date, end_date ) AS (
SELECT personid,
nextstatusid,
datechanged,
LEAD(datechanged, 1, datechanged) OVER(
PARTITION BY personid
ORDER BY datechanged
)
FROM table_name
),
split_year_ranges ( personid, year, start_date, end_date, max_date ) AS (
SELECT personid,
TRUNC( start_date, 'YY' ),
start_date,
LEAST(
end_date,
ADD_MONTHS( TRUNC( start_date, 'YY' ), 12 )
),
end_date
FROM date_ranges
WHERE status = 2
UNION ALL
SELECT personid,
end_date,
end_date,
LEAST( max_date, ADD_MONTHS( end_date, 12 ) ),
max_date
FROM split_year_ranges
WHERE end_date < max_date
)
SELECT personid,
EXTRACT( YEAR FROM year) AS year,
SUM( end_date - start_date ) AS total_days,
SUM(
( TRUNC( end_date, 'IW' ) - TRUNC( start_date, 'IW' ) ) * 5 / 7
+ LEAST( end_date - TRUNC( end_date, 'IW' ), 5 )
- LEAST( start_date - TRUNC( start_date, 'IW' ), 5 )
) AS total_weekdays
FROM split_year_ranges
GROUP BY personid, year
ORDER BY personid, year
Which, for the sample data:
CREATE TABLE table_name ( RecordID, PersonID, PreviousStatusID, NextStatusID, DateChanged ) AS
SELECT 1, 111, 1, 2, DATE '2020-03-11' FROM DUAL UNION ALL
SELECT 2, 111, 2, 1, DATE '2020-03-13' FROM DUAL UNION ALL
SELECT 3, 111, 1, 3, DATE '2020-04-01' FROM DUAL UNION ALL
SELECT 4, 111, 3, 1, DATE '2020-04-07' FROM DUAL UNION ALL
SELECT 5, 111, 1, 2, DATE '2020-06-03' FROM DUAL UNION ALL
SELECT 6, 111, 2, 1, DATE '2020-06-05' FROM DUAL UNION ALL
SELECT 7, 111, 1, 2, DATE '2020-09-14' FROM DUAL UNION ALL
SELECT 8, 111, 2, 1, DATE '2020-09-17' FROM DUAL UNION ALL
SELECT 9, 222, 1, 2, DATE '2019-12-31' FROM DUAL UNION ALL
SELECT 10, 222, 2, 2, DATE '2020-12-01' FROM DUAL UNION ALL
SELECT 11, 222, 2, 2, DATE '2021-01-02' FROM DUAL;
Outputs:
PERSONID
YEAR
TOTAL_DAYS
TOTAL_WEEKDAYS
111
2020
7
7
222
2019
1
1
222
2020
366
262
222
2021
1
1
db<>fiddle here

Provided no vacation crosses a year boundary
with grps as (
SELECT sh.*,
row_number() over (partition by PersonID, NextStatusID order by DateChanged) grp
FROM ScheduleHistory sh
WHERE NextStatusID in (1,2) and 3 not in (NextStatusID, PreviousStatusID)
), durations as (
SELECT PersonID, min(DateChanged) DateChanged, max(DateChanged) - min(DateChanged) duration
FROM grps
GROUP BY PersonID, grp
)
SELECT PersonID, sum(duration) days_out
FROM durations
GROUP BY PersonID;
db<>fiddle

year_span is used to split an interval spanning across two years in two different records
H1 adds a row number dependent from PersonID to get the right sequence for each person
H2 gets the periods for each status change and extract 1st day of the year of the interval end
H3 split records that span across two years and calculate the right date_start and date_end for each interval
H calculates days elapsed in each interval for each year
final query sum up the records to get output
EDIT
If you need workdays instead of total days, you should not use total_days/7*5 because it is a bad approximation and in some cases gives weird results.
I have posted a solution to jump on fridays to mondays here
with
statuses (sid, sdescr) as (
select 1, 'Active' from dual union all
select 2, 'Out of the Office' from dual union all
select 3, 'Other' from dual
),
ScheduleHistory(RecordID, PersonID, PreviousStatusID, NextStatusID , DateChanged) as (
select 1, 111, 1, 2, date '2020-03-11' from dual union all
select 2, 111, 2, 1, date '2020-03-13' from dual union all
select 3, 111, 1, 3, date '2020-04-01' from dual union all
select 4, 111, 3, 1, date '2020-04-07' from dual union all
select 5, 111, 1, 2, date '2020-06-03' from dual union all
select 6, 111, 2, 1, date '2020-06-05' from dual union all
select 7, 111, 1, 2, date '2020-09-14' from dual union all
select 8, 111, 2, 1, date '2020-09-17' from dual union all
SELECT 9, 222, 1, 2, date '2019-12-31' from dual UNION ALL
SELECT 10, 222, 2, 2, date '2020-12-01' from dual UNION ALL
SELECT 11, 222, 2, 2, date '2021-01-02' from dual
),
year_span (n) as (
select 1 from dual union all
select 2 from dual
),
H1 AS (
SELECT ROW_NUMBER() OVER (PARTITION BY PersonID ORDER BY RecordID) PID, H.*
FROM ScheduleHistory H
),
H2 as (
SELECT
H1.*, H2.DateChanged DateChanged2,
EXTRACT(YEAR FROM H2.DateChanged) - EXTRACT(YEAR FROM H1.DateChanged) + 1 Y,
trunc(H2.DateChanged,'YEAR') Y2
FROM H1 H1
LEFT JOIN H1 H2 ON H1.PID = H2.PID-1 AND H1.PersonID = H2.PersonID
),
H3 AS (
SELECT Y, N, H2.PID, H2.RecordID, H2.PersonID, H2.NextStatusID,
CASE WHEN Y=1 THEN H2.DateChanged ELSE CASE WHEN N=1 THEN H2.DateChanged ELSE Y2 END END D1,
CASE WHEN Y=1 THEN H2.DateChanged2 ELSE CASE WHEN N=1 THEN Y2 ELSE H2.DateChanged2 END END D2
FROM H2
JOIN year_span N ON N.N <=Y
),
H AS (
SELECT PersonID, NextStatusID, EXTRACT(year FROM d1) Y, d2-d1 D
FROM H3
)
select PersonID, sdescr Status, Y, sum(d) d
from H
join statuses s on NextStatusID = s.sid
group by PersonID, sdescr, Y
order by PersonID, sdescr, Y
output
PersonID Status Y d
111 Active 2020 177
111 Other 2020 6
111 Out of the Office 2020 7
222 Out of the Office 2019 1
222 Out of the Office 2020 366
222 Out of the Office 2021 1
check the fiddle here

Compare data of current week against same week of previous years

I have this table that contains sales by stores & date.
-------------------------------------------
P_DATE - P_STORE - P_SALES
-------------------------------------------
2019-02-05 - S1 - 5000
2019-02-05 - S2 - 9850
2018-06-17 - S1 - 6980
2018-05-17 - S2 - 6590
..
..
..
-------------------------------------------
I want to compare Sum of sales for each store of last 10 weeks of this year with same week of previous years.
I want a result like this :
---------------------------------------------------
Week - Store - Sales-2019 - Sales2018
---------------------------------------------------
20 - S1 - 2580 - 2430
20 - S2 - 2580 - 2430
.
.
10 - S1 - 5905 - 5214
10 - S2 - 4789 - 6530
---------------------------------------------------
I'v tried this :
Select
[Week] = DATEPART(WEEK, E_Date),
[Store] = E_store
[Sales 2019] = Case when Year(P_date) = '2019' Then Sum (P_Sales)
[Sales 2018] = Case when Year(P_date) = '2018' Then Sum (P_Sales)
From
PIECE
Group by
DATEPART(WEEK, E_Date),
E_store
I need your help please.

This script will consider 10 weeks including current week-
WITH wk_list (COMMON,DayMinus)
AS
(
SELECT 1,0 UNION ALL
SELECT 1,1 UNION ALL
SELECT 1,2 UNION ALL
SELECT 1,3 UNION ALL
SELECT 1,4 UNION ALL
SELECT 1,5 UNION ALL
SELECT 1,6 UNION ALL
SELECT 1,7 UNION ALL
SELECT 1,8 UNION ALL
SELECT 1,9
)
SELECT
DATEPART(ISO_WEEK, P_DATE) WK,
P_STORE,
SUM(CASE WHEN YEAR(P_DATE) = 2019 THEN P_SALES ELSE 0 END) SALES_2019,
SUM(CASE WHEN YEAR(P_DATE) = 2018 THEN P_SALES ELSE 0 END) SALES_2018
FROM your_table
WHERE YEAR(P_DATE) IN (2019,2018)
AND DATEPART(ISO_WEEK, P_DATE) IN
(
SELECT A.WKNUM-wk_list.DayMinus AS [WEEK NUMBER]
FROM wk_list
INNER JOIN (
SELECT 1 AS COMMON,DATENAME(ISO_WEEK,GETDATE()) WKNUM
) A ON wk_list.COMMON = A.COMMON
)
GROUP BY DATEPART(ISO_WEEK, P_DATE),P_STORE
But if you want to exclude current week, just replace the following part in above script
, wk_list (COMMON,DayMinus)
AS
(
SELECT 1,1 UNION ALL
SELECT 1,2 UNION ALL
SELECT 1,3 UNION ALL
SELECT 1,4 UNION ALL
SELECT 1,5 UNION ALL
SELECT 1,6 UNION ALL
SELECT 1,7 UNION ALL
SELECT 1,8 UNION ALL
SELECT 1,9 UNION ALL
SELECT 1,10
)

Is this what you're looking for?
DECLARE #t TABLE (TransactionID INT, Week INT, Year INT, Amount MONEY)
INSERT INTO #t
(TransactionID, Week, Year, Amount)
VALUES
(1, 20, 2018, 50),
(2, 20, 2019, 20),
(3, 19, 2018, 35),
(4, 19, 2019, 40),
(5, 20, 2018, 70),
(6, 20, 2019, 80)
SELECT TOP 10 Week, [2018], [2019] FROM (SELECT Week, Year, SUM(Amount) As Amount FROM #t GROUP BY Week, Year) t
PIVOT
(
SUM(Amount)
FOR Year IN ([2018], [2019])
) sq
ORDER BY Week DESC

SQL: Create multiple rows for a record based on months between two dates

My table has records as below for different Id's and different start and end dates
ID, Startdate, Enddate
1, 2017-02-14, 2018-11-05
I want to write an SQL without using date dimension table that gives below output: Basically one record for each month between start and end date.
1, 2017, 02
1, 2017, 03
1, 2017, 04
1, 2017, 05
1, 2017, 06
1, 2017, 07
1, 2017, 08
1, 2017, 09
1, 2017, 10
1, 2017, 11
1, 2017, 12
1, 2018, 01
1, 2018, 02
1, 2018, 03
1, 2018, 04
1, 2018, 05
1, 2018, 06
1, 2018, 07
1, 2018, 09
1, 2018, 10
1, 2018, 11

Please use below query example:
set #start_date = '2017-02-14';
set #end_date = LAST_DAY('2018-11-05');
WITH RECURSIVE date_range AS
(
select MONTH(#start_date) as month_, YEAR(#start_date) as year_, DATE_ADD(#start_date, INTERVAL 1 MONTH) as next_month_date
UNION
SELECT MONTH(dr.next_month_date) as month_, YEAR(dr.next_month_date) as year_, DATE_ADD(dr.next_month_date, INTERVAL 1 MONTH) as next_month_date
FROM date_range dr
where next_month_date <= #end_date
)
select month_, year_ from date_range
order by next_month_date desc

This is what I did and it worked like a charm:
-- sample data
WITH table_data
AS (
SELECT 1 AS id
,cast('2017-08-14' AS DATE) AS start_dt
,cast('2018-12-16' AS DATE) AS end_dt
UNION ALL
SELECT 2 AS id
,cast('2017-09-14' AS DATE) AS start_dt
,cast('2019-01-16' AS DATE) AS end_dt
)
-- find minimum date from the data
,starting_date (start_date)
AS (
SELECT min(start_dt)
FROM TABLE_DATA
)
--get all months between min and max dates
,all_dates
AS (
SELECT last_day(add_months(date_trunc('month', start_date), idx * 1)) month_date
FROM starting_date
CROSS JOIN _v_vector_idx
WHERE month_date <= add_months(start_date, abs(months_between((
SELECT min(start_dt) FROM TABLE_DATA), (SELECT max(end_dt) FROM TABLE_DATA))) + 1)
ORDER BY month_date
)
SELECT id
,extract(year FROM month_date)
,extract(month FROM month_date)
,td.start_dt
,td.end_dt
FROM table_data td
INNER JOIN all_dates ad
ON ad.month_date > td.start_dt
AND ad.month_date <= last_day(td.end_dt)
ORDER BY 1
,2

You have to generate date and from that have to pick year and month
select distinct year(date),month( date) from
(select * from (
select
date_add('2017-02-14 00:00:00.000', INTERVAL n5.num*10000+n4.num*1000+n3.num*100+n2.num*10+n1.num DAY ) as date
from
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n1,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n2,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n3,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n4,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n5
) a
where date >'2017-02-14 00:00:00.000' and date < '2018-11-05'
) as t

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Finding records over continuous date range - sql

Related

Find customer ids who ordered more in 2019 than they did in 2018

Count daily fidelity

Complex query analyzing historical records

Compare data of current week against same week of previous years

SQL: Create multiple rows for a record based on months between two dates

Categories

Resources