Count daily fidelity - sql

I have the below table and I would like to count, day by day, the number of distinct people who logged in everyday. For example, for day 1, everyone logged in, so it's 4. For day 4, there's just one person ID who logged in everyday since day 1, so the count would be 1.
DAY
PERSON_ID
1
01
1
02
1
03
1
04
2
01
2
02
2
03
3
01
4
02
4
01
Expected output.
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01, 02, 03, 04
2
3
01, 02, 03
3
1
01
4
1
01
EDIT: the query should also work on the below data.
with t ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
Expected output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
EXPLANATION
10
3
01, 02, 04
Three unique people in day 10
12
2
01, 02
Day 11 does not have values, so it's not included. From those in day 10, only 2 appear in day 12
13
1
01
From those in day 10 and 12, only 01 appears in day 13
14
1
01
From those in day 10, 12 and 13, only 01 appears in day 14

You can use listagg() with group by clause. If day is always start from the 1 and increases by 1 then you can use below query. He with the help of exits I have selected only those person_id which are available in all the previous days.
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable values(1, '01');
insert into yourtable values(1, '02');
insert into yourtable values(1, '03');
insert into yourtable values(1, '04');
insert into yourtable values(2, '01');
insert into yourtable values(2, '02');
insert into yourtable values(2, '03');
insert into yourtable values(3, '01');
insert into yourtable values(4, '02');
insert into yourtable values(4, '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=a.day)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
1
4
01,02,03,04
2
3
01,02,03
3
1
01
4
1
01
db<fiddle here
Instead of day sequence if you had increasing dates in day column:
create table yourtable(DAY date, PERSON_ID varchar(10));
insert into yourtable values(date '2021-01-01', '01');
insert into yourtable values(date '2021-01-01', '02');
insert into yourtable values(date '2021-01-01', '03');
insert into yourtable values(date '2021-01-01', '04');
insert into yourtable values(date '2021-01-02', '01');
insert into yourtable values(date '2021-01-02', '02');
insert into yourtable values(date '2021-01-02', '03');
insert into yourtable values(date '2021-01-03', '01');
insert into yourtable values(date '2021-01-04', '02');
insert into yourtable values(date '2021-01-04', '01');
Query:
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(day)=( max(day)- min(day))+1)
group by day;
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
01-JAN-21
4
01,02,03,04
02-JAN-21
3
01,02,03
03-JAN-21
1
01
04-JAN-21
1
01
db<fiddle here
Revised answer
create table yourtable(DAY int, PERSON_ID varchar(10));
insert into yourtable(day,person_id)
with cte ( DAY, PERSON_ID ) AS(
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL)
select * from cte ;
Query#1 (for Oracle 19c and later)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG(distinct person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
group by day;
Query#1 (for Oracle 18c and earlier)
select day, count(person_id) as PEOPLE_LOGGED_EVERYDAY, LISTAGG( person_id,',') WITHIN GROUP(ORDER BY person_id) AS PEOPLE
from
(
select distinct day, person_id
from yourtable a
where exists (select 1 from yourtable b where b.day<=a.day and a.person_id=b.person_id
group by person_id having count(DISTINCT day)=(select COUNT( distinct DAY) from yourtable where day<=a.day))
)t group by day
Output:
DAY
PEOPLE_LOGGED_EVERYDAY
PEOPLE
10
3
01,02,04
12
2
01,02
13
1
01
14
1
01
db<fiddle here

In Standard SQL, I would approach this by doing the following:
Enumerate the days for each person.
Determine the earliest day for each person.
Filter where the earliest day is "1" and the enumeration equals the days.
Then aggregate:
select day, count(*),
listagg(person_id, ',') within group (order by person_id)
from (select t.*,
row_number() over (partition by person_id order by day) as seqnum,
min(day) over (partition by person_id) as min_day
from t
) t
where seqnum = day and min_day = 1
group by day
order by day;
Note only is this simpler than using match recognize, but I would guess that the performance would be much better too.

You can use either:
SELECT DAY,
COUNT(DISTINCT person_id) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day)
- DENSE_RANK() OVER (PARTITION BY person_id ORDER BY day) AS day_grp
FROM table_name t
)
WHERE day_grp = 0
GROUP BY day
ORDER BY day
or MATCH_RECOGNIZE to find the successive days:
SELECT day,
COUNT(
DISTINCT
CASE cls WHEN 'CONSECUTIVE_DAYS' THEN person_id END
) AS num_people
FROM (
SELECT t.*,
DENSE_RANK() OVER (ORDER BY day) AS day_rank
FROM table_name t
)
MATCH_RECOGNIZE(
PARTITION BY person_id
ORDER BY day
MEASURES
classifier() AS cls
ALL ROWS PER MATCH
PATTERN ( ^ consecutive_days* )
DEFINE
consecutive_days AS COALESCE( PREV(day_rank) + 1, 1 ) = day_rank
)
GROUP BY day
ORDER BY day
Which, for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 1, '01' FROM DUAL UNION ALL
SELECT 1, '02' FROM DUAL UNION ALL
SELECT 1, '03' FROM DUAL UNION ALL
SELECT 1, '04' FROM DUAL UNION ALL
SELECT 2, '01' FROM DUAL UNION ALL
SELECT 2, '02' FROM DUAL UNION ALL
SELECT 2, '03' FROM DUAL UNION ALL
SELECT 3, '01' FROM DUAL UNION ALL
SELECT 3, '02' FROM DUAL UNION ALL
SELECT 4, '01' FROM DUAL;
Outputs:
DAY
NUM_PEOPLE
1
4
2
3
3
2
4
1
and for the sample data:
CREATE TABLE table_name ( DAY, PERSON_ID ) AS
SELECT 10, '01' FROM DUAL UNION ALL
SELECT 10, '02' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 10, '04' FROM DUAL UNION ALL
SELECT 12, '01' FROM DUAL UNION ALL
SELECT 12, '02' FROM DUAL UNION ALL
SELECT 12, '03' FROM DUAL UNION ALL
SELECT 13, '04' FROM DUAL UNION ALL
SELECT 13, '01' FROM DUAL UNION ALL
SELECT 14, '02' FROM DUAL UNION ALL
SELECT 14, '01' FROM DUAL
Outputs:
DAY
NUM_PEOPLE
10
3
12
2
13
1
14
1
db<>fiddle here

Related

Complex query analyzing historical records

I am using Oracle and trying to retrieve the total number of days a person was out of the office during the year. I have 2 tables involved:
Statuses
1 - Active
2 - Out of the Office
3 - Other
ScheduleHistory
RecordID - primary key
PersonID
PreviousStatusID
NextStatusID
DateChanged
I can easily find when the person went on vacation and when they came back, using
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND NextStatusID = 2
and
SELECT DateChanged FROM ScheduleHistory WHERE PersonID=111 AND PreviousStatusID = 2
But in case a person went on vacation more than once, how can I can I calculate total number of days a person was out of the office. Is it possible to do programmatically, given only PersonID?
Here is some sample data:
RecordID PersonID PreviousStatusID NextStatusID DateChanged
-----------------------------------------------------------------------------
1 111 1 2 03/11/2020
2 111 2 1 03/13/2020
3 111 1 3 04/01/2020
4 111 3 1 04/07/2020
5 111 1 2 06/03/2020
6 111 2 1 06/05/2020
7 111 1 2 09/14/2020
8 111 2 1 09/17/2020
So from the data above, for the year 2020 for PersonID 111 the query should return 7
Try this:
with aux1 AS (
SELECT
a.*,
to_date(datechanged, 'MM/DD/YYYY') - LAG(to_date(datechanged, 'MM/DD/YYYY')) OVER(
PARTITION BY personid
ORDER BY
recordid
) lag_date
FROM
ScheduleHistory a
)
SELECT
personid,
SUM(lag_date) tot_days_ooo
FROM
aux1
WHERE
previousstatusid = 2
GROUP BY
personid;
If you want total days (or weekdays) for each year (and to account for periods when it goes over the year boundary) then:
WITH date_ranges ( personid, status, start_date, end_date ) AS (
SELECT personid,
nextstatusid,
datechanged,
LEAD(datechanged, 1, datechanged) OVER(
PARTITION BY personid
ORDER BY datechanged
)
FROM table_name
),
split_year_ranges ( personid, year, start_date, end_date, max_date ) AS (
SELECT personid,
TRUNC( start_date, 'YY' ),
start_date,
LEAST(
end_date,
ADD_MONTHS( TRUNC( start_date, 'YY' ), 12 )
),
end_date
FROM date_ranges
WHERE status = 2
UNION ALL
SELECT personid,
end_date,
end_date,
LEAST( max_date, ADD_MONTHS( end_date, 12 ) ),
max_date
FROM split_year_ranges
WHERE end_date < max_date
)
SELECT personid,
EXTRACT( YEAR FROM year) AS year,
SUM( end_date - start_date ) AS total_days,
SUM(
( TRUNC( end_date, 'IW' ) - TRUNC( start_date, 'IW' ) ) * 5 / 7
+ LEAST( end_date - TRUNC( end_date, 'IW' ), 5 )
- LEAST( start_date - TRUNC( start_date, 'IW' ), 5 )
) AS total_weekdays
FROM split_year_ranges
GROUP BY personid, year
ORDER BY personid, year
Which, for the sample data:
CREATE TABLE table_name ( RecordID, PersonID, PreviousStatusID, NextStatusID, DateChanged ) AS
SELECT 1, 111, 1, 2, DATE '2020-03-11' FROM DUAL UNION ALL
SELECT 2, 111, 2, 1, DATE '2020-03-13' FROM DUAL UNION ALL
SELECT 3, 111, 1, 3, DATE '2020-04-01' FROM DUAL UNION ALL
SELECT 4, 111, 3, 1, DATE '2020-04-07' FROM DUAL UNION ALL
SELECT 5, 111, 1, 2, DATE '2020-06-03' FROM DUAL UNION ALL
SELECT 6, 111, 2, 1, DATE '2020-06-05' FROM DUAL UNION ALL
SELECT 7, 111, 1, 2, DATE '2020-09-14' FROM DUAL UNION ALL
SELECT 8, 111, 2, 1, DATE '2020-09-17' FROM DUAL UNION ALL
SELECT 9, 222, 1, 2, DATE '2019-12-31' FROM DUAL UNION ALL
SELECT 10, 222, 2, 2, DATE '2020-12-01' FROM DUAL UNION ALL
SELECT 11, 222, 2, 2, DATE '2021-01-02' FROM DUAL;
Outputs:
PERSONID
YEAR
TOTAL_DAYS
TOTAL_WEEKDAYS
111
2020
7
7
222
2019
1
1
222
2020
366
262
222
2021
1
1
db<>fiddle here
Provided no vacation crosses a year boundary
with grps as (
SELECT sh.*,
row_number() over (partition by PersonID, NextStatusID order by DateChanged) grp
FROM ScheduleHistory sh
WHERE NextStatusID in (1,2) and 3 not in (NextStatusID, PreviousStatusID)
), durations as (
SELECT PersonID, min(DateChanged) DateChanged, max(DateChanged) - min(DateChanged) duration
FROM grps
GROUP BY PersonID, grp
)
SELECT PersonID, sum(duration) days_out
FROM durations
GROUP BY PersonID;
db<>fiddle
year_span is used to split an interval spanning across two years in two different records
H1 adds a row number dependent from PersonID to get the right sequence for each person
H2 gets the periods for each status change and extract 1st day of the year of the interval end
H3 split records that span across two years and calculate the right date_start and date_end for each interval
H calculates days elapsed in each interval for each year
final query sum up the records to get output
EDIT
If you need workdays instead of total days, you should not use total_days/7*5 because it is a bad approximation and in some cases gives weird results.
I have posted a solution to jump on fridays to mondays here
with
statuses (sid, sdescr) as (
select 1, 'Active' from dual union all
select 2, 'Out of the Office' from dual union all
select 3, 'Other' from dual
),
ScheduleHistory(RecordID, PersonID, PreviousStatusID, NextStatusID , DateChanged) as (
select 1, 111, 1, 2, date '2020-03-11' from dual union all
select 2, 111, 2, 1, date '2020-03-13' from dual union all
select 3, 111, 1, 3, date '2020-04-01' from dual union all
select 4, 111, 3, 1, date '2020-04-07' from dual union all
select 5, 111, 1, 2, date '2020-06-03' from dual union all
select 6, 111, 2, 1, date '2020-06-05' from dual union all
select 7, 111, 1, 2, date '2020-09-14' from dual union all
select 8, 111, 2, 1, date '2020-09-17' from dual union all
SELECT 9, 222, 1, 2, date '2019-12-31' from dual UNION ALL
SELECT 10, 222, 2, 2, date '2020-12-01' from dual UNION ALL
SELECT 11, 222, 2, 2, date '2021-01-02' from dual
),
year_span (n) as (
select 1 from dual union all
select 2 from dual
),
H1 AS (
SELECT ROW_NUMBER() OVER (PARTITION BY PersonID ORDER BY RecordID) PID, H.*
FROM ScheduleHistory H
),
H2 as (
SELECT
H1.*, H2.DateChanged DateChanged2,
EXTRACT(YEAR FROM H2.DateChanged) - EXTRACT(YEAR FROM H1.DateChanged) + 1 Y,
trunc(H2.DateChanged,'YEAR') Y2
FROM H1 H1
LEFT JOIN H1 H2 ON H1.PID = H2.PID-1 AND H1.PersonID = H2.PersonID
),
H3 AS (
SELECT Y, N, H2.PID, H2.RecordID, H2.PersonID, H2.NextStatusID,
CASE WHEN Y=1 THEN H2.DateChanged ELSE CASE WHEN N=1 THEN H2.DateChanged ELSE Y2 END END D1,
CASE WHEN Y=1 THEN H2.DateChanged2 ELSE CASE WHEN N=1 THEN Y2 ELSE H2.DateChanged2 END END D2
FROM H2
JOIN year_span N ON N.N <=Y
),
H AS (
SELECT PersonID, NextStatusID, EXTRACT(year FROM d1) Y, d2-d1 D
FROM H3
)
select PersonID, sdescr Status, Y, sum(d) d
from H
join statuses s on NextStatusID = s.sid
group by PersonID, sdescr, Y
order by PersonID, sdescr, Y
output
PersonID Status Y d
111 Active 2020 177
111 Other 2020 6
111 Out of the Office 2020 7
222 Out of the Office 2019 1
222 Out of the Office 2020 366
222 Out of the Office 2021 1
check the fiddle here

find gap between months in two consecutive year oracle sql

Need to find record having gap between months in a table if the data is present in two different year.
I have column like id, value,month, year.
Id, value, month,year
1, 123, oct, 2020
1, 128, nov, 2020
1, 127, jan ,2021
2, 121, Dec, 2020
2, 154, jan, 2021
Output I need:
Id 1 as there is a gap in month (Dec is Missing for id=1)
Here's one option. Read comments within code.
SQL> with test (id, value, month, year) as
2 -- sample data; you have that, don't type it
3 (select 1, 123, 'oct', 2020 from dual union all
4 select 1, 128, 'nov', 2020 from dual union all
5 select 1, 127, 'jan', 2021 from dual union all
6 select 2, 121, 'dec', 2020 from dual union all
7 select 2, 154, 'jan', 2021 from dual
8 ),
9 temp as
10 -- "convert" month and year to real date value
11 (select id,
12 value,
13 to_date(month ||' '|| year, 'mon yyyy', 'nls_date_language=english') datum
14 from test
15 ),
16 temp2 as
17 -- select difference in months between DATUM and next month (LEAD!)
18 (select id,
19 months_between
20 (datum,
21 to_date(month ||' '|| year, 'mon yyyy', 'nls_date_language=english') datum
22 ) diff
23 from temp
24 )
25 select distinct id
26 from temp2
27 where abs(diff) > 1;
ID
----------
1
SQL>
It can probably be compressed, but step-by-step CTEs show what's going on.
I would construct a date and use lag():
select t.*
from (select t.*,
lag(dte) over (partition by id order by dte) as prev_dte
from (select t.*,
to_date(year || '-' || month || '-01', 'YYYY-MON-DD') as dte
from t
) t
) t
where prev_dte <> dte - interval '1' month;
Here is a db<>fiddle.
Here is an example using the LAG function and finding rows where where the prior month is not one month behind (or non existent)
WITH
sample_data (Id,
VALUE,
month,
year)
AS
(SELECT 1, 123, 'oct', 2020 FROM DUAL
UNION ALL
SELECT 1, 128, 'nov', 2020 FROM DUAL
UNION ALL
SELECT 1, 127, 'jan', 2021 FROM DUAL
UNION ALL
SELECT 2, 121, 'Dec', 2020 FROM DUAL
UNION ALL
SELECT 2, 154, 'jan', 2021 FROM DUAL)
SELECT DISTINCT id
FROM (SELECT sd.id,
CASE
WHEN ADD_MONTHS (TO_DATE (sd.year || sd.month, 'YYYYMON'), -1) =
TO_DATE (
LAG (sd.year || sd.month)
OVER (
PARTITION BY id
ORDER BY
sd.year, EXTRACT (MONTH FROM TO_DATE (sd.month, 'MON'))),
'YYYYMON')
OR LAG (sd.id)
OVER (
PARTITION BY id
ORDER BY sd.year, EXTRACT (MONTH FROM TO_DATE (sd.month, 'MON')))
IS NULL
THEN
'Y'
ELSE
'N'
END AS valid_prev_month
FROM sample_data sd)
WHERE valid_prev_month = 'N';

Return Month wise count if no data for month return 0 as count in oracle sql

I have a table having data for January to March (till current month) and I am able to take the month wise count.But user required is to display zero for rest of the month.Kindly suggest.
For example:
select count(a.emp_id) as cnt ,to_char(a.due_date,'MONTH') as Process_Month from EMP_Request a
where a.due_date is not null
group by to_char(a.due_date,'MONTH')
Output:
cnt Process_month
20 JANUARY
35 FEBUARY
26 March
Desired output:
cnt Process_month
20 JANUARY
35 FEBUARY
26 March
0 APRIL
0 MAY
…….
….
….
0 DECEMBER
Please assist.
use WWV_FLOW_MONTHS_MONTH to get all the month and left join with your query to get the month name from the date column and join with it
with cte
(
SELECT month_display as month FROM WWV_FLOW_MONTHS_MONTH
) , cnt as
(
select count(a.emp_id) as cnt ,
to_char(a.due_date,'MONTH') as Process_Month from EMP_Request a
where a.due_date is not null
group by to_char(a.due_date,'MONTH')
) select coalesce(Process_Month,month), cnt from cte left join cnt on cte.month=cnt.to_char(to_date(Process_Month, 'DD-MM-YYYY'), 'Month')
Right join months generator with your query:
select to_char(to_date(mth_num, 'MM'), 'MONTH') month, nvl(cnt, 0) cnt
from (
select count(emp_id) as cnt, to_char(due_date, 'mm') mth_num
from emp_request where due_date is not null
group by to_char(due_date, 'mm')) e
right join (
select to_char(level, 'fm00') mth_num
from dual connect by level <= 12) m using (mth_num)
order by mth_num
dbfiddle demo
Months generator is a simple hierarchical query which gives us 12 values 01, 02... 12:
select to_char(level, 'fm00') mth_num from dual connect by level <= 12
You can also use system views to get these numbers:
select to_char(rownum, 'fm00') mth_num from all_objects where rownum <= 12
or this syntax:
select to_char(column_value, 'fm00') mth_num
from table(sys.odcivarchar2list(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))
It's better to work on numbers which you can sort properly and convert to month names in the last step. This way you have natural months order.
If you want to be sure that month names are always in english, not dependent from local settings then use to_date with third parameter, like here:
select to_char(sysdate, 'month', 'nls_date_language=english') from dual
This is a general problem which is not really a sql problem. SQL doesn't really know about what months you are interested in. So the solution is to tell it in a sub query.
Here is a solution that doesn't use external tables. You simply select all months of the year and outer join your data.
select TO_CHAR(TO_DATE(available_months.m,'MM'),'MONTH') , NVL(sum(data.cnt),0) from
(select to_number(to_char(sysdate,'MM')) m, 7 cnt from dual) data,
(select 1 m from dual union select 2 from dual union select 3 from dual union select 4 from dual
union select 5 from dual union select 6 from dual union select 7 from dual
union select 8 from dual union select 9 from dual union select 10 from dual
union select 11 from dual union select 12 from dual) available_months
where
data.m (+) = available_months.m
group by available_months.m
order by available_months.m;
Or with your data query included is should look like (not tested):
select TO_CHAR(TO_DATE(available_months.m,'MM'),'MONTH') , NVL(sum(data.cnt),0) from
(select count(a.emp_id) as cnt ,to_char(a.due_date,'MONTH') as Process_Month from EMP_Request a where a.due_date is not null) data
(select 1 m from dual union select 2 from dual union select 3 from dual union select 4 from dual
union select 5 from dual union select 6 from dual union select 7 from dual
union select 8 from dual union select 9 from dual union select 10 from dual
union select 11 from dual union select 12 from dual) available_months
where
data.due_date (+) = available_months.m
group by available_months.m
order by available_months.m;

SQL: Create multiple rows for a record based on months between two dates

My table has records as below for different Id's and different start and end dates
ID, Startdate, Enddate
1, 2017-02-14, 2018-11-05
I want to write an SQL without using date dimension table that gives below output: Basically one record for each month between start and end date.
1, 2017, 02
1, 2017, 03
1, 2017, 04
1, 2017, 05
1, 2017, 06
1, 2017, 07
1, 2017, 08
1, 2017, 09
1, 2017, 10
1, 2017, 11
1, 2017, 12
1, 2018, 01
1, 2018, 02
1, 2018, 03
1, 2018, 04
1, 2018, 05
1, 2018, 06
1, 2018, 07
1, 2018, 09
1, 2018, 10
1, 2018, 11
Please use below query example:
set #start_date = '2017-02-14';
set #end_date = LAST_DAY('2018-11-05');
WITH RECURSIVE date_range AS
(
select MONTH(#start_date) as month_, YEAR(#start_date) as year_, DATE_ADD(#start_date, INTERVAL 1 MONTH) as next_month_date
UNION
SELECT MONTH(dr.next_month_date) as month_, YEAR(dr.next_month_date) as year_, DATE_ADD(dr.next_month_date, INTERVAL 1 MONTH) as next_month_date
FROM date_range dr
where next_month_date <= #end_date
)
select month_, year_ from date_range
order by next_month_date desc
This is what I did and it worked like a charm:
-- sample data
WITH table_data
AS (
SELECT 1 AS id
,cast('2017-08-14' AS DATE) AS start_dt
,cast('2018-12-16' AS DATE) AS end_dt
UNION ALL
SELECT 2 AS id
,cast('2017-09-14' AS DATE) AS start_dt
,cast('2019-01-16' AS DATE) AS end_dt
)
-- find minimum date from the data
,starting_date (start_date)
AS (
SELECT min(start_dt)
FROM TABLE_DATA
)
--get all months between min and max dates
,all_dates
AS (
SELECT last_day(add_months(date_trunc('month', start_date), idx * 1)) month_date
FROM starting_date
CROSS JOIN _v_vector_idx
WHERE month_date <= add_months(start_date, abs(months_between((
SELECT min(start_dt) FROM TABLE_DATA), (SELECT max(end_dt) FROM TABLE_DATA))) + 1)
ORDER BY month_date
)
SELECT id
,extract(year FROM month_date)
,extract(month FROM month_date)
,td.start_dt
,td.end_dt
FROM table_data td
INNER JOIN all_dates ad
ON ad.month_date > td.start_dt
AND ad.month_date <= last_day(td.end_dt)
ORDER BY 1
,2
You have to generate date and from that have to pick year and month
select distinct year(date),month( date) from
(select * from (
select
date_add('2017-02-14 00:00:00.000', INTERVAL n5.num*10000+n4.num*1000+n3.num*100+n2.num*10+n1.num DAY ) as date
from
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n1,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n2,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n3,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n4,
(select 0 as num
union all select 1
union all select 2
union all select 3
union all select 4
union all select 5
union all select 6
union all select 7
union all select 8
union all select 9) n5
) a
where date >'2017-02-14 00:00:00.000' and date < '2018-11-05'
) as t

Finding records over continuous date range

I need to write a query which returns loans that have been taken in a semester, the semesters are defined in this way:
Jan - April - Spring
May - August - Summer
September - Dec - Fall
Now my loans can start at any time and can span for any duration of time:
say a loan starts in Dec 2013 to Oct 2014, then the loan has spanned the four semesters:
Fall 2013
Spring 2014
Summer 2014
Fall 2014
And so when counting the number of loans for each semester for each year, the count of this loan will be present in all the four semesters
Fall 2013
Spring 2014
Summer 2014
Fall 2014
The schema of the loan table is
LOAN(Loan_ID, St_id#, Comp_id#, Start_Date, Date_Returned)
where start_date and date_returned are the corresponding start and end dates of a loan.
What I have so far does not take into account loans that have overlapped into semesters.
SELECT extract(YEAR FROM start_date) AS year,
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END AS semester,
Count(comp_id) AS num_of_loans
FROM loan
GROUP BY (extract(YEAR FROM start_date),
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END)
ORDER BY YEAR, Decode(semester, 'spring', 1, 'summer', 2, 'fall', 3);
Sample Input:
INSERT INTO loan VALUES('L101', '101', 'H101', TO_DATE('2014-10-19','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD'));
INSERT INTO loan VALUES('L102', '102', 'H101', TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD'));
INSERT INTO loan VALUES('L103', '102', 'H102', TO_DATE('2014-10-24','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD'));
INSERT INTO loan VALUES('L104', '101', 'H102', TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD'));
INSERT INTO loan VALUES('L105', '102', 'H102', TO_DATE('2014-11-04','YYYY-MM-DD'), TO_DATE('2014-11-10','YYYY-MM-DD'));
INSERT INTO loan VALUES('L106', '103', 'N101', TO_DATE('2014-10-15','YYYY-MM-DD'), TO_DATE('2014-10-20','YYYY-MM-DD'));
INSERT INTO loan VALUES('L107', '201', 'N101', TO_DATE('2013-09-01','YYYY-MM-DD'), TO_DATE('2013-09-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L108', '201', 'N102', TO_DATE('2013-11-15','YYYY-MM-DD'), TO_DATE('2013-11-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L109', '202', 'N102', TO_DATE('2013-10-10','YYYY-MM-DD'), TO_DATE('2013-10-19','YYYY-MM-DD'));
INSERT INTO loan VALUES('L110', '202', 'N102', TO_DATE('2013-08-23','YYYY-MM-DD'), TO_DATE('2013-09-02','YYYY-MM-DD'));
INSERT INTO loan VALUES('L111', '202', 'N104', TO_DATE('2014-11-12','YYYY-MM-DD'), TO_DATE('2014-11-15','YYYY-MM-DD'));
INSERT INTO loan VALUES('L112', '203', 'N104', TO_DATE('2014-08-27','YYYY-MM-DD'), TO_DATE('2014-08-31','YYYY-MM-DD'));
INSERT INTO loan VALUES('L113', '301', 'N104', TO_DATE('2014-09-13','YYYY-MM-DD'), TO_DATE('2014-09-23','YYYY-MM-DD'));
INSERT INTO loan VALUES('L114', '301', 'N104', TO_DATE('2014-10-23','YYYY-MM-DD'), TO_DATE('2014-10-24','YYYY-MM-DD'));
INSERT INTO loan VALUES('L115', '301', 'N107', TO_DATE('2014-10-11','YYYY-MM-DD'), TO_DATE('2014-10-14','YYYY-MM-DD'));
INSERT INTO loan VALUES('L116', '302', 'N107', TO_DATE('2014-09-10','YYYY-MM-DD'), TO_DATE('2014-09-15','YYYY-MM-DD'));
INSERT INTO loan VALUES('L117', '101', 'H101', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L118', '101', 'H103', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L119', '101', 'H104', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L120', '101', 'H103', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L121', '101', 'H104', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L122', '101', 'H105', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L123', '101', 'H106', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L124', '101', 'H106', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
INSERT INTO loan VALUES('L125', '101', 'H105', TO_DATE('2014-11-19','YYYY-MM-DD'), null);
Sample Output:
SEMESTER YEARS NUM_LOANS
Spring 2013 0
Summer 2013 3
Fall 2013 5
Spring 2014 1
Summer 2014 2
Fall 2014 20
what I get:
YEAR SEMESTER NUM_OF_LOANS
2013 summer 2
2013 fall 3
2014 summer 1
2014 fall 19
Thanks!
Query ::
WITH FNL AS(
SELECT EXTRACT(YEAR FROM A.START_DATE) AS YEAR,B.SEMESTER AS semester,0 as num_of_loans
FROM LOAN A,(SELECT 1 SEQ,'spring' SEMESTER,0 NUM_OF_LOANS FROM DUAL
UNION ALL
SELECT 2,'summer' SEMESTER,0 num_of_loans from dual
UNION ALL
SELECT 3,'fall' SEMESTER ,0 num_of_loans FROM DUAL) b
GROUP BY B.SEMESTER,EXTRACT(YEAR FROM START_DATE)
union all
SELECT extract(YEAR FROM start_date) AS year,
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END AS semester,
Count(comp_id) AS num_of_loans
FROM loan
GROUP BY (extract(YEAR FROM start_date),
CASE WHEN extract(MONTH FROM start_date) <= 4 THEN 'spring'
WHEN extract(MONTH FROM start_date) > 4 AND extract(MONTH FROM start_date) <=8 THEN 'summer'
ELSE 'fall' END)
)
select year,semester,sum(num_of_loans) from fnl group by year,semester
ORDER BY YEAR, Decode(semester, 'spring', 1, 'summer', 2, 'fall', 3);
Output :
Year semester num_of_loans
2013 spring 0
2013 summer 1
2013 fall 3
2014 spring 0
2014 summer 1
2014 fall 20
i hope this helpful for you.Good Luck.. :)
I don't currently have an operating SQL Server available at the moment, but I believe it should look something like this:
select #min = min(start_date),
#max = max(end_date)
FROM LOAN
SELECT #minYear = YEAR(#min),
#minSem = CEILING(MONTH(#min)/4),
#maxYear = YEAR(#max),
#maxSem = CEILING(MONTH(#max)/4),
#semCount= (4-#minSem+#maxSem)+(#maxYear-#minYear-1)*3,
#i = 0
CREATE TABLE #TMP (Semester INT, Year INT, LoanCount INT)
WHILE #i < #semCount BEGIN
SELECT #curSem = (#minSem + #i)%3, -- 3 semesters per year
#curYear = #minYear + FLOOR((#minSem +#i)/3)
INSERT INTO #TMP
SELECT #curSem,
#curYear,
(SELECT COUNT * FROM LOAN
WHERE YEAR(end_date) >= #curYear
AND CEILING(MONTH(end_date)/4)>+#curSem
YEAR(start_date) <= #curYear
AND CEILING(MONTH(start_date)/4)<=#curSem) as loanCount
SELECT #i = #i + 1
END
SELECT * FROM #TMP
Hope this helps. Good Luck
You have to join semestrs with loans, here's a brief example:
with loan as (
-- This is your sample data
select TO_DATE('2014-10-19','YYYY-MM-DD') startdate, TO_DATE('2014-10-30','YYYY-MM-DD') datereturned from dual union all
select TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-24','YYYY-MM-DD'), TO_DATE('2014-10-30','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-31','YYYY-MM-DD'), TO_DATE('2014-11-03','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-04','YYYY-MM-DD'), TO_DATE('2014-11-10','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-15','YYYY-MM-DD'), TO_DATE('2014-10-20','YYYY-MM-DD') from dual union all
select TO_DATE('2013-09-01','YYYY-MM-DD'), TO_DATE('2013-09-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-11-15','YYYY-MM-DD'), TO_DATE('2013-11-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-10-10','YYYY-MM-DD'), TO_DATE('2013-10-19','YYYY-MM-DD') from dual union all
select TO_DATE('2013-08-23','YYYY-MM-DD'), TO_DATE('2013-09-02','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-12','YYYY-MM-DD'), TO_DATE('2014-11-15','YYYY-MM-DD') from dual union all
select TO_DATE('2014-08-27','YYYY-MM-DD'), TO_DATE('2014-08-31','YYYY-MM-DD') from dual union all
select TO_DATE('2014-09-13','YYYY-MM-DD'), TO_DATE('2014-09-23','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-23','YYYY-MM-DD'), TO_DATE('2014-10-24','YYYY-MM-DD') from dual union all
select TO_DATE('2014-10-11','YYYY-MM-DD'), TO_DATE('2014-10-14','YYYY-MM-DD') from dual union all
select TO_DATE('2014-09-10','YYYY-MM-DD'), TO_DATE('2014-09-15','YYYY-MM-DD') from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null from dual union all
select TO_DATE('2014-11-19','YYYY-MM-DD'), null FROM dual
),
timescale as ( -- Timescale boundaries to build a list of semesters
select to_date ('01.01.2013', 'dd.mm.yyyy') d1, -- Start date
to_date ('01.12.2015', 'dd.mm.yyyy') d2 -- End date
from dual
),
months as ( -- List of months withing the timescale , semestr is specified for each month
select y || '*' ||
case
when m < 5 then 'Spring'
when m > 8 then 'Fall'
else 'Summer'
end s,
d d1,
add_months(d,1)-1 d2
from (
select add_months (d1, level-1) d,
extract (year from add_months (d1, level-1)) y,
extract (month from add_months (d1, level-1)) m
from timescale
connect by add_months (d1, level-2) < d2
)
),
semestr as( -- List of semesters with their boundaries, built by grouping the list of months
select s, min(d1) d1, max(d2) d2
from months
group by s
)
-- the query itself - quite easy
select s,
(select count(1) from loan where startdate <= d2 and nvl(datereturned, d1) >= d1 ) x
from semestr
order by d1