Teradata - Split date range into month columns with day count - sql

I need to split different date ranges over a quarter period into month columns with only the days actually used in that month. Each record (range) would be different.
Example:
Table
Record_ID Start_Date End_Date
1 10/27 11/30
2 11/30 12/14
3 12/14 12/31
Range 1 = 10/5 to 12/14
Range 2 = 11/20 to 12/31
Range 3 = 10/28 to 12/2
Output:
Range 1
Oct Nov Dec
27 30 14

Similar to #ULick's answer using sys_calendar.calendar, but a little more succinct:
CREATE VOLATILE MULTISET TABLE datetest (record_id int, start_date date, end_date date) ON COMMIT PRESERVE ROWS;
INSERT INTO datetest VALUES (1, '2017-10-05', '2017-12-14');
INSERT INTO datetest VALUES (2, '2017-11-20','2017-12-31');
SELECT record_id,
SUM(CASE WHEN month_of_year = 10 THEN 1 ELSE 0 END) as October,
SUM(CASE WHEN month_of_year = 11 THEN 1 ELSE 0 END) as November,
SUM(CASE WHEN month_of_year = 12 THEN 1 ELSE 0 END) as December
FROM datetest
INNER JOIN sys_calendar.calendar cal
ON cal.calendar_date BETWEEN start_date and end_date
GROUP BY record_id;
DROP TABLE datetest;
Because Quarter was mentioned in the question (I'm not sure how it relates here) there is also quarter_of_year and month_of_quarter available in the sys_calendar to slice and dice this even further.
Also, if you are on 16.00+ There is PIVOT functionality which may help get rid of the CASE statements here.

First join with the calendar to get all the dates within the range and get the number of days per each month (incl. full month, not mentioned in Start_Date and End_Date).
Then sum up each month in a column per Range.
create table SplitDateRange ( Range bigint, Start_Date date, End_Date date );
insert into SplitDateRange values ( 1, '2018-10-05', '2018-12-14' );
insert into SplitDateRange values ( 2, '2018-11-20', '2018-12-31' );
insert into SplitDateRange values ( 3, '2018-10-28', '2018-12-02' );
select
Range
, sum(case when mon = 10 then days else 0 end) as "Oct"
, sum(case when mon = 11 then days else 0 end) as "Nov"
, sum(case when mon = 12 then days else 0 end) as "Dec"
from (
select
Range
, extract(MONTH from C.calendar_date) as mon
, max(C.calendar_date) - min(calendar_date) +1 as days
from Sys_Calendar.CALENDAR as C
inner join SplitDateRange as DR
on C.calendar_date between DR.Start_Date and DR.End_Date
group by 1,2
) A
group by Range
order by Range
;

Different approach, avoids the cross join to the calendar by applying Teradata Expand On feature for creating time series. More text, but should be more efficient for larger tables/ranges:
SELECT record_id,
Sum(CASE WHEN mth = 10 THEN days_in_month ELSE 0 END) AS October,
Sum(CASE WHEN mth = 11 THEN days_in_month ELSE 0 END) AS November,
Sum(CASE WHEN mth = 12 THEN days_in_month ELSE 0 END) AS December
FROM
( -- this Derived Table simply avoids repeating then EXTRACT/INTERVAL calculations (can't be done directly in the nested Select)
SELECT record_id,
Extract(MONTH From Begin(expanded_pd)) AS mth,
Cast((INTERVAL( base_pd P_INTERSECT expanded_pd) DAY) AS INT) AS days_in_month
FROM
(
SELECT record_id,
PERIOD(start_date, end_date+1) AS base_pd,
expanded_pd
FROM datetest
-- creates one row per month
EXPAND ON base_pd AS expanded_pd BY ANCHOR PERIOD Month_Begin
) AS dt
) AS dt
GROUP BY 1

Related

Oracle SQL Show all month of a year, with or without value ORA-01841

I have a problem with which I despair, I have data distributed over days, and would like to display this for the entire year in months and once in weeks.
My problem with the months that I get in the select my data displayed (for January, September) but I want that all months for a selected year are displayed, even if they are empty. For this I have made myself a "WITH" (copied) and now try to join this, but get an ORA-01841 error.
And how do I implement the whole construct to display only the weeks.
WITH MONAT_ZAEHLER (MZ) AS
(
SELECT
TO_CHAR(ADD_MONTHS(TO_DATE('01.2022','MM.YYYY'),LEVEL -1),'Month', 'NLS_DATE_LANGUAGE = GERMAN') AS GRD_ROW_ID
FROM
DUAL
CONNECT BY LEVEL <= 12
)
SELECT
TO_CHAR(GEN_DATUM,'Month', 'NLS_DATE_LANGUAGE = GERMAN') AS GRD_ROW_ID
, COUNT( DISTINCT CASE
WHEN LP_BELEGUNG.ART = 1 THEN LP_BELEGUNG.LP_BELEGUNG_ID
ELSE NULL
END ) AS "1"
, COUNT( DISTINCT CASE
WHEN LP_BELEGUNG.ART = 2 THEN LP_BELEGUNG.LP_BELEGUNG_ID
ELSE NULL
END ) AS "2"
, COUNT( DISTINCT CASE
WHEN LP_BELEGUNG.ART = 3 THEN LP_BELEGUNG.LP_BELEGUNG_ID
ELSE NULL
END ) AS "3"
, COUNT( DISTINCT CASE
WHEN LP_BELEGUNG.ART = 99 THEN LP_BELEGUNG.LP_BELEGUNG_ID
ELSE NULL
END ) AS "99"
FROM
LP_BELEGUNG
FULL OUTER JOIN MONAT_ZAEHLER ON TRUNC(LP_BELEGUNG.GEN_DATUM, 'Month') = MONAT_ZAEHLER.MZ
WHERE
TO_CHAR(GEN_DATUM, 'YYYY') = '2022'
GROUP BY
TO_CHAR(GEN_DATUM,'Month', 'NLS_DATE_LANGUAGE = GERMAN')
The error is because you're converting the month to a name string in the CTE, then trying to convert it again for the GRD_ROW_ID alias.
The solution is basically the same as your previous question, but now you want the CTE to have one row per month - which you are doing, but you should leave it as a date type in the CTE, not convert it to a string there:
with cte (dt) as (
select add_months(date '2022-01-01', level - 1)
from dual
connect by level <= 12
)
... then convert that actual date value to a string:
SELECT
TO_CHAR(cte.dt, 'Month', 'NLS_DATE_LANGUAGE = GERMAN') AS GRD_ROW_ID
...
... and outer join to your actual table as before, using a date range:
FROM
cte
LEFT JOIN
LP_BELEGUNG
ON
LP_BELEGUNG.GEN_DATUM >= cte.dt AND LP_BELEGUNG.GEN_DATUM < add_months(cte.dt, 1)
GROUP BY
cte.dt
ORDER BY
cte.dt
... this time looking for values where the the GEN_DATUM is greater than or equal to cte.dt value (again, as before), which is midnight on the first day of the first day of the month; and less than add_months(cte.dt, 1), which is midnight on the first day of the first day of the following month. So for January, that will be >= 2022-01-01 00:00:00 and < 2022-02-01 00:00:00, which is all possible dates and times during that month.
GRD_ROW_ID
ANZAHL_ART_1
ANZAHL_ART_2
ANZAHL_ART_3
ANZAHL_ART_4
Januar
0
0
0
0
Februar
0
0
0
0
März
0
0
0
0
April
0
0
0
0
Mai
0
0
0
0
Juni
0
0
0
0
Juli
0
0
0
0
August
0
0
0
0
September
1
1
1
7
Oktober
0
0
0
0
November
0
0
0
0
Dezember
0
0
0
0
fiddle
To get a row for every week of the year you would do something similar again, but in blocks of 7 days:
with cte (dt) as (
select date '2022-01-01' + 7 * (level - 1)
from dual
connect by level <= 53
)
SELECT
TO_CHAR(cte.dt, 'YYYY-WW') AS GRD_ROW_ID
...
FROM
cte
LEFT JOIN
LP_BELEGUNG
ON
LP_BELEGUNG.GEN_DATUM >= cte.dt AND LP_BELEGUNG.GEN_DATUM < cte.dt + 7
AND LP_BELEGUNG.GEN_DATUM < add_months(trunc(cte.dt, 'YYYY'), 12)
GROUP BY
cte.dt
ORDER BY
cte.dt
which has an extra check in the join to stop it including data from week 53 which is actually in the following year - which I'm guessing you woudl want to do.
fiddle

Calculate number of workdays PER MONTH from start_date and end_date

So I have a table that looks like this :
task_id | start_date |end_date
I want to calculate the number of workdays (just days from mondays to fridays , no holidays) per month.
for example : if a task took from 02-01-2022 to 05-02-2022 to be accomplished, i need the result to look something like this
task_id | january |february |march |april .............|december
1 21 4 0 0 .......... 0
You can try to use generate_series function to generate date during your start_date and end_date which we can easy to count then the condition aggregate function to make pivot.
extract can get the month number or workdays(from Mondays to Fridays) by TIMESTAMP type, we can use that be count condition in aggregate function.
SELECT t1.task_id,
count(CASE WHEN extract(isodow from dt) BETWEEN 1 AND 5 AND EXTRACT(MONTH from dt) = 1 THEN 1 END) january,
count(CASE WHEN extract(isodow from dt) BETWEEN 1 AND 5 AND EXTRACT(MONTH from dt) = 2 THEN 1 END) february,
count(CASE WHEN extract(isodow from dt) BETWEEN 1 AND 5 AND EXTRACT(MONTH from dt) = 3 THEN 1 END) march
-- more months you can write
FROM T t1
CROSS JOIN generate_series(t1.start_date,t1.end_date,'1 day'::interval) dt
group by t1.task_id
sqlfiddle

db2 compare year and month side by side

I need to compare side by side the companies values by current year vs last year and current month with same month of the previous year.
I use this query to get the values
SELECT STORE, SUM(TOTAL) as VAL, DATE FROM MYTABLE
WHERE DATE=CURRENT_DATE GROUP BY STORE ORDER BY STORE
below the results
STORE | VAL | DATE
1 10 CURRENT_DATE (2018-27-03)
1 20 2018-26-03
1 30 2018-25-03
2 20 CURRENT_DATE (2018-27-03)
2 20 2018-26-02
and i need this
STORE | VALUE CURRENT YEAR | VALUE LAST YEAR
1 60 30 (CALCULATED)
2 40 50 (CALCULATED)
STORE | VALUE CURRENT MONTH | VALUE SAME MONTH OF LAST YEAR
1 60 30 (CALCULATED)
2 20 50 (CALCULATED)
Thank you
You could just join two sub-selects together.
E.g with this DDL and Data
CREATE TABLE MYTABLE (STORE int, VAL int, D DATE);
INSERT INTO MYTABLE VALUES
( 1, 10, '2018-03-27')
,( 1, 20, '2018-03-26')
,( 1, 10, '2018-02-25')
,( 1, 35, '2017-03-25')
,( 2, 20, '2018-03-27')
,( 2, 15, '2017-03-26');
This will get you current month and last month last year values
SELECT C.*, LY.VAL_CURR_MONTH_LY
FROM (
SELECT STORE, SUM(VAL) as VAL_CURR_MONTH
FROM MYTABLE WHERE INT(D)/100=INT(CURRENT_DATE)/100
GROUP BY STORE ) AS C
LEFT JOIN
(SELECT STORE
, SUM(VAL) AS VAL_CURR_MONTH_LY
FROM MYTABLE
WHERE INT(D)/100 = INT(CURRENT_DATE)/100 -100
GROUP BY STORE ) LY
ON
C.STORE = LY.STORE
Then this for years
SELECT C.*, LY.VAL_LY
FROM (
SELECT STORE, SUM(VAL) as VAL_CURR_YEAR
FROM MYTABLE WHERE INT(D)/10000=INT(CURRENT_DATE)/10000
GROUP BY STORE ) AS C
LEFT JOIN
(SELECT STORE
, SUM(VAL) AS VAL_LY
FROM MYTABLE
WHERE INT(D)/10000 = INT(CURRENT_DATE)/10000 -1
GROUP BY STORE ) LY
ON
C.STORE = LY.STORE
P.S. there are many other ways to manipulate dates, but casting to INT is maybe one of the easier ways
Also, here is a more flexible way to get the "Same Month of Last Year" value. A similar method can get "last Year" values.
SELECT T.*
, AVG(VAL) OVER(
PARTITION BY STORE
ORDER BY YEAR_MONTH
RANGE BETWEEN 101 PRECEDING AND 100 PRECEDING
) AS SAME_MONTH_PREV_YEAR
FROM
( SELECT STORE
, INTEGER(D)/100 AS YEAR_MONTH
, SUM(VAL) AS VAL
FROM
MYTABLE T
GROUP BY
STORE
, INTEGER(D)/100
) AS T
;
Gives
STORE YEAR_MONTH VAL SAME_MONTH_PREV_YEAR
----- ---------- --- --------------------
1 201703 35 NULL
1 201802 10 NULL
1 201803 30 35
2 201703 15 NULL
2 201803 20 15
It is better to avoid functions on table columns in where clauses. Check following SQLs which are based on P. Vernon sample table.
Note: These SQLs are for DB2 LUW 11.1
For month:
SELECT STORE,
SUM(CASE WHEN YEAR(D) = year(current date) THEN val
ELSE 0 END) as VAL_CURR_MONTH,
SUM(CASE WHEN YEAR(D) = year(current date) - 1 THEN vaL
ELSE 0 END) as VAL_CURR_MONTH_LY
FROM MYTABLE
WHERE D between first_day(current date) and last_day(current date)
or D between first_day(current date - 1 year) and last_day(current date - 1 year)
GROUP BY STORE
ORDER BY STORE
For year:
SELECT STORE, SUM(CASE WHEN YEAR(D) = year(current date) THEN val
ELSE 0 END) as VAL_CY,
SUM(CASE WHEN YEAR(D) = year(current date) - 1 THEN vaL
ELSE 0 END) as VAL_LY
FROM MYTABLE
WHERE D between first_day(current date - (month(current date) - 1) months)
and last_day(current date + (12 - month(current date)) months)
or D between first_day(current date - (month(current date) - 1) months - 1 year)
and last_day(current date + (12 - month(current date)) months - 1 year)
GROUP BY STORE
ORDER BY STORE

Count days from start_date to end_date or end of month

With datediff() I can count the days between two dates, but how can I count the days between the later date or the end of the month and the start date?
CREATE TABLE table1 (id int, start_date datetime, end_date datetime, jan int);
INSERT INTO table1 (id, start_date, end_date) VALUES
(1, '2016-12-12', '2017-01-17'),
(2, '2017-01-10', '2017-01-10'),
(3, '2017-01-10', '2017-02-10'),
(4, '2017-01-03', '2017-02-03'),
(5, '2016-12-03', '2017-02-03');
If I run:
select id, month(start_date) as month, datediff(end_date, start_date) as diff
from table1;
it returns
id month diff
1 12 36
2 1 0
3 1 31
4 1 31
5 12 62
but I would like it to return:
id month diff
1 12 19
5 12 28
1 1 17
2 1 0
3 1 21
4 1 28
5 1 31
3 2 10
4 2 3
5 2 3
I'm trying to get the amount of days in a month a event occurs by month.
I've created a separated query to update a new column with the values, but ideally it shouldn't have a new column, since I would need several new columns for each year-month combination and one for each year-month combination:
update table1 set jan= case
when start_date >= "2017-01-01" and end_date <= last_day("2017-01-01") then datediff(end_date, start_date)+1
when start_date >= "2017-01-01" and start_date <= last_day("2017-01-01") and end_date > last_day("2017-01-01") then datediff(last_day("2017-01-01"), start_date)+1
when start_date < "2017-01-01" and end_date between "2017-01-01" and last_day("2017-01-01") then datediff(end_date, "2017-01-01")+1
when start_date < "2017-01-01" and end_date > last_day("2017-01-01") then day(last_day("2017-01-01"))
else null
end;
Your problem is going to be getting multiple rows... so let's take a different tack.
This ends up being trivial if you have a calendar table: a table with a row-per-date (and a bunch of individual columns and indices):
SELECT Table1.id, Calendar.calendar_month, COUNT(*)
FROM Table1
JOIN Calendar
ON Calendar.calendar_date >= start_date
AND Calendar.calendar_date < end_date
GROUP BY Table1.id, Calendar.calendar_month
ORDER BY Table1.id, MIN(Calendar.calendar_date)
Fiddle Demo
I don't know if this is what you're looking for.
select month(start_date) as month,
datediff(LAST_DAY(start_date), start_date) as diff
from table1
UNION ALL
select month(end_date) as month,
IF(end_date < LAST_DAY(start_date), datediff(start_date, end_date),
datediff(end_date, LAST_DAY(start_date)))
from table1;
DEMO

Group entries per week where each column got specific date validation

I am trying to group tickets per week in a year when data field X are between that week. The final result should be something like Week,Datefield1,Datefield2..
What I reached so far:
DECLARE #YearStartDate datetime
SET #YearStartDate = '2016-01-04 00:00:00.000'
DECLARE #YearEndDate datetime
SET #YearEndDate = '2016-04-24 00:00:00.000'
SELECT
Year(T.CreateDate) as 'Year',
'Week ' + cast(datepart(wk, CreateDate) as varchar(2)) as 'Week',
Sum(CASE WHEN CreateDate BETWEEN #YearStartDate and #YearEndDate
THEN 1 Else 0 End) 'Created'
FROM mytable AS T
GROUP BY Year(T.CreateDate),datepart(wk,T.CreateDate)
ORDER BY Year(T.CreateDate),datepart(wk, T.CreateDate)
Result:
Year Week Created
----------- ------- -----------
2016 Week 1 0
2016 Week 2 5
2016 Week 3 3
2016 Week 4 2
Goal:
Year Week Created Schuduled Closed
----------- ------- ----------- ----------- -----------
2016 Week 1 0 0 0
2016 Week 2 5 3 2
2016 Week 3 3 2 2
2016 Week 4 2 2 0
Suggestion?
SELECT
Year(..) as 'Year',
'Week ' + cast(datepart(wk, ...) as varchar(2)) as 'Week',
Sum(CASE WHEN CreateDate BETWEEN #YearStartDate and #YearEndDate
THEN 1 Else 0 End) 'Created'
Sum(CASE WHEN ScheduledDate BETWEEN #YearStartDate and #YearEndDate
THEN 1 Else 0 End) 'Scheduled'
Sum(CASE WHEN ClosedDate BETWEEN #YearStartDate and #YearEndDate
THEN 1 Else 0 End) 'Closed'
FROM mytable AS T
GROUP BY Year(...)
ORDER BY Year(...)
Constraints:
I can count the entries per week for one datafield but for several datafields (created,scheduled,pending,closed) I think I need to change the logic. Maybe create a inner join or left join against the same table.
I was wondering if you could give me some guidance on finding the right path. Thanks for taking the time to help me.
All the best.
Right, join on other similar SELECTs for scheduled and closed should work. Something as
DECLARE #YearStartDate datetime = '2016-01-04 00:00:00.000';
DECLARE #YearEndDate datetime = '2016-04-24 00:00:00.000';
SELECT CLR.*, SCH.Schuduled, CLS.Closed FROM
(SELECT
Year(T.CreateDate) as 'Year',
'Week ' + cast(datepart(wk, CreateDate) as varchar(2)) as 'Week',
Sum(CASE WHEN CreateDate BETWEEN #YearStartDate and #YearEndDate THEN 1 Else 0 End) 'Created'
FROM mytable AS T
GROUP BY Year(T.CreateDate),datepart(wk,T.CreateDate)
) CRT
JOIN
(SELECT
Year(T.ScheduleDate) as 'Year',
'Week ' + cast(datepart(wk, ScheduleDate) as varchar(2)) as 'Week',
Sum(CASE WHEN ScheduleDate BETWEEN #YearStartDate and #YearEndDate THEN 1 Else 0 End) 'Schuduled'
FROM mytable AS T
GROUP BY Year(T.ScheduleDate),datepart(wk,T.ScheduleDate)
) SCH
ON CRT.Year = SCH.Year
AND CRT.Week = SCH.Week
JOIN
(SELECT
Year(T.ClosedDate) as 'Year',
'Week ' + cast(datepart(wk, ClosedDate) as varchar(2)) as 'Week',
Sum(CASE WHEN ClosedDate BETWEEN ClosedDate and #YearEndDate THEN 1 Else 0 End) 'Closed'
FROM mytable AS T
GROUP BY Year(T.ClosedDate),datepart(wk,T.ClosedDate)
) CLS
ON CRT.Year = CLS.Year
AND CRT.Week = CLS.Week
ORDER BY Year, Week
You have three different date columns. One method would pre-aggregation along the dimensions and then use full outer join. Alternatively, you can use union all and aggregation:
SELECT Year(dte) as [Year],
'Week ' + datename(wk, dte) as [Week],
Sum(Created) as Created,
Sum(Scheduled) as Scheduled,
Sum(Closed) as Closed
FROM ((SELECT createddate as dte, 1 as created, 0 as schedule, 0 as closed
FROM mytable
) UNION ALL
(SELECT scheduleddate as dte, 0 as created, 1 as schedule, 0 as closed
FROM mytable
) UNION ALL
(SELECT closeddate as date, 1 as created, 0 as schedule, 0 as closed
FROM mytable
)
) t
GROUP BY Year(date), datename(wk, dte)
ORDER BY Year(date), datename(wk, dte);
I would encourage a couple of things:
Do not use single quotes for column aliases. Only use single quotes for string and date names.
Do not use reserved works such as "year" for column or table names.
Also note the use of datename(). This returns a string so no conversion is needed.