Find gaps in time not covered by records with start date and end date - sql

I have a table of fee records (f_fee_item) as follows:
Fee_Item_ID int
Fee_Basis_ID int
Start_Date date
End_Date date
(irrelevant columns removed)
Assume that records for the same Fee_Basis_ID won't overlap.
I need to find the Start_Date and End_Date of each gap in the fee records for each Fee_Basis_ID between a supplied #Query_Start_Date and #Query_End_Date. I need this data to calculate fee accruals for all periods where fees have not been charged.
I also need the query to return a record if there are no fee records at all for a given Fee_Basis_ID (Fee_Basis_ID is a foreign key to D_Fee_Basis.Fee_Basis_ID if that helps).
For example:
#Query_Start_Date = '2011-01-01'
#Query_Start_Date = '2011-09-30'
D_Fee_Basis:
F_Fee_Item
1
2
3
F_Fee_Item:
Fee_Item_ID Fee_Basis_ID Start_Date End_Date
1 1 2011-01-01 2011-03-31
2 1 2011-04-01 2011-06-30
3 2 2011-01-01 2011-03-31
4 2 2011-05-01 2011-06-30
Required Results:
Fee_Basis_ID Start_Date End_Date
1 2011-07-01 2011-09-30
2 2011-04-01 2011-04-30
2 2011-07-01 2011-09-30
3 2011-01-01 2011-09-30
I've bee trying different self-joins for days trying to get it working but with no luck.
Please help!!

Here is a solution:
declare #Query_Start_Date date= '2011-01-01'
declare #Query_End_Date date = '2011-09-30'
declare #D_Fee_Basis table(F_Fee_Item int)
insert #D_Fee_Basis values(1)
insert #D_Fee_Basis values(2)
insert #D_Fee_Basis values(3)
declare #F_Fee_Item table(Fee_Item_ID int, Fee_Basis_ID int,Start_Date date,End_Date date)
insert #F_Fee_Item values(1,1,'2011-01-01','2011-03-31')
insert #F_Fee_Item values(2,1,'2011-04-01','2011-06-30')
insert #F_Fee_Item values(3,2,'2011-01-01','2011-03-31')
insert #F_Fee_Item values(4,2,'2011-05-01','2011-06-30')
;with a as
(-- find all days between Start_Date and End_Date
select #Query_Start_Date d
union all
select dateadd(day, 1, d)
from a
where d < #Query_end_Date
), b as
(--find all unused days
select a.d, F_Fee_Item Fee
from a, #D_Fee_Basis Fee
where not exists(select 1 from #F_Fee_Item where a.d between Start_Date and End_Date and Fee.F_Fee_Item = Fee_Basis_ID)
),
c as
(--find all start dates
select d, Fee, rn = row_number() over (order by fee, d) from b
where not exists (select 1 from b b2 where dateadd(day,1, b2.d) = b.d and b2.Fee= b.Fee)
),
e as
(--find all end dates
select d, Fee, rn = row_number() over (order by fee, d) from b
where not exists (select 1 from b b2 where dateadd(day,-1, b2.d) = b.d and b2.Fee= b.Fee)
)
--join start dates with end dates
select c.Fee Fee_Basis_ID, c.d Start_Date, e.d End_Date from c join e on c.Fee = e.Fee and c.rn = e.rn
option (maxrecursion 0)
Link for result:
https://data.stackexchange.com/stackoverflow/q/114193/

Related

SQL Table Join with Weighting

I'm trying to create a table that will calculate the weights of spend for a customer in the months they shop for a period. For example, the following customer (faux data) has this spend profile:
/* Customer spend */
create or replace temp table ts_all_transactions
(
inferred_customer_id varchar(128)
,nw_date date
,spend number(21,2)
);
insert into ts_all_transactions
values
('52f5','2019-06-01',17.35)
,('52f5','2018-11-01',24.85)
,('52f5','2019-12-01',1.40)
,('52f5','2019-01-01',2.45)
,('52f5','2019-03-01',3.90)
,('52f5','2020-01-01',37.55)
,('52f5','2019-10-01',13.20)
,('52f5','2019-09-01',5.70)
;
A calendar containing the months in which a period falls in, along with a weighting is then created:
-- Calculate weights for each period of the time series
-- Create a staging table
create or replace temp table period_dimension as
select abs(seq4()-12) as period,
dateadd(month, seq4(), dateadd(month, -23, date_trunc('Month', current_date()))) as start_date,
dateadd(month, 12, start_date) as end_date
from table(generator(rowcount => 12)) -- number of months after reference date in previous line
;
select * from period_dimension;
create or replace temp table my_date_dimension
(
my_date date not null
,year smallint not null
,month smallint not null
,month_name char(3) not null
,day_of_mon smallint not null
,day_of_week varchar(9) not null
,week_of_year smallint not null
,day_of_year smallint not null
)
as
with my_date as (
select
seq4(),
dateadd(month, seq4(), dateadd(month, -23, date_trunc('Month', current_date()))) as my_date
from table(generator(rowcount=>23))
)
select my_date
,year(my_date)
,month(my_date)
,monthname(my_date)
,day(my_date)
,dayofweek(my_date)
,weekofyear(my_date)
,dayofyear(my_date)
from my_date
;
create or replace table weight_lookup as
select
a.period
,b.my_date
,rank() over (partition by a.period order by b.my_date) as weight
from period_dimension a
inner join my_date_dimension b
where b.my_date >= a.start_date
and b.my_date < a.end_date
order by 1,2
;
-- Create a staging table
create or replace temp table period_dimension2 as
select abs(seq4()-12) as period,
dateadd(month, seq4(), dateadd(month, -23, date_trunc('Month', current_date()))) as start_date,
last_day(dateadd(month, 11, start_date)) as end_date
from table(generator(rowcount => 12)) -- number of months after reference date in previous line
;
The above is then used to calculate an average spend based on the months the customer shops in the period, however, I'm not getting the output I expect:
-- For each month of each period, group all together by period here so we have 12 periods
-- so each period represents 12 rolling months with period 12 being the oldest period
create or replace temp table ts_spend_time as
select
a.inferred_customer_id
,b.period
,max(a.nw_date) as max_mnth /* Month in period where most spend was made */
,sum(a.spend * b.weight) / 78 as avg_spend /* Sum of weights 12,11,10...1 to give 78 */
from ts_all_transactions a
inner join weight_lookup b on a.nw_date = b.my_date
inner join period_dimension2 c on b.my_date = c.start_date and b.period = c.period
where b.my_date >= c.start_date
and b.my_date <= c.end_date
group by 1,2
order by 1 desc, 2,3
;
The output I get from the above code is this:
create or replace temp table ts_spend_time_wrong_out
(
inferred_customer_id varchar(128)
,period number(11)
,max_mnth date
,avg_spend number(38,8)
);
insert into ts_spend_time_wrong_out
values
('52f5',3,'2019-03-01',0.05000000)
,('52f5',5,'2019-01-01',0.03141026)
,('52f5',7,'2018-11-01',0.31858974)
;
I would like to get an output like this:
create or replace temp table ts_spend_time_should_be
(
inferred_customer_id varchar(128)
,period number(11)
,max_mnth date
,avg_spend number(38,8)
);
insert into ts_spend_time_should_be
values
('52f5',1,'01JAN2020',6.301923077)
,('52f5',2,'01JAN2020',7.266025641)
,('52f5',3,'01JAN2020',8.280128205)
,('52f5',4,'01JAN2020',9.294230769)
,('52f5',5,'01DEC2019',4.081410256)
,('52f5',6,'01OCT2019',4.412179487)
,('52f5',7,'01OCT2019',5.276923077)
,('52f5',8,'01SEP2019',3.941666667)
,('52f5',9,'01JUN2019',3.687179487)
,('52f5',10,'01JUN2019',4.309615385)
,('52f5',11,'01JUN2019',4.932051282)
,('52f5',12,'01MAR2019',2.662820513)
;
In the correct solution example, the average spend is calculated by period as follows: ((17.35*2)+(5.7*5)+(13.20*6)+(1.4*8)+(37.55*9)) / 78
How can I resolve this? TIA
firstly you should use row_number() over(order by seq4()) as there can be gaps in a seq()
so working half way through you question
with ts_all_transactions as (
select id, nw_date::date as nw_date, spend from values
('52f5','2019-06-01',17.35)
,('52f5','2018-11-01',24.85)
,('52f5','2019-12-01',1.40)
,('52f5','2019-01-01',2.45)
,('52f5','2019-03-01',3.90)
,('52f5','2020-01-01',37.55)
,('52f5','2019-10-01',13.20)
,('52f5','2019-09-01',5.70)
v(id,nw_date, spend)
), period_dimension as (
select
row_number() over(order by seq4())-1 as rn0,
abs(rn0-12) as period,
dateadd('month', rn0, dateadd(month, -23, date_trunc('Month', current_date()))) as start_date,
dateadd('month', 12, start_date) as end_date
from table(generator(rowcount => 12)) -- number of months after reference date in previous line
), weight_periods as (
select p.period
,p.start_date
,p.end_date
,row_number() over(partition by p.period order by seq4())-1 as rn1
,dateadd('month',-rn1, p.end_date ) as weight_month
,12 - rn1 + 1 as weight
from period_dimension p,
table(generator(rowcount => 12))
), monthly_spends as (
select id
,date_trunc('month', nw_date) as m_date
,sum(spend) as s_spend
from ts_all_transactions
group by 1,2
)
select m.id
,w.period
,w.end_date
,w.weight_month
,m.s_spend
,w.weight
,m.s_spend * w.weight as w_spend
from monthly_spends m
join weight_periods w on m.m_date = w.weight_month
order by 1,2,3,4;
gives:
ID PERIOD END_DATE WEIGHT_MONTH S_SPEND WEIGHT W_SPEND
52f5 1 2020-05-01 2019-06-01 17.35 2 34.70
52f5 1 2020-05-01 2019-09-01 5.70 5 28.50
52f5 1 2020-05-01 2019-10-01 13.20 6 79.20
52f5 1 2020-05-01 2019-12-01 1.40 8 11.20
52f5 1 2020-05-01 2020-01-01 37.55 9 337.95
52f5 2 2020-04-01 2019-06-01 17.35 3 52.05
...
This shows up to this point we can see the inputs to the values you are expecting for the "weighted average" which can be done via :
select m.id
,w.period
,sum(m.s_spend * w.weight) as t_w_spend
,round(t_w_spend / 78,3) as weighted_avg_spend
from monthly_spends m
join weight_periods w on m.m_date = w.weight_month
group by 1,2
order by 1,2;
which gives:
ID PERIOD T_W_SPEND WEIGHTED_AVG_SPEND
52f5 1 491.55 6.302
52f5 2 566.75 7.266
52f5 3 641.95 8.230
52f5 4 724.95 9.294
52f5 5 804.05 10.308
52f5 6 362.35 4.646
52f5 7 386.75 4.958
52f5 8 479.05 6.142
52f5 9 361.70 4.637
52f5 10 336.15 4.310
52f5 11 384.70 4.932
52f5 12 433.25 5.554
which starts the same, but diverge as I think your date periods are done "wrong"
but the next point is you have this line
,max(a.nw_date) as max_mnth /* Month in period where most spend was made */
which does not do what you comment it as doing..
what it does is find the max date value in the aggregate.
to do that you need to go back to the monthly results sql and put a first_value() into the mix then select the results via:
id, period, max_spend_month, sum(w_spend)/78 as weighted_avg_spend
from (
select m.id
,w.period
,w.end_date
,w.weight_month
,m.s_spend
,w.weight
,m.s_spend * w.weight as w_spend
,first_value(w.weight_month) over (partition by m.id, w.period order by m.s_spend desc) as max_spend_month
from monthly_spends m
join weight_periods w on m.m_date = w.weight_month
)
group by 1,2,3
order by 1,2;
which now matches your expectations:
ID PERIOD MAX_SPEND_MONTH WEIGHTED_AVG_SPEND
52f5 1 2020-01-01 6.30192308
52f5 2 2020-01-01 7.26602564
52f5 3 2020-01-01 8.23012821
52f5 4 2020-01-01 9.29423077
52f5 5 2020-01-01 10.30833333
52f5 6 2019-06-01 4.64551282
52f5 7 2019-06-01 4.95833333
52f5 8 2018-11-01 6.14166667
52f5 9 2018-11-01 4.63717949
52f5 10 2018-11-01 4.30961538
52f5 11 2018-11-01 4.93205128
52f5 12 2018-11-01 5.55448718

Fill up date gap by month

I have table of products and their sales quantity in months.
Product Month Qty
A 2018-01-01 5
A 2018-02-01 3
A 2018-05-01 5
B 2018-08-01 10
B 2018-10-01 12
...
I'd like to first fill in the data gap between each product's min and max dates like below:
Product Month Qty
A 2018-01-01 5
A 2018-02-01 3
A 2018-03-01 0
A 2018-04-01 0
A 2018-05-01 5
B 2018-08-01 10
B 2018-09-01 0
B 2018-10-01 12
...
Then I would need to perform an accumulation of each product's sales quantity by month.
Product Month total_Qty
A 2018-01-01 5
A 2018-02-01 8
A 2018-03-01 8
A 2018-04-01 8
A 2018-05-01 13
B 2018-08-01 10
B 2018-09-01 10
B 2018-10-01 22
...
I fumbled over the "cross join" clause, however it seems to generate some unexpected results for me. Could someone help to give a hint how I can achieve this in SQL?
Thanks a lot in advance.
I think a recursive CTE is a simple way to do this. The code is just:
with cte as (
select product, min(mon) as mon, max(mon) as end_mon
from t
group by product
union all
select product, dateadd(month, 1, mon), end_mon
from cte
where mon < end_mon
)
select cte.product, cte.mon, coalesce(qty, 0) as qty
from cte left join
t
on t.product = cte.product and t.mon = cte.mon;
Here is a db<>fiddle.
Hi i think this example can help you and perform what you excepted :
CREATE TABLE #MyTable
(Product varchar(10),
ProductMonth DATETIME,
Qty int
);
GO
CREATE TABLE #MyTableTempDate
(
FullMonth DATETIME
);
GO
INSERT INTO #MyTable
SELECT 'A', '2019-01-01', 214
UNION
SELECT 'A', '2019-02-01', 4
UNION
SELECT 'A', '2019-03-01', 50
UNION
SELECT 'B', '2019-01-01', 214
UNION
SELECT 'B', '2019-02-01', 10
UNION
SELECT 'C', '2019-04-01', 150
INSERT INTO #MyTableTempDate
SELECT '2019-01-01'
UNION
SELECT '2019-02-01'
UNION
SELECT '2019-03-01'
UNION
SELECT '2019-04-01'
UNION
SELECT '2019-05-01'
UNION
SELECT '2019-06-01'
UNION
SELECT '2019-07-01';
------------- FOR NEWER SQL SERVER VERSION > 2005
WITH MyCTE AS
(
SELECT T.Product, T.ProductMonth AS 'MMonth', T.Qty
FROM #MyTable T
UNION
SELECT T.Product, TD.FullMonth AS 'MMonth', 0 AS 'Qty'
FROM #MyTable T, #MyTableTempDate TD
WHERE NOT EXISTS (SELECT 1 FROM #MyTable TT WHERE TT.Product = T.Product AND TD.FullMonth = TT.ProductMonth)
)
-- SELECT * FROM MyCTE;
SELECT Product, MMonth, Qty, SUM( Qty) OVER(PARTITION BY Product ORDER BY Product
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) as 'TotalQty'
FROM MyCTE
ORDER BY Product, MMonth ASC;
DROP TABLE #MyTable
DROP TABLE #MyTableTempDate
I have other way to perform this in lower SQL Server Version (like 2005 and lower)
It's a SELECT on SELECT if it's your case let me know and i provide some other example.
You can create the months with a recursive CTE
DECLARE #MyTable TABLE
(
ProductID CHAR(1),
Date DATE,
Amount INT
)
INSERT INTO #MyTable
VALUES
('A','2018-01-01', 5),
('A','2018-02-01', 3),
('A','2018-05-01', 5),
('B','2018-08-01', 10),
('B','2018-10-01', 12)
DECLARE #StartDate DATE
DECLARE #EndDate DATE
SELECT #StartDate = MIN(Date), #EndDate = MAX(Date) FROM #MyTable
;WITH dates AS (
SELECT #StartDate AS Date
UNION ALL
SELECT DATEADD(Month, 1, Date)
FROM dates
WHERE Date < #EndDate
)
SELECT A.ProductID, d.Date, COALESCE(Amount,0) AS Amount, COALESCE(SUM(Amount) OVER(PARTITION BY A.ProductID ORDER BY A.ProductID, d.Date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW),0) AS Total
FROM
(
SELECT ProductID, MIN(date) as DateStart, MAX(date) as DateEnd
FROM #MyTable
GROUP BY ProductID -- As I read in your comments that you need different min and max dates per product
) A
JOIN dates d ON d.Date >= A.DateStart AND d.Date <= A.DateEnd
LEFT JOIN #MyTable T ON A.ProductID = T.ProductID AND T.Date = d.Date
ORDER BY A.ProductID, d.Date
Try this below
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL
DROP TABLE #Temp
;WITH CTE(Product,[Month],Qty)
AS
(
SELECT 'A','2018-01-01', 5 UNION ALL
SELECT 'A','2018-02-01', 3 UNION ALL
SELECT 'A','2018-05-01', 5 UNION ALL
SELECT 'B','2018-08-01', 10 UNION ALL
SELECT 'D','2018-10-01', 12
)
SELECT ct.Product,[MonthDays],ct.Qty
INTO #Temp
FROM
(
SELECT c.Product,[Month],
ISNULL(Qty,0) AS Qty
FROM CTE c
)ct
RIGHT JOIN
(
SELECT -- This code is to get month data
CONVERT(VARCHAR(10),'2018-'+ RIGHT('00'+CAST(MONTH(DATEADD(MM, s.number, CONVERT(DATETIME, 0)))AS VARCHAR),2) +'-01',120) AS [MonthDays]
FROM master.dbo.spt_values s
WHERE [type] = 'P' AND s.number BETWEEN 0 AND 11
)DT
ON dt.[MonthDays] = ct.[Month]
SELECT
MAX(Product)OVER(ORDER BY [MonthDays])AS Product,
[MonthDays],
ISNULL(Qty,0) Qty,
SUM(ISNULL(Qty,0))OVER(ORDER BY [MonthDays]) As SumQty
FROM #Temp
Result
Product MonthDays Qty SumQty
------------------------------
A 2018-01-01 5 5
A 2018-02-01 3 8
A 2018-03-01 0 8
A 2018-04-01 0 8
A 2018-05-01 5 13
A 2018-06-01 0 13
A 2018-07-01 0 13
B 2018-08-01 10 23
B 2018-09-01 0 23
D 2018-10-01 12 35
D 2018-11-01 0 35
D 2018-12-01 0 35
First of all, i would divide month and year to get easier with statistics.
I will give you an example query, not based on your table but still helpful.
--here i create the table that will be used as calendar
Create Table MA_MonthYears (
Month int not null ,
year int not null
PRIMARY KEY ( month, year) )
--/////////////////
-- here i'm creating a procedure to fill the ma_monthyears table
declare #month as int
declare #year as int
set #month = 1
set #year = 2015
while ( #year != 2099 )
begin
insert into MA_MonthYears(Month, year)
select #month, #year
if #month < 12
set #month=#month+1
else
set #month=1
if #month = 1
set #year = #year + 1
end
--/////////////////
--here you are the possible result you are looking for
select SUM(Ma_saledocdetail.taxableamount) as Sold, MA_MonthYears.month , MA_MonthYears.year , item
from MA_MonthYears left outer join MA_SaleDocDetail on year(MA_SaleDocDetail.DocumentDate) = MA_MonthYears.year
and Month(ma_saledocdetail.documentdate) = MA_MonthYears.Month
group by MA_SaleDocDetail.Item, MA_MonthYears.year , MA_MonthYears.month
order by MA_MonthYears.year , MA_MonthYears.month

Group time series by time intervals (e.g. days) with aggregate of duration

I have a table containing a time series with following information. Each record represents the event of "changing the mode".
Timestamp | Mode
------------------+------
2018-01-01 12:00 | 1
2018-01-01 18:00 | 2
2018-01-02 01:00 | 1
2018-01-02 02:00 | 2
2018-01-04 04:00 | 1
By using the LEAD function, I can create a query with the following result. Now each record contains the information, when and how long the "mode was active".
Please check the 2nd and the 4th record. They "belong" to multiple days.
StartDT | EndDT | Mode | Duration
------------------+------------------+------+----------
2018-01-01 12:00 | 2018-01-01 18:00 | 1 | 6:00
2018-01-01 18:00 | 2018-01-02 01:00 | 2 | 7:00
2018-01-02 01:00 | 2018-01-02 02:00 | 1 | 1:00
2018-01-02 02:00 | 2018-01-04 04:00 | 2 | 50:00
2018-01-04 04:00 | (NULL) | 1 | (NULL)
Now I would like to have a query that groups the data by day and mode and aggregates the duration.
This result table is needed:
Date | Mode | Total
------------+------+-------
2018-01-01 | 1 | 6:00
2018-01-01 | 2 | 6:00
2018-01-02 | 1 | 1:00
2018-01-02 | 2 | 23:00
2018-01-03 | 2 | 24:00
2018-01-04 | 2 | 04:00
I didn't known how to handle the records that "belongs" to multiple days. Any ideas?
create table ChangeMode ( ModeStart datetime2(7), Mode int )
insert into ChangeMode ( ModeStart, Mode ) values
( '2018-11-15T21:00:00.0000000', 1 ),
( '2018-11-16T17:18:19.1231234', 2 ),
( '2018-11-16T18:00:00.5555555', 1 ),
( '2018-11-16T18:00:01.1234567', 2 ),
( '2018-11-16T19:02:22.8888888', 1 ),
( '2018-11-16T20:00:00.9876543', 2 ),
( '2018-11-17T09:00:00.0000000', 1 ),
( '2018-11-17T23:23:23.0230450', 2 ),
( '2018-11-19T17:00:00.0172839', 1 ),
( '2018-11-20T03:07:00.7033077', 2 )
;
with
-- Determine the earliest and latest dates.
-- Cast to date to remove the time portion.
-- Cast results back to datetime because we're going to add hours later.
MinMaxDates
as
(select cast(min(cast(ModeStart as date))as datetime) as MinDate,
cast(max(cast(ModeStart as date))as datetime) as MaxDate from ChangeMode),
-- How many days have passed during that period
Dur
as
(select datediff(day,MinDate,MaxDate) as Duration from MinMaxDates),
-- Create a list of numbers.
-- These will be added to MinDate to get a list of dates.
NumList
as
( select 0 as Num
union all
select Num+1 from NumList,Dur where Num<Duration ),
-- Create a list of dates by adding those numbers to MinDate
DayList
as
( select dateadd(day,Num,MinDate)as ModeDate from NumList, MinMaxDates ),
-- Create a list of day periods
PeriodList
as
( select ModeDate as StartTime,
dateadd(day,1,ModeDate) as EndTime
from DayList ),
-- Use LEAD to get periods for each record
-- Final record would return NULL for ModeEnd
-- We replace that with end of last day
ModePeriodList
as
( select ModeStart,
coalesce( lead(ModeStart)over(order by ModeStart),
dateadd(day,1,MaxDate) ) as ModeEnd,
Mode from ChangeMode, MinMaxDates ),
ModeDayList
as
( select * from ModePeriodList, PeriodList
where ModeStart<=EndTime and ModeEnd>=StartTime
),
-- Keep the later of the mode start time, and the day start time
-- Keep the earlier of the mode end time, and the day end time
ModeDayPeriod
as
( select case when ModeStart>=StartTime then ModeStart else StartTime end as StartTime,
case when ModeEnd<=EndTime then ModeEnd else EndTime end as EndTime,
Mode from ModeDayList ),
SumDurations
as
( select cast(StartTime as date) as ModeDate,
Mode,
DateDiff_Big(nanosecond,StartTime,EndTime)
/3600000000000
as DurationHours from ModeDayPeriod )
-- List the results in order
-- Use MaxRecursion option in case there are more than 100 days
select ModeDate as [Date], Mode, sum(DurationHours) as [Total Duration Hours]
from SumDurations
group by ModeDate, Mode
order by ModeDate, Mode
option (maxrecursion 0)
Result is:
Date Mode Total Duration Hours
---------- ----------- ---------------------------------------
2018-11-15 1 3.00000000000000
2018-11-16 1 18.26605271947221
2018-11-16 2 5.73394728052777
2018-11-17 1 14.38972862361111
2018-11-17 2 9.61027137638888
2018-11-18 2 24.00000000000000
2018-11-19 1 6.99999519891666
2018-11-19 2 17.00000480108333
2018-11-20 1 3.11686202991666
2018-11-20 2 20.88313797008333
you could use a CTE to create a table of days then join the time slots to it
DECLARE #MAX as datetime2 = (SELECT MAX(CAST(Timestamp as date)) MX FROM process);
WITH StartEnd AS (select p1.Timestamp StartDT,
P2.Timestamp EndDT ,
p1.mode
from process p1
outer apply
(SELECT TOP 1 pOP.* FROM
process pOP
where pOP.Timestamp > p1.Timestamp
order by pOP.Timestamp asc) P2
),
CAL AS (SELECT (SELECT MIN(cast(StartDT as date)) MN FROM StartEnd) DT
UNION ALL
SELECT DATEADD(day,1,DT) DT FROM CAL WHERE CAL.DT < #MAX
),
TMS AS
(SELECT CASE WHEN S.StartDT > C.DT THEN S.StartDT ELSE C.DT END AS STP,
CASE WHEN S.EndDT < DATEADD(day,1,C.DT) THEN S.ENDDT ELSE DATEADD(day,1,C.DT) END AS STE
FROM StartEnd S JOIN CAL C ON NOT(S.EndDT <= C.DT OR S.StartDT>= DATEADD(day,1,C.dt))
)
SELECT *,datediff(MI ,TMS.STP, TMS.ste) as x from TMS
The following uses recursive CTE to build a list of dates (a calendar or number table works equally well). It then intersect the dates with date times so that missing dates are populated with matching data. The important bit is that for each row, if start datetime belongs to previous day then it is clamped to 00:00. Likewise for end datetime.
DECLARE #t TABLE (timestamp DATETIME, mode INT);
INSERT INTO #t VALUES
('2018-01-01 12:00', 1),
('2018-01-01 18:00', 2),
('2018-01-02 01:00', 1),
('2018-01-02 02:00', 2),
('2018-01-04 04:00', 1);
WITH cte1 AS (
-- the min and max dates in your data
SELECT
CAST(MIN(timestamp) AS DATE) AS mindate,
CAST(MAX(timestamp) AS DATE) AS maxdate
FROM #t
), cte2 AS (
-- build all dates between min and max dates using recursive cte
SELECT mindate AS day_start, DATEADD(DAY, 1, mindate) AS day_end, maxdate
FROM cte1
UNION ALL
SELECT DATEADD(DAY, 1, day_start), DATEADD(DAY, 2, day_start), maxdate
FROM cte2
WHERE day_start < maxdate
), cte3 AS (
-- pull end datetime from next row into current
SELECT
timestamp AS dt_start,
LEAD(timestamp) OVER (ORDER BY timestamp) AS dt_end,
mode
FROM #t
), cte4 AS (
-- join datetime with date using date overlap query
-- then clamp start datetime to 00:00 of the date
-- and clamp end datetime to 00:00 of next date
SELECT
IIF(dt_start < day_start, day_start, dt_start) AS dt_start_fix,
IIF(dt_end > day_end, day_end, dt_end) AS dt_end_fix,
mode
FROM cte2
INNER JOIN cte3 ON day_end > dt_start AND dt_end > day_start
)
SELECT dt_start_fix, dt_end_fix, mode, datediff(minute, dt_start_fix, dt_end_fix) / 60.0 AS total
FROM cte4
DB Fiddle
Thanks everybody!
The answer from Cato put me on the right track. Here my final solution:
DECLARE #Start AS datetime;
DECLARE #End AS datetime;
DECLARE #Interval AS int;
SET #Start = '2018-01-01';
SET #End = '2018-01-05';
SET #Interval = 24 * 60 * 60;
WITH
cteDurations AS
(SELECT [Timestamp] AS StartDT,
LEAD ([Timestamp]) OVER (ORDER BY [Timestamp]) AS EndDT,
Mode
FROM tblLog
WHERE [Timestamp] BETWEEN #Start AND #End
),
cteTimeslots AS
(SELECT #Start AS StartDT,
DATEADD(SECOND, #Interval, #Start) AS EndDT
UNION ALL
SELECT EndDT,
DATEADD(SECOND, #Interval, EndDT)
FROM cteTimeSlots WHERE StartDT < #End
),
cteDurationsPerTimesplot AS
(SELECT CASE WHEN S.StartDT > C.StartDT THEN S.StartDT ELSE C.StartDT END AS StartDT,
CASE WHEN S.EndDT < C.EndDT THEN S.EndDT ELSE C.EndDT END AS EndDT,
C.StartDT AS Slot,
S.Mode
FROM cteDurations S
JOIN cteTimeslots C ON NOT(S.EndDT <= C.StartDT OR S.StartDT >= C.EndDT)
)
SELECT Slot,
Mode,
SUM(DATEDIFF(SECOND, StartDT, EndDT)) AS Duration
FROM cteDurationsPerTimesplot
GROUP BY Slot, Mode
ORDER BY Slot, Mode;
With the variable #Interval you are able to define the size of the timeslots.
The CTE cteDurations creates a subresult with the durations of all necessary entries by using the TSQL function LEAD (available in MSSQL >= 2012). This will be a lot faster than an OUTER APPLY.
The CTE cteTimeslots generates a list of timeslots with start time and end time.
The CTE cteDurationsPerTimesplot is a subresult with a JOIN between cteDurations and cteTimeslots. This this the magic JOIN statement from Cato!
And finally the SELECT statement will do the grouping and sum calculation per Slot and Mode.
Once again: Thanks a lot to everybody! Especially to Cato! You saved my weekend!
Regards
Oliver

SQL - Find if column dates include at least partially a date range

I need to create a report and I am struggling with the SQL script.
The table I want to query is a company_status_history table which has entries like the following (the ones that I can't figure out)
Table company_status_history
Columns:
| id | company_id | status_id | effective_date |
Data:
| 1 | 10 | 1 | 2016-12-30 00:00:00.000 |
| 2 | 10 | 5 | 2017-02-04 00:00:00.000 |
| 3 | 11 | 5 | 2017-06-05 00:00:00.000 |
| 4 | 11 | 1 | 2018-04-30 00:00:00.000 |
I want to answer to the question "Get all companies that have been at least for some point in status 1 inside the time period 01/01/2017 - 31/12/2017"
Above are the cases that I don't know how to handle since I need to add some logic of type :
"If this row is status 1 and it's date is before the date range check the next row if it has a date inside the date range."
"If this row is status 1 and it's date is after the date range check the row before if it has a date inside the date range."
I think this can be handled as a gaps and islands problem. Consider the following input data: (same as sample data of OP plus two additional rows)
id company_id status_id effective_date
-------------------------------------------
1 10 1 2016-12-15
2 10 1 2016-12-30
3 10 5 2017-02-04
4 10 4 2017-02-08
5 11 5 2017-06-05
6 11 1 2018-04-30
You can use the following query:
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
ORDER BY company_id, effective_date
to get:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 0
2 10 1 2016-12-30 1
3 10 5 2017-02-04 2
4 10 4 2017-02-08 2
5 11 5 2017-06-05 0
6 11 1 2018-04-30 0
Now you can identify status = 1 islands using:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
)
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
Output:
id company_id status_id effective_date grp
-----------------------------------------------
1 10 1 2016-12-15 1
2 10 1 2016-12-30 1
3 10 5 2017-02-04 1
4 10 4 2017-02-08 2
5 11 5 2017-06-05 1
6 11 1 2018-04-30 2
Calculated field grp will help us identify those islands:
;WITH CTE AS
(
SELECT t.id, t.company_id, t.status_id, t.effective_date, x.cnt
FROM company_status_history AS t
OUTER APPLY
(
SELECT COUNT(*) AS cnt
FROM company_status_history AS c
WHERE c.status_id = 1
AND c.company_id = t.company_id
AND c.effective_date < t.effective_date
) AS x
), CTE2 AS
(
SELECT id, company_id, status_id, effective_date,
ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) -
cnt AS grp
FROM CTE
)
SELECT company_id,
MIN(effective_date) AS start_date,
CASE
WHEN COUNT(*) > 1 THEN DATEADD(DAY, -1, MAX(effective_date))
ELSE MIN(effective_date)
END AS end_date
FROM CTE2
GROUP BY company_id, grp
HAVING COUNT(CASE WHEN status_id = 1 THEN 1 END) > 0
Output:
company_id start_date end_date
-----------------------------------
10 2016-12-15 2017-02-03
11 2018-04-30 2018-04-30
All you want know is those records from above that overlap with the specified interval.
Demo here with somewhat more complicated use case.
Maybe this is what you are looking for? For these kind of questions, you need to join two instance of your table, in this case I am just joining with next record by Id, which probably is not totally correct. To do it better, you can create a new Id using a windowed function like row_number, ordering the table by your requirement criteria
If this row is status 1 and it's date is before the date range check
the next row if it has a date inside the date range
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
else NULL
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
Implementing second criteria:
"If this row is status 1 and it's date is after the date range check
the row before if it has a date inside the date range."
declare #range_st date = '2017-01-01'
declare #range_en date = '2017-12-31'
select
case
when csh1.status_id=1 and csh1.effective_date<#range_st
then
case
when csh2.effective_date between #range_st and #range_en then true
else false
end
when csh1.status_id=1 and csh1.effective_date>#range_en
then
case
when csh3.effective_date between #range_st and #range_en then true
else false
end
else null -- ¿?
end
from company_status_history csh1
left join company_status_history csh2
on csh1.id=csh2.id+1
left join company_status_history csh3
on csh1.id=csh3.id-1
I would suggest the use of a cte and the window functions ROW_NUMBER. With this you can find the desired records. An example:
DECLARE #t TABLE(
id INT
,company_id INT
,status_id INT
,effective_date DATETIME
)
INSERT INTO #t VALUES
(1, 10, 1, '2016-12-30 00:00:00.000')
,(2, 10, 5, '2017-02-04 00:00:00.000')
,(3, 11, 5, '2017-06-05 00:00:00.000')
,(4, 11, 1, '2018-04-30 00:00:00.000')
DECLARE #StartDate DATETIME = '2017-01-01';
DECLARE #EndDate DATETIME = '2017-12-31';
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY company_id ORDER BY effective_date) AS rn
FROM #t
),
cteLeadLag AS(
SELECT c.*, ISNULL(c2.effective_date, c.effective_date) LagEffective, ISNULL(c3.effective_date, c.effective_date)LeadEffective
FROM cte c
LEFT JOIN cte c2 ON c2.company_id = c.company_id AND c2.rn = c.rn-1
LEFT JOIN cte c3 ON c3.company_id = c.company_id AND c3.rn = c.rn+1
)
SELECT 'Included' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Following' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date > #EndDate
AND LagEffective BETWEEN #StartDate AND #EndDate
UNION ALL
SELECT 'Trailing' AS RangeStatus, *
FROM cteLeadLag
WHERE status_id = 1
AND effective_date < #EndDate
AND LeadEffective BETWEEN #StartDate AND #EndDate
I first select all records with their leading and lagging Dates and then I perform your checks on the inclusion in the desired timespan.
Try with this, self-explanatory. Responds to this part of your question:
I want to answer to the question "Get all companies that have been at
least for some point in status 1 inside the time period 01/01/2017 -
31/12/2017"
Case that you want to find those id's that have been in any moment in status 1 and have records in the period requested:
SELECT *
FROM company_status_history
WHERE id IN
( SELECT Id
FROM company_status_history
WHERE status_id=1 )
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'
Case that you want to find id's in status 1 and inside the period:
SELECT *
FROM company_status_history
WHERE status_id=1
AND effective_date BETWEEN '2017-01-01' AND '2017-12-31'

Query to return all the days of a month

This problem is related to this, which has no solution in sight: here
I have a table that shows me all sessions of an area.
This session has a start date.
I need to get all the days of month of the start date of the session by specific area (in this case)
I have this query:
SELECT idArea, idSession, startDate FROM SessionsPerArea WHERE idArea = 1
idArea | idSession | startDate |
1 | 1 | 01-01-2013 |
1 | 2 | 04-01-2013 |
1 | 3 | 07-02-2013 |
And i want something like this:
date | Session |
01-01-2013 | 1 |
02-01-2013 | NULL |
03-01-2013 | NULL |
04-01-2013 | 1 |
........ | |
29-01-2013 | NULL |
30-01-2013 | NULL |
In this case, the table returns me all the days of January.
The second column is the number of sessions that occur on that day, because there may be several sessions on the same day.
Anyone can help me?
Please try:
DECLARE #SessionsPerArea TABLE (idArea INT, idSession INT, startDate DATEtime)
INSERT #SessionsPerArea VALUES (1,1,'2013-01-01')
INSERT #SessionsPerArea VALUES (1,2,'2013-01-04')
INSERT #SessionsPerArea VALUES (1,3,'2013-07-02')
DECLARE #RepMonth as datetime
SET #RepMonth = '01/01/2013';
WITH DayList (DayDate) AS
(
SELECT #RepMonth
UNION ALL
SELECT DATEADD(d, 1, DayDate)
FROM DayList
WHERE (DayDate < DATEADD(d, -1, DATEADD(m, 1, #RepMonth)))
)
SELECT *
FROM DayList t1 left join #SessionsPerArea t2 on t1.DayDate=startDate and t2.idArea = 1
This will work:
DECLARE #SessionsPerArea TABLE (idArea INT, idSession INT, startDate DATE)
INSERT #SessionsPerArea VALUES
(1,1,'2013-01-01'),
(1,2,'2013-01-04'),
(1,3,'2013-07-02')
;WITH t1 AS
(
SELECT startDate
, DATEADD(MONTH, DATEDIFF(MONTH, '1900-01-01', startDate), '1900-01-01') firstInMonth
, DATEADD(DAY, -1, DATEADD(MONTH, DATEDIFF(MONTH, '1900-01-01', startDate) + 1, '1900-01-01')) lastInMonth
, COUNT(*) cnt
FROM #SessionsPerArea
WHERE idArea = 1
GROUP BY
startDate
)
, calendar AS
(
SELECT DISTINCT DATEADD(DAY, c.number, t1.firstInMonth) d
FROM t1
JOIN master..spt_values c ON
type = 'P'
AND DATEADD(DAY, c.number, t1.firstInMonth) BETWEEN t1.firstInMonth AND t1.lastInMonth
)
SELECT d date
, cnt Session
FROM calendar c
LEFT JOIN t1 ON t1.startDate = c.d
It uses simple join on master..spt_values table to generate rows.
Just an example of calendar table. To return data for a month adjust the number of days between < 32, for a year to 365+1. You can calculate the number of days in a month or between start/end dates with query. I'm not sure how to do this in SQL Server. I'm using hardcoded values to display all dates in Jan-2013. You can adjust start and end dates for diff. month or to get start/end dates with queries...:
WITH data(r, start_date) AS
(
SELECT 1 r, date '2012-12-31' start_date FROM any_table --dual in Oracle
UNION ALL
SELECT r+1, date '2013-01-01'+r-1 FROM data WHERE r < 32 -- number of days between start and end date+1
)
SELECT start_date FROM data WHERE r > 1
/
START_DATE
----------
1/1/2013
1/2/2013
1/3/2013
...
...
1/31/2013