SQL Summary of Status total values by Month of Type2 SC Dimension - sql

I have a voucher status history table as a type2 slowly changing dimension table I am trying to get the summary total value of each status by each month before a particular date. This is my schema and insert code:
CREATE TABLE #HDimVouchers(
[HVoucherKey] [bigint] IDENTITY(1,1) NOT NULL,
[Voucher_id] [bigint] NOT NULL,
[VoucherStatusKey] [int] NOT NULL,
[Voucher_amt] [decimal](18, 2) NULL,
[DateStatusStart] [date] NULL,
[DateStatusEnd] [date] NULL
)
--drop table #HDimVouchers
insert #HDimVouchers
values
(10,2,10.00,'2019-01-01','2019-02-15'),
(10,4,10.00,'2019-02-16',null),
(13,4,10.00,'2019-01-10',null),
(11,2,15.00,'2019-01-01',null),
(12,2,20.00,'2019-03-12','2019-03-12'),
(12,4,20.00,'2019-03-13',null),
(15,2,205.00,'2019-05-25','2020-04-24'),
(15,6,205.00,'2020-04-25',null),
(21,2,100.00,'2019-02-16',null)
I would like to get a summary to total value by year-month by voucherstatuskey something like the below:
[Year-Month]
[VoucherStatusKey]
[Amount]
201901
2
25
201901
4
10
201902
2
100
201902
4
10
201903
4
20
201905
2
205
201906
2
205
201907
2
205
201908
2
205
201909
2
205
201910
2
205
201911
2
205
201912
2
205
202001
2
205
202002
2
205
202003
2
205
I have had many attempts to get the data as above, but I am struggling to get the correct format and values. Below is something I have tried
SELECT convert(nvarchar(4),Year([DateStatusStart])) + RIGHT('00' + CONVERT(NVARCHAR(2), DATEPART(Month, [DateStatusStart])), 2)
,[VoucherStatusKey]
,SUM([Voucher_amt]) OVER (PARTITION BY Year([DateStatusStart]),Month([DateStatusStart]), [VoucherStatusKey] ORDER BY [DateStatusStart]) AS running_total
FROM #HDimVouchers where [DateStatusStart] < '2020-03-31';

Let me assume that you want the value at the end of the month. Then, you can take the following approach:
Generate all appropriate months for each voucher.
Use a join to bring in the appropriate value.
For the first part, you could use a tally or calendar table if one is available. However a recursive CTE is also convenient:
with vdates as (
select voucher_id, eomonth(min(DateStatusStart)) as eom
from HDimVouchers
group by voucher_id
union all
select voucher_id, eomonth(dateadd(month, 1, eom))
from vdates
where eom < '2020-03-01'
)
select vd.*, hv.Voucher_amt
from vdates vd join
HDimVouchers hv
on hv.voucher_id = vd.voucher_id and
vd.eom >= hv.DateStatusStart and
(vd.eom <= hv.DateStatusEnd or hv.DateStatusEnd is null)
order by vd.eom, vd.voucher_id;
Here is a db<>fiddle.

My take on this would be:
;with [dates] as (
select YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [YM] from #HDimVouchers
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
YEAR(MIN([DateStatusStart]))*100+MONTH(MIN([DateStatusStart])) [DateStatusStart],
YEAR(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31))))*100+MONTH(MAX(ISNULL([DateStatusEnd], DATEFROMPARTS(2999, 12, 31)))) [DateStatusEnd]
from [#HDimVouchers] group by [Voucher_id]
), [map] as (
select
[dimkeys].[Voucher_id],
[dates].[YM],
COALESCE(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([i].[DateStatusStart])*100+MONTH([i].[DateStatusStart]) < [dates].[YM]),
(select MIN([i].[DateStatusStart]) from [#HDimVouchers] [i] where [i].[Voucher_id] = [dimkeys].[Voucher_id])
) [MappingDate]
from [dates]
cross join [dimkeys]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [dimkeys].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [dates].[YM]
where [dates].[YM] >= [dimkeys].[DateStatusStart] and [dates].[YM] <= [dimkeys].[DateStatusEnd]
group by [dimkeys].[Voucher_id], [dates].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];
So:
Get all year-month values from start to end
Get all distinct keys with their overall min/max dates (aka when the member exists)
cross join them to get an entry of every key for every yearmonth (within lifetime of member)
add the date which should be used for mapping (this is used to decide to which month to add a member that was changed in a year-month)
only then join this up with the full dimension and group by the SCD Type 2 attribute
Update
For big tables, you can split it up in multiple temp. tables instead of going all out on CTEs. That usually helps a lot on performance.
select *,
YEAR([DateStatusStart])*100+MONTH([DateStatusStart]) [YmStart],
YEAR([DateStatusEnd])*100+MONTH([DateStatusEnd]) [YmEnd]
into [#withYm]
from [#HDimVouchers];
;with [dates] as (
select MIN([YmStart]) [YM] from [#withYm]
union all
select case when ([dates].[YM] % 100) = 12 then [dates].[YM] + 100 - 11 else [dates].[YM] + 1 end from [dates] where [YM] < 202112
), [dimkeys] as (
select
[Voucher_id],
MIN([YmStart]) [YmStart],
MAX(ISNULL([YmEnd], 299912)) [YmEnd]
from [#withYm]
group by [Voucher_id]
)
select
[dimkeys].[Voucher_id],
[dates].[YM]
into [#all]
from [dates]
cross join [dimkeys]
where [dates].[YM] >= [dimkeys].[YmStart] and [dates].[YM] <= [dimkeys].[YmEnd]
;with [map] as (
select
[#all].[Voucher_id],
[#all].[YM],
ISNULL(
MAX([d].[DateStatusStart]),
(select MAX([i].[DateStatusStart]) from [#withYm] [i] where [i].[Voucher_id] = [#all].[Voucher_id] and [i].[YmStart] < [#all].[YM])
) [MappingDate]
from [#all]
left join [#HDimVouchers] [d] on [d].[Voucher_id] = [#all].[Voucher_id] and YEAR([d].[DateStatusStart])*100+MONTH([d].[DateStatusStart]) = [#all].[YM]
group by [#all].[Voucher_id], [#all].[YM]
)
select [map].[YM], [fact].[VoucherStatusKey], SUM([fact].[Voucher_amt]) [Sum]
from [map] join [#HDimVouchers] [fact] on [fact].[Voucher_id] = [map].[Voucher_id] and [fact].[DateStatusStart] = [map].[MappingDate]
group by [map].[YM], [fact].[VoucherStatusKey]
order by [YM], [VoucherStatusKey];

Related

SQL - '1' IF hour in month EXISTS, '0' IF NOT EXISTS

I have a table that has aggregations down to the hour level YYYYMMDDHH. The data is aggregated and loaded by an external process (I don't have control over). I want to test the data on a monthly basis.
The question I am looking to answer is: Does every hour in the month exist?
I'm looking to produce output that will return a 1 if the hour exists or 0 if the hour does not exist.
The aggregation table looks something like this...
YYYYMM YYYYMMDD YYYYMMDDHH DATA_AGG
201911 20191101 2019110100 100
201911 20191101 2019110101 125
201911 20191101 2019110103 135
201911 20191101 2019110105 95
… … … …
201911 20191130 2019113020 100
201911 20191130 2019113021 110
201911 20191130 2019113022 125
201911 20191130 2019113023 135
And defined as...
CREATE TABLE YYYYMMDDHH_DATA_AGG AS (
YYYYMM VARCHAR,
YYYYMMDD VARCHAR,
YYYYMMDDHH VARCHAR,
DATA_AGG INT
);
I'm looking to produce the following below...
YYYYMMDDHH HOUR_EXISTS
2019110100 1
2019110101 1
2019110102 0
2019110103 1
2019110104 0
2019110105 1
... ...
In the example above, two hours do not exist, 2019110102 and 2019110104.
I assume I'd have to join the aggregation table against a computed table that contains all the YYYYMMDDHH combos???
The database is Snowflake, but assume most generic ANSI SQL queries will work.
You can get what you want with a recursive CTE
The recursive CTE generates the list of possible Hours. And then a simple left outer join gets you the flag for if you have any records that match that hour.
WITH RECURSIVE CTE (YYYYMMDDHH) as
(
SELECT YYYYMMDDHH
FROM YYYYMMDDHH_DATA_AGG
WHERE YYYYMMDDHH = (SELECT MIN(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG)
UNION ALL
SELECT TO_VARCHAR(DATEADD(HOUR, 1, TO_TIMESTAMP(C.YYYYMMDDHH, 'YYYYMMDDHH')), 'YYYYMMDDHH') YYYYMMDDHH
FROM CTE C
WHERE TO_VARCHAR(DATEADD(HOUR, 1, TO_TIMESTAMP(C.YYYYMMDDHH, 'YYYYMMDDHH')), 'YYYYMMDDHH') <= (SELECT MAX(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG)
)
SELECT
C.YYYYMMDDHH,
IFF(A.YYYYMMDDHH IS NOT NULL, 1, 0) HOUR_EXISTS
FROM CTE C
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG A
ON C.YYYYMMDDHH = A.YYYYMMDDHH;
If your timerange is too long you'll have issues with the cte recursing too much. You can create a table or temp table with all of the possible hours instead. For example:
CREATE OR REPLACE TEMPORARY TABLE HOURS (YYYYMMDDHH VARCHAR) AS
SELECT TO_VARCHAR(DATEADD(HOUR, SEQ4(), TO_TIMESTAMP((SELECT MIN(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG), 'YYYYMMDDHH')), 'YYYYMMDDHH')
FROM TABLE(GENERATOR(ROWCOUNT => 10000)) V
ORDER BY 1;
SELECT
H.YYYYMMDDHH,
IFF(A.YYYYMMDDHH IS NOT NULL, 1, 0) HOUR_EXISTS
FROM HOURS H
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG A
ON H.YYYYMMDDHH = A.YYYYMMDDHH
WHERE H.YYYYMMDDHH <= (SELECT MAX(YYYYMMDDHH) FROM YYYYMMDDHH_DATA_AGG);
You can then fiddle with the generator count to make sure you have enough hours.
You can generate a table with every hour of the month and LEFT OUTER JOIN your aggregation to it:
WITH EVERY_HOUR AS (
SELECT TO_CHAR(DATEADD(HOUR, HH, TO_DATE(YYYYMM::TEXT, 'YYYYMM')),
'YYYYMMDDHH')::NUMBER YYYYMMDDHH
FROM (SELECT DISTINCT YYYYMM FROM YYYYMMDDHH_DATA_AGG) t
CROSS JOIN (
SELECT ROW_NUMBER() OVER (ORDER BY NULL) - 1 HH
FROM TABLE(GENERATOR(ROWCOUNT => 745))
) h
QUALIFY YYYYMMDDHH < (YYYYMM + 1) * 10000
)
SELECT h.YYYYMMDDHH, NVL2(a.YYYYMM, 1, 0) HOUR_EXISTS
FROM EVERY_HOUR h
LEFT OUTER JOIN YYYYMMDDHH_DATA_AGG a ON a.YYYYMMDDHH = h.YYYYMMDDHH
Here's something that might help get you started. I'm guessing you want to have 'synthetic' [YYYYMMDD] values? Otherwise, if the value aren't there, then they shouldn't appear in the list
DROP TABLE IF EXISTS #_hours
DROP TABLE IF EXISTS #_temp
--Populate a table with hours ranging from 00 to 23
CREATE TABLE #_hours ([hour_value] VARCHAR(2))
DECLARE #_i INT = 0
WHILE (#_i < 24)
BEGIN
INSERT INTO #_hours
SELECT FORMAT(#_i, '0#')
SET #_i += 1
END
-- Replicate OP's sample data set
CREATE TABLE #_temp (
[YYYYMM] INTEGER
, [YYYYMMDD] INTEGER
, [YYYYMMDDHH] INTEGER
, [DATA_AGG] INTEGER
)
INSERT INTO #_temp
VALUES
(201911, 20191101, 2019110100, 100),
(201911, 20191101, 2019110101, 125),
(201911, 20191101, 2019110103, 135),
(201911, 20191101, 2019110105, 95),
(201911, 20191130, 2019113020, 100),
(201911, 20191130, 2019113021, 110),
(201911, 20191130, 2019113022, 125),
(201911, 20191130, 2019113023, 135)
SELECT X.YYYYMM, X.YYYYMMDD, X.YYYYMMDDHH
-- Case: If 'target_hours' doesn't exist, then 0, else 1
, CASE WHEN X.target_hours IS NULL THEN '0' ELSE '1' END AS [HOUR_EXISTS]
FROM (
-- Select right 2 characters from converted [YYYYMMDDHH] to act as 'target values'
SELECT T.*
, RIGHT(CAST(T.[YYYYMMDDHH] AS VARCHAR(10)), 2) AS [target_hours]
FROM #_temp AS T
) AS X
-- Right join to keep all of our hours and only the target hours that match.
RIGHT JOIN #_hours AS H ON H.hour_value = X.target_hours
Sample output:
YYYYMM YYYYMMDD YYYYMMDDHH HOUR_EXISTS
201911 20191101 2019110100 1
201911 20191101 2019110101 1
NULL NULL NULL 0
201911 20191101 2019110103 1
NULL NULL NULL 0
201911 20191101 2019110105 1
NULL NULL NULL 0
With (almost) standard sql, you can do a cross join of the distinct values of YYYYMMDD to a list of all possible hours and then left join to the table:
select concat(d.YYYYMMDD, h.hour) as YYYYMMDDHH,
case when t.YYYYMMDDHH is null then 0 else 1 end as hour_exists
from (select distinct YYYYMMDD from tablename) as d
cross join (
select '00' as hour union all select '01' union all
select '02' union all select '03' union all
select '04' union all select '05' union all
select '06' union all select '07' union all
select '08' union all select '09' union all
select '10' union all select '11' union all
select '12' union all select '13' union all
select '14' union all select '15' union all
select '16' union all select '17' union all
select '18' union all select '19' union all
select '20' union all select '21' union all
select '22' union all select '23'
) as h
left join tablename as t
on concat(d.YYYYMMDD, h.hour) = t.YYYYMMDDHH
order by concat(d.YYYYMMDD, h.hour)
Maybe in Snowflake you can construct the list of hours with a sequence much easier instead of all those UNION ALLs.
This version accounts for the full range of days, across months and years. It's a simple cross join of the set of possible days with the set of possible hours of the day -- left joined to actual dates.
set first = (select min(yyyymmdd::number) from YYYYMMDDHH_DATA_AGG);
set last = (select max(yyyymmdd::number) from YYYYMMDDHH_DATA_AGG);
with
hours as (select row_number() over (order by null) - 1 h from table(generator(rowcount=>24))),
days as (
select
row_number() over (order by null) - 1 as n,
to_date($first::text, 'YYYYMMDD')::date + n as d,
to_char(d, 'YYYYMMDD') as yyyymmdd
from table(generator(rowcount=>($last-$first+1)))
)
select days.yyyymmdd || lpad(hours.h,2,0) as YYYYMMDDHH, nvl2(t.yyyymmddhh,1,0) as HOUR_EXISTS
from days cross join hours
left join YYYYMMDDHH_DATA_AGG t on t.yyyymmddhh = days.yyyymmdd || lpad(hours.h,2,0)
order by 1
;
$first and $last can be packed in as sub-queries if you prefer.

Count number of days each employee take vacation in a month SQL Server

I have this table:
Vacationtbl:
ID Start End
-------------------------
01 04/10/17 04/12/17
01 04/27/17 05/02/17
02 04/13/17 04/15/17
02 04/17/17 04/20/17
03 06/14/17 06/22/17
Employeetbl:
ID Fname Lname
------------------
01 John AAA
02 Jeny BBB
03 Jeby CCC
I like to count the number of days each employee take vacation in April.
My query:
SELECT
SUM(DATEDIFF(DAY, Start, End) + 1) AS Days
FROM
Vacationtbl
GROUP BY
ID
01 returns 9 (not correct)
02 returns 7 (correct)
How do I fix the query so that it counts until the end of month and stops at end of month. For example, April has 30 days. On second row, Employee 01 should counts 4/27/17 until 4/30/17. And 05/02/17 is for May.
Thanks
The Tally/Calendar table is the way to go. However, you can use an ad-hoc tally table.
Example
Select Year = Year(D)
,Month = Month(D)
,ID
,Days = count(*)
From Vacationtbl A
Cross Apply (
Select Top (DateDiff(DAY,[Start],[End])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),[Start])
From master..spt_values
) B
-- YOUR OPTIONAL WHERE STATEMENT HERE --
Group By ID,Year(D),Month(D)
Order By 1,2,3
Returns
Year Month ID Days
2017 4 01 7
2017 4 02 7
2017 5 01 2
EDIT - To Show All ID even if Zero Days
Select ID
,Year = Year(D)
,Month = Month(D)
,Days = sum(case when D between [Start] and [End] then 1 else 0 end)
From (
Select Top (DateDiff(DAY,'05/01/2017','05/31/2017')+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),'05/01/2017')
From master..spt_values
) D
Cross Join Vacationtbl B
Group By ID,Year(D),Month(D)
Order By 1,2,3
Returns
ID Year Month Days
1 2017 5 2
2 2017 5 0
dbFiddle if it Helps
EDIT - 2 Corrects for Overlaps (Gaps and Islands)
--Create Some Sample Data
----------------------------------------------------------------------
Declare #Vacationtbl Table ([ID] varchar(50),[Start] date,[End] date)
Insert Into #Vacationtbl Values
(01,'04/10/17','04/12/17')
,(01,'04/27/17','05/02/17')
,(02,'04/13/17','04/15/17')
,(02,'04/17/17','04/20/17')
,(02,'04/16/17','04/17/17') -- << Overlap
,(03,'05/16/17','05/17/17')
-- The Actual Query
----------------------------------------------------------------------
Select ID
,Year = Year(D)
,Month = Month(D)
,Days = sum(case when D between [Start] and [End] then 1 else 0 end)
From (Select Top (DateDiff(DAY,'04/01/2017','04/30/2017')+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),'04/01/2017') From master..spt_values ) D
Cross Join (
Select ID,[Start] = min(D),[End] = max(D)
From (
Select E.*,Grp = Dense_Rank() over (Order By D) - Row_Number() over (Partition By ID Order By D)
From (
Select Distinct A.ID,D
From #Vacationtbl A
Cross Apply (Select Top (DateDiff(DAY,A.[Start],A.[End])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[Start]) From master..spt_values ) B
) E
) G
Group By ID,Grp
) B
Group By ID,Year(D),Month(D)
Order By 1,2,3
Returns
ID Year Month Days
1 2017 4 7
2 2017 4 8
3 2017 4 0
Without a dates table, you could use
select Id
,sum(case when [end]>'20170430' and [start]<'20170401' then datediff(day,'20170401','20170430')+1
when [end]>'20170430' then datediff(day,[start],'20170430')+1
when [start]<'20170401' then datediff(day,'20170401',[end])+1
else datediff(day,[start],[end])+1
end) as VacationDays
from Vacationtbl
where [start] <= '20170430' and [end] >= '20170401'
group by Id
There are 3 conditions here
Start is before this month and the end is after this month. In this case you subtract the end and start dates of the month.
End is after month end and start is in the month, in this case subtract month end date from the start.
Start is before this month but the end is in the month. In this case subtract month start date and the end date.
Edit: Based on the OP's comments that the future dates have to be included,
/*This recursive cte generates the month start and end dates with in a given time frame
For Eg: all the month start and end dates for 2017
Change the start and end period as needed*/
with dates (month_start_date,month_end_date) as
(select cast('2017-01-01' as date),cast(eomonth('2017-01-01') as date)
union all
select dateadd(month,1,month_start_date),eomonth(dateadd(month,1,month_start_date)) from dates
where month_start_date < '2017-12-01'
)
--End recursive cte
--Query logic is the same as above
select v.Id
,year(d.month_start_date) as yr,month(d.month_start_date) as mth
,sum(case when v.[end]>d.month_end_date and v.[start]<d.month_start_date then datediff(day,d.month_start_date,d.month_end_date)+1
when v.[end]>d.month_end_date then datediff(day,v.[start],d.month_end_date)+1
when v.[start]<d.month_start_date then datediff(day,d.month_start_date,v.[end])+1
else datediff(day,v.[start],v.[end])+1
end) as VacationDays
from dates d
join Vacationtbl v on v.[start] <= d.month_end_date and v.[end] >= d.month_start_date
group by v.id,year(d.month_start_date),month(d.month_start_date)
Assuming you want only one month and you want to count all days, you can do this with arithmetic. A separate calendar table is not necessary. The advantage is performance.
I think this would be easier if SQL Server supported least() and greatest(), but case will do:
select id,
sum(1 + datediff(day, news, newe)) as vacation_days_april
from vactiontbl v cross apply
(values (case when [start] < '2017-04-01' then cast('2017-04-01' as date) else [start] end),
(case when [end] >= '2017-05-01' then cast('2017-04-30' as date) else [end] end)
) v(news, newe)
where news <= newe
group by id;
You can readily extend this to any month:
with m as (
select cast('2017-04-01' as date) as month_start,
cast('2017-04-30' as date) as month_end
)
select id,
sum(1 + datediff(day, news, newe)) as vacation_days_aprile
from m cross join
vactiontbl v cross apply
(values (case when [start] < m.month_start then m.month_start else [start] end),
(case when [end] >= m.month_end then m.month_end else [end] end)
) v(news, newe)
where news <= newe
group by id;
You can even use a similar idea to extend to multiple months, with a different row for each user and each month.
You can use a Calendar or dates table for this sort of thing.
For only 152kb in memory, you can have 30 years of dates in a table with this:
/* dates table */
declare #fromdate date = '20000101';
declare #years int = 30;
/* 30 years, 19 used data pages ~152kb in memory, ~264kb on disk */
;with n as (select n from (values(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) t(n))
select top (datediff(day, #fromdate,dateadd(year,#years,#fromdate)))
[Date]=convert(date,dateadd(day,row_number() over(order by (select 1))-1,#fromdate))
into dbo.Dates
from n as deka cross join n as hecto cross join n as kilo
cross join n as tenK cross join n as hundredK
order by [Date];
create unique clustered index ix_dbo_Dates_date
on dbo.Dates([Date]);
Without taking the actual step of creating a table, you can use it inside a common table expression with just this:
declare #fromdate date = '20170401';
declare #thrudate date = '20170430';
;with n as (select n from (values(0),(1),(2),(3),(4),(5),(6),(7),(8),(9)) t(n))
, dates as (
select top (datediff(day, #fromdate, #thrudate)+1)
[Date]=convert(date,dateadd(day,row_number() over(order by (select 1))-1,#fromdate))
from n as deka cross join n as hecto cross join n as kilo
cross join n as tenK cross join n as hundredK
order by [Date]
)
select [Date]
from dates;
Use either like so:
select
v.Id
, count(*) as VacationDays
from Vacationtbl v
inner join Dates d
on d.Date >= v.[Start]
and d.Date <= v.[End]
where d.Date >= '20170401'
and d.Date <= '20170430'
group by v.Id
rextester demo (table): http://rextester.com/PLW73242
rextester demo (cte): http://rextester.com/BCY62752
returns:
+----+--------------+
| Id | VacationDays |
+----+--------------+
| 01 | 7 |
| 02 | 7 |
+----+--------------+
Number and Calendar table reference:
Generate a set or sequence without loops - 2 - Aaron Bertrand
The "Numbers" or "Tally" Table: What it is and how it replaces a loop - Jeff Moden
Creating a Date Table/Dimension in sql Server 2008 - David Stein
Calendar Tables - Why You Need One - David Stein
Creating a date dimension or calendar table in sql Server - Aaron Bertrand
Try this,
declare #Vacationtbl table(ID int,Startdate date,Enddate date)
insert into #Vacationtbl VALUES
(1 ,'04/10/17','04/12/17')
,(1 ,'04/27/17','05/02/17')
,(2 ,'04/13/17','04/15/17')
,(2 ,'04/17/17','04/20/17')
-- somehow convert your input into first day of month
Declare #firstDayofGivenMonth date='2017-04-01'
Declare #LasttDayofGivenMonth date=dateadd(day,-1,dateadd(month,datediff(month,0,#firstDayofGivenMonth)+1,0))
;with CTE as
(
select *
,case when Startdate<#firstDayofGivenMonth then #firstDayofGivenMonth else Startdate end NewStDT
,case when Enddate>#LasttDayofGivenMonth then #LasttDayofGivenMonth else Enddate end NewEDT
from #Vacationtbl
)
SELECT
SUM(DATEDIFF(DAY, NewStDT, NewEDT) + 1) AS Days
FROM
CTE
GROUP BY
ID

SQL - Select values from a table based on dates using incrementing dates

I have a SQL table of dates (MM/DD format), targets, and levels, as such:
Date Target Level
10/2 1000 1
10/4 2000 1
10/7 2000 2
I want to use those dates as tiers, or checkpoints, for when to use the respective targets and levels. So, anything on or after those dates (until the next date) would use that target/level. Anything before the first date just uses the values from the first date.
I want to select a range of dates (a 5 week range of dates, with the start date and end date of the range being determined by the current day: 3 weeks back from today, to 2 weeks forward from today) and fill in the targets and levels accordingly, as such:
Date Target Level
10/1 1000 1
10/2 1000 1
10/3 1000 1
10/4 2000 1
10/5 2000 1
10/6 2000 1
10/7 2000 2
10/8 2000 2
...
11/5 2000 2
How do I go about:
Selecting the range of dates (as efficiently as possible)
Filling in the range of dates with the respective target/level from the appropriate date in my table?
Thank you.
You can do this using outer apply. The following creates a list of dates using a recursive CTE:
with d as (
select cast(getdate() as date) as dte
union all
select dateadd(day, -1, dte)
from d
where dte >= getdate() - 30
select d.dte, t.target, t.level
from d outer apply
(select top 1 t.*
from t
where d.dte >= t.dte
order by t.dte desc
);
you can use a CTE to generate your 'missing' dates, then use a CROSS APPLY to obtain the target and level that was last active (by querying the TOP 1 DESC where the date is on or before current date) - finally I introduced 'maximum date' as a variable
DECLARE #MAXD as DATETIME = '20161105';
WITH DATS AS (SELECT MIN([Date]) D FROM dbo.YourTab
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, CA.Target, CA.Level from DATS
CROSS APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y
WHERE
Y.[Date] <= DATS.D
ORDER BY Y.Date DESC) CA
option (maxrecursion 0);
I made a bit of a change with dates to go back 3 and forward two weeks - also I switched to outer apply to handle no data in force
DECLARE #MIND as DATETIME = dateadd(week,-3,cast(getdate() as date));
DECLARE #MAXD as DATETIME = dateadd(week, 5,#MIND);
WITH DATS AS (SELECT #MIND D
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, CA.Target, CA.Level from DATS
OUTER APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y WHERE Y.[Date] <= DATS.D ORDER BY Y.Date DESC) CA
ORDER BY DATS.D
option (maxrecursion 0);
Final change - if there is no earlier value for the date - take first future row
DECLARE #MIND as DATETIME = dateadd(week,-3,cast(getdate() as date));
DECLARE #MAXD as DATETIME = dateadd(week, 5,#MIND);
WITH DATS AS (SELECT #MIND D
UNION ALL
SELECT dateadd(day,1,D) FROM DATS WHERE D < #MAXD)
select DATS.D, COALESCE(CA.Target, MQ.Target) Target , COALESCE(CA.Level, MQ.Level) Level from DATS
OUTER APPLY
(SELECT TOP 1 Y.Target, Y.Level FROM YourTab Y WHERE Y.[Date] <= DATS.D ORDER BY Y.Date DESC) CA
OUTER APPLY
(
SELECT TOP 1 M.Target, M.Level FROM YourTab M ORDER BY M.[Date] ASC
) MQ
ORDER BY DATS.D
option (maxrecursion 0);
I don't know why you store dates as MM/DD but you need some conversion into right datatype. This could do a trick:
;WITH YourTable AS (
SELECT *
FROM (VALUES
('10/2', 1000, 1),
('10/4', 2000, 1),
('10/7', 2000, 2)
) as t([Date], [Target], [Level])
), dates_cte AS ( --this CTE is generating dates you need
SELECT DATEADD(week,-3,GETDATE()) as d --3 weeks back
UNION ALL
SELECT dateadd(day,1,d)
FROM dates_cte
WHERE d < DATEADD(week,2,GETDATE()) --2 weeks forward
)
SELECT REPLACE(CONVERT(nvarchar(5),d,101),'/0','/') as [Date],
COALESCE(t.[Target],t1.[Target]) [Target],
COALESCE(t.[Level],t1.[Level]) [Level]
FROM dates_cte dc
OUTER APPLY ( --Here we got PREVIOUS values
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) <= dc.d
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) DESC
) t
OUTER APPLY ( --Here we got NEXT values and use them if there is no PREV
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) >= dc.d
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) ASC
) t1
Output:
Date Target Level
10/5 2000 1
10/6 2000 1
10/7 2000 2
10/8 2000 2
10/9 2000 2
10/10 2000 2
10/11 2000 2
10/12 2000 2
...
11/9 2000 2
EDIT
With Categories:
;WITH YourTable AS (
SELECT *
FROM (VALUES
('10/2', 1000, 1, 'A'),
('10/4', 3000, 1, 'B'),
('10/7', 2000, 2, 'A')
) as t([Date], [Target], [Level], [Category])
), dates_cte AS (
SELECT DATEADD(week,-3,GETDATE()) as d
UNION ALL
SELECT dateadd(day,1,d)
FROM dates_cte
WHERE d < DATEADD(week,2,GETDATE())
)
SELECT REPLACE(CONVERT(nvarchar(5),d,101),'/0','/') as [Date],
COALESCE(t.[Target],t1.[Target]) [Target],
COALESCE(t.[Level],t1.[Level]) [Level],
c.Category
FROM dates_cte dc
CROSS JOIN (
SELECT DISTINCT Category
FROM YourTable
) c
OUTER APPLY (
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) <= dc.d
AND c.Category = Category
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) DESC
) t
OUTER APPLY (
SELECT TOP 1 *
FROM YourTable
WHERE CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) >= dc.d
AND c.Category = Category
ORDER BY CONVERT(datetime,REPLACE([Date],'/','/0')+'/2016',101) ASC
) t1
ORDER BY c.Category, d
Not sure if I'm over simplifying this, but:
select min(X.Date) Date_Range_Start, max(X.date) Date_Range_End
, V.<value_date>
, isnull(X.Target, 'Out of range') Target
, isnull(X.Level, 'Out of range') Level
from X --replace this with your table
left join <value_table> V --table with dates to be assessed
on V.<Date> between X.Date_Range_Start and X.Date_Range_End
group by Target, Level, V.<value_date>

How can I find a value that doesn't exist in a table?

I have a simplified table called Bookings that has two columns BookDate and BookSlot. The BookDate column will have dates only (no time) and the BookSlot column will contain the time of the day in intervals of 30 minutes from 0 to 1410 inclusive. (i.e. 600 = 10:00am)
How can I find the first slot available in the future (not booked) without running through a loop?
Here is the table definition and test data:
Create Table Bookings(
BookDate DateTime Not Null,
BookSlot Int Not Null
)
Go
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',0);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',30);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',60);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-01',630);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',60);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',90);
Insert Into Bookings(BookDate,BookSlot) Values('2014-07-02',120);
I want a way to return the first available slot that is not in the table and that is in the future (based on server time).
Based on above test data:
If the current server time was 1st Jul, 00:10am, the result should be 1st Jul, 90min (01:30am).
If the current server time was 2nd Jul, 01:05am, the result should be 2nd Jul, 150min (02:30am).
If there are no bookings in the future, the function would simply return the closest half-hour in the future.
--
SQL Fiddle for this is here:
http://sqlfiddle.com/#!6/0e93d/1
Below is one method that will allow bookings up to 256 days in the future, and allow for an empty Booking table. I assume you are using SQL Server 2005 since your BookDate is dateTime instead of date.
In any case, you might consider storing the slots as a complete datetime instead of separate columns. That will facilitate queries and improve performance.
DECLARE #now DATETIME = '2014-07-01 00:10:00';
WITH T4
AS (SELECT N
FROM (VALUES(0),
(0),
(0),
(0),
(0),
(0),
(0),
(0)) AS t(N)),
T256
AS (SELECT Row_number()
OVER(
ORDER BY (SELECT 0)) - 1 AS n
FROM T4 AS a
CROSS JOIN T4 AS b
CROSS JOIN T4 AS c),
START_DATE
AS (SELECT Dateadd(DAY, Datediff(DAY, '', #now), '') AS start_date),
START_TIME
AS (SELECT Dateadd(MINUTE, Datediff(MINUTE, '', #now) / 30 * 30, '') AS
start_time),
DAILY_INTERVALS
AS (SELECT N * 30 AS interval
FROM T256
WHERE N < 48)
SELECT TOP (1) Dateadd(DAY, future_days.N, START_DATE) AS BookDate,
DAILY_INTERVALS.INTERVAL AS BookSlot
FROM START_DATE
CROSS APPLY START_TIME
CROSS APPLY DAILY_INTERVALS
CROSS APPLY T256 AS future_days
WHERE Dateadd(MINUTE, DAILY_INTERVALS.INTERVAL,
Dateadd(DAY, future_days.N, START_DATE)) > START_TIME
AND NOT EXISTS(SELECT *
FROM DBO.BOOKINGS
WHERE BOOKDATE = START_DATE
AND BOOKSLOT = DAILY_INTERVALS.INTERVAL)
ORDER BY BOOKDATE,
BOOKSLOT;
See this SQL Fiddle
It's a bit complicated but try this:
WITH DATA
AS (SELECT *,
Row_number()
OVER (
ORDER BY BOOKDATE, BOOKSLOT) RN
FROM BOOKINGS)
SELECT CASE
WHEN T.BOOKSLOT = 1410 THEN Dateadd(DAY, 1, BOOKDATE)
ELSE BOOKDATE
END Book_Date,
CASE
WHEN T.BOOKSLOT = 1410 THEN 0
ELSE BOOKSLOT + 30
END Book_Slot
FROM (SELECT TOP 1 T1.*
FROM DATA T1
LEFT JOIN DATA t2
ON t1.RN = T2.RN - 1
WHERE t2.BOOKSLOT - t1.BOOKSLOT > 30
OR ( t1.BOOKDATE != T2.BOOKDATE
AND ( t2.BOOKSLOT != 0
OR t1.BOOKSLOT != 630 ) )
OR t2.BOOKSLOT IS NULL)T
Here is the SQL fiddle example.
Explanation
This solution contains 2 parts:
Comparing each line to the next and checking for a gap (can be done easier in SQL 2012)
Adding a half an hour to create the next slot, this includes moving to the next day if needed.
Edit
Added TOP 1 in the query so that only the first slot is returned as requested.
Update
Here is the updated version including 2 new elements (getting current date+ time and dealing with empty table):
DECLARE #Date DATETIME = '2014-07-01',
#Slot INT = 630
DECLARE #time AS TIME = Cast(Getdate() AS TIME)
SELECT #Slot = Datepart(HOUR, #time) * 60 + Round(Datepart(MINUTE, #time) / 30,
0) * 30
+ 30
SET #Date = Cast(Getdate() AS DATE)
;WITH DATA
AS (SELECT *,
Row_number()
OVER (
ORDER BY BOOKDATE, BOOKSLOT) RN
FROM BOOKINGS
WHERE BOOKDATE > #Date
OR ( BOOKDATE = #Date
AND BOOKSLOT >= #Slot ))
SELECT TOP 1 BOOK_DATE,
BOOK_SLOT
FROM (SELECT CASE
WHEN RN = 1
AND NOT (#slot = BOOKSLOT
AND #Date = BOOKDATE) THEN #Date
WHEN T.BOOKSLOT = 1410 THEN Dateadd(DAY, 1, BOOKDATE)
ELSE BOOKDATE
END Book_Date,
CASE
WHEN RN = 1
AND NOT (#slot = BOOKSLOT
AND #Date = BOOKDATE) THEN #Slot
WHEN T.BOOKSLOT = 1410 THEN 0
ELSE BOOKSLOT + 30
END Book_Slot,
1 AS ID
FROM (SELECT TOP 1 T1.*
FROM DATA T1
LEFT JOIN DATA t2
ON t1.RN = T2.RN - 1
WHERE t2.BOOKSLOT - t1.BOOKSLOT > 30
OR ( t1.BOOKDATE != T2.BOOKDATE
AND ( t2.BOOKSLOT != 0
OR t1.BOOKSLOT != 1410 ) )
OR t2.BOOKSLOT IS NULL)T
UNION
SELECT #date AS bookDate,
#slot AS BookSlot,
2 ID)X
ORDER BY X.ID
Play around with the SQL fiddle and let me know what you think.
In SQL Server 2012 and later, you can use the lead() function. The logic is a bit convoluted because of all the boundary conditions. I think this captures it:
select top 1
(case when BookSlot = 1410 then BookDate else BookDate + 1 end) as BookDate,
(case when BookSlot = 1410 then 0 else BookSlot + 30 end) as BookSlot
from (select b.*,
lead(BookDate) over (order by BookDate) as next_dt,
lead(BookSlot) over (partition by BookDate order by BookSlot) as next_bs
from bookings b
) b
where (next_bs is null and BookSlot < 1410 or
next_bs - BookSlot > 30 or
BookSlot = 1410 and (next_dt <> BookDate + 1 or next_dt = BookDate and next_bs <> 0)
)
order by BookDate, BookSlot;
Using a tally table to generate a list of originally available booking slots out 6 weeks (adjustable below):
declare #Date as date = getdate();
declare #slot as int = 30 * (datediff(n,#Date,getdate()) /30);
with
slots as (
select (ROW_NUMBER() over (order by s)-1) * 30 as BookSlot
from(
values (1),(1),(1),(1),(1),(1),(1),(1) -- 4 hour block
)slots(s)
cross join (
values (1),(1),(1),(1),(1),(1) -- 6 blocks of 4 hours each day
)QuadHours(t)
)
,days as (
select (ROW_NUMBER() over (order by s)-1) + getdate() as BookDate
from (
values (1),(1),(1),(1),(1),(1),(1) -- 7 days in a week
)dayList(s)
cross join (
-- set this to number of weeks out to allow bookings to be made
values (1),(1),(1),(1),(1),(1) -- allow 6 weeks of bookings at a time
)weeks(t)
)
,tally as (
select
cast(days.BookDate as date) as BookDate
,slots.BookSlot as BookSLot
from slots
cross join days
)
select top 1
tally.BookDate
,tally.BookSlot
from tally
left join #Bookings book
on tally.BookDate = book.BookDate
and tally.BookSlot = book.BookSlot
where book.BookSlot is null
and ( tally.BookDate > #Date or tally.BookSlot > #slot )
order by tally.BookDate,tally.BookSlot;
go
try this:
SELECT a.bookdate, ((a.bookslot/60.)+.5) * 60
FROM bookings a LEFT JOIN bookings b
ON a.bookdate=b.bookdate AND (a.bookslot/60.)+.50=b.bookslot/60.
WHERE b.bookslot IS null

SQL Server creating two tables and comparing them

I have a table with 3 columns (in SQL Server 2012). One of the columns is a date column. What I would like to do is split the table for two specified dates and merge them into one table with an extra field. Hopefully the example below will explain.
Example of what I currently have.
Company date no_employees
ABC 2014-05-30 35
DEF 2014-05-30 322
GHI 2014-05-30 65
JKL 2014-05-30 8
MNO 2014-05-30 30
ABC 2014-01-01 33
DEF 2014-01-01 301
GHI 2014-01-01 70
MNO 2014-01-01 30
What I would like a query to return for me (not sure if its possible),
Company start date no_employees end date no_employees diff
ABC 33 35 2
DEF 301 322 21
GHI 70 65 -5
JKL 0 8 8
MNO 30 30 0
PIVOT (and COALESCE to generate the 0s) seems to do it:
declare #t table (Company char(3),[date] date,no_employees int)
insert into #t(Company,[date],no_employees) values
('ABC','2014-05-30',35 ),
('DEF','2014-05-30',322 ),
('GHI','2014-05-30',65 ),
('JKL','2014-05-30',8 ),
('MNO','2014-05-30',30 ),
('ABC','2014-01-01',33 ),
('DEF','2014-01-01',301 ),
('GHI','2014-01-01',70 ),
('MNO','2014-01-01',30 )
select Company,
COALESCE(start,0) as start,
COALESCE([end],0) as [end],
COALESCE([end],0)-COALESCE(start,0) as diff
from
(select
Company,
CASE WHEN [date]='20140530' THEN 'end'
ELSE 'start' END as period,
no_employees
from #t
where [date] in ('20140101','20140530')
) t
pivot (MAX(no_employees) for period in ([start],[end])) u
Result:
Company start end diff
------- ----------- ----------- -----------
ABC 33 35 2
DEF 301 322 21
GHI 70 65 -5
JKL 0 8 8
MNO 30 30 0
This could easily be parameterized for the specific start and end dates to use.
Also, at the moment I'm using MAX because we have to have an aggregate in PIVOT, even though here the sample data contains a maximum of one row. If there's a possibility of multiple rows existing for the start or end date, we'd need to know how you want that handled.
declare #lowdate date = '2014-01-01'
declare #highdate date = '2014-05-30'
;with x as
(
select company, min(no_employees) no_employees
from #t records
where recorddate = #lowdate
group by company
), y as
(
select company, max(no_employees) no_employees
from #t records
where recorddate = #highdate
group by company
)
select coalesce(x.company, y.company) company,
coalesce(x.no_employees, 0) start_no_employees,
coalesce(y.no_employees, 0) end_no_employees,
coalesce(y.no_employees, 0) - coalesce(x.no_employees, 0) diff
from
x full outer join y
on
x.company = y.company
Create Table #temp(Company varchar(10), CDate date,emp int)
Select T1.Company,T1.emp,T2.emp,(T1.emp-T2.emp) Diff
from #temp T1
inner join #temp T2 On T1.Company=T2.Company and T1.CDate<T2.CDate
Order by T1.Company,T1.CDate
declare #t table (Company char(3),[date] date,no_employees int)
insert into #t(Company,[date],no_employees) values
('ABC','2014-05-30',35 ),
('DEF','2014-05-30',322 ),
('GHI','2014-05-30',65 ),
('JKL','2014-05-30',8 ),
('MNO','2014-05-30',30 ),
('ABC','2014-01-01',33 ),
('DEF','2014-01-01',301 ),
('GHI','2014-01-01',70 ),
('MNO','2014-01-01',30 )
select Company,MIN(no_employees),MAX(no_employees),CASE WHEN MIN(no_employees) = MAX(no_employees) then MAX(no_employees) else
MIN(no_employees) - MAX(no_employees) end as cNT from #t
GROUP BY Company
select the companies. Outer join the start date records. Outer join the end date records. Use coalesce to show 0 instead of null.
select
company,
coalesce(rec20140101.no_employees, 0) as empno_start,
coalesce(rec20140530.no_employees, 0) as empno_end
from
(
select distinct company
from records
) companies -- or use a company table if you have one
left join
(
select company, no_employees
from records
where recorddate = '2014-01-01'
) rec20140101
on rec20140101.company = companies.companyrec
left join
(
select company, no_employees
from records
where recorddate = '2014-05-30'
) rec20140530
on rec20140530.company = companies.company);
EDIT: And here is a way to scan the table just once. It's even a little shorter ;-)
select
company,
coalesce(min( case when recorddate = '2014-05-30' then no_employees end ), 0) as empno_start,
coalesce(min( case when recorddate = '2014-01-01' then no_employees end ), 0) as empno_end
from records
group by company;
Try this:
;with cte as
(select
COALESCE(src.company, tgt.company) company
isnull(tgt.no_employees,0) 'start date no_employees',
isnull(src.no_employees , 0) 'end date no_employees'
from
tbl src
full outer join tbl tgt on src.company = tgt.company and src.date <> tgt.date
where (src.date = (select max(date) from tbl) or src.date is null)
and (tgt.date = (select min(date) from tbl) or tgt.date is null)
)
select *, [end date no_employees] - [start date no_employees] diff
from cte