I am trying to find the number of rows that 2 dates fall between. Basically I have an auth dated 1/1/2018 - 4/1/2018 and I need the count of pay periods those dates fall within.
Here is the data I am looking at:
create table #dates
(
pp_start_date date,
pp_end_date date
)
insert into #dates (pp_start_date,pp_end_date)
values ('2017-12-28', '2018-01-10'),
('2018-01-11', '2018-01-24'),
('2018-01-25', '2018-02-07'),
('2018-02-08', '2018-02-21'),
('2018-02-22', '2018-03-07'),
('2018-03-08', '2018-03-21'),
('2018-03-22', '2018-04-04'),
('2018-04-05', '2018-04-18');
When I run this query,
SELECT
ad.pp_start_date, ad.pp_end_date, orderby
FROM
(SELECT
ROW_NUMBER() OVER (ORDER BY pp_start_date) AS orderby, *
FROM
#dates) ad
WHERE
'2018-01-01' <= ad.pp_end_date
I somehow want to only get 7 rows. Is this even possible? Thanks in advance for any help!
EDIT - Ok so using a count(*) worked to get the number of rows but now I am trying to get the number of rows for 2 dynamic dates form another temp table but I don't see a way to relate the data.
Using the #dates temp table referenced above gives me the date data. Now using this data:
create table #stuff
([month] date,
[name] varchar(20),
units int,
fips_code int,
auth_datefrom date,
auth_dateto date)
insert into #stuff (month,name,units,fips_code,auth_datefrom,auth_dateto)
values ('2018-01-01','SMITH','50','760', '2018-01-01', '2018-04-01');
insert into #stuff (month,name,units,fips_code,auth_datefrom,auth_dateto)
values ('2018-01-01','JONES','46','193', '2018-01-01', '2018-04-01');
insert into #stuff (month,name,units,fips_code,auth_datefrom,auth_dateto)
values ('2018-01-01','DAVID','84','109', '2018-02-01', '2018-04-01');
I want to somehow create a statement that does a count of rows from the #dates table where the auth dates are referenced in the #stuff table I just can't figure out how to relate them or join them.
pp_start_date <= auth_dateto and pp_end_date >= auth_datefrom
Here is my output for #dates
pp_start_date pp_end_date
2017-12-28 2018-01-10
2018-01-11 2018-01-24
2018-01-25 2018-02-07
2018-02-08 2018-02-21
2018-02-22 2018-03-07
2018-03-08 2018-03-21
2018-03-22 2018-04-04
2018-04-05 2018-04-18
Here is my output for #stuff
month name units fips_code auth_datefrom auth_dateto
2018-01-01 SMITH 50 760 2018-01-01 2018-04-01
2018-01-01 JONES 46 193 2018-01-01 2018-04-01
2018-01-01 DAVID 84 109 2018-02-01 2018-04-01
I am trying to use the auth_datefrom and auth_dateto from #stuff to find out how many rows that is from #dates.
try this one.
SELECT ad.pp_start_date, ad.pp_end_date, orderby
from (select
row_number()over ( order by pp_start_date) as orderby, * from
#dates) ad
where ad.pp_end_date <= '2018-01-01'
or ad.pp_start_date >= '2018-01-01'
Are you looking for this?
select d.*
from #dates d
where d.startdate <= '2018-04-01' and
d.enddate >= '2018-01-01';
This returns all rows that have a date with the time period you specify.
I'm not sure what the row_number() does. If you want the count, then:
select count(*)
from #dates d
where d.startdate <= '2018-04-01' and
d.enddate >= '2018-01-01';
Related
Suppose I have a date range, #StartDate = 2022-01-01 and #EndDate = 2022-02-01, and this is a reporting period.
In addition, I also have customer records, where each customer has a LIVE Date and a ServiceEndDate (or ServiceEndDate = NULL as they are an ongoing customer)
Some customers may have their Live Date and Service end date range extend outside of the reporting period range. I would only want to report for days that they were a customer in the period.
Name
LiveDate
ServiceEndDate
Tom
2021-10-11
2022-01-13
Mark
2022-11-13
2022-02-15
Andy
2022-01-02
2022-02-10
Rob
2022-01-09
2022-01-14
I would like to create a table where column A is the Date (iterating between every date in the reporting period) and column B is a sum of the number of customers that were a customer on that date.
Something like this
Date
NumberOfCustomers
2022-01-01
2
2022-01-02
3
2022-01-03
3
2022-01-04
3
2022-01-05
3
2022-01-06
3
2022-01-07
3
2022-01-08
3
2022-01-09
4
2022-01-10
4
2022-01-11
4
2022-01-12
4
2022-01-13
4
2022-01-14
3
2022-01-15
3
And so on until the end the #EndDate
Any help would be much appreciated, thanks.
You can join your table to a calendar table containing all the dates you need:
with calendar as
(select cast('2022-01-01' as datetime) as d
union all select dateadd(day, 1, d)
from calendar
where d < '2022-02-01')
select d as "Date", count(*) as NumberOfCustomers
from calendar inner join table_name
on d between LiveDate and coalesce(ServiceEndDate, '9999-12-31')
group by d;
Fiddle
I would personally suggest using a Tally, rather than an rCTE, as a Tally is significantly more performant.
SELECT *
INTO dbo.YourTable
FROM (VALUES('Tom ',CONVERT(date,'2021-10-11 '),CONVERT(date,'2022-01-13')),
('Mark',CONVERT(date,' 2022-11-13'),CONVERT(date,' 2022-02-15')),
('Andy',CONVERT(date,' 2022-01-02'),CONVERT(date,' 2022-02-10')),
('Rob ',CONVERT(date,'2022-01-09 '),CONVERT(date,'2022-01-14')))V(Name,LiveDate,ServiceEndDate);
GO
SELECT *
FROM dbo.YourTable;
GO
DECLARE #StartDate date = '20220101',
#EndDate date = '20220201';
WITH N AS(
SELECT N
FROM (VALUES(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL),(NULL))N(N)),
Tally AS(
SELECT 0 AS I
UNION ALL
SELECT TOP (DATEDIFF(DAY, #StartDate, #EndDate))
ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS I
FROM N N1, N N2, N N3), --up to 1,000 days
Dates AS(
SELECT DATEADD(DAY, T.I, #StartDate) AS Date
FROM Tally T)
SELECT D.Date,
COUNT(YT.[Name]) AS NumberOfCustomers
FROM Dates D
LEFT JOIN dbo.YourTable YT ON D.[Date] >= YT.LiveDate
AND (D.[Date] <= YT.ServiceEndDate
OR YT.ServiceEndDate IS NULL)
GROUP BY D.[Date]
ORDER BY D.[Date];
GO
DROP TABLE dbo.YourTable;
Note that then results don't reflect your expected results, I suspect your expected results are wrong. For example you have 2 people live on 2022-01-01, however, there is only 1 person who is live on that date: Tom.
This solution will also never have Mark as "live" (the rCTE method in the other answer won't either) as their end date is before their Live date. If someone can have their service end before it started, I would suggest you have a data quality issue, and you should be adding a CHECK CONSTRAINT to the table to ensure that value of ServiceEndDate is >= LiveDate.
I have to add weekend and holiday's value to the previous working day value so that weekend and holiday's should not display in the report but if we don't have previous working day we should simply skip the row as 2018-01-01 skipped in the below output
**DAYS VALUE**
2018-01-01 10 Holiday-1
2018-01-02 20
2018-01-03 30
2018-01-04 40
2018-01-05 50
2018-01-06 60 Saturday
2018-01-07 70 Sunday
2018-01-08 80
2018-01-09 90
2018-01-10 100 Holiday-2
OUTPUT
2018-01-02 20
2018-01-03 30
2018-01-04 40
2018-01-05 180
2018-01-08 80
2018-01-09 190
I am trying with LEAD, LAG, DATEDIFF and in other ways but not getting any solution so please guys help he with this problem.
When there is a row in your Holidays calendar table (I will assume, that weekends are there too), you need to find the max date, prior the current one, for which there is no row in holidays table. Then group by this "real date" and sum the value. Something like this:
declare #t table([DAYS] date, [VALUE] int)
declare #Holidays table([DAYS] date, Note varchar(100))
insert into #t values
('2018-01-01', 10),
('2018-01-02', 20),
('2018-01-03', 30),
('2018-01-04', 40),
('2018-01-05', 50),
('2018-01-06', 60),
('2018-01-07', 70),
('2018-01-08', 80),
('2018-01-09', 90),
('2018-01-10', 100)
insert into #Holidays values
('2018-01-01', 'Holiday-1'),
('2018-01-06', 'Saturday'),
('2018-01-07', 'Sunday'),
('2018-01-10', 'Holiday-2')
;with cte as (
select
IIF(h1.[DAYS] is not null /* i.e. it is a holiday */,
(select max([DAYS])
from #t t2
where t2.[DAYS] < t1.[DAYS] and not exists(select * from #Holidays h2 where h2.[DAYS] = t2.[DAYS])), t1.[DAYS]) as RealDate
, t1.[VALUE]
from #t t1
left join #Holidays h1 on t1.DAYS = h1.[DAYS]
)
select
RealDate
, sum([VALUE]) as RealValue
from cte
where RealDate is not null
group by RealDate
You can do this with cumulative sums (to define groups) and aggregation. Define the groups as the number of non-holidays on or before a given day, then aggregate. This is the same value for a non-holiday followed by a holiday.
Then aggregate:
select max(days) as days, sum(value)
from (select t.*,
sum(case when holiday is null then 1 else 0 end) over (order by days asc) as grp
from t
) t
group by grp;
EDIT:
With a separate holidays table, you just need to add the join:
select max(days) as days, sum(value)
from (select t.*,
sum(case when h.holiday is null then 1 else 0 end) over (order by t.days asc) as grp
from t left join
holidays h
on t.days = h.date
) t
group by grp;
I am trying to write SQL to generate the following data
Date Count
2018-09-24 2
2018-09-25 2
2018-09-26 2
2018-09-27 2
2018-09-28 2
2018-09-29 1
A sample of the base table I am using is
ID StartDate EndDate
187267 2018-09-24 2018-10-01
187270 2018-09-24 2018-09-30
So I'm trying to get a list of dates between 2 dates and then count how many base data records there are in each date.
I started using a temporary table and attempting to loop through the records to get the results but I'm not sure if this is the right approach.
I have this code so far
WITH ctedaterange
AS (SELECT [Dates] = (select ea.StartWork from EngagementAssignment ea where ea.EngagementAssignmentId IN(SELECT ea.EngagementAssignmentId
FROM EngagementLevel el INNER JOIN
EngagementAssignment ea ON el.EngagementLevelID = ea.EngagementLevelId
WHERE el.JobID = 15072 and ea.AssetId IS NOT NULL))
UNION ALL
SELECT [dates] + 1
FROM ctedaterange
WHERE [dates] + 1 < = (select ea.EndWork from EngagementAssignment ea where ea.EngagementAssignmentId IN(SELECT ea.EngagementAssignmentId
FROM EngagementLevel el INNER JOIN
EngagementAssignment ea ON el.EngagementLevelID = ea.EngagementLevelId
WHERE el.JobID = 15072 and ea.AssetId IS NOT NULL)))
SELECT [Dates], Count([Dates])
FROM ctedaterange
GROUP BY [Dates]
But I get this error
Subquery returned more than 1 value. This is not permitted when the subquery follows =, !=, <, <= , >, >= or when the subquery is used as an expression.
I get correct results when the job I use only generates one record in the subselect in the where clause, ie:
SELECT ea.EngagementAssignmentId
FROM EngagementLevel el INNER JOIN
EngagementAssignment ea ON el.EngagementLevelID = ea.EngagementLevelId
WHERE el.JobID = 15047 and ea.AssetId IS NOT NULL
generates one record.
The results look like this:
Dates (No column name)
2018-09-24 02:00:00.000 1
2018-09-25 02:00:00.000 1
2018-09-26 02:00:00.000 1
2018-09-27 02:00:00.000 1
2018-09-28 02:00:00.000 1
2018-09-29 02:00:00.000 1
2018-09-30 02:00:00.000 1
2018-10-01 02:00:00.000 1
you can generate according to your range by changing from and to date
DECLARE
#DateFrom DATETIME = GETDATE(),
#DateTo DATETIME = '2018-10-30';
WITH DateGenerate
AS (
SELECT #DateFrom as MyDate
UNION ALL
SELECT DATEADD(DAY, 1, MyDate)
FROM DateGenerate
WHERE MyDate < #DateTo
)
SELECT
MyDate
FROM
DateGenerate;
Well, if you only have a low date range, you can use a recursive CTE as demonstrated in the other answers. The problem with a recursive CTE is with large ranges, where it starts to be ineffective - So I wanted to show you a different approach, that builds the calendar CTE without using recursion.
First, Create and populate sample table (Please save us this step in your future questions):
DECLARE #T AS TABLE
(
ID int,
StartDate date,
EndDate date
)
INSERT INTO #T (ID, StartDate, EndDate) VALUES
(187267, '2018-09-24', '2018-10-01'),
(187270, '2018-09-24', '2018-09-30')
Then, get the first start date and the number of dates you need in the calendar cte:
DECLARE #DateDiff int, #StartDate Date
SELECT #DateDiff = DATEDIFF(DAY, MIN(StartDate), Max(EndDate)),
#StartDate = MIN(StartDate)
FROM #T
Now, construct the calendar cte based on row_number (that is, unless you already have a numbers (tally) table you can use):
;WITH Calendar(TheDate)
AS
(
SELECT TOP(#DateDiff + 1) DATEADD(DAY, ROW_NUMBER() OVER(ORDER BY ##SPID)-1, #StartDate)
FROM sys.objects t0
-- unremark the next row if you don't get enough records...
-- CROSS JOIN sys.objects t1
)
Note that I'm using row_number() - 1 and therefor have to select top(#DateDiff + 1)
Finally - the query:
SELECT TheDate, COUNT(ID) As NumberOfRecords
FROM Calendar
JOIN #T AS T
ON Calendar.TheDate >= T.StartDate
AND Calendar.TheDate <= T.EndDate
GROUP BY TheDate
Results:
TheDate | NumberOfRecords
2018-09-24 | 2
2018-09-25 | 2
2018-09-26 | 2
2018-09-27 | 2
2018-09-28 | 2
2018-09-29 | 2
2018-09-30 | 2
2018-10-01 | 1
You can see a live demo on rextester.
Can you please try following SQL CTE query where I have used a SQL dates table function [dbo].[DatesTable] which produces a list of dates between min date and max date in the source table
;with boundaries as (
select
min(StartDate) minD, max(EndDate) maxD
from DateRanges
), dates as (
select
dates.[date]
from boundaries
cross apply [dbo].[DatesTable](minD, maxD) as dates
)
select dates.[date], count(*) as [count]
from dates
inner join DateRanges
on dates.date between DateRanges.StartDate and DateRanges.EndDate
group by dates.[date]
order by dates.[date]
The output is as expected
Try this: demo
WITH cte1
AS (SELECT id,sdate,edate from t
union all
select c.id,DATEADD(DAY, 1, c.sdate),c.edate from cte1 c where DATEADD(DAY, 1, c.sdate)<=c.edate
)
SELECT sdate,count(id) as total FROM cte1
group by sdate
OPTION (MAXRECURSION 0)
Output:
sdate total
2018-09-24 2
2018-09-25 2
2018-09-26 2
2018-09-27 2
2018-09-28 2
2018-09-29 2
2018-09-30 1
I have a SQL table with From and To dates like so:
Row From To
--------------------------------------------------
1 2017-10-28 00:00:00 2017-10-30 00:00:00
2 2017-10-30 00:00:00 2017-10-31 00:00:00
3 2017-10-31 00:00:00 2017-10-31 07:30:00
4 2017-10-31 14:41:00 2017-10-31 15:14:00
5 2017-10-31 17:13:00 2017-11-01 00:00:00
6 2017-11-01 00:00:00 2017-11-01 23:45:00
7 2017-11-02 03:13:00 2017-11-02 07:56:00
I need to group consecutive data into islands. The data is non-overlapping. This is done easily enough using this query:
;with Islands as
(
SELECT
min([From]) as [From]
,max([To]) as [To]
FROM
(
select
[From],
[To],
sum(startGroup) over (order by [From]) StartGroup
from
(
SELECT
[From],
[To],
(case when [From] <= lag([To]) over (order by [From])
then 0
else 1
end) as StartGroup
FROM dbo.DateTable
) IsNewIsland
) GroupedIsland
group by StartGroup
)
select *
from Islands
And gives me these results:
From To Rows
-----------------------------------------------------
2017-10-28 00:00:00 2017-10-31 07:30:00 1-3
2017-10-31 14:41:00 2017-10-31 15:14:00 4
2017-10-31 17:13:00 2017-11-01 23:45:00 5-6
2017-11-02 03:13:00 2017-11-02 07:56:00 7
The problem I have is that I need to modify the query to cap/split the islands once they have gotten enough records to be a certain total duration. This is an input/hardcoded value. The split includes the entire record, not splitting in the middle of a record's From-To range. As an example, I need to split islands to be 27 hours. This would give this result:
From To Rows
-----------------------------------------------------
2017-10-29 00:00:00 2017-10-30 00:00:00 1
2017-10-30 00:00:00 2017-10-31 07:30:00 2-3
2017-10-31 17:13:00 2017-11-01 23:45:00 5-6
The first island was split because rows 1 and 2 alone created a 27 hour period. Rows 4 and 7 are not enough to create an island, so they are ignored.
I tried pulling this information via a lag function in the inner select to compute the "rolling duration" across rows, but it would not work on islands that spanned more than 2 rows because it would only track the last row's duration and I could not "carry" the calculation forward.
SELECT
[From],
[To],
(case when [From] <= lag([To]) over (order by [From]
then (datediff(minute, [From], [To]) + lag(datediff(minute, [From], [To])) over (order by [From]))
else datediff(minute, [From], [To])
end) as RollingDuration,
(case when [From] <= lag([To]) over (order by [From])
then 0
else 1
end) as StartGroup
FROM dbo.DateTable
The "least worst" way I can think of doing it is a "quirky update". (Google it, I honestly didn't make it up.)
http://www.sqlservercentral.com/articles/T-SQL/68467/
Copy the data in to a new table with one or more additional (blank) fields
Use a CLUSTERED PRIMARY KEY to ensure the rows are updated in correct sequence
Use UPDATE and user variables to iterate through rows and store results of calculations
Using that I can start a new group if there is a gap, or a running total reaches 27 hours. Then proceed as usual.
-- New table to work through
----------------------------------------------------------------------
-- Addition [group_start] field (identifies groups, and useful data)
-- PRIMARY KEY CLUSTERED to enforce the order rows will be processed
----------------------------------------------------------------------
CREATE TABLE sample (
id INT,
start DATETIME,
cease DATETIME,
group_start DATETIME DEFAULT(0),
PRIMARY KEY CLUSTERED (group_start, start) -- To force the order we will iterate the rows, and is useful in last step
);
INSERT INTO
sample (
id,
start,
cease
)
VALUES
(1, '2017-10-28 00:00:00', '2017-10-30 00:00:00'),
(2, '2017-10-30 00:00:00', '2017-10-31 00:00:00'),
(3, '2017-10-31 00:00:00', '2017-10-31 07:30:00'),
(4, '2017-10-31 14:41:00', '2017-10-31 15:14:00'),
(5, '2017-10-31 17:13:00', '2017-11-01 00:00:00'),
(6, '2017-11-01 00:00:00', '2017-11-01 23:45:00'),
(7, '2017-11-02 03:13:00', '2017-11-02 07:56:00')
;
-- Quirky Update
----------------------------------------------------------------------
-- Update [group_start] to the start of the current group
-- -> new group if gap since previous row
-- -> new group if previous row took group to 27 hours
-- -> else same group as previous row
----------------------------------------------------------------------
DECLARE #grp_start DATETIME = 0;
WITH
lagged AS
(
SELECT *, LAG(cease) OVER (ORDER BY group_start, start) AS lag_cease FROM sample
)
UPDATE
lagged
SET
#grp_start
= group_start
= CASE WHEN start <> lag_cease THEN start
WHEN start >= DATEADD(hour, 27, #grp_start) THEN start
ELSE #grp_start END
OPTION
(MAXDOP 1)
;
-- Standard SQL to apply other logic
----------------------------------------------------------------------
-- MAX() OVER () to find end time of each group
-- WHERE to filter out any groups under 12 hours long
----------------------------------------------------------------------
SELECT
*
FROM
(
SELECT
*,
MAX(cease) OVER (PARTITION BY group_start) AS group_cease
FROM
sample
)
bounded_groups
WHERE
group_cease >= DATEADD(hour, 12, group_start)
;
http://dbfiddle.uk/?rdbms=sqlserver_2017&fiddle=1bec5b3fe920c1affd58f23a11e280a0
Here is the table that I am working with:
MemberID MembershipStartDate MembershipEndDate
=================================================================
123 2010-01-01 00:00:00.000 2012-12-31 00:00:00.000
123 2011-01-01 00:00:00.000 2012-12-31 00:00:00.000
123 2013-05-01 00:00:00.000 2013-12-31 00:00:00.000
123 2014-01-01 00:00:00.000 2014-12-31 00:00:00.000
123 2015-01-01 00:00:00.000 2015-03-31 00:00:00.000
What I want is to create one row that shows continuous membership,
and a second row if the membership breaks by more than 2 days, with a new start and end date..
So the output I am looking for is like:
MemberID MembershipStartDate MembershipEndDate
=================================================================
123 2010-01-01 00:00:00.000 2012-12-31 00:00:00.000
123 2013-05-01 00:00:00.000 2015-03-31 00:00:00.000
There is a memberID field attached to these dates which is how they are grouped.
I've had to deal with this kind of thing before
I use something like this
USE tempdb
--Create test Data
DECLARE #Membership TABLE (MemberID int ,MembershipStartDate date,MembershipEndDate date)
INSERT #Membership
(MemberID,MembershipStartDate,MembershipEndDate)
VALUES (123,'2010-01-01','2012-12-31'),
(123,'2011-01-01','2012-12-31'),
(123,'2013-05-01','2013-12-31'),
(123,'2014-01-01','2014-12-31'),
(123,'2015-01-01','2015-03-31')
--Create a table to hold all the dates that might be turning points
DECLARE #SignificantDates Table(MemberID int, SignificantDate date, IsMember bit DEFAULT 0)
--Populate table with the start and end dates as well as the days just before and just after each period
INSERT #SignificantDates (MemberID ,SignificantDate)
SELECT MemberID, MembershipStartDate FROM #Membership
UNION
SELECT MemberID,DATEADD(day,-1,MembershipStartDate ) FROM #Membership
UNION
SELECT MemberID,MembershipEndDate FROM #Membership
UNION
SELECT MemberID,DATEADD(day,1,MembershipEndDate) FROM #Membership
--Set the is member flag for each date that is covered by a membership
UPDATE sd SET IsMember = 1
FROM #SignificantDates sd
JOIN #Membership m ON MembershipStartDate<= SignificantDate AND SignificantDate <= MembershipEndDate
--To demonstrate what we're about to do, Select all the dates and show the IsMember Flag and the previous value
SELECT sd.MemberID, sd.SignificantDate,sd.IsMember, prv.prevIsMember
FROM
#SignificantDates sd
JOIN (SELECT
MemberId,
SignificantDate,
IsMember,
Lag(IsMember,1) OVER (PARTITION BY MemberId ORDER BY SignificantDate desc) AS prevIsMember FROM #SignificantDates
) as prv
ON sd.MemberID = prv.MemberID
AND sd.SignificantDate = prv.SignificantDate
ORDER BY sd.MemberID, sd.SignificantDate
--Delete the ones where the flag is the same as the previous value
delete sd
FROM
#SignificantDates sd
JOIN (SELECT MemberId, SignificantDate,IsMember, Lag(IsMember,1) OVER (PARTITION BY MemberId ORDER BY SignificantDate) AS prevIsMember FROM #SignificantDates ) as prv
ON sd.MemberID = prv.MemberID
AND sd.SignificantDate = prv.SignificantDate
AND prv.IsMember = prv.prevIsMember
--SELECT the Start date for each period of membership and the day before the following period of non membership
SELECT
nxt.MemberId,
nxt.SignificantDate AS MembershipStartDate,
DATEADD(day,-1,nxt.NextSignificantDate) AS MembershipEndDate
FROM
(
SELECT
MemberID,
SignificantDate,
LEAd(SignificantDate,1) OVER (PARTITION BY MemberId ORDER BY SignificantDate) AS NextSignificantDate,
IsMember
FROM #SignificantDates
) nxt
WHERE nxt.IsMember = 1