SQL count exposure of life time by age - sql

(Using SQL Server 2008)
I need some help visualizing a solution. Let's say I have the following simple table for members of a pension scheme:
[Date of Birth] [Date Joined] [Date Left]
1970/06/1 2003/01/01 2007/03/01
I need to calculate the number of lives in each age group from 2000 to 2009.
NOTE: "Age" is defined as "age last birthday" (or "ALB") on 1 January of each of those yeasrs. e.g. if you are exactly 41.35 or 41.77 etc. years old on 1/1/2009 then you would be ALB 41.
So if the record above were the only entry in the database, then the output would be something like:
[Year] [Age ] [Number of Lives]
2003 32 1
2004 33 1
2005 34 1
2006 35 1
2007 36 1
(For 2000, 2001, 2002, 2008 and 2009 there are no lives on file since the sole member only joined on 1/1/2003 and left on 1/3/2007)
I hope I am making myself clear enough.
Anyone have any suggestions?
Thanks, Karl
[EDIT]
Adding another layer to the problem:
What if I had:
[Date of Birth] [Date Joined] [Date Left] [Gender] [Pension Value]
1970/06/1 2003/01/01 2007/03/01 'M' 100,000
and I want the output to be:
[Year] [Age ] [Gender] sum([Pension Value]) [Number of Lives]
2003 32 M 100,000 1
2004 33 M 100,000 1
2005 34 M 100,000 1
2006 35 M 100,000 1
2007 36 M 100,000 1
Any ideas?

WITH years AS
(
SELECT 1900 AS y
UNION ALL
SELECT y + 1
FROM years
WHERE y < YEAR(GETDATE())
),
agg AS
(
SELECT YEAR(Dob) AS Yob, YEAR(DJoined) AS YJoined, YEAR(DLeft) AS YLeft
FROM mytable
)
SELECT y, y - Yob, COUNT(*)
FROM agg
JOIN years
ON y BETWEEN YJoined AND YLeft
GROUP BY
y, y - Yob
OPTION (MAXRECURSION 0)
People born on same year always have the same age in your model
That's why if they go at all, they always go into one group and you just need to generate one row per year for the period they stay in the program.

You can try something like this
DECLARE #Table TABLE(
[Date of Birth] DATETIME,
[Date Joined] DATETIME,
[Date Left] DATETIME
)
INSERT INTO #Table ([Date of Birth],[Date Joined],[Date Left]) SELECT '01 Jun 1970', '01 Jan 2003', '01 Mar 2007'
INSERT INTO #Table ([Date of Birth],[Date Joined],[Date Left]) SELECT '01 Jun 1979', '01 Jan 2002', '01 Mar 2008'
DECLARE #StartYear INT,
#EndYear INT
SELECT #StartYear = 2000,
#EndYear = 2009
;WITH sel AS(
SELECT #StartYear YearVal
UNION ALL
SELECT YearVal + 1
FROM sel
WHERE YearVal < #EndYear
)
SELECT YearVal AS [Year],
COUNT(Age) [Number of Lives]
FROM (
SELECT YearVal,
YearVal - DATEPART(yy, [Date of Birth]) - 1 Age
FROM sel LEFT JOIN
#Table ON DATEPART(yy, [Date Joined]) <= sel.YearVal
AND DATEPART(yy, [Date Left]) >= sel.YearVal
) Sub
GROUP BY YearVal

Try the following sample query
SET NOCOUNT ON
Declare #PersonTable as Table
(
PersonId Integer,
DateofBirth DateTime,
DateJoined DateTime,
DateLeft DateTime
)
INSERT INTO #PersonTable Values
(1, '1970/06/10', '2003/01/01', '2007/03/01'),
(1, '1970/07/11', '2003/01/01', '2007/03/01'),
(1, '1970/03/12', '2003/01/01', '2007/03/01'),
(1, '1973/07/13', '2003/01/01', '2007/03/01'),
(1, '1972/06/14', '2003/01/01', '2007/03/01')
Declare #YearTable as Table
(
YearId Integer,
StartOfYear DateTime
)
insert into #YearTable Values
(1, '1/1/2000'),
(1, '1/1/2001'),
(1, '1/1/2002'),
(1, '1/1/2003'),
(1, '1/1/2004'),
(1, '1/1/2005'),
(1, '1/1/2006'),
(1, '1/1/2007'),
(1, '1/1/2008'),
(1, '1/1/2009')
;WITH AgeTable AS
(
select StartOfYear, DATEDIFF (YYYY, DateOfBirth, StartOfYear) Age
from #PersonTable
Cross join #YearTable
)
SELECT StartOfYear, Age, COUNT (1) NumIndividuals
FROM AgeTable
GROUP BY StartOfYear, Age
ORDER BY StartOfYear, Age

First some preparation to have something to test with:
CREATE TABLE People (
ID int PRIMARY KEY
,[Name] varchar(50)
,DateOfBirth datetime
,DateJoined datetime
,DateLeft datetime
)
go
-- some data to test with
INSERT INTO dbo.People
VALUES
(1, 'Bob', '1961-04-02', '1999-01-01', '2007-05-07')
,(2, 'Sadra', '1960-07-11', '1999-01-01', '2008-05-07')
,(3, 'Joe', '1961-09-25', '1999-01-01', '2009-02-11')
go
-- helper table to hold years
CREATE TABLE dimYear (
CalendarYear int PRIMARY KEY
)
go
-- fill-in years for report
DECLARE
#yr int
,#StartYear int
,#EndYear int
SET #StartYear = 2000
SET #EndYear = 2009
SET #yr = #StartYear
WHILE #yr <= #EndYear
BEGIN
INSERT INTO dimYear (CalendarYear) values(#yr)
SET #yr =#yr+1
END
-- show test data and year tables
select * from dbo.People
select * from dbo.dimYear
go
Then a function to return person's age for each year, if the person is still an active member.
-- returns [CalendarYear], [Age] for a member, if still active member in that year
CREATE FUNCTION dbo.MemberAge(#DateOfBirth datetime, #DateLeft datetime)
RETURNS TABLE
AS
RETURN (
SELECT
CalendarYear,
CASE
WHEN DATEDIFF(dd, cast(CalendarYear AS varchar(4)) + '-01-01',#DateLeft) > 0
THEN DATEDIFF(yy, #DateOfBirth, cast(CalendarYear AS varchar(4)) + '-01-01')
ELSE -1
END AS Age
FROM dimYear
);
go
And the final query:
SELECT
a.CalendarYear AS "Year"
,a.Age AS "Age"
,count(*) AS "Number Of Lives"
FROM
dbo.People AS p
CROSS APPLY dbo.MemberAge(p.DateOfBirth, p.DateLeft) AS a
WHERE a.Age > 0
GROUP BY a.CalendarYear, a.Age

Deal with this in pieces (some random thoughts) - create views to test you dev steps if you can:
ALB - do a query that, for a given year, gives you your memeber's ALB
Member in year - another bit of query that tell you whether a member was a member in a given year
Put those two together and you should be able to create a query that says whether a person was a member in a given year and what their ALB was for that year.
Hmm, tricky - following this chain of thought what you'd then want to do is generate a table that has all the years the person was a member and their ALB in that year (and a unique id)
From 4. select year, alb, count(id) group by year, alb
I'm not sure I'm going in the right direction from about 3 though it should work.
You may find a (temporary) table of years helpful - joining things to a table of dates makes all kinds of things possible.
Not really an answer, but certainly some direction...

Related

Calculate year over year increase SQL Server 2008R2

Below is my table structure. I need to calculate rent for length of lease for each properties:
Let's look at PropertyID = 12077:
Area = 1280
StartDate = 2023-02-01
EndDate = 2027-10-31
BaseRent = 21.53
RentIncreasePercent = .04 (4 percent)
IncreaseRepeatMonths = 12 months (NOTE: First 12 months there won't be any increase)
Since this property lease starts and ends between year 2023 and 2028, I'd like to know (in separate row per year) amount of rent to be collected each year. This would take percent increase every 12 months (compound rent increase) into consideration.
Example:
21.53 * 1280 would give rent for first 12 months. However, lease started in February so year 2023 total rent amount would be = ((21.23 * 1280)/12) * 11
For year 2024, first month rent would be = (21.23 * 1280)/12 because rent only increases every 12 months. For next 11 months of 2024, rent would be ((12.23 * 1.04 * 1280)/12)* 11.
For year 2025, first month rent would be (12.23 * 1.04 *1280)/12). However, next 11 months of 2025 would be ((12.72 * 1.04 * 1280)/12)*11. 12.72 comes from compound increase.
How would I go about coming up with a view to do this? Most confusing part to me is not knowing how to accommodate for lease start date when it is not starting on January.
declare #table table
(
PropertyID int,
area int,
StartDate date,
EndDate date,
BaseRent decimal(12,2),
RentIncreaseBasis varchar(30),
RentIncreasePercent decimal(5,2),
IncreaseRepeatMonths int
)
insert #table values (12076, 5627, '2024-01-01', '2028-12-31', '16.52', '% Increase', 0.03, 12)
insert #table values (12077, 1280, '2023-02-01', '2027-10-31', '21.53', '% Increase', 0.04, 12)
insert #table values (12078, 1000, '2017-03-01', '2025-11-30', '23.52', '% Increase', 0.01, 12)
insert #table values (12079, 2000, '2020-02-01', '2024-09-30', '15.57', '% Increase', 0.05, 12)
insert #table values (12080, 3000, '2018-05-01', '2020-08-31', '18.58', '% Increase', 0.04, 12)
insert #table values (12081, 4000, '2019-08-01', '2020-12-31', '22.56', '% Increase', 0.03, 12)
insert #table values (12082, 5000, '2017-02-01', '2028-03-31', '19.53', '% Increase', 0.02, 12)
select * from #table
I recommend to use a calendar table which containts all the months from your table.
I hope my example will work in SQL 2008.
-- here is your code
-- the calendar table
DECLARE #MonthCalendar table(
[Month] date PRIMARY KEY
)
DECLARE #MinDate date,#MaxDate date
-- get min and max date
SELECT
#MinDate=MIN(StartDate),
#MaxDate=MAX(EndDate)
FROM #table
-- fill the calendar table
;WITH monthCTE AS(
SELECT CAST(#MinDate AS date) [Month]
UNION ALL
SELECT DATEADD(MONTH,1,[Month])
FROM monthCTE
WHERE [Month]<#MaxDate
)
INSERT #MonthCalendar([Month])
SELECT [Month]
FROM monthCTE
OPTION(MAXRECURSION 0);
-- final query
SELECT
*,
(BaseRent*Area*(1+RentIncreasePercent*IncreaseCount))/12 MonthRentAmount,
(1+RentIncreasePercent*IncreaseCount) TotalPercent
FROM
(
SELECT *,(ROW_NUMBER()OVER(PARTITION BY t.PropertyID ORDER BY m.[Month])-1)/12 IncreaseCount
FROM #table t
JOIN #MonthCalendar m ON m.[Month] BETWEEN t.StartDate AND t.EndDate
--WHERE t.PropertyID=12077
) q
-- query for total amounts by PropertyIDs and Years
SELECT
PropertyID,
YEAR(StartDate) [Year],
SUM((BaseRent*Area*(1+RentIncreasePercent*IncreaseCount))/12) YearRentAmount
FROM
(
SELECT *,(ROW_NUMBER()OVER(PARTITION BY t.PropertyID ORDER BY m.[Month])-1)/12 IncreaseCount
FROM #table t
JOIN #MonthCalendar m ON m.[Month] BETWEEN t.StartDate AND t.EndDate
--WHERE t.PropertyID=12077
) q
GROUP BY PropertyID,YEAR(StartDate)
ORDER BY PropertyID,[Year]

What is the best way to get active employee count per month?

I have Employee like below:
DECLARE #Employees TABLE
(
[EmployeeID] [int] IDENTITY(1,1) NOT NULL,
[HireDate] [datetime] NOT NULL,
[TerminationDate] [datetime] NULL
)
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/01/01','2016/01/02')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/02/01', '2017/01/30')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/03/01', '2016/05/05')
If I need to know the count of active employees for Feb 2016, I used below query:
SELECT * FROM #Employees
WHERE HireDate <= '2016-02-28' AND TerminationDate >= '2016-02-28'
However, I'm having difficulty on an easy method to find active employees for each month. For example, I want to know count of active employees from Jan 2016 to Jan 2017 every month.
Do I need to have separate table with each month and use some CTE to cross reference both tables and provide report for every month? Any directions will be grateful.
With the inputs so far, I have got to this. It seems to be working fine except for Jan 2016 where I have one employee active though only for 2 days, it is not reporting since I know I'm validating month-end. Any tweaks?
DECLARE #startDate DATETIME
DECLARE #endDate datetime
SET #startDate='2014-01-31'
SET #endDate='2017-05-31'
DECLARE #Employees TABLE
(
[EmployeeID] [int] IDENTITY(1,1) NOT NULL,
[HireDate] [datetime] NOT NULL,
[TerminationDate] [datetime] NULL
)
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/01/01','2016/01/02')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/02/01', '2017/01/30')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/03/01', '2016/05/05')
;With MyListOfDates( MyCalendarMonthEnd )
AS
(
SELECT #startDate MyCalendarMonthEnd
UNION ALL
SELECT DATEADD(MONTH, 1, MyCalendarMonthEnd)
FROM MyListOfDates
WHERE MyCalendarMonthEnd < #endDate
)
SELECT YEAR(mld.MyCalendarMonthEnd) Year, MONTH(mld.MyCalendarMonthEnd) Month, COUNT(*) ActiveEmployeeCount
FROM MyListOfDates mld
JOIN #Employees e on 1 = 1
WHERE e.HireDate <= mld.MyCalendarMonthEnd and e.TerminationDate >= mld.MyCalendarMonthEnd
GROUP BY mld.MyCalendarMonthEnd
One option is to use an ad-hoc tally table. A tally/calendar table would do the trick as well
I opted for the DatePart DAY to capture any portion of the month
Example
Declare #Date1 date = '2016-01-01'
Declare #Date2 date = '2017-01-31'
Select Year = DatePart(YEAR,D)
,Month = DatePart(MONTH,D)
,EmpCnt = count(DISTINCT [EmployeeID])
From (Select Top (DateDiff(DAY,#Date1,#Date2)+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),#Date1) From master..spt_values n1,master..spt_values n2) A
Left Join #Employees B on D between [HireDate] and IsNull([TerminationDate],GetDate())
Group By DatePart(YEAR,D), DatePart(MONTH,D)
Order By 1,2
Returns
Year Month EmpCnt
2016 1 1
2016 2 1
2016 3 2
2016 4 2
2016 5 2
2016 6 1
2016 7 1
2016 8 1
2016 9 1
2016 10 1
2016 11 1
2016 12 1
2017 1 1
As Requested - Some Commentary
First we create a series of dates between X and Y. This is done via an ad-hoc tally table, Row_Number(), and DateAdd(). For example:
Declare #Date1 date = '2016-01-01'
Declare #Date2 date = '2017-01-31'
Select Top (DateDiff(DAY,#Date1,#Date2)+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),#Date1)
From master..spt_values n1,master..spt_values n2
Returns
D
2016-01-01
2016-01-02
2016-01-03
2016-01-04
...
2017-01-29
2017-01-30
2017-01-31
Notice that we are performing a cross join on spt_values (n1 and n2). This is because spt_values has only 2,523 records (or days). Considering that would equate to only 6 years, by using a cross join which expands the potential time span of 6.3 million days --- a ridiculous number, but you would never see that volume because we specify TOP ( nDays )
Once we have this dataset of target days, we then perform a LEFT JOIN to the EMPLOYEE table where D is between Hire and Term dates. This actually create a large temporal dataset. For example if an employee was active for only 10 days, we would see 10 records. 1 for for each day.
Then we perform a simple aggregation COUNT(DISTINCT EmployeeID) group by year and month.
In case anyone interested in the solution using CTEs. Preferred solution is provided by #JohnCappelleti
DECLARE #startDate DATETIME
DECLARE #endDate datetime
SET #startDate='2014-01-31'
SET #endDate='2017-05-31'
DECLARE #Employees TABLE
(
[EmployeeID] [int] IDENTITY(1,1) NOT NULL,
[HireDate] [datetime] NOT NULL,
[TerminationDate] [datetime] NULL
)
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/01/01','2016/01/02')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/02/01', '2017/01/30')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/03/01', '2016/05/05')
;With MyListOfDates( MyCalendarMonthEnd )
AS
(
SELECT #startDate MyCalendarMonthEnd
UNION ALL
SELECT DATEADD(DAY, 1, MyCalendarMonthEnd)
FROM MyListOfDates
WHERE MyCalendarMonthEnd < #endDate
)
SELECT YEAR(mld.MyCalendarMonthEnd) Year, MONTH(mld.MyCalendarMonthEnd) Month, COUNT(DISTINCT EmployeeID) ActiveEmployeeCount
FROM MyListOfDates mld
JOIN #Employees e on 1 = 1
WHERE e.HireDate <= mld.MyCalendarMonthEnd and e.TerminationDate >= mld.MyCalendarMonthEnd
GROUP BY YEAR(mld.MyCalendarMonthEnd), MONTH(mld.MyCalendarMonthEnd)
ORDER BY 1,2
OPTION (MAXRECURSION 0)
I have already queried #Techspider to explain the output in tabular form.
I am not using ROW_Number or distinct.
I am not using CROSS Join because My output is Each Month,Each Year (not each day,each month,each year).
Also you have to find each month count
Also finding count for such long duration will slow down
Try this,
DECLARE #startDate DATETIME
DECLARE #endDate datetime
SET #startDate='2016-01-01'
SET #endDate='2017-01-31'
DECLARE #Employees TABLE
(
[EmployeeID] [int] IDENTITY(1,1) NOT NULL,
[HireDate] [datetime] NOT NULL,
[TerminationDate] [datetime] NULL
)
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/01/01','2016/01/02')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/02/01', '2017/01/30')
INSERT INTO #Employees (HireDate, TerminationDate) VALUES ('2016/03/01', '2016/05/05')
SELECT datepart(year,EDT)[Year],datepart(month,edt)[Month]
,count( e.[EmployeeID]) EmpCount
FROM
(SELECT dateadd(month,number,#startDate)STDT
,dateadd(day,-1,dateadd(month,datediff(month,0,(dateadd(month,number,#startDate)))+1,0)) EDT
FROM MASTER.dbo.spt_values
WHERE name is null and number<=datediff(month,#startDate,#endDate)+1)n
left join #Employees E on
HireDate <= n.STDT
AND TerminationDate >= n.EDT
group by datepart(year,EDT),datepart(month,edt)
order by 1,2

CountDistinct() is counting value twice when grouped in SSRS

-- Sample data.
declare #Table1 as Table ( RegisterId Int Identity, UnitId Int, DateRegistered date);
declare #Table2 as Table ( Id Int Identity, RegisterId Int, Rep1 int, Rep2 int, DateCreated Date );
declare #Table3 as Table ( UnitId int Identity, UnitName varchar(40), SquadName varchar(40))
insert into #Table1 ( UnitId, DateRegistered )
values
( 1, '20160115' );
insert into #Table2 ( RegisterId, Rep1, Rep2, DateCreated )
values
( 1, 3, 4, '20160122' ), ( 1, 10, 4, '20160129' ), ( 1, 32, 45, '20160210' );
insert into #Table3 ( UnitName )
values
( 'Tango', 'West' ), ( 'Lima', 'West' ), ( 'Foxtrot', 'West' );
SELECT t3.UnitName
, t2.RegisterId
, t2.DateCreated
, t2.Rep1 + t2.Rep2 as 'TotalReps'
, DateName(month, t2.DateCreated) as 'Month'
, DateName(year, t2.DateCreated) as 'Year'
FROM #Table1 t1
INNER JOIN #Table2 t2 ON t1.RegisterId = t2.RegisterId
INNER JOIN #Table3 t3 ON t1.UnitId = t3.UnitId
Building a report in SSRS, the above is my query. Report parameters are a start date, enddate and UnitId(s).
In the report I have 3 Row Groups - Month, Year, SquadName. In the report I am using the TotalReps for totalreps, CountDistinct(Field!RegisterId.Value) for the ConfirmedRegisters and Count(Field!RegisterId.Value) for CheckIn. THe TOTALs are just SUMS of the expressions, SUM(CountDistinct(Field!RegisterId.Value)).
The report shows like:
TotalReps ConfirmedRegisters CheckIns
WEST
2016
Jan
21 1 2
Feb
77 1 1
TOTAL 98 1 3
Some definitions. A ConfirmedRegister means the Id exists in Table1 AND Table2. A Checkin is just a count of Table2 Ids. So to be a checkin, there must be a row in Table2 and a ConfirmedREgister can ONLY BE COUNTED ONCE, regardless of the number of checkins and when they happen. So if a Table1 register occurs in Jan 2016 and there are checkins off the registerid in Jan and Feb 2016 as our test data suggests, the report should show a zero in the ConfirmedRegisters columns for Feb because the RegisterId was counted in Jan.
Should be:
TotalReps ConfirmedRegisters CheckIns
WEST
2016
Jan
21 1 2
Feb
77 0 1
TOTAL 98 1 3
Notice the TOTAL Confirmed Registers is showing correct, I guess because it is totalling the whole date range. But The MONTHLY totals are incorrect for the CONFIRMEDREGISTERS columns because it is counting RegisterID for Jan and Feb where it should only count the Jan and put nothing or 0 for Feb.
Not sure if I need do fix this in the query or the report.
I fixed this by using a CTE with a windowing function (row_number() over(partition ...) to mimic a 'First()' type function so I could then count the first occurrence of each RegisterId from Table2.

Year Over Year (YOY) Distinct Count

EDITED:
I'm working in Sql Server 2005 and I'm trying to get a year over year (YOY) count of distinct users for the current fiscal year (say Jun 1-May 30) and the past 3 years. I'm able to do what I need by running a select statement four times, but I can't seem to find a better way at this point. I'm able to get a distinct count for each year in one query, but I need it to a cumulative distinct count. Below is a mockup of what I have so far:
SELECT [Year], COUNT(DISTINCT UserID)
FROM
(
SELECT u.uID AS UserID,
CASE
WHEN dd.ddEnd BETWEEN #yearOneStart AND #yearOneEnd THEN 'Year1'
WHEN dd.ddEnd BETWEEN #yearTwoStart AND #yearTwoEnd THEN 'Year2'
WHEN dd.ddEnd BETWEEN #yearThreeStart AND #yearThreeEnd THEN 'Year3'
WHEN dd.ddEnd BETWEEN #yearFourStart AND #yearFourEnd THEN 'Year4'
ELSE 'Other'
END AS [Year]
FROM Users AS u
INNER JOIN UserDataIDMatch AS udim
ON u.uID = udim.udim_FK_uID
INNER JOIN DataDump AS dd
ON udim.udimUserSystemID = dd.ddSystemID
) AS Data
WHERE LOWER([Year]) 'other'
GROUP BY
[Year]
I get something like:
Year1 1
Year2 1
Year3 1
Year4 1
But I really need:
Year1 1
Year2 2
Year3 3
Year4 4
Below is a rough schema and set of values (updated for simplicity). I tried to create a SQL Fiddle, but I'm getting a disk space error when I attempt to build the schema.
CREATE TABLE Users
(
uID int identity primary key,
uFirstName varchar(75),
uLastName varchar(75)
);
INSERT INTO Users (uFirstName, uLastName)
VALUES
('User1', 'User1'),
('User2', 'User2')
('User3', 'User3')
('User4', 'User4');
CREATE TABLE UserDataIDMatch
(
udimID int indentity primary key,
udim.udim_FK_uID int foreign key references Users(uID),
udimUserSystemID varchar(75)
);
INSERT INTO UserDataIDMatch (udim_FK_uID, udimUserSystemID)
VALUES
(1, 'SystemID1'),
(2, 'SystemID2'),
(3, 'SystemID3'),
(4, 'SystemID4');
CREATE TABLE DataDump
(
ddID int identity primary key,
ddSystemID varchar(75),
ddEnd datetime
);
INSERT INTO DataDump (ddSystemID, ddEnd)
VALUES
('SystemID1', '10-01-2013'),
('SystemID2', '10-01-2014'),
('SystemID3', '10-01-2015'),
('SystemID4', '10-01-2016');
Unless I'm missing something, you just want to know how many records there are where the date is less than or equal to the current fiscal year.
DECLARE #YearOneStart DATETIME, #YearOneEnd DATETIME,
#YearTwoStart DATETIME, #YearTwoEnd DATETIME,
#YearThreeStart DATETIME, #YearThreeEnd DATETIME,
#YearFourStart DATETIME, #YearFourEnd DATETIME
SELECT #YearOneStart = '06/01/2013', #YearOneEnd = '05/31/2014',
#YearTwoStart = '06/01/2014', #YearTwoEnd = '05/31/2015',
#YearThreeStart = '06/01/2015', #YearThreeEnd = '05/31/2016',
#YearFourStart = '06/01/2016', #YearFourEnd = '05/31/2017'
;WITH cte AS
(
SELECT u.uID AS UserID,
CASE
WHEN dd.ddEnd BETWEEN #yearOneStart AND #yearOneEnd THEN 'Year1'
WHEN dd.ddEnd BETWEEN #yearTwoStart AND #yearTwoEnd THEN 'Year2'
WHEN dd.ddEnd BETWEEN #yearThreeStart AND #yearThreeEnd THEN 'Year3'
WHEN dd.ddEnd BETWEEN #yearFourStart AND #yearFourEnd THEN 'Year4'
ELSE 'Other'
END AS [Year]
FROM Users AS u
INNER JOIN UserDataIDMatch AS udim
ON u.uID = udim.udim_FK_uID
INNER JOIN DataDump AS dd
ON udim.udimUserSystemID = dd.ddSystemID
)
SELECT
DISTINCT [Year],
(SELECT COUNT(*) FROM cte cteInner WHERE cteInner.[Year] <= cteMain.[Year] )
FROM cte cteMain
Concept using an existing query
I have done something similar for finding out the number of distinct customers who bought something in between years, I modified it to use your concept of year, the variables you add would be that start day and start month of the year and the start year and end year.
Technically there is a way to avoid using a loop but this is very clear and you can't go past year 9999 so don't feel like putting clever code to avoid a loop makes sense
Tips for speeding up the query
Also when matching dates make sure you are comparing dates, and not comparing a function evaluation of the column as that would mean running the function on every record set and would make indices useless if they existed on dates (which they should). Use date add on
zero to initiate your target dates subtracting 1900 from the year, one from the month and one from the target date.
Then self join on the table where the dates create a valid range (i.e. yearlessthan to yearmorethan) and use a subquery to create a sum based on that range. Since you want accumulative from the first year to the last limit the results to starting at the first year.
At the end you will be missing the first year as by our definition it does not qualify as a range, to fix this just do a union all on the temp table you created to add the missing year and the number of distinct values in it.
DECLARE #yearStartMonth INT = 6, #yearStartDay INT = 1
DECLARE #yearStart INT = 2008, #yearEnd INT = 2012
DECLARE #firstYearStart DATE =
DATEADD(day,#yearStartDay-1,
DATEADD(month, #yearStartMonth-1,
DATEADD(year, #yearStart- 1900,0)))
DECLARE #lastYearEnd DATE =
DATEADD(day, #yearStartDay-2,
DATEADD(month, #yearStartMonth-1,
DATEADD(year, #yearEnd -1900,0)))
DECLARE #firstdayofcurrentyear DATE = #firstYearStart
DECLARE #lastdayofcurrentyear DATE = DATEADD(day,-1,DATEADD(year,1,#firstdayofcurrentyear))
DECLARE #yearnumber INT = YEAR(#firstdayofcurrentyear)
DECLARE #tempTableYearBounds TABLE
(
startDate DATE NOT NULL,
endDate DATE NOT NULL,
YearNumber INT NOT NULL
)
WHILE #firstdayofcurrentyear < #lastYearEnd
BEGIN
INSERT INTO #tempTableYearBounds
VALUES(#firstdayofcurrentyear,#lastdayofcurrentyear,#yearNumber)
SET #firstdayofcurrentyear = DATEADD(year,1,#firstdayofcurrentyear)
SET #lastdayofcurrentyear = DATEADD(year,1,#lastdayofcurrentyear)
SET #yearNumber = #yearNumber + 1
END
DECLARE #tempTableCustomerCount TABLE
(
[Year] INT NOT NULL,
[CustomerCount] INT NOT NULL
)
INSERT INTO #tempTableCustomerCount
SELECT
YearNumber as [Year],
COUNT(DISTINCT CustomerNumber) as CutomerCount
FROM Ticket
JOIN #tempTableYearBounds ON
TicketDate >= startDate AND TicketDate <=endDate
GROUP BY YearNumber
SELECT * FROM(
SELECT t2.Year as [Year],
(SELECT
SUM(CustomerCount)
FROM #tempTableCustomerCount
WHERE Year>=t1.Year
AND Year <=t2.Year) AS CustomerCount
FROM #tempTableCustomerCount t1 JOIN #tempTableCustomerCount t2
ON t1.Year < t2.Year
WHERE t1.Year = #yearStart
UNION
SELECT [Year], [CustomerCount]
FROM #tempTableCustomerCount
WHERE [YEAR] = #yearStart
) tt
ORDER BY tt.Year
It isn't efficient but at the end the temp table you are dealing with is so small I don't think it really matters, and adds a lot more versatility versus the method you are using.
Update: I updated the query to reflect the result you wanted with my data set, I was basically testing to see if this was faster, it was faster by 10 seconds but the dataset I am dealing with is relatively small. (from 12 seconds to 2 seconds).
Using your data
I changed the tables you gave to temp tables so it didn't effect my environment and I removed the foreign key because they are not supported for temp tables, the logic is the same as the example included but just changed for your dataset.
DECLARE #startYear INT = 2013, #endYear INT = 2016
DECLARE #yearStartMonth INT = 10 , #yearStartDay INT = 1
DECLARE #startDate DATETIME = DATEADD(day,#yearStartDay-1,
DATEADD(month, #yearStartMonth-1,
DATEADD(year,#startYear-1900,0)))
DECLARE #endDate DATETIME = DATEADD(day,#yearStartDay-1,
DATEADD(month,#yearStartMonth-1,
DATEADD(year,#endYear-1899,0)))
DECLARE #tempDateRangeTable TABLE
(
[Year] INT NOT NULL,
StartDate DATETIME NOT NULL,
EndDate DATETIME NOT NULL
)
DECLARE #currentDate DATETIME = #startDate
WHILE #currentDate < #endDate
BEGIN
DECLARE #nextDate DATETIME = DATEADD(YEAR, 1, #currentDate)
INSERT INTO #tempDateRangeTable(Year,StartDate,EndDate)
VALUES(YEAR(#currentDate),#currentDate,#nextDate)
SET #currentDate = #nextDate
END
CREATE TABLE Users
(
uID int identity primary key,
uFirstName varchar(75),
uLastName varchar(75)
);
INSERT INTO Users (uFirstName, uLastName)
VALUES
('User1', 'User1'),
('User2', 'User2'),
('User3', 'User3'),
('User4', 'User4');
CREATE TABLE UserDataIDMatch
(
udimID int indentity primary key,
udim.udim_FK_uID int foreign key references Users(uID),
udimUserSystemID varchar(75)
);
INSERT INTO UserDataIDMatch (udim_FK_uID, udimUserSystemID)
VALUES
(1, 'SystemID1'),
(2, 'SystemID2'),
(3, 'SystemID3'),
(4, 'SystemID4');
CREATE TABLE DataDump
(
ddID int identity primary key,
ddSystemID varchar(75),
ddEnd datetime
);
INSERT INTO DataDump (ddSystemID, ddEnd)
VALUES
('SystemID1', '10-01-2013'),
('SystemID2', '10-01-2014'),
('SystemID3', '10-01-2015'),
('SystemID4', '10-01-2016');
DECLARE #tempIndividCount TABLE
(
[Year] INT NOT NULL,
UserCount INT NOT NULL
)
-- no longer need to filter out other because you are using an
--inclusion statement rather than an exclusion one, this will
--also make your query faster (when using real tables not temp ones)
INSERT INTO #tempIndividCount(Year,UserCount)
SELECT tdr.Year, COUNT(DISTINCT UId) FROM
Users u JOIN UserDataIDMatch um
ON um.udim_FK_uID = u.uID
JOIN DataDump dd ON
um.udimUserSystemID = dd.ddSystemID
JOIN #tempDateRangeTable tdr ON
dd.ddEnd >= tdr.StartDate AND dd.ddEnd < tdr.EndDate
GROUP BY tdr.Year
-- will show you your result
SELECT * FROM #tempIndividCount
--add any ranges that did not have an entry but were in your range
--can easily remove this by taking this part out.
INSERT INTO #tempIndividCount
SELECT t1.Year,0 FROM
#tempDateRangeTable t1 LEFT OUTER JOIN #tempIndividCount t2
ON t1.Year = t2.Year
WHERE t2.Year IS NULL
SELECT YearNumber,UserCount FROM (
SELECT 'Year'+CAST(((t2.Year-t1.Year)+1) AS CHAR) [YearNumber] ,t2.Year,(
SELECT SUM(UserCount)
FROM #tempIndividCount
WHERE Year >= t1.Year AND Year <=t2.Year
) AS UserCount
FROM #tempIndividCount t1
JOIN #tempIndividCount t2
ON t1.Year < t2.Year
WHERE t1.Year = #startYear
UNION ALL
--add the missing first year, union it to include the value
SELECT 'Year1',Year, UserCount FROM #tempIndividCount
WHERE Year = #startYear) tt
ORDER BY tt.Year
Benefits over using a WHEN CASE based approach
More Robust
Do not need to explicitly determine the end and start dates of each year, just like in a logical year just need to know the start and end date. Can easily change what you are looking for with some simple modifications(i.e. say you want all 2 year ranges or 3 year).
Will be faster if the database is indexed properly
Since you are searching based on the same data type you can utilize the indices that should be created on the date columns in the database.
Cons
More Complicated
The query is a lot more complicated to follow, even though it is more robust there is a lot of extra logic in the actual query.
In some circumstance will not provide good boost to execution time
If the dataset is very small, or the number of dates being compared isn't significant then this could not save enough time to be worth it.
In SQL Server once you match a WHEN inside a CASE, it stop evaluating will not going on evaluating next WHEN clauses. Hence you can't accumulate that way.
if I understand you correctly, this would show your results.
;WITH cte AS
(F
SELECT dd.ddEnd [dateEnd], u.uID AS UserID
FROM Users AS u
INNER JOIN UserDataIDMatch AS udim
ON u.uID = udim.udim_FK_uID
INNER JOIN DataDump AS dd
ON udim.udimUserSystemID = dd.ddSystemID
WHERE ddEnd BETWEEN #FiscalYearStart AND #FiscalYearEnd3
)
SELECT datepart(year, #FiscalYearStart) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN #FiscalYearStart AND #FiscalYearEnd1
GROUP BY #FiscalYearStart
UNION
SELECT datepart(year, #FiscalYearEnd1) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN #FiscalYearStart AND #FiscalYearEnd2
GROUP BY #FiscalYearEnd1
UNION
SELECT datepart(year, #FiscalYearEnd3) AS [Year], COUNT(DISTINCT UserID) AS CntUserID
FROM cte
WHERE dateEnd BETWEEN #FiscalYearStart AND #FiscalYearEnd3
GROUP BY #FiscalYearEnd2

SQL Server Date Range

I have a SQL Server table that contains the following dates (OpenDate, ClosedDate, WinnerAnnouncedDate).
I have 3 rows, for 3 different categories.
I'm trying to figure out how I would get the following scenario:
Today is 14th March. I want to find out which category had the winner announced, but the following category hasn't started yet.
So if Row 1 had OpenDate = 12th Feb, ClosedDate = 10th March, WinnerAnnounced = 12th March
Row 2 had an OpenDate of 16th March I need it to find Row 1 because the winner has been announced, but the following category hasn't opened yet.
This may seem a little confusing, so I'll be ready to clear things up if required.
I'm not 100% clear on what you're saying, but I think it's something like:
Find the last winner announced from categories that have a start date earlier than now.
If that's the case then something like this might work for you. I'm assuming that your table is called #dates as you haven't included the table name
create table #dates (
id int identity(1,1) primary key,
openDate datetime,
closedDate datetime,
WinnerAnnouncedDate datetime
)
insert into #dates
values ('12 feb 2012', '10 march 2012', '13 march 2012')
insert into #dates
values ('12 feb 2012', '10 march 2012', null)
insert into #dates
values ('16 mar 2012', null, null)
select *
from #dates
where id = (select max(id) from #dates where openDate <= getdate() and winnerAnnouncedDate is not null)
--drop table #dates
SELECT TOP 1 WITH TIES *
FROM atable
WHERE WinnerAnnouncedDate <= GETDATE()
ORDER BY WinnerAnnouncedDate
WITH TIES will return several rows if several WinnerAnnouncedDate values match the condition and have the same top value.