I have two tables used for storing employee attendance details.
one table stores emp Id and respective in time and out time in date time info second table stores the other employee details as employee id, employee name etc...
I have a requirement to generate a report which shows total hours worked by emp per day, a status column storing details like Present if total hours > 4.5 else absent
also need to count the number of days an employee have status as present, number of days for which an employee's total hour is greater than 6 less than 8.5.
I have written the query to fetch every details but the performance is unacceptable it takes around 30-35 miniutes to fetch all the details
if I exclude the days counting logic it takes around 1-2 minutes
the table structure is
Ist Employee table
EmployeeID, EmployeeName.....other details(not necessary at this moment)
Attendance table
Emp_ID, INOUT_Time
My query
DECLARE #currStartDate DATETIME
DECLARE #currEndDate DATETIME
declare #startDate datetime;
declare #endDate datetime;
set #startDate = CONVERT(Datetime, '12/16/2013');
set #endDate = CONVERT(Datetime, '01/16/2014');
SET #currStartDate=#startDate
SET #currEndDate=dateAdd(day,1,#startDate)
DECLARE #formatTable TABLE
(
EmployeeCode varchar(10),
EmployeeName varchar(100),
[Date] Datetime,
InTime datetime,
OutTime datetime,
TotalHrs varchar(10),
[Status] varchar(10)
)
WHILE #currEndDate <= #endDate
BEGIN
--get the day by day attendance Range
INSERT INTO #formatTable
(
EmployeeCode,
EmployeeName,
[Date],
InTime,
OutTime
)
SELECT
E.EmployeeID,
ISNULL(LTRIM(RTRIM(E.FirstName)),'') +' '+ISNULL(LTRIM(RTRIM(E.LastName)),'') AS EmployeeName,
#currStartDate,
MIN(AD.INOUT_Time) as INTIME,
CASE WHEN MAX(AD.INOUT_Time)=MIN(AD.INOUT_Time) THEN NULL ELSE MAX(AD.INOUT_Time) END as OUTTIME
FROM employees E WITH(NOLOCK)
LEFT OUTER JOIN Attendance AD
ON E.EmployeeID = AD.Emp_ID
AND INOUT_Time BETWEEN #currStartDate AND #currEndDate
GROUP BY E.EmployeeID,DATEADD(dd, 0, DATEDIFF(dd, 0, INOUT_Time ))
UPDATE #formatTable
SET TotalHrs=Convert(varchar(20),DATEDIFF(MINUTE, ISNULL(InTime,GETDATE()),ISNULL(OutTime,InTime))/Convert(decimal(4,2),60))
,[Status] =(CASE WHEN DATEDIFF(MINUTE, ISNULL(InTime,GETDATE()),ISNULL(OutTime,InTime))/Convert(decimal(4,2),60) >= 4.5
THEN 'P'
ELSE 'Abs' END )
,HoursStatus = (CASE WHEN DATEDIFF(MINUTE, ISNULL(InTime,GETDATE()),ISNULL(OutTime,InTime))/Convert(decimal(4,2),60) >= 8.5
THEN 'Greater Than 8.5'
WHEN DATEDIFF(MINUTE, ISNULL(InTime,GETDATE()),ISNULL(OutTime,InTime))/Convert(decimal(4,2),60) BETWEEN 6 AND 8.49
THEN '6-8.49'
WHEN DATEDIFF(MINUTE, ISNULL(InTime,GETDATE()),ISNULL(OutTime,InTime))/Convert(decimal(4,2),60) BETWEEN 4.5 AND 5.99
THEN '4.5-5.99' end)
WHERE [Date]=#currStartDate
-- moving to nextday
SELECT #currStartDate=DATEADD(DAY,1,#currStartDate)
SELECT #currEndDate=DATEADD(DAY,1,#currEndDate)
END
IF OBJECT_ID('tempdb..##output') IS NOT NULL
DROP TABLE ##output
SELECT EmployeeCode,EmployeeName,[Date],Convert(varchar(10),INTime,108) INTime,Convert(varchar(10),Outtime,108) Outtime,TotalHrs,[Status],Convert(varchar,#startDate,105) as StartDate,Convert(varchar,#endDate,105) as EndDate, (SELECT COUNT(*) FROM #formatTable counter
WHERE ft.EmployeeCode = counter.EmployeeCode AND counter.[Status] = 'P' ) AS TotalPresent, (SELECT COUNT(*) FROM #formatTable counter
WHERE ft.EmployeeCode = counter.EmployeeCode AND counter.HoursStatus = 'Greater Than 8.5' ) as gt8point5,(SELECT COUNT(*) FROM #formatTable counter
WHERE ft.EmployeeCode = counter.EmployeeCode AND counter.HoursStatus = '6-8.49' ) as gt6lessthan8,(SELECT COUNT(*) FROM #formatTable counter
WHERE ft.EmployeeCode = counter.EmployeeCode AND counter.HoursStatus = '4.5-5.99' ) as gt4point5lessthan6
INTO ##output FROM #formatTable as ft
GROUP BY EmployeeCode,EmployeeName,[Date],TotalHrs,INTime,Outtime,[Status]
SELECT * FROM ##output AS AttendanceReport
Any suggestion for improving the performance, especially the day counting logic
A quick look at your query suggest the following indexes.
-- Assuming you don't have EmployeeID as a clustered index
CREATE INDEX IX_Employees_EmployeeID
ON Employees (EmployeeID)
INCLUDE (FirstName, LastName)
CREATE INDEX IX_Attendance_EmployeeID_INOUTTime
ON Attendance (EmployeeID, INOUT_Time)
You could also create an index for #formatTable, but that depends what your query plan states and how many rows are generated.
Related
I have a face reader machine. I am getting data only employeecode and punchdate from that machine. Here we don't have a separate machine for IN or OUT. So I cannot predict which one is IN or OUT. I used Row number concept that's also I am getting lots of duplication.
Multiple punches per day
Night shift calculation
miss punch
SQL
CREATE TABLE #EmployeeTimeSequence (employeecode nvarchar(500),
punchdate datetime,
Punchsequence nvarchar(10));
INSERT INTO #EmployeeTimeSequence
SELECT employeecode,
punchdate,
ROW_NUMBER() OVER (PARTITION BY employeecode ORDER BY punchdate) AS PunchSequence
FROM attendance_trial; --Replace this with your dbo.AttLog table if this solution works for you
/*WHERE clause could be added here to filter for specific dates or EnrollNumbers */
--select * from #EmployeeTimeSequence
--drop table #EmployeeTimeSequence
/* If time between punches is greater than this threashold, then it will be treated as a missed punch
in logic below. Remove this or modify as needed. */
DECLARE #MissedPunchThreshold int;
SET #MissedPunchThreshold = 20;
CREATE TABLE #Temp1 (employeecode nvarchar(500),
CheckDate datetime,
Time_In datetime,
Time_Out datetime,
HoursBetweenPunch nvarchar(50),
PunchOutSequence nvarchar(50));
INSERT INTO #Temp1
/* Anchor member */
SELECT ETS_In.employeecode,
CAST(ETS_In.punchdate AS datetime) AS CheckDate,
ETS_In.punchdate AS Time_In,
ETS_Out.punchdate AS Time_Out,
DATEDIFF(HOUR, ETS_In.punchdate, ETS_Out.punchdate) AS HoursBetweenPunch,
ETS_Out.Punchsequence AS PunchOutSequence
FROM #EmployeeTimeSequence ETS_In
LEFT OUTER JOIN #EmployeeTimeSequence ETS_Out ON ETS_In.employeecode = ETS_Out.employeecode
AND ETS_Out.Punchsequence = ETS_In.Punchsequence + 1
WHERE ETS_In.Punchsequence = 1
UNION ALL
/* Recursive memebr - build on top of anchor */
SELECT ETS_In.employeecode,
CAST(ETS_In.punchdate AS datetime) AS CheckDate,
ETS_In.punchdate AS Time_In,
ETS_Out.punchdate AS Time_Out,
DATEDIFF(HOUR, ETS_In.punchdate, ETS_Out.punchdate) AS HoursBetweenPunch,
ETS_Out.Punchsequence AS PunchOutSequence
FROM #EmployeeTimeSequence ETS_In --get the time for the in punch
INNER JOIN #Temp1 ET ON ET.employeecode = ETS_In.employeecode
AND ETS_In.Punchsequence = CASE
WHEN ET.HoursBetweenPunch > #MissedPunchThreshold -- if more than threshold, then treat as missed punch
THEN ET.PunchOutSequence -- then treat the previous out punch as the next in punch instead
ELSE ET.PunchOutSequence + 1 -- else join as usual to get the next punch in sequence
END
INNER JOIN #EmployeeTimeSequence ETS_Out -- get the time for the out punch
ON ETS_In.employeecode = ETS_Out.employeecode
AND ETS_Out.Punchsequence = ETS_In.Punchsequence + 1;
CREATE TABLE #att_final (employeecode nvarchar(500),
attendance_dt datetime,
Time_In datetime,
Time_out datetime,
HoursBetweenPunch nvarchar(50));
INSERT INTO #att_final
SELECT employeecode,
CONVERT(varchar, CheckDate, 23) AS dt,
Time_In,
CASE WHEN HoursBetweenPunch > #MissedPunchThreshold THEN NULL ELSE Time_Out END AS Time_Out,
CASE WHEN HoursBetweenPunch > #MissedPunchThreshold THEN NULL ELSE HoursBetweenPunch END AS HoursBetweenPunch
FROM #Temp1
ORDER BY employeecode,
CheckDate;
SELECT #att_final.employeecode,
MIN((CONVERT(varchar(10), Time_In, 111))) AS attendance_dt,
MIN(Time_In) AS LOGIN,
MAX(Time_out) AS logout
FROM #att_final
GROUP BY #att_final.employeecode;
DROP TABLE #EmployeeTimeSequence;
DROP TABLE #Temp1;
DROP TABLE #att_final;
I need to create a patient census report that shows average number patients present per hour and per day of a week over a given time period. This would allow me to show, for example, over the last 6 months there was an average of 4 people in the ER on Mondays. I have a table valued function that will show the following for patients:
VisitID, FromDateTime, ThruDateTime, LocationID.
I was able to show the number of patients in, for example, the ER for a given day using the code below. But it is limited to one day only. (Adapted from http://www.sqlservercentral.com/Forums/Topic939818-338-1.aspx).
--Census Count by Date Range--
DECLARE #BeginDateParameter DateTime, #EndDateParameter DateTime
SET #BeginDateParameter = '20160201'
SET #EndDateParameter = '2016-02-01 23:59:59.000'
----------------------------------------------------
-- Create a temp table to hold the necessary values
-- plus an extra "x" field to track processing
----------------------------------------------------
IF OBJECT_ID('tempdb..#Temp') IS NOT NULL DROP TABLE #Temp
CREATE TABLE #Temp (ID INT Identity NOT NULL, VisitID VarChar(100), SourceID VarChar(100),
FromDateTime DateTime, ThruDateTime DateTime, x INT)
----------------------------------------------------
-- Populate the temp table with values from the
-- the actual table in the database
----------------------------------------------------
INSERT INTO #Temp
SELECT VisitID, FromDateTime, ThruDateTime
FROM PatientFlowTable(BeginDateParameter,#EndDateParameter)
WHERE (FromDateTime BETWEEN #BeginDateParameter AND #EndDateParameter +1
OR ThruDateTime BETWEEN #BeginDateParameter AND #EndDateParameter +1)
AND LocationID = 'ER'
-- Given Period is taken as inclusive of given hours in the input (eg. 15:25:30 will be taken as 15:00:00)
-- frist make sure that the minutes, seconds and milliseconds are removed from input range for clarity
set #BeginDateParameter = dateadd(hh, datepart(hh,#BeginDateParameter), convert(varchar(12),#BeginDateParameter,112))
set #EndDateParameter = dateadd(hh, datepart(hh,#EndDateParameter), convert(varchar(12),#EndDateParameter,112))
-- you may create this CTE by other ways (eg. from permanent Tally table)...
;with dh
as
(
select top 24
DATEADD(hour,ROW_NUMBER() OVER (ORDER BY [Object_id])-1,convert(varchar(12),#BeginDateParameter,112)) as HoDstart
,DATEADD(hour,ROW_NUMBER() OVER (ORDER BY [Object_id]),convert(varchar(12),#BeginDateParameter,112)) as HoDend
,ROW_NUMBER() OVER (ORDER BY Object_id)-1 as DayHour
from sys.columns -- or any other (not very big) table which have more than 24 raws, just remamber to change
-- [Object_id] in OVER (ORDER BY [Object_id]... to some existing column
)
select d.DayHour, count(w.VisitID) as PatientCount
from dh d
left join #Temp w
on w.[FromDateTime] < d.HoDend
and w.[ThruDateTime] >= d.HoDstart
where d.HoDstart between #BeginDateParameter and #EndDateParameter
group by d.DayHour
order by d.DayHour
SELECT VisitID, FromDateTime, ThruDateTime
FROM PatientFlowTable(BeginDateParameter,#EndDateParameter)
WHERE (FromDateTime BETWEEN #BeginDateParameter AND #EndDateParameter +1
OR ThruDateTime BETWEEN #BeginDateParameter AND #EndDateParameter +1)
AND LocationID = 'ER'
Output example for the first three hours show patients that were present in the ER by taking into account their departure time.
Hour PatientCount
0 2
1 3
2 3
For querying short time periods, I would create a table-valued function that generates the hour entries. The results table can be joined into your query.
CREATE FUNCTION [dbo].[f_hours] (#startDateTime DATETIME,
#endDateTime DATETIME)
RETURNS #result TABLE (
[dateTime] DATETIME PRIMARY KEY
)
AS
BEGIN
DECLARE
#dateTime DATETIME = #startDateTime,
#hours INT = DATEDIFF(hour, #startDateTime, #endDateTime)
WHILE (#dateTime <= #endDateTime)
BEGIN
INSERT
INTO #result
VALUES (#dateTime)
SET #dateTime = DATEADD(hour, 1, #dateTime)
END
RETURN
END
GO
The time required by the function can be output with SET STATISTICS TIME ON. For the generation of over 6000 records needs my computer 53 ms.
SET STATISTICS TIME ON
SELECT *
FROM [dbo].[f_hours]('2016-02-01', '2016-02-10 16:00')
SET STATISTICS TIME OFF
I have an attendance SQL table that stores the start and end day's punch of employee. Each punch (punch in and punch out) is in a separate record.
I want to calculate the total working hour of each employee for a requested month.
I tried to make a scalar function that takes two dates and employee ID and return the calculation of the above task, but it calculate only the difference of one date between all dates.
The data is like this:
000781 2015-08-14 08:37:00 AM EMPIN 539309898
000781 2015-08-14 08:09:48 PM EMPOUT 539309886
My code is:
#FromDate NVARCHAR(10)
,#ToDate NVARCHAR(10)
,#EmpID NVARCHAR(6)
CONVERT(NVARCHAR,DATEDIFF(HOUR
,(SELECT Time from PERS_Attendance att where attt.date between convert(date,#fromDate) AND CONVERT(Date,#toDate)
AND (EmpID= #EmpID OR ISNULL(#EmpID, '') = '') AND Funckey = 'EMPIN')
,(SELECT Time from PERS_Attendance att where attt.date between convert(date,#fromDate) AND CONVERT(Date,#toDate)
AND (EmpID= #EmpID OR ISNULL(#EmpID, '') = '') AND Funckey = 'EMPOUT') ))
FROM PERS_Attendance attt
One more approach that I think is simple and efficient.
It doesn't require modern functions like LEAD
it works correctly if the same person goes in and out several times during the same day
it works correctly if the person stays in over the midnight or even for several days in a row
it works correctly if the period when person is "in" overlaps the start OR end date-time.
it does assume that data is correct, i.e. each "in" is matched by "out", except possibly the last one.
Here is an illustration of a time-line. Note that start time happens when a person was "in" and end time also happens when a person was still "in":
All we need to do it calculate a plain sum of time differences between each event (both in and out) and start time, then do the same for end time. If event is in, the added duration should have a positive sign, if event is out, the added duration should have a negative sign. The final result is a difference between sum for end time and sum for start time.
summing for start:
|---| +
|----------| -
|-----------------| +
|--------------------------| -
|-------------------------------| +
--|====|--------|======|------|===|=====|---|==|---|===|====|----|=====|--- time
in out in out in start out in out in end out in out
summing for end:
|---| +
|-------| -
|----------| +
|--------------| -
|------------------------| +
|-------------------------------| -
|--------------------------------------| +
|-----------------------------------------------| -
|----------------------------------------------------| +
I would recommend to calculate durations in minutes and then divide result by 60 to get hours, but it really depends on your requirements. By the way, it is a bad idea to store dates as NVARCHAR.
DECLARE #StartDate datetime = '2015-08-01 00:00:00';
DECLARE #EndDate datetime = '2015-09-01 00:00:00';
DECLARE #EmpID nvarchar(6) = NULL;
WITH
CTE_Start
AS
(
SELECT
EmpID
,SUM(DATEDIFF(minute, (CAST(att.[date] AS datetime) + att.[Time]), #StartDate)
* CASE WHEN Funckey = 'EMPIN' THEN +1 ELSE -1 END) AS SumStart
FROM
PERS_Attendance AS att
WHERE
(EmpID = #EmpID OR #EmpID IS NULL)
AND att.[date] < #StartDate
GROUP BY EmpID
)
,CTE_End
AS
(
SELECT
EmpID
,SUM(DATEDIFF(minute, (CAST(att.[date] AS datetime) + att.[Time]), #StartDate)
* CASE WHEN Funckey = 'EMPIN' THEN +1 ELSE -1 END) AS SumEnd
FROM
PERS_Attendance AS att
WHERE
(EmpID = #EmpID OR #EmpID IS NULL)
AND att.[date] < #EndDate
GROUP BY EmpID
)
SELECT
CTE_End.EmpID
,(SumEnd - ISNULL(SumStart, 0)) / 60.0 AS SumHours
FROM
CTE_End
LEFT JOIN CTE_Start ON CTE_Start.EmpID = CTE_End.EmpID
OPTION(RECOMPILE);
There is LEFT JOIN between sums for end and start times, because there can be EmpID that has no records before the start time.
OPTION(RECOMPILE) is useful when you use Dynamic Search Conditions in T‑SQL. If #EmpID is NULL, you'll get results for all people, if it is not NULL, you'll get result just for one person.
If you need just one number (a grand total) for all people, then wrap the calculation in the last SELECT into SUM(). If you always want a grand total for all people, then remove #EmpID parameter altogether.
It would be a good idea to have an index on (EmpID,date).
My approach would be as follows:
CREATE FUNCTION [dbo].[MonthlyHoursByEmpID]
(
#StartDate Date,
#EndDate Date,
#Employee NVARCHAR(6)
)
RETURNS FLOAT
AS
BEGIN
DECLARE #TotalHours FLOAT
DECLARE #In TABLE ([Date] Date, [Time] Time)
DECLARE #Out TABLE ([Date] Date, [Time] Time)
INSERT INTO #In([Date], [Time])
SELECT [Date], [Time]
FROM PERS_Attendance
WHERE [EmpID] = #Employee AND [Funckey] = 'EMPIN' AND ([Date] > #StartDate AND [Date] < #EndDate)
INSERT INTO #Out([Date], [Time])
SELECT [Date], [Time]
FROM PERS_Attendance
WHERE [EmpID] = #Employee AND [Funckey] = 'EMPOUT' AND ([Date] > #StartDate AND [Date] < #EndDate)
SET #TotalHours = (SELECT SUM(CONVERT([float],datediff(minute,I.[Time], O.[Time]))/(60))
FROM #in I
INNER JOIN #Out O
ON I.[Date] = O.[Date])
RETURN #TotalHours
END
Assuming the entries are properly paired (in -> out -> in -> out -> in etc).
SQL Server 2012 and later:
DECLARE #Year int = 2015
DECLARE #Month int = 8
;WITH
cte AS (
SELECT EmpID,
InDate = LAG([Date], 1) OVER (PARTITION BY EmpID ORDER BY [Date]),
OutDate = [Date],
HoursWorked = DATEDIFF(hour, LAG([Date], 1) OVER (PARTITION BY EmpID ORDER BY [Date]), [Date]),
Funckey
FROM PERS_Attendance
)
SELECT EmpID,
TotalHours = SUM(HoursWorked)
FROM cte
WHERE Funckey = 'EMPOUT'
AND YEAR(InDate) = #Year
AND MONTH(InDate) = #Month
GROUP BY EmpID
SQL Server 2005 and later:
;WITH
cte1 AS (
SELECT *,
rn = ROW_NUMBER() OVER (PARTITION BY EmpID ORDER BY [Date])
FROM PERS_Attendance
),
cte2 AS (
SELECT a.EmpID, b.[Date] As InDate, a.[Date] AS OutDate,
HoursWorked = DATEDIFF(hour, b.[Date], a.[Date])
FROM cte1 a
LEFT JOIN cte1 b ON a.EmpID = b.EmpID and a.rn = b.rn + 1
WHERE a.Funckey = 'EMPOUT'
)
SELECT EmpID,
TotalHours = SUM(HoursWorked)
FROM cte2
WHERE YEAR(InDate) = #Year
AND MONTH(InDate) = #Month
GROUP BY EmpID
I have a table which contains following columns
userid,
game,
gameStarttime datetime,
gameEndtime datetime,
startdate datetime,
currentdate datetime
I can retrieve all the playing times but I want to count the total playing time per DAY and 0 or null if game not played on a specific day.
Take a look at DATEDIFF to do the time calculations. Your requirements are not very clear, but it should work for whatever you're looking to do.
Your end result would probably look something like this:
SELECT
userid,
game,
DATEDIFF(SS, gameStarttime, gameEndtime) AS [TotalSeconds]
FROM [source]
GROUP BY
userid,
game
In the example query above, the SS counts the seconds between the 2 dates (assuming both are not null). If you need just minutes, then MI will provide the total minutes. However, I imagine total seconds is best so that you can convert to whatever unit of measure you need accurate, such as hours that might be "1.23" or something like that.
Again, most of this is speculation based on assumptions and what you seem to be looking for. Hope that helps.
MSDN Docs for DATEDIFF: https://msdn.microsoft.com/en-us/library/ms189794.aspx
You may also look up DATEPART if you want the minutes and seconds separately.
UPDATED BASED ON FEEDBACK
The query below breaks out the hour breakdowns by day, splits time across multiple days, and shows "0" for days where no games are played. Also, for your output, I have to assume you have a separate table of users (so you can show users who have no time in your date range).
-- Define start date
DECLARE #BeginDate DATE = '4/21/2015'
-- Create sample data
DECLARE #Usage TABLE (
userid int,
game nvarchar(50),
gameStartTime datetime,
gameEndTime datetime
)
DECLARE #Users TABLE (
userid int
)
INSERT #Users VALUES (1)
INSERT #Usage VALUES
(1, 'sample', '4/25/2015 10pm', '4/26/2015 2:30am'),
(1, 'sample', '4/22/2015 4pm', '4/22/2015 4:30pm')
-- Generate list of days in range
DECLARE #DayCount INT = DATEDIFF(DD, #BeginDate, GETDATE()) + 1
;WITH CTE AS (
SELECT TOP (225) [object_id] FROM sys.all_objects
), [Days] AS (
SELECT TOP (#DayCount)
DATEADD(DD, ROW_NUMBER() OVER (ORDER BY x.[object_id]) - 1, #BeginDate) AS [Day]
FROM CTE x
CROSS JOIN CTE y
ORDER BY
[Day]
)
SELECT
[Days].[Day],
Users.userid,
SUM(COALESCE(CONVERT(MONEY, DATEDIFF(SS, CASE WHEN CONVERT(DATE, Usage.gameStartTime) < [Day] THEN [Day] ELSE Usage.gameStartTime END,
CASE WHEN CONVERT(DATE, Usage.gameEndTime) > [Day] THEN DATEADD(DD, 1, [Days].[Day]) ELSE Usage.gameEndTime END)) / 3600, 0)) AS [Hours]
FROM [Days]
CROSS JOIN #Users Users
LEFT OUTER JOIN #Usage Usage
ON Usage.userid = Users.userid
AND [Days].[Day] BETWEEN CONVERT(DATE, Usage.gameStartTime) AND CONVERT(DATE, Usage.gameEndTime)
GROUP BY
[Days].[Day],
Users.userid
The query above yields the output below for the sample data:
Day userid Hours
---------- ----------- ---------------------
2015-04-21 1 0.00
2015-04-22 1 0.50
2015-04-23 1 0.00
2015-04-24 1 0.00
2015-04-25 1 2.00
2015-04-26 1 2.50
2015-04-27 1 0.00
I've edited my sql on sql fiddle and I think this might get you what you asked for. to me it looks a little more simple then the answer you've accepted.
DECLARE #FromDate datetime, #ToDate datetime
SELECT #Fromdate = MIN(StartDate), #ToDate = MAX(currentDate)
FROM Games
-- This recursive CTE will get you all dates
-- between the first StartDate and the last CurrentDate on your table
;WITH AllDates AS(
SELECT #Fromdate As TheDate
UNION ALL
SELECT TheDate + 1
FROM AllDates
WHERE TheDate + 1 <= #ToDate
)
SELECT UserId,
TheDate,
COALESCE(
SUM(
-- When the game starts and ends in the same date
CASE WHEN DATEDIFF(DAY, GameStartTime, GameEndTime) = 0 THEN
DATEDIFF(HOUR, GameStartTime, GameEndTime)
ELSE
-- when the game starts in the current date
CASE WHEN DATEDIFF(DAY, GameStartTime, TheDate) = 0 THEN
DATEDIFF(HOUR, GameStartTime, DATEADD(Day, 1, TheDate))
ELSE -- meaning the game ends in the current date
DATEDIFF(HOUR, TheDate, GameEndTime)
END
END
),
0) As HoursPerDay
FROM (
SELECT DISTINCT UserId,
TheDate,
CASE
WHEN CAST(GameStartTime as Date) = TheDate
THEN GameStartTime
ELSE NULL
END As GameStartTime, -- return null if no game started that day
CASE
WHEN CAST(GameEndTime as Date) = TheDate
THEN GameEndTime
ELSE NULL
END As GameEndTime -- return null if no game ended that day
FROM Games CROSS APPLY AllDates -- This is where the magic happens :-)
) InnerSelect
GROUP BY UserId, TheDate
ORDER BY UserId, TheDate
OPTION (MAXRECURSION 0)
Play with it your self on sql fiddle.
Suppose I have a SQL table of Awards, with fields for Date and Amount. I need to generate a table with a sequence of consecutive dates, the amount awarded in each day, and the running (cumulative) total.
Date Amount_Total Amount_RunningTotal
---------- ------------ -------------------
1/1/2010 100 100
1/2/2010 300 400
1/3/2010 0 400
1/4/2010 0 400
1/5/2010 400 800
1/6/2010 100 900
1/7/2010 500 1400
1/8/2010 300 1700
This SQL works, but isn't as quick as I'd like:
Declare #StartDate datetime, #EndDate datetime
Select #StartDate=Min(Date), #EndDate=Max(Date) from Awards
; With
/* Returns consecutive from numbers 1 through the
number of days for which we have data */
Nbrs(n) as (
Select 1 Union All
Select 1+n
From Nbrs
Where n<=DateDiff(d,#StartDate,#EndDate)),
/* Returns all dates #StartDate to #EndDate */
AllDays as (
Select Date=DateAdd(d, n, #StartDate)
From Nbrs )
/* Returns totals for each day */
Select
d.Date,
Amount_Total = (
Select Sum(a.Amount)
From Awards a
Where a.Date=d.Date),
Amount_RunningTotal = (
Select Sum(a.Amount)
From Awards a
Where a.Date<=d.Date)
From AllDays d
Order by d.Date
Option(MAXRECURSION 1000)
I tried adding an index to Awards.Date, but it made a very minimal difference.
Before I resort to other strategies like caching, is there a more efficient way to code the running total calculation?
I generally use a temporary table for this:
DECLARE #Temp TABLE
(
[Date] date PRIMARY KEY,
Amount int NOT NULL,
RunningTotal int NULL
)
INSERT #Temp ([Date], Amount)
SELECT [Date], Amount
FROM ...
DECLARE #RunningTotal int
UPDATE #Temp
SET #RunningTotal = RunningTotal = #RunningTotal + Amount
SELECT * FROM #Temp
If you can't make the date column a primary key then you need to include an ORDER BY [Date] in the INSERT statement.
Also, this question's been asked a few times before. See here or search for "sql running total". The solution I posted is, as far as I know, still the one with the best performance, and also easy to write.
I don't have a database setup in front of me so I hope the below works first shot. A pattern like this should result in a much speedier query...you're just joining twice, similar amount of aggregation:
Declare #StartDate datetime, #EndDate datetime
Select #StartDate=Min(Date), #EndDate=Max(Date) from Awards
;
WITH AllDays(Date) AS (SELECT #StartDate UNION ALL SELECT DATEADD(d, 1, Date)
FROM AllDays
WHERE Date < #EndDate)
SELECT d.Date, sum(day.Amount) Amount_Total, sum(running.Amount) Amount_RunningTotal
FROM AllDays d
LEFT JOIN (SELECT date, SUM(Amount) As Amount
FROM Awards
GROUP BY Date) day
ON d.Date = day.Date
LEFT JOIN (SELECT date, SUM(Amount) As Amount
FROM Awards
GROUP BY Date) running
ON (d.Date >= running.Date)
Group by d.Date
Order by d.Date
Note: I changed your table expression up top, it was leaving out the first day before...if this is intentional just slap a where clause on this to exclude it. Let me know in the comments if this doesn't work or doesn't fit and I'll make whatever adjustments.
Here's a working solution based on #Aaronaught's answer. The only gotcha I had to overcome in T-SQL was that #RunningTotal etc. can't be null (need to be converted to zero).
Declare #StartDate datetime, #EndDate datetime
Select #StartDate=Min(StartDate),#EndDate=Max(StartDate) from Awards
/* #AllDays: Contains one row per date from #StartDate to #EndDate */
Declare #AllDays Table (
Date datetime Primary Key)
; With
Nbrs(n) as (
Select 0 Union All
Select 1+n from Nbrs
Where n<=DateDiff(d,#StartDate,#EndDate)
)
Insert into #AllDays
Select Date=DateAdd(d, n, #StartDate)
From Nbrs
Option(MAXRECURSION 10000) /* Will explode if working with more than 10000 days (~27 years) */
/* #AmountsByDate: Contains one row per date for which we have an Award, along with the totals for that date */
Declare #AmountsByDate Table (
Date datetime Primary Key,
Amount money)
Insert into #AmountsByDate
Select
StartDate,
Amount=Sum(Amount)
from Awards a
Group by StartDate
/* #Result: Joins #AllDays and #AmountsByDate etc. to provide totals and running totals for every day of the award */
Declare #Result Table (
Date datetime Primary Key,
Amount money,
RunningTotal money)
Insert into #Result
Select
d.Date,
IsNull(bt.Amount,0),
RunningTotal=0
from #AllDays d
Left Join #AmountsByDate bt on d.Date=bt.Date
Order by d.Date
Declare #RunningTotal money Set #RunningTotal=0
Update #Result Set #RunningTotal = RunningTotal = #RunningTotal + Amount
Select * from #Result