How to get latest record weekly within an SQL statement - sql

I'm trying to export the latest records from SQL Server 2005 database once weekly. This is my table:
agent_name date ID
ALEX 2015-05-25 13
ALEX 2015-05-22 13
ALICE 2015-05-24 10
ALICE 2015-05-26 10
How to create output table should like this:
agent_name date ID
ALEX 2015-05-25 13
ALICE 2015-05-26 10
My SQL script:
SELECT a.agent_name,
a.date,
a.ID
FROM Payment a
INNER JOIN agentmaster b ON a.ID = b.ID2
WHERE b.agent ='Y'
AND a.date >= DATEADD(day, -7, GETDATE())

If you want latest rows for each ID and agent_name use this:
SELECT a.agent_name,
MAX(a.[date]) [date],
a.ID
FROM Payment a
GROUP BY
a.agent_name,
a.ID
If you want to have latest rows in each week use this:
SELECT a.agent_name,
a.date,
a.ID
FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY DATEPART(WEEK, [date]) ORDER BY [date] DESC) seq
FROM Payment ) a
WHERE
seq = 1
If you want latest rows for each ID and agent_name at each week use this:
;WITH p AS (
/* add your query here */
)
SELECT a.agent_name,
a.date,
a.ID
FROM (SELECT *, ROW_NUMBER() OVER (PARTITION BY agent_name, ID, DATEPART(WEEK, [date]) ORDER BY [date] DESC) seq
FROM p ) a
WHERE
seq = 1

Try This
SELECT ID, Agent_Name, MAX(Date) Max_Date
FROM AgentMaster
GROUP BY Agent_Name, ID
If you want to add the date filter then add the where condition
date >= DATEADD(day,-7, GETDATE())

This will return records from the first day of the current week to the last day of the current week
SELECT a.agent_name
,a.date
,a.ID
FROM Payment a INNER JOIN agentmaster b
ON a.ID = b.ID2
WHERE b.agent ='Y' AND
(a.date BETWEEN
DATEADD(wk, DATEDIFF(wk, 0, GetDate()),0) AND
DATEADD(wk, DATEDIFF(wk, 0, GETDATE()),6))

In this solution, I make a table in a CTE and defines the first day of week within a period. And then make a JOIN between the first day of weeks and our actual query
Sample data
DECLARE #tbl TABLE(agent_name VARCHAR(10) ,[date] date, Id INT)
INSERT #tbl
SELECT 'ALEX' , '2015-05-25' , 13 UNION
SELECT 'ALEX' , '2015-05-22' , 13 UNION
SELECT 'ALICE' , '2015-05-24' , 10 UNION
SELECT 'ALICE' , '2015-05-26' , 10
SELECT * FROM #tbl
Query
-- This period can be changed
DECLARE #StartDate date = '01/01/2015'
DECLARE #EndDate date = '01/01/2016'
;WITH Numbers (Number)
AS
(
SELECT number
FROM master..spt_values
WHERE [type] = 'P'
)
,firstDayOfWeek (fWDate)
AS
(
SELECT DATEADD(DAY, n.Number, #StartDate)
FROM Numbers AS n
WHERE n.Number <= DATEDIFF(DAY, #StartDate, #EndDate)
AND DATEPART(WEEKDAY, DATEADD(DAY, n.Number, #StartDate)) = 1 --Defines first day of week
)
SELECT t.agent_name, Max(t.date) AS [date], t.Id
FROM #tbl AS t
INNER JOIN firstDayOfWeek AS fw ON t.date >= fw.fWDate
AND t.[date] < DATEADD(DAY, 7, fw.fWDate)
GROUP BY t.Id, t.agent_name

Related

Select date ranges where periods do not overlap

I have two tables each containing the start and end dates of several periods. I want an efficient way to find periods (date ranges) where dates are within the ranges of the first table but not within ranges of the second table.
For example, if this is my first table (with dates that I want)
start_date end_date
2001-01-01 2010-01-01
2012-01-01 2015-01-01
And this is my second table (with dates that I do not want)
start_date end_date
2002-01-01 2006-01-01
2003-01-01 2004-01-01
2005-01-01 2009-01-01
2014-01-01 2018-01-01
Then output looks like
start_date end_date
2001-01-01 2001-12-31
2009-01-02 2010-01-01
2012-01-01 2013-12-31
We can safely assume that periods in the first table are non-overlapping, but can not assume periods in the second table are overlapping.
I already have a method for doing this but it is an order of magnitude slower than I can accept. So hoping someone can propose a faster approach.
My present method looks like:
merge table 2 into non-overlapping periods
find the inverse of table 2
join overlapping periods from table 1 and inverted-table-2
I am sure there is a faster way if some of these steps can be merged together.
In more detail
/* (1) merge overlapping preiods */
WITH
spell_starts AS (
SELECT [start_date], [end_date]
FROM table_2 s1
WHERE NOT EXISTS (
SELECT 1
FROM table_2 s2
WHERE s2.[start_date] < s1.[start_date]
AND s1.[start_date] <= s2.[end_date]
)
),
spell_ends AS (
SELECT [start_date], [end_date]
FROM table_2 t1
WHERE NOT EXISTS (
SELECT 1
FROM table_2 t2
WHERE t2.[start_date] <= t1.[end_date]
AND t1.[end_date] < t2.[end_date]
)
)
SELECT s.[start_date], MIN(e.[end_date]) as [end_date]
FROM spell_starts s
INNER JOIN spell_ends e
ON s.[start_date] <= e.[end_date]
GROUP BY s.[start_date]
/* (2) inverse table 2 */
SELECT [start_date], [end_date]
FROM (
/* all forward looking spells */
SELECT DATEADD(DAY, 1, [end_date]) AS [start_date]
,LEAD(DATEADD(DAY, -1, [start_date]), 1, '9999-01-01') OVER ( ORDER BY [start_date] ) AS [end_date]
FROM merge_table_2
UNION ALL
/* back looking spell (to 'origin of time') created separately */
SELECT '1900-01-01' AS [start_date]
,DATEADD(DAY, -1, MIN([start_date])) AS [end_date]
FROM merge_table_2
) k
WHERE [start_date] <= [end_date]
AND '1900-01-01' <= [start_date]
AND [end_date] <= '9999-01-01'
/* (3) overlap spells */
SELECT IIF(t1.start_date < t2.start_date, t2.start_date, t1.start_date) AS start_date
,IIF(t1.end_date < t2.end_date, t1.end_date, t2.end_date) AS end_date
FROM table_1 t1
INNER JOIN inverse_merge_table_2 t2
ON t1.start_date < t2.end_date
AND t2.start_date < t1.end_date
Hope this helps. I have comment the two ctes I am using for explanation purposes
Here you go:
drop table table1
select cast('2001-01-01' as date) as start_date, cast('2010-01-01' as date) as end_date into table1
union select '2012-01-01', '2015-01-01'
drop table table2
select cast('2002-01-01' as date) as start_date, cast('2006-01-01' as date) as end_date into table2
union select '2003-01-01', '2004-01-01'
union select '2005-01-01', '2009-01-01'
union select '2014-01-01', '2018-01-01'
/***** Solution *****/
-- This cte put all dates into one column
with cte as
(
select t
from
(
select start_date as t
from table1
union all
select end_date
from table1
union all
select dateadd(day,-1,start_date) -- for table 2 we bring the start date back one day to make sure we have nothing in the forbidden range
from table2
union all
select dateadd(day,1,end_date) -- for table 2 we bring the end date forward one day to make sure we have nothing in the forbidden range
from table2
)a
),
-- This one adds an end date using the lead function
cte2 as (select t as s, coalesce(LEAD(t,1) OVER ( ORDER BY t ),t) as e from cte a)
-- this query gets all intervals not in table2 but in table1
select s, e
from cte2 a
where not exists(select 1 from table2 b where s between dateadd(day,-1,start_date) and dateadd(day,1,end_date) and e between dateadd(day,-1,start_date) and dateadd(day,1,end_date) )
and exists(select 1 from table1 b where s between start_date and end_date and e between start_date and end_date)
and s <> e
If you want performance, then you want to use window functions.
The idea is to:
Combine the dates with flags of being in-and-out of the two tables.
Use cumulative sums to determine where dates start being in-and-out.
Then you have a gaps-and-islands problem where you want to combine the results.
Finally, filter on the particular periods you want.
This looks like:
with dates as (
select start_date as dte, 1 as in1, 0 as in2
from table1
union all
select dateadd(day, 1, end_date), -1, 0
from table1
union all
select start_date, 0, 1 as in2
from table2
union all
select dateadd(day, 1, end_date), 0, -1
from table2
),
d as (
select dte,
sum(sum(in1)) over (order by dte) as ins_1,
sum(sum(in2)) over (order by dte) as ins_2
from dates
group by dte
)
select min(dte), max(next_dte)
from (select d.*, dateadd(day, -1, lead(dte) over (order by dte)) as next_dte,
row_number() over (order by dte) as seqnum,
row_number() over (partition by case when ins_1 >= 1 and ins_2 = 0 then 'in' else 'out' end order by dte) as seqnum_2
from d
) d
group by (seqnum - seqnum_2)
having max(ins_1) > 0 and max(ins_2) = 0
order by min(dte);
Here is a db<>fiddle.
Thanks to #zip and #Gordon for their answers. Both were superior to my initial approach. However, the following solution was faster than both of their approaches in my environment & context:
WITH acceptable_starts AS (
SELECT [start_date] FROM table1 AS a
WHERE NOT EXISTS (
SELECT 1 FROM table2 AS b
WHERE DATEADD(DAY, 1, a.[end_date]) BETWEEN b.[start_date] AND b.
UNION ALL
SELECT DATEADD(DAY, 1, [end_date]) FROM table2 AS a
WHERE NOT EXISTS (
SELECT 1 FROM table2 AS b
WHERE DATEADD(DAY, 1, a.[end_date]) BETWEEN b.[start_date] AND b.[end_date]
)
),
acceptable_ends AS (
SELECT [end_date] FROM table1 AS a
WHERE NOT EXISTS (
SELECT 1 FROM table2 AS b
WHERE DATEADD(DAY, -1, a.[start_date]) BETWEEN b.[start_date] AND b.[end_date]
)
UNION ALL
SELECT DATEADD(DAY, -1, [start_date]) FROM table2 AS a
WHERE NOT EXISTS (
SELECT 1 FROM table2 AS b
WHERE DATEADD(DAY, -1, a.[start_date]) BETWEEN b.[start_date] AND b.[end_date]
)
)
SELECT s.[start_date], MIN(e.[end_date]) AS [end_date]
FROM acceptable_starts
INNER JOIN acceptable_ends
ON s.[start_date] < e.[end_date]

return a result even if there is no data for that day/ week/ month

I have an MSSQL database and I want to get a value for each day/ week/ month in separate queries.
I got this working just fine except for intervals where there is no data, it wont return anything. And since im putting this in a graph, I want it to display a 0 or a NULL at least instead of jumping days or weeks etc.
I dont know if it will be different for each query but here is my daily query:
select CAST(Placements.CreatedOn AS DATE) AS
date,SUM(Placements.CommissionPerc * (Placements.PlacementFee / 100)) AS value
from [placements]
where [Placements].[CreatedOn] >= '2018-06-07' and [Placements].[CreatedOn] < '2018-06-12'
group by CAST(Placements.CreatedOn AS DATE)
order by CAST(Placements.CreatedOn AS DATE) ASC
This returns a result like:
So it returns 0 for when the data is actually 0 but when its missing, theres nothing like for days 9, 10 and 12
How can i fix this? thanks
Using a recursive CTE you can generate a list of dates.
Which can then be used to LEFT JOIN your table.
Example:
WITH DATES2018 AS
(
SELECT CAST('2018-01-01' AS DATE) AS [date]
UNION ALL
SELECT DATEADD(day, 1, [date])
FROM DATES2018
WHERE [date] < CAST('2018-12-31' AS DATE)
)
SELECT
d.[Date],
SUM(p.CommissionPerc * (p.PlacementFee / 100.0)) AS [value]
FROM DATES2018 AS d
LEFT JOIN [Placements] AS p ON CAST(p.CreatedOn AS DATE) = d.[Date]
WHERE d.[Date] BETWEEN '2018-06-07' AND '2018-06-11'
GROUP BY d.[Date]
ORDER BY d.[Date] ASC
OPTION (MAXRECURSION 366)
But you could also just add a new permanent table with all dates.
And use that table to left join your table.
Btw, if variables are used for the start and end date then that SQL can be optimized.
DECLARE #StartDate DATE = '2018-06-07';
DECLARE #EndDate DATE = '2018-06-11';
WITH DATES AS
(
SELECT #StartDate AS [date]
UNION ALL
SELECT DATEADD(day, 1, [date])
FROM DATES
WHERE [date] < #EndDate
)
SELECT
d.[Date],
SUM(p.CommissionPerc * (p.PlacementFee / 100.0)) AS [value]
FROM DATES AS d
LEFT JOIN [Placements] AS p
ON p.CreatedOn BETWEEN CAST(#StartDate AS DATETIME) AND CAST(DATEADD(day, 1, #EndDate) AS DATETIME) AND
CAST(p.CreatedOn AS DATE) = d.[Date]
GROUP BY d.[Date]
ORDER BY d.[Date] ASC
OPTION (MAXRECURSION 0)
A permanent calendar table would be best but here's and example that uses a CTE to create the dates needed for a LEFT JOIN. This uses a maximum of 1,000 days but can be extended as needed.
DECLARE
#StartDate date = '2018-06-07'
, #EndDate date = '2018-06-12';
WITH
t10 AS (SELECT n FROM (VALUES(0),(0),(0),(0),(0),(0),(0),(0),(0),(0)) t(n))
,t1k AS (SELECT ROW_NUMBER() OVER (ORDER BY (SELECT 0)) - 1 AS num FROM t10 AS a CROSS JOIN t10 AS b CROSS JOIN t10 AS c)
,calendar AS (SELECT DATEADD(day, num, #StartDate) AS calendar_date
FROM t1k
WHERE num <= DATEDIFF(day, #StartDate, #EndDate)
)
SELECT
calendar.calendar_date AS date
, SUM( COALESCE(Placements.CommissionPerc * (Placements.PlacementFee / 100),0 ) ) AS value
FROM calendar
LEFT JOIN [placements] ON [Placements].[CreatedOn] = calendar.calendar_date
GROUP BY calendar.calendar_date
ORDER BY calendar.calendar_date ASC;

Find missing date as compare to calendar

I am explain problem in short.
select distinct DATE from #Table where DATE >='2016-01-01'
Output :
Date
2016-11-23
2016-11-22
2016-11-21
2016-11-19
2016-11-18
Now i need to find out missing date a compare to our calender dates from year '2016'
i.e. Here date '2016-11-20' is missing.
I want list of missing dates.
Thanks for reading this. Have nice day.
You need to generate dates and you have to find missing ones. Below with recursive cte i have done it
;WITH CTE AS
(
SELECT CONVERT(DATE,'2016-01-01') AS DATE1
UNION ALL
SELECT DATEADD(DD,1,DATE1) FROM CTE WHERE DATE1<'2016-12-31'
)
SELECT DATE1 MISSING_ONE FROM CTE
EXCEPT
SELECT * FROM #TABLE1
option(maxrecursion 0)
Using CTE and get all dates in CTE table then compare with your table.
CREATE TABLE #yourTable(_Values DATE)
INSERT INTO #yourTable(_Values)
SELECT '2016-11-23' UNION ALL
SELECT '2016-11-22' UNION ALL
SELECT '2016-11-21' UNION ALL
SELECT '2016-11-19' UNION ALL
SELECT '2016-11-18'
DECLARE #DATE DATE = '2016-11-01'
;WITH CTEYear (_Date) AS
(
SELECT #DATE
UNION ALL
SELECT DATEADD(DAY,1,_Date)
FROM CTEYear
WHERE _Date < EOMONTH(#DATE,0)
)
SELECT * FROM CTEYear
WHERE NOT EXISTS(SELECT 1 FROM #yourTable WHERE _Date = _Values)
OPTION(maxrecursion 0)
You need to generate the dates and then find the missing ones. A recursive CTE is one way to generate a handful of dates. Another way is to use master..spt_values as a list of numbers:
with n as (
select row_number() over (order by (select null)) - 1 as n
from master..spt_values
),
d as (
select dateadd(day, n.n, cast('2016-01-01' as date)) as dte
from n
where n <= 365
)
select d.date
from d left join
#table t
on d.dte = t.date
where t.date is null;
If you are happy enough with ranges of missing dates, you don't need a list of dates at all:
select date, (datediff(day, date, next_date) - 1) as num_missing
from (select t.*, lead(t.date) over (order by t.date) as next_date
from #table t
where t.date >= '2016-01-01'
) t
where next_date <> dateadd(day, 1, date);

SQL calculate date segments within calendar year

What I need is to calculate the missing time periods within the calendar year given a table such as this in SQL:
DatesTable
|ID|DateStart |DateEnd |
1 NULL NULL
2 2015-1-1 2015-12-31
3 2015-3-1 2015-12-31
4 2015-1-1 2015-9-30
5 2015-1-1 2015-3-31
5 2015-6-1 2015-12-31
6 2015-3-1 2015-6-30
6 2015-7-1 2015-10-31
Expected return would be:
1 2015-1-1 2015-12-31
3 2015-1-1 2015-2-28
4 2015-10-1 2015-12-31
5 2015-4-1 2015-5-31
6 2015-1-1 2015-2-28
6 2015-11-1 2015-12-31
It's essentially work blocks. What I need to show is the part of the calendar year which was NOT worked. So for ID = 3, he worked from 3/1 through the rest of the year. But he did not work from 1/1 till 2/28. That's what I'm looking for.
You can do it using LEAD, LAG window functions available from SQL Server 2012+:
;WITH CTE AS (
SELECT ID,
LAG(DateEnd) OVER (PARTITION BY ID ORDER BY DateEnd) AS PrevEnd,
DateStart,
DateEnd,
LEAD(DateStart) OVER (PARTITION BY ID ORDER BY DateEnd) AS NextStart
FROM DatesTable
)
SELECT ID, DateStart, DateEnd
FROM (
-- Get interval right before current [DateStart, DateEnd] interval
SELECT ID,
CASE
WHEN DateStart IS NULL THEN '20150101'
WHEN DateStart > start THEN start
ELSE NULL
END AS DateStart,
CASE
WHEN DateStart IS NULL THEN '20151231'
WHEN DateStart > start THEN DATEADD(d, -1, DateStart)
ELSE NULL
END AS DateEnd
FROM CTE
CROSS APPLY (SELECT COALESCE(DATEADD(d, 1, PrevEnd), '20150101')) x(start)
-- If there is no next interval then get interval right after current
-- [DateStart, DateEnd] interval (up-to end of year)
UNION ALL
SELECT ID, DATEADD(d, 1, DateEnd) AS DateStart, '20151231' AS DateEnd
FROM CTE
WHERE DateStart IS NOT NULl -- Do not re-examine [Null, Null] interval
AND NextStart IS NULL -- There is no next [DateStart, DateEnd] interval
AND DateEnd < '20151231' -- Current [DateStart, DateEnd] interval
-- does not terminate on 31/12/2015
) AS t
WHERE t.DateStart IS NOT NULL
ORDER BY ID, DateStart
The idea behind the above query is simple: for every [DateStart, DateEnd] interval get 'not worked' interval right before it. If there is no interval following the current interval, then also get successive 'not worked' interval (if any).
Also note that I assume that if DateStart is NULL then DateStart is also NULL for the same ID.
Demo here
If your data is not too big, this approach will work. It expands all the days and ids and then re-groups them:
with d as (
select cast('2015-01-01' as date)
union all
select dateadd(day, 1, d)
from d
where d < cast('2015-12-31' as date)
),
td as (
select *
from d cross join
(select distinct id from t) t
where not exists (select 1
from t t2
where d.d between t2.startdate and t2.enddate
)
)
select id, min(d) as startdate, max(d) as enddate
from (select td.*,
dateadd(day, - row_number() over (partition by id order by d), d) as grp
from td
) td
group by id, grp
order by id, grp;
An alternative method relies on cumulative sums and similar functionality that is much easier to expression in SQL Server 2012+.
Somewhat simpler approach I think.
Basically create a list of dates for all work block ranges (A). Then create a list of dates for the whole year for each ID (B). Then remove the A from B. Compile the remaining list of dates into date ranges for each ID.
DECLARE #startdate DATETIME, #enddate DATETIME
SET #startdate = '2015-01-01'
SET #enddate = '2015-12-31'
--Build date ranges from remaining date list
;WITH dateRange(ID, dates, Grouping)
AS
(
SELECT dt1.id, dt1.Dates, dt1.Dates + row_number() over (order by dt1.id asc, dt1.Dates desc) AS Grouping
FROM
(
--Remove (A) from (B)
SELECT distinct dt.ID, tmp.Dates FROM DatesTable dt
CROSS APPLY
(
--GET (B) here
SELECT DATEADD(DAY, number, #startdate) [Dates]
FROM master..spt_values
WHERE type = 'P' AND DATEADD(DAY, number, #startdate) <= #enddate
) tmp
left join
(
--GET (A) here
SELECT DISTINCT T.Id,
D.Dates
FROM DatesTable AS T
INNER JOIN master..spt_values as N on N.number between 0 and datediff(day, T.DateStart, T.DateEnd)
CROSS APPLY (select dateadd(day, N.number, T.DateStart)) as D(Dates)
WHERE N.type ='P'
) dr
ON dr.Id = dt.Id and dr.Dates = tmp.Dates
WHERE dr.id is null
) dt1
)
SELECT ID, CAST(MIN(Dates) AS DATE) DateStart, CAST(MAX(Dates) AS DATE) DateEnd
FROM dateRange
GROUP BY ID, Grouping
ORDER BY ID
Heres the code:
http://sqlfiddle.com/#!3/f3615/1
I hope this helps!

Query to get results for every hour of the day even data if not present

I am trying to get the query for getting the count of users every hour of day in the table. If the data for that hour is not present, I want to record the hour with count of zero. Also users should be counted only for their first entry. Subsequent entries should be ignored.
Table:
userId creationDate
1 2014-10-08 14:33:20.763
2 2014-10-09 04:24:14.283
3 2014-10-10 18:34:26.260
Desired output:
Date UserCount
2014-10-08 00:00:00.000 1
2014-10-08 01:00:00.000 1
2014-10-08 02:00:00.000 1
2014-10-08 03:00:00.000 0
2014-10-08 04:00:00.000 1
....
.....
2014-10-10 23:00:00.000 1
2014-10-10 00:00:00.000 0
My attempt:
SELECT
CAST(creationDate as date) AS ForDate,
DATEPART(hour, date) AS OnHour,
COUNT(distinct userId) AS Totals
FROM
Table
WHERE
primaryKey = 123
GROUP BY
CAST(creationDate as date), DATEPART(hour, createDate)
This only gives me per hour for the record that is present. Not the data for the missing hours. I think there is a way by using a cross join to get 0 data even for the missing hours.
Something like this, I came across, but not able to construct a proper query with it.
cross join (select
ROW_NUMBER() over (order by (select NULL)) as seqnum
from
INFORMATION_SCHEMA.COLUMNS) hours
where hours.seqnum >= 24
Once again, I am not a SQL expert, but trying hard to construct this result set.
One more attempt :
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(createDate as DATE) as datetime) as thedatehour
from Table a
) a
cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours.seqnum (less than)= 24
)
select dh.DateHour, COUNT(distinct c.userId)
from dh cross join Table c
--on dh.DateHour = c.createDate
group by dh.DateHour
order by 1
You need to build up a table of possible hours, and then join this to your actual records.
The best way to build up a table of possible hours is to use a recursive common table expression.
Here's how:
-- Example data
DECLARE #users TABLE(UserID INT, creationDate DATETIME)
INSERT #users
( UserID, creationDate )
VALUES ( 1, '2014-10-08 14:33:20.763'),
( 2, '2014-10-09 04:24:14.283'),
( 3, '2014-10-10 18:34:26.260')
;WITH u1st AS ( -- determine the FIRST time the user appears
SELECT UserID, MIN(creationDate) AS creationDate
FROM #users
GROUP BY UserID
), hrs AS ( -- recursive CTE of start hours
SELECT DISTINCT CAST(CAST(creationDate AS DATE) AS DATETIME) AS [StartHour]
FROM #users AS u
UNION ALL
SELECT DATEADD(HOUR, 1, [StartHour]) AS [StartHour] FROM hrs
WHERE DATEPART(HOUR,[StartHour]) < 23
), uGrp AS ( -- your data grouped by start hour
SELECT -- note that DATETIMEFROMPARTS is only in SQL Server 2012 and later
DATETIMEFROMPARTS(YEAR(CreationDate),MONTH(CreationDate),
DAY(creationDate),DATEPART(HOUR, creationDate),0,0,0)
AS StartHour,
COUNT(1) AS UserCount FROM u1st AS u
GROUP BY YEAR(creationDate), MONTH(creationDate), DAY(creationDate),
DATEPART(HOUR, creationDate)
)
SELECT hrs.StartHour, ISNULL(uGrp.UserCount, 0) AS UserCount
FROM hrs LEFT JOIN uGrp ON hrs.StartHour = uGrp.StartHour
ORDER BY hrs.StartHour
NB - DATETIMEFROMPARTS is only in SQL SERVER 2012 and greater. If you are using an earlier version of SQL SERVER you could have
WITH u1st AS ( -- determine the FIRST time the user appears
SELECT UserID, MIN(creationDate) AS creationDate
FROM #users
GROUP BY UserID
), hrs AS ( -- recursive CTE of start hours
SELECT DISTINCT CAST(CAST(creationDate AS DATE) AS DATETIME) AS [StartHour]
FROM #users AS u
UNION ALL
SELECT DATEADD(HOUR, 1, [StartHour]) AS [StartHour] FROM hrs
WHERE DATEPART(HOUR,[StartHour]) < 23
), uGrp AS ( -- your data grouped by start hour
SELECT -- note that DATETIMEFROMPARTS is only in SQL Server 2012 and later
CAST(CAST(YEAR(creationDate) AS CHAR(4)) + '-'
+ RIGHT('0' + CAST(MONTH(creationDate) AS CHAR(2)), 2) + '-'
+ RIGHT('0' + CAST(DAY(creationDate) AS CHAR(2)), 2) + ' '
+ RIGHT('0' + CAST(DATEPART(HOUR, creationDate) AS CHAR(2)), 2)
+ ':00:00.000'
AS DATETIME) AS StartHour,
COUNT(1) AS UserCount FROM u1st AS u
GROUP BY YEAR(creationDate), MONTH(creationDate), DAY(creationDate),
DATEPART(HOUR,creationDate)
)
SELECT hrs.StartHour, ISNULL(uGrp.UserCount, 0) AS UserCount
FROM hrs LEFT JOIN uGrp ON hrs.StartHour = uGrp.StartHour
ORDER BY hrs.StartHour
I asked a similar question on dba just this morning...https://dba.stackexchange.com/questions/86435/filling-in-date-holes-in-grouped-by-date-sql-data.
You can used my GetSequence function, or create a Numbers table. I haven't done my own testing yet to validate what was suggested in my scenario.
Try this:
BUILD SAMPLE DATA
CREATE TABLE yourTable(
userId INT,
creationDate DATETIME
)
INSERT INTO yourTable VALUES (1, '2014-10-08 14:33:20.763'), (2, '2014-10-09 04:24:14.283'),(3, '2014-10-10 18:34:26.260');
SOLUTION
WITH tally(N) AS(
SELECT TOP(23) ROW_NUMBER() OVER(ORDER BY (SELECT NULL)) FROM sys.columns
)
,hourly(creationDate) AS(
SELECT DATEADD(HOUR, t.N, d.creationDate)
FROM tally t
CROSS JOIN(
SELECT DISTINCT DATEADD(DD, DATEDIFF(DD, 0, creationDate), 0) AS creationDate FROM yourTable
) d
)
SELECT
h.creationDate,
userCount = ISNULL(t.userCount, 0)
FROM hourly h
LEFT JOIN(
SELECT
creationDate = DATEADD(HOUR, DATEPART(HOUR, creationDate) ,DATEADD(DD, DATEDIFF(DD, 0, creationDate), 0)),
userCount = COUNT(*)
FROM yourTable
GROUP BY DATEADD(DD, DATEDIFF(DD, 0, creationDate), 0), DATEPART(HOUR, creationDate)
)t
ON t.creationDate = h.creationDate
CLEANUP
DROP TABLE yourTable
Create a temporary table (let's say #CreationDateHours) containing create date and hours from 0 to 23.
Declare #date as date
SELECT MAX(CAST(creationDate as date)) AS ForDate, 0 as OnHour into #CreationDateHours
FROM Table
WHERE
primaryKey = 123
Select #date=ForDate from #CreationDateHours
Declare #i int
Set #i=1
While #i<24
begin
insert into #CreationDateHours
select #date as ForDate, #i as OnHour
set #i+=1
end
Now, Run this query to get the desired results
select t1.ForDate, t1.OnHour, isnull(t2.Totals,0) AS Totals
from
#CreationDateHours t1 left join (SELECT
CAST(creationDate as date) AS ForDate,
DATEPART(hour, date) AS OnHour,
COUNT(distinct userId) AS Totals
FROM
Table
WHERE
primaryKey = 123
GROUP BY
CAST(creationDate as date), DATEPART(hour, createDate)) as t2
on t1.ForDate= t2.ForDate and t1.OnHour=t2.OnHour
select count, strftime('%H', creationDate) as hour from table group by hour;
OUTUPUT:
count hour
n1 01
n2 02
n3 03
... ...
n24 24
Tested on SQLite3
https://www.sqlite.org/lang_datefunc.html
you can see all the formats
(like %d for every day of month)
if you want the whole date
strftime('%Y-%m-%d %H, creationDate)