SQL query count occurrences then group by day but also fill missing days

SQL query count occurrences then group by day but also fill missing days - sql

I have a table called diseaseScores which has calculation results. Each hour a running score is calculated (currentScore). The objective of this query is to group the hourly scores by the day (obsDate), then count the numberOfhours where the running score is at a level considered high. High is greater than 16 (currentScore > 16).
My query so far is:
SELECT
DATEADD(DAY, 0, DATEDIFF(day, 0, obsDate)) AS obsDate,
(CASE
WHEN count(id) > 12 THEN count(id)
ELSE 0
END) numOfHoursAtHigh
FROM
diseaseScores
WHERE
diseaseID = 2
AND siteID = 72160
AND numOfRotationYears = 3
AND currentScore > 16
AND month(obsDate) IN (6)
GROUP BY
DATEADD(DAY, 0, DATEDIFF(day, 0, obsDate))
ORDER BY
DATEADD(DAY, 0, DATEDIFF(day, 0, obsDate));
The query returns results for 13 days of the month. I wish to fill the gaps so I a have a record for each of the day of the month. The gaps need to have a numOfHoursAtHigh result of 0.
How can I do this? This is for SQL Server 2008 +
The result set being returned is:
2016-06-04 00:00:00.000 0
2016-06-05 00:00:00.000 23
2016-06-06 00:00:00.000 23
2016-06-07 00:00:00.000 23
2016-06-08 00:00:00.000 3
2016-06-09 00:00:00.000 23
2016-06-10 00:00:00.000 0
2016-06-17 00:00:00.000 13
2016-06-18 00:00:00.000 23
2016-06-19 00:00:00.000 0
2016-06-20 00:00:00.000 14
2016-06-21 00:00:00.000 23
2016-06-22 00:00:00.000 16
UPDATE : So using a modified version of knobcreekmans approach (was doubling up certain days) I now have this which does fill my gaps and works great for one month. As soon as I ask for two months worth by changing month(obsDate) IN (6) to month(obsDate) IN (6,7) it skips days if they happen to clash in month 6 and 7. Grrrrrr, am so close!
SELECT CAST(obsDate AS DATE) as obsDate,
(CASE
WHEN COUNT(id) > 12 THEN COUNT(id)
ELSE 0
END) numOfHoursAtHigh
FROM diseaseScores
WHERE diseaseID=2
AND siteID=72160
AND numOfRotationYears=3
AND currentScore > 16
AND month(obsDate) IN (6)
GROUP BY CAST(obsDate AS DATE)
UNION
SELECT CAST(obsDate AS DATE) AS obsDate,
0 AS numOfHoursAtHigh
FROM diseaseScores
WHERE diseaseID=2
AND siteID=72160
AND numOfRotationYears=3
AND currentScore <= 17
AND month(obsDate) IN (6)
and day(obsDate) NOT IN --<-- added from here
(
SELECT distinct day(obsDate)
FROM diseaseScores
WHERE diseaseID=2
AND siteID=72160
AND numOfRotationYears=3
AND currentScore > 16
AND month(obsDate) IN (6)
) --<-- to here to omit the duplicates
GROUP BY CAST(obsDate AS DATE)
ORDER BY CAST(obsDate AS DATE)
To answer to the question around the expected result. Its one record for each day of the month (or months) giving to columns. A date and then an integer for numOfHoursAtHigh e.g
2016-06-01 0
2016-06-02 0
2016-06-03 0
2016-06-04 0
2016-06-05 23
2016-06-06 23
2016-06-07 23
2016-06-08 23
2016-06-09 23
2016-06-10 0
2016-06-11 0
2016-06-12 0
2016-06-13 0
2016-06-14 0
2016-06-15 0
2016-06-16 0
2016-06-17 13
2016-06-18 23
2016-06-19 0
2016-06-20 14
2016-06-21 23
2016-06-22 16
2016-06-23 0
2016-06-24 0
2016-06-25 0
2016-06-26 0
2016-06-27 0
2016-06-28 0
2016-06-29 0
2016-06-30 0

You could create another SELECT that is the same as the original, modify the part of your WHERE clause that is filtering out the results you want (currentScore > 16), and then UNION them together.
SELECT CAST(obsDate AS DATE) as obsDate,
(CASE
WHEN COUNT(id) > 12 THEN COUNT(id)
ELSE 0
END) numOfHoursAtHigh
FROM diseaseScores
WHERE diseaseID=2
AND siteID=72160
AND numOfRotationYears=3
AND currentScore > 16
AND month(obsDate) IN (6)
GROUP BY CAST(obsDate AS DATE)
UNION
SELECT CAST(obsDate AS DATE) AS obsDate,
0 AS numOfHoursAtHigh
FROM diseaseScores
WHERE diseaseID=2
AND siteID=72160
AND numOfRotationYears=3
AND currentScore < 17 --<-- note the change
AND month(obsDate) IN (6)
GROUP BY CAST(obsDate AS DATE)
ORDER BY CAST(obsDate AS DATE)

Hi, You can have the below query,
SELECT DS.dateadd(DAY,0, datediff(day,0, obsDate)) as obsDate,
CASE
WHEN DS1.COUNT(id) > 12 THEN COUNT(id)
ELSE 0
END AS numOfHoursAtHigh
FROM diseaseScores DS
INNER JOIN (
SELECT dateadd(DAY,0, datediff(day,0, obsDate)) AS date, COUNT(id)
FROM diseaseScores GROUP BY date
) DS1
ON DS.date = DS1.obsDate
AND DS.diseaseID=2
AND DS.siteID=721DS.60
AND DS.numOfRotationYears=3
AND DS.currentScore > 16
AND DS.month(obsDate) IN (6)
ORDER BY DS.obsDate;

Related

Sql group by latest repeated field

I don't even know what's a good title for this question.
But I'm having a table:
create table trans
(
[transid] INT IDENTITY (1, 1) NOT NULL,
[customerid] int not null,
[points] decimal(10,2) not null,
[date] datetime not null
)
and records:
--cus1
INSERT INTO trans ( customerid , points , date )
VALUES ( 1, 10, '2016-01-01' ) , ( 1, 20, '2017-02-01' ) , ( 1, 22, '2017-03-01' ) ,
( 1, 24, '2018-02-01' ) , ( 1, 50, '2018-02-25' ) , ( 2, 44, '2016-02-01' ) ,
( 2, 20, '2017-02-01' ) , ( 2, 32, '2017-03-01' ) , ( 2, 15, '2018-02-01' ) ,
( 2, 10, '2018-02-25' ) , ( 3, 10, '2018-02-25' ) , ( 4, 44, '2015-02-01' ) ,
( 4, 20, '2015-03-01' ) , ( 4, 32, '2016-04-01' ) , ( 4, 15, '2016-05-01' ) ,
( 4, 10, '2017-02-25' ) , ( 4, 10, '2018-02-27' ) ,( 4, 20, '2018-02-28' ) ,
( 5, 44, '2015-02-01' ) , ( 5, 20, '2015-03-01' ) , ( 5, 32, '2016-04-01' ) ,
( 5, 15, '2016-05-01' ) ,( 5, 10, '2017-02-25' );
-- selecting the data
select * from trans
Produces:
transid customerid points date
----------- ----------- --------------------------------------- -----------------------
1 1 10.00 2016-01-01 00:00:00.000
2 1 20.00 2017-02-01 00:00:00.000
3 1 22.00 2017-03-01 00:00:00.000
4 1 24.00 2018-02-01 00:00:00.000
5 1 50.00 2018-02-25 00:00:00.000
6 2 44.00 2016-02-01 00:00:00.000
7 2 20.00 2017-02-01 00:00:00.000
8 2 32.00 2017-03-01 00:00:00.000
9 2 15.00 2018-02-01 00:00:00.000
10 2 10.00 2018-02-25 00:00:00.000
11 3 10.00 2018-02-25 00:00:00.000
12 4 44.00 2015-02-01 00:00:00.000
13 4 20.00 2015-03-01 00:00:00.000
14 4 32.00 2016-04-01 00:00:00.000
15 4 15.00 2016-05-01 00:00:00.000
16 4 10.00 2017-02-25 00:00:00.000
17 4 10.00 2018-02-27 00:00:00.000
18 4 20.00 2018-02-28 00:00:00.000
19 5 44.00 2015-02-01 00:00:00.000
20 5 20.00 2015-03-01 00:00:00.000
21 5 32.00 2016-04-01 00:00:00.000
22 5 15.00 2016-05-01 00:00:00.000
23 5 10.00 2017-02-25 00:00:00.000
I'm trying to group all the customerid and sum their points. But here's the catch, If the trans is not active for 1 year(the next tran is 1 year and above), the points will be expired.
For this case:
Points for each customers should be:
Customer1 20+22+24+50
Customer2 20+32+15+10
Customer3 10
Customer4 10+20
Customer5 0
Here's what I have so far:
select
t1.transid as transid1,
t1.customerid as customerid1,
t1.date as date1,
t1.points as points1,
t1.rank1 as rank1,
t2.transid as transid2,
t2.customerid as customerid2,
t2.points as points2,
isnull(t2.date,getUTCDate()) as date2,
isnull(t2.rank2,t1.rank1+1) as rank2,
cast(case when(t1.date > dateadd(year,-1,isnull(t2.date,getUTCDate()))) Then 0 ELSE 1 END as bit) as ShouldExpire
from
(
select transid,CustomerID,Date,points,
RANK() OVER(PARTITION BY CustomerID ORDER BY date ASC) AS RANK1
from trans
)t1
left join
(
select transid,CustomerID,Date,points,
RANK() OVER(PARTITION BY CustomerID ORDER BY date ASC) AS RANK2
from trans
)t2 on t1.RANK1=t2.RANK2-1
and t1.customerid=t2.customerid
which gives
from the above table,how do I check for ShouldExpire field having max(rank1) for customer, if it's 1, then totalpoints will be 0, otherwise,sum all the consecutive 0's until there are no more records or a 1 is met?
Or is there a better approach to this problem?

The following query uses LEAD to get the date of the next record withing the same CustomerID slice:
;WITH CTE AS (
SELECT transid, CustomerID, [Date], points,
LEAD([Date]) OVER (PARTITION BY CustomerID
ORDER BY date ASC) AS nextDate,
CASE
WHEN [date] > DATEADD(YEAR,
-1,
-- same LEAD() here as above
ISNULL(LEAD([Date]) OVER (PARTITION BY CustomerID
ORDER BY date ASC),
getUTCDate()))
THEN 0
ELSE 1
END AS ShouldExpire
FROM trans
)
SELECT transid, CustomerID, [Date], points, nextDate, ShouldExpire
FROM CTE
ORDER BY CustomerID, [Date]
Output:
transid CustomerID Date points nextDate ShouldExpire
-------------------------------------------------------------
1 1 2016-01-01 10.00 2017-02-01 1 <-- last exp. for 1
2 1 2017-02-01 20.00 2017-03-01 0
3 1 2017-03-01 22.00 2018-02-01 0
4 1 2018-02-01 24.00 2018-02-25 0
5 1 2018-02-25 50.00 NULL 0
6 2 2016-02-01 44.00 2017-02-01 1 <-- last exp. for 2
7 2 2017-02-01 20.00 2017-03-01 0
8 2 2017-03-01 32.00 2018-02-01 0
9 2 2018-02-01 15.00 2018-02-25 0
10 2 2018-02-25 10.00 NULL 0
11 3 2018-02-25 10.00 NULL 0 <-- no exp. for 3
12 4 2015-02-01 44.00 2015-03-01 0
13 4 2015-03-01 20.00 2016-04-01 1
14 4 2016-04-01 32.00 2016-05-01 0
15 4 2016-05-01 15.00 2017-02-25 0
16 4 2017-02-25 10.00 2018-02-27 1 <-- last exp. for 4
17 4 2018-02-27 10.00 2018-02-28 0
18 4 2018-02-28 20.00 NULL 0
19 5 2015-02-01 44.00 2015-03-01 0
20 5 2015-03-01 20.00 2016-04-01 1
21 5 2016-04-01 32.00 2016-05-01 0
22 5 2016-05-01 15.00 2017-02-25 0
23 5 2017-02-25 10.00 NULL 1 <-- last exp. for 5
Now, you seem to want to calculate the sum of points after the last expiration.
Using the above CTE as a basis you can achieve the required result with:
;WITH CTE AS (
... above query here ...
)
SELECT CustomerID,
SUM(CASE WHEN rnk = 0 THEN points ELSE 0 END) AS sumOfPoints
FROM (
SELECT transid, CustomerID, [Date], points, nextDate, ShouldExpire,
SUM(ShouldExpire) OVER (PARTITION BY CustomerID ORDER BY [Date] DESC) AS rnk
FROM CTE
) AS t
GROUP BY CustomerID
Output:
CustomerID sumOfPoints
-----------------------
1 116.00
2 77.00
3 10.00
4 30.00
5 0.00
Demo here

The tricky part here is to dump all points when they expire, and start accumulating them again. I assumed that if there was only one transaction that we don't expire the points until there's a new transaction, even if that first transaction was over a year ago now?
I also get a different answer for customer #5, as they do appear to have a "transaction chain" that hasn't expired?
Here's my query:
WITH ordered AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY customerid ORDER BY [date]) AS order_id
FROM
trans),
max_transid AS (
SELECT
customerid,
MAX(transid) AS max_transid
FROM
trans
GROUP BY
customerid),
not_expired AS (
SELECT
t1.customerid,
t1.points,
t1.[date] AS t1_date,
CASE
WHEN m.customerid IS NOT NULL THEN GETDATE()
ELSE t2.[date]
END AS t2_date
FROM
ordered t1
LEFT JOIN ordered t2 ON t2.customerid = t1.customerid AND t1.transid != t2.transid AND t2.order_id = t1.order_id + 1 AND t1.[date] > DATEADD(YEAR, -1, t2.[date])
LEFT JOIN max_transid m ON m.customerid = t1.customerid AND m.max_transid = t1.transid
),
max_not_expired AS (
SELECT
customerid,
MAX(t1_date) AS max_expired
FROM
not_expired
WHERE
t2_date IS NULL
GROUP BY
customerid)
SELECT
n.customerid,
SUM(n.points) AS points
FROM
not_expired n
LEFT JOIN max_not_expired m ON m.customerid = n.customerid
WHERE
ISNULL(m.max_expired, '19000101') < n.t1_date
GROUP BY
n.customerid;
It could be refactored to be simpler, but I wanted to show the steps to get to the final answer:
customerid points
1 116.00
2 77.00
3 10.00
4 30.00
5 57.00

can you try this:
SELECT customerid,
Sum(t1.points)
FROM trans t1
WHERE NOT EXISTS (SELECT 1
FROM trans t2
WHERE Datediff(year, t1.date, t2.date) >= 1)
GROUP BY t1.customerid
Hope it helps!

try this:
select customerid,Sum(points)
from trans where Datediff(year, date, GETDATE()) < 1
group by customerid
output:
customerid Points
1 - 74.00
2 - 25.00
3 - 10.00
4 - 30.00

Group by day from a certain hour of the day

SQLServer 2008r2. I have a table which is populated with a record at 10 mins past every hour of every day. Every hour the job is run it also enters 48 records which represent a forecast of what is likely to happen for the next 48 hours. Note - Just before it enters the 48 hour forecast it deletes the forecast which was entered last time. So although it enters a 48 hour forecast every hour there is only ever one forecast in the system. The relevant fields in the table look like this:
currentScore obsDate
9 2017-06-22 08:10:00
9 2017-06-22 07:10:00
9 2017-06-22 06:10:00
10 2017-06-22 05:10:00
... ...
How can I query this table and group by day from a certain time of day? I would like the day to start at 6am the day before and finish at 6am on the day. I only need five records from the table, the day, two before and two in the future. So if its Jun 20 I want June 18, 19, 20, 21 and 22. Here is the query which gets the correct results by calendar day.
SELECT cast(obsDate AS DATE) AS theDate
,sum(CASE
WHEN currentScore < 8
THEN 1
ELSE 0
END) AS currentscore_low
,sum(CASE
WHEN currentScore >= 8
AND currentScore < 17
THEN 1
ELSE 0
END) AS currentscore_medium
,sum(CASE
WHEN currentScore >= 17
THEN 1
ELSE 0
END) AS currentscore_high
FROM diseaseScores
WHERE siteID = 8315
AND obsDate >= cast(getdate() - 2 AS DATE)
GROUP BY cast(obsDate AS DATE)
ORDER BY cast(obsDate AS DATE);
which returns this result:
theDAte low med high
2017-06-18 23 0 0
2017-06-19 22 0 0
2017-06-20 5 19 0
2017-06-21 0 24 0
2017-06-22 0 9 0
There is a new requirement to get the same result but the group by and the subsequent counts need to be from 6am to 6am. e.g
the first rec should be from 2017-06-17 06:00am to 2017-06-18 06:00am
the second rec should be from 2017-06-18 06:00am to 2017-06-19 06:00am
....etc
How can I do this? Thanks in advance
UPDATE, I have done two things:
1..introduce Tims idea
2..I also add an extra field 'numOfScores' to show how many hours worth of data
each line represent
select
cast(dateadd(hour, -6, obsDate) as date) as theDate, count(currentScore) as numOfScores,
sum(case when currentScore < 8 then 1 else 0 end) as currentscore_low,
sum(case when currentScore >= 8 and currentScore < 17
then 1 else 0 end) as currentscore_medium,
sum(case when currentScore >= 17 then 1 else 0 end) as currentscore_high
from diseaseScores
where siteID = 8315 and
obsDate >= cast(getdate() - 2 as date)
group by cast(dateadd(hour, -6, obsDate) as date)
order by cast(dateadd(hour, -6, obsDate) as date);
I now get this result:
2017-06-18 5 5 0 0
2017-06-19 24 23 1 0
2017-06-20 24 1 23 0
2017-06-21 24 8 16 0
2017-06-22 24 1 23 0
2017-06-23 9 0 9 0
This tells me that that there is only 5 hours worth of scores on the 2017-06-18. I want this first line to be 24 hours worth. From 6am on the 17th until 6am on the 18th. This makes me think I am not getting the result I wish
The 23rd only having 9 hours is ok because this is the most recent forecast
UPDATED:
I dont think its easily done in one query (if even possible) so I will just use five queries and specifically state the dates&times to get my outcome. e.g here are the first two:
select
sum(case when currentScore < 9 then 1 else 0 end) as numOfLOWRecs,
sum(case when currentScore > 8 and currentScore < 17 then 1 else 0 end) as currentscore_medium,
sum(case when currentScore >= 17 then 1 else 0 end) as currentscore_high
from diseaseScores where siteID = 9999
and obsDate >= '2017-06-18 06:00' and obsDate < '2017-06-19 06:00'
select
sum(case when currentScore < 9 then 1 else 0 end) as numOfLOWRecs,
sum(case when currentScore > 8 and currentScore < 17 then 1 else 0 end) as currentscore_medium,
sum(case when currentScore >= 17 then 1 else 0 end) as currentscore_high
from diseaseScores where siteID = 9999
and obsDate >= '2017-06-19 06:00' and obsDate < '2017-06-20 06:00'

One trick which might work here would be to simply shift each observation backwards by 6 hours. This would shift 2017-06-17 06:00:00 to 2017-06-17 00:00:00, i.e. now 6am becomes the start of that actual day.
select
cast(dateadd(hour, -6, obsDate) as date) as theDate,
sum(case when currentScore < 8 then 1 else 0 end) as currentscore_low,
sum(case when currentScore >= 8 and currentScore < 17
then 1 else 0 end) as currentscore_medium,
sum(case when currentScore >= 17 then 1 else 0 end) as currentscore_high
from diseaseScores
where siteID = 8315 and
obsDate >= cast(getdate() - 2 as date)
group by cast(dateadd(hour, -6, obsDate) as date)
order by cast(dateadd(hour, -6, obsDate) as date);

Group by day and conditional count issue

Using SQLServer 2008r2 - I have a table which has records inserted every hour. The relevant columns for my query are currentScore (int) and obsDate (smallDateTime). I wish to get five records grouped by day. Today, the two days prior to today (starting at mid-night) and two days in the future. SO if its Jun 20 I want June 18, 19, 20, 21 and 22. I am successfully doing this like so:
select dateadd(DAY,0, datediff(day,0, obsDate)) as theDate,
count(currentScore) as numOfScores
from diseaseScores
where siteID=8315 and obsDate > dateAdd(day, -2, (SELECT CONVERT(DATETIME,
CONVERT(DATE, CURRENT_TIMESTAMP)) + '00:00'))
group by dateadd(DAY,0, datediff(day,0, obsDate))
order by dateadd(DAY,0, datediff(day,0, obsDate))
My record set looks like so:
theDate numOfScores
2017-06-18 00:00:00.000 23
2017-06-19 00:00:00.000 22
2017-06-20 00:00:00.000 24
2017-06-21 00:00:00.000 24
2017-06-22 00:00:00.000 9
I wish to add three more columns which will count the number of currentScore in a certain range. Something like this
CASE
WHEN currentScore < 8 THEN COUNT(where currentScore < 8) as Low
WHEN currentScore > 8 and < 17 THEN COUNT(where currentScore > 8 and < 17) as Med
WHEN currentScore > 17 THEN COUNT(where currentScore > 17 ) as High
Can I do this with a select case? What is the best way to achieve this?
Thanks in advance
Here is the result I wish to achieve:
theDAte numOfScores low med high
2017-06-18 23 23 0 0
2017-06-19 22 22 0 0
2017-06-20 24 5 19 0
2017-06-21 24 0 24 0
2017-06-22 9 0 9 0

First, use cast(. . as date). Much clearer! Then you can do what you want using conditional aggregation:
select cast(obsDate as date) as theDate,
count(currentScore) as numOfScores ,
sum(case when currentScore < 8 then 1 else 0 end) as currentscore_low,
sum(case when currentScore >= 8 and currentScore < 17 then 1 else 0 end) as currentscore_medium,
sum(case when currentScore >= 17 then 1 else 0 end) as currentscore_high
from diseaseScores
where siteID = 8315 and
obsDate >= cast(getdate() - 2 as date)
group by cast(obsDate as date)
order by cast(obsDate as date);
Note: Your original where clause has only half the date condition. I didn't add the other half, but it should be pretty obvious how to get no more than two days in the future.

SQLHow do I modify this query to select unique by hour

(Looking for a better title)
Hello I have the query below
Declare #CDT varchar(23)
Declare #CDT2 varchar(23)
set #cdt = '2016-01-18 00:00:00.000'
set #cdt2 = '2016-01-26 00:00:00.000'
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (5) and calldatetime between #cdt and #cdt2) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (124) and calldatetime between #cdt and #cdt2) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (576) and calldatetime between #cdt and #cdt2) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
I now need to select a unique number per hour rather than a distinct ammount overall.
for instance if I run this with the "select distinct(Agentid), rest of query here, it will give me a count of agentids, independant of the cases, how do I "WHEN AGENTID is unique"?
I copied examples from the original question
Project id Datetime Agentid
---------- ----------------------- ---------
5 11-23-2015 09:00:00.000 12
5 11-23-2015 10:00:00.000 12
6 11-23-2015 11:00:00.000 12
1 11-23-2015 12:00:00.000 3
3 11-23-2015 13:00:00.000 4
124 11-23-2015 14:00:00.000 7
124 11-23-2015 15:00:00.000 9
124 11-23-2015 16:00:00.000 10
576 11-23-2015 17:00:00.000 10
576 11-23-2015 18:00:00.000 44
576 11-23-2015 19:00:00.000 69
etc 11-23-2015 20:00:00.000 23
Expected output (Ignore the incorrect counts, assume they are correct from above^):
Datetime 5 124 576
------------- --- --- ---
09:00 - 09:59 0 4 5
10:00 - 10:59 4 3 1
11:00 - 11:59 5 2 1
12:00 - 12:59 1 1 1
13:00 - 13:59 6 1 1
14:00 - 14:59 6 1 1
15:00 - 15:59 7 1 2
16:00 - 16:59 8 1 3
17:00 - 17:59 9 1 3
18:00 - 18:59 1 1 2
19:00 - 19:59 12 1 0
20:00 - 20:59 0 0 0
so far
Hour of Day Project 5 Project 124 Project 576
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0

I'm pretty sure you need to do this with subqueries:
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (5)) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (124)) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (576)) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number

Here is the table used by these queries:
DECLARE #wt TABLE (
projectid varchar(4) not null,
edate datetime not null,
agentid int not null );
If you want to get the counts by time and project, use this query:
SELECT edate, projectid, COUNT(*) as nentries
FROM #wt
GROUP BY edate, projectid;
I haven't dealt with bucketing the dates by hour; that is a separate issue.
To get a tabular result set as you have shown:
SELECT edate, [5] AS [Project 5], [124] AS [Project 124], [576] AS [Project 576]
FROM (
SELECT edate, CAST(projectid AS int) AS projectid
FROM #wt
WHERE ISNUMERIC(projectid) <> 0 ) AS s
PIVOT (
COUNT(projectid)
FOR projectid IN ([5], [124], [576])) AS p;
Here is the result set for the PIVOT query using the above data:
However, you have to specify the projects of interest in the query. If you want to have an arbitrary number of projects and get columns for each one, that is going to require dynamic SQL to construct the PIVOT query.
#Tab Alleman: I added some data to illustrate the conditions that will test your scenario. Here is the result set with the same PIVOT query:

Counting number of rows grouped by date and hour

I am tracking customer store entry data in Microsoft SQL Server 2008 R2 that looks something like this:
DoorID DateTimeStamp EntryType
1 2013-09-02 09:01:16.000 IN
1 2013-09-02 09:04:09.000 IN
1 2013-09-02 10:19:29.000 IN
1 2013-09-02 10:19:30.000 IN
1 2013-09-02 10:19:32.000 OUT
1 2013-09-02 10:26:36.000 IN
1 2013-09-02 10:26:40.000 OUT
I don't want to count the OUT rows, just IN.
I believe that it needs to be grouped on Date, and DoorID, then get the hours totals.
I would like it to come out like this.
Date DoorID HourOfDay TotalInPersons
2013-09-02 1 0 0
2013-09-02 1 1 0
2013-09-02 1 2 0
2013-09-02 1 3 0
2013-09-02 1 4 0
2013-09-02 1 5 0
2013-09-02 1 6 0
2013-09-02 1 7 0
2013-09-02 1 8 0
2013-09-02 1 9 2
2013-09-02 1 10 3
2013-09-02 1 11 0
2013-09-02 1 12 0
2013-09-02 1 13 0
2013-09-02 1 14 0
2013-09-02 1 15 0
2013-09-02 1 16 0
2013-09-02 1 17 0
2013-09-02 1 18 0
2013-09-02 1 19 0
2013-09-02 1 20 0
2013-09-02 1 21 0
2013-09-02 1 22 0
2013-09-02 1 23 0

SELECT
[Date] = CONVERT(DATE, DateTimeStamp),
DoorID,
HourOfDay = DATEPART(HOUR, DateTimeStamp),
TotalInPersons = COUNT(*)
FROM dbo.tablename
WHERE EntryType = 'IN'
GROUP BY
CONVERT(DATE, DateTimeStamp),
DoorID,
DATEPART(HOUR, DateTimeStamp)
ORDER BY
[Date], DoorID, HourOfDay;
Of course if you need all hours, even where no rows are represented, here is one solution (which limits the output for any day only to the doors that have at least one IN entry on that day):
;WITH h AS
(
SELECT TOP (24) h = number FROM Master..spt_values
WHERE type = N'P' ORDER BY number
),
doors AS
(
SELECT DISTINCT DoorID, [Date] = CONVERT(DATE,DateTimeStamp)
FROM dbo.tablename WHERE EntryType = 'IN'
)
SELECT
d.[Date],
d.DoorID,
HourOfDay = h.h,
TotalInPersons = COUNT(t.EntryType)
FROM doors AS d CROSS JOIN h
LEFT OUTER JOIN dbo.tablename AS t
ON CONVERT(DATE, t.DateTimeStamp) = d.[Date]
AND t.DoorID = d.DoorID
AND DATEPART(HOUR, t.DateTimeStamp) = h.h
AND t.EntryType = 'IN'
GROUP BY d.[Date], d.DoorID, h.h
ORDER BY d.[Date], d.DoorID, h.h;

How about something like this:
SELECT
CAST(DateTimeStamp AS DATE) AS Date
,DoorID
,DATEPART(HOUR, DateTimeStamp) AS HourOfDay
,COUNT(*) AS TotalInPersons
FROM StoreTable
WHERE EntryType = 'IN'
GROUP BY
CAST(DateTimeStamp AS DATE)
,DoorID
,DATEPART(HOUR, DateTimeStamp)

This should work. I guessed on how you would pull DoorID and TotalPersons, but the overall logic is correct
SELECT CONVERT(date,dateColumn) AS Date,
datepart(hh,dateColumn) AS HourOfDay,
DoorID,
COUNT(people) AS TotalPersons
FROM yourtable
WHERE EntryType = 'IN'
GROUP BY CONVERT(date,dateColumn), datepart(hh,dateColumn), DoorID
ORDER BY CONVERT(date,dateColumn), datepart(hh,dateColumn)

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

SQL query count occurrences then group by day but also fill missing days - sql

Related

Sql group by latest repeated field

Group by day from a certain hour of the day

Group by day and conditional count issue

SQLHow do I modify this query to select unique by hour

Counting number of rows grouped by date and hour

Categories

Resources