How to Make Groups Reset by ID number - sql-server-2012

I wrote a SQL script that numbers a new group every time either 1) The ID number changes compared to the one before it or 2) The order date is more than 15 minutes longer than the one before it. However, instead of having numbers that go from 0 to infinity, I want the group # to reset to 0 every time the ID number changes. Please advise.
This code creates a dummy table of values:
CREATE TABLE #pretemp (
[hospital_code] varchar(255),
s_visit_ident varchar(255),
orderdt datetime
);
INSERT INTO #pretemp ([hospital_code],s_visit_ident,orderdt)
VALUES ('ClevelandClinic', '1', '01-02-2020 12:30'),
('ClevelandClinic', '1','01-02-2020 12:35'),
('ClevelandClinic', '1', '01-02-2020 12:42'),
('ClevelandClinic', '1', '01-02-2020 12:55'),
('ClevelandClinic', '1','01-05-2020 18:44'),
('ClevelandClinic', '2','01-05-2020 07:09'),
('ClevelandClinic', '1','01-05-2020 07:12'),
('ClevelandClinic', '1','01-05-2020 07:18'),
('ClevelandClinic', '1', '01-07-2020 00:00'),
('ClevelandClinic', '3','01-07-2020 00:03'),
('ClevelandClinic', '3','01-07-2020 00:10');
This script creates and adds a group number.
select hospital_code, s_visit_ident, orderdt,
lead(orderdt,1) over (order by hospital_code, s_visit_ident,orderdt) as 'Lead Time',
lead(s_visit_ident,1) over (order by hospital_code,s_visit_Ident,orderdt) as 'Lead pt_id',
lag(Orderdt,1) over (order by hospital_Code, s_visit_ident,orderdt) as 'Lag',
lag(s_visit_ident,1) over (order by hospital_code, s_visit_ident,orderdt) as 'Lag pt_id'
into #temp
from #pretemp
order by hospital_code, s_visit_ident, orderdt,lead(orderdt,1) over (order by
hospital_code,s_visit_ident,orderdt)
select *,
case when [lead pt_id] <> [s_visit_ident] then null else [lead time] end as 'Updated Lead Time',
case when [lead pt_id] <> [s_visit_ident] then null else datediff(mi,orderdt,[lead time]) end as
'Timediff',
case when [lead pt_id] <> s_visit_ident then 0
when datediff(mi,orderdt,[lead time]) <= 15 then 1 else 0 end as 'Tag'
into #temp2
From #temp
order by s_Visit_ident,orderdt,[lead time],hospital_code
ALTER TABLE #temp2 ADD [group] INT
DECLARE #group INT
SELECT #group = 0
UPDATE ss SET [group] = #group, #group = CASE WHEN
s.[lag pt_id] is null then #group
when s.[lead pt_id] is null then #group
when
s.[lag pt_Id] <> s.s_visit_ident then #group + 1
--If it's the first, then if the lag time
when datediff(mi,ss.[lag],s.[orderdt]) > 15 then #group+1
--If it's in the middle
when datediff(mi,ss.[lag],s.orderdt) <= 15 and datediff(mi,s.orderdt,ss.[lead time]) <= 15 then #group
--If it's the last of the group
when datediff(mi,ss.[lag],s.orderdt) <=15 and datediff(mi,s.orderdt,ss.[lead time]) > 15 then #group
else #group + 1
END
FROM #temp2
ss JOIN (SELECT hospital_Code, s_Visit_ident, orderdt,[lag pt_id],[lead pt_id]
FROM #temp2
) s ON (ss.hospital_Code = s.hospital_Code AND ss.s_visit_ident = s.s_visit_Ident AND ss.orderdt = s.orderdt)
SELECT hospital_code AS Hospital, s_visit_Ident AS Patient, orderdt,[group]
FROM #temp2
GROUP BY hospital_Code,s_Visit_ident,orderdt,[group]
ORDER BY [group] asc
These are the results I get:
Hospital Patient orderdt group
ClevelandClinic 1 2020-01-02 12:30:00.000 0
ClevelandClinic 1 2020-01-02 12:35:00.000 0
ClevelandClinic 1 2020-01-02 12:42:00.000 0
ClevelandClinic 1 2020-01-02 12:55:00.000 0
ClevelandClinic 1 2020-01-05 07:12:00.000 1
ClevelandClinic 1 2020-01-05 07:18:00.000 1
ClevelandClinic 1 2020-01-05 18:44:00.000 2
ClevelandClinic 1 2020-01-07 00:00:00.000 3
ClevelandClinic 2 2020-01-05 07:09:00.000 4
ClevelandClinic 3 2020-01-07 00:03:00.000 5
ClevelandClinic 3 2020-01-07 00:10:00.000 5
These are the results I want:
Hospital Patient orderdt group
ClevelandClinic 1 2020-01-02 12:30:00.000 0
ClevelandClinic 1 2020-01-02 12:35:00.000 0
ClevelandClinic 1 2020-01-02 12:42:00.000 0
ClevelandClinic 1 2020-01-02 12:55:00.000 0
ClevelandClinic 1 2020-01-05 07:12:00.000 1
ClevelandClinic 1 2020-01-05 07:18:00.000 1
ClevelandClinic 1 2020-01-05 18:44:00.000 2
ClevelandClinic 1 2020-01-07 00:00:00.000 3
ClevelandClinic 2 2020-01-05 07:09:00.000 0
ClevelandClinic 3 2020-01-07 00:03:00.000 0
ClevelandClinic 3 2020-01-07 00:10:00.000 0
Instead of the group number being added every time, I want the group number to reset to 0 every time I have a new patient ID number. I appreciate any help you can give me.

Looks like this should do it in a single query. The solution uses a common table expression (cte) to calculation when to increment the group. After that there is a large sum() function that produces a running total of the group increments. The -1 makes the group numbers start from 0 instead of 1.
with cte as
(
select p.hospital_code,
p.s_visit_ident,
p.orderdt,
case when coalesce(datediff(
minute,
lag(p.orderdt) over(partition by p.s_visit_ident order by p.orderdt),
p.orderdt), 15) < 15
then 0 else 1 end as increment_group
from #pretemp p
)
select cte.hospital_code,
cte.s_visit_ident as patient,
cte.orderdt,
sum(cte.increment_group)
over(partition by cte.s_visit_ident
order by cte.orderdt
rows between unbounded preceding and current row) - 1 as [group]
from cte
order by cte.s_visit_ident, cte.orderdt;
Fiddle

Related

Calculating slots with double bookings and null val

Example dataset.
CLINIC
APPTDATETIME
PATIENT_ID
NEW_FOLLOWUP_FLAG
TGYN
20/07/2022 09:00:00
1
N
TGYN
20/07/2022 09:45:00
2
F
TGYN
20/07/2022 10:05:00
NULL
NULL
TGYN
20/07/2022 10:05:00
4
F
TGYN
20/07/2022 10:25:00
5
F
TGYN
20/07/2022 10:30:00
NULL
NULL
TGYN
20/07/2022 10:35:00
NULL
NULL
TGYN
20/07/2022 10:40:00
NULL
NULL
TGYN
20/07/2022 10:45:00
NULL
NULL
TGYN
20/07/2022 11:10:00
6
F
TGYN
20/07/2022 11:10:00
7
F
As you can see there are times with multiple patients, times with empty slots and times with both (generally DQ errors).
I'm trying to calculate how many slots where filled and how many of those were new (N) or follow up(F). If there is a slot with a patient and also a NULL row then I only want to count the row with the patient. If there are only NULL rows for a timeslot then I want to count that as 'unfilled'.
From this dataset I would like to calculate the following for each group of clinic and apptdatetime.
CLINIC
APPTDATE
N Capacity
F Capacity
Unfilled Capacity
TGYN
20/07/2022
1
5
4
What's the best way to go about this?
I've considered taking a list of distinct values for each clinic and date and then joining to that but wanted to know if there are a more elegant way.
First I set up some demo data in a table from what you provided:
DECLARE #table TABLE (CLINIC NVARCHAR(4), APPTDATETIME DATETIME, PATIENT_ID INT, NEW_FOLLOWUP_FLAG NVARCHAR(1))
INSERT INTO #table (CLINIC, APPTDATETIME, PATIENT_ID, NEW_FOLLOWUP_FLAG) VALUES
('TGYN','07/20/2022 09:00:00', 1 ,'N'),
('TGYN','07/20/2022 09:45:00', 2 ,'F'),
('TGYN','07/20/2022 10:05:00', NULL ,NULL),
('TGYN','07/20/2022 10:05:00', 4 ,'F'),
('TGYN','07/20/2022 10:25:00', 5 ,'F'),
('TGYN','07/20/2022 10:30:00', NULL ,NULL),
('TGYN','07/20/2022 10:35:00', NULL ,NULL),
('TGYN','07/20/2022 10:40:00', NULL ,NULL),
('TGYN','07/20/2022 10:45:00', NULL ,NULL),
('TGYN','07/20/2022 11:10:00', 6 ,'F'),
('TGYN','07/20/2022 11:10:00', 7 ,'F')
Reading through your description it looks like you'd need a couple of case statements and a group by:
SELECT CLINIC, CAST(APPTDATETIME AS DATE) AS APPTDATE,
SUM(CASE WHEN NEW_FOLLOWUP_FLAG = 'N' THEN 1 ELSE 0 END) AS NCapacity,
SUM(CASE WHEN NEW_FOLLOWUP_FLAG = 'F' THEN 1 ELSE 0 END) AS FCapacity,
SUM(CASE WHEN NEW_FOLLOWUP_FLAG IS NULL THEN 1 ELSE 0 END) AS UnfilledCapacity
FROM #table
GROUP BY CLINIC, CAST(APPTDATETIME AS DATE)
Which returns a result set like this:
CLINIC APPTDATE NCapacity FCapacity UnfilledCapacity
------------------------------------------------------------
TGYN 2022-07-20 1 5 5
Note that I cast the datetime column to a date and grouped by that.
The case statements just test for a condition (is the column null, or F or N) and then just returns a 1, which is summed.
Your title also asked about finding duplicates in the data set. You should likely have a constraint on this table making CLINIC and APPTDATETIME forcibly unique. This would prevent rows even being inserted as dupes.
If you want to find them in the table try something like this:
SELECT CLINIC, APPTDATETIME, COUNT(*) AS Cnt
FROM #table
GROUP BY CLINIC, APPTDATETIME
HAVING COUNT(*) > 1
Which from the test data returned:
CLINIC APPTDATETIME Cnt
-----------------------------------
TGYN 2022-07-20 10:05:00.000 2
TGYN 2022-07-20 11:10:00.000 2
Indicating there are dupes for those clinic/datetime combinations.
HAVING is the magic here, we can count them up and state we only want ones which are greater than 1.
This is basically a straight-forward conditional aggregation with group by, with the slight complication of excluding NULL rows where a corresponding appointment also exists.
For this you can include an anti-semi self-join using not exists so as to exclude counting for unfilled capacity any row where there's also valid data for the same date:
select CLINIC, Convert(date, APPTDATETIME) AppDate,
Sum(case when NEW_FOLLOWUP_FLAG = 'N' then 1 end) N_Capacity,
Sum(case when NEW_FOLLOWUP_FLAG = 'f' then 1 end) F_Capacity,
Sum(case when NEW_FOLLOWUP_FLAG is null then 1 end) U_Capacity
from t
where not exists (
select * from t t2
where t.PATIENT_ID is null
and t2.PATIENT_ID is not null
and t.APPTDATETIME = t2.APPTDATETIME
)
group by CLINIC, Convert(date, APPTDATETIME);

How to select rows based on a rolling 30 day window SQL

My question involves how to identify an index discharge.
The index discharge is the earliest discharge. On that date, the 30 day window starts. Any admissions during that time period are considered readmissions, and they should be ignored. Once the 30 day window is over, then any subsequent discharge is considered an index and the 30 day window begins again.
I can't seem to work out the logic for this. I've tried different windowing functions, I've tried cross joins and cross applies. The issue I keep encountering is that a readmission cannot be an index admission. It must be excluded.
I have successfully written a while loop to solve this problem, but I'd really like to get this in a set based format, if it's possible. I haven't been successful so far.
Ultimate goal is this -
id
AdmitDate
DischargeDate
MedicalRecordNumber
IndexYN
1
2021-03-03 00:00:00.000
2021-03-09 13:20:00.000
X0090362
1
4
2021-03-05 00:00:00.000
2021-03-10 16:00:00.000
X0012614
1
6
2021-05-18 00:00:00.000
2021-05-21 22:20:00.000
X0012614
1
7
2021-06-21 00:00:00.000
2021-07-08 13:30:00.000
X0012614
1
8
2021-02-03 00:00:00.000
2021-02-09 17:00:00.000
X0019655
1
10
2021-03-23 00:00:00.000
2021-03-26 16:40:00.000
X0019655
1
11
2021-03-15 00:00:00.000
2021-03-18 15:53:00.000
X4135958
1
13
2021-05-17 00:00:00.000
2021-05-23 14:55:00.000
X4135958
1
15
2021-06-24 00:00:00.000
2021-07-13 15:06:00.000
X4135958
1
Sample code is below.
CREATE TABLE #Admissions
(
[id] INT,
[AdmitDate] DATETIME,
[DischargeDateTime] DATETIME,
[UnitNumber] VARCHAR(20),
[IndexYN] INT
)
INSERT INTO #Admissions
VALUES( 1 ,'2021-03-03' ,'2021-03-09 13:20:00.000' ,'X0090362', NULL)
,(2 ,'2021-03-27' ,'2021-03-30 19:59:00.000' ,'X0090362', NULL)
,(3 ,'2021-03-31' ,'2021-04-04 05:57:00.000' ,'X0090362', NULL)
,(4 ,'2021-03-05' ,'2021-03-10 16:00:00.000' ,'X0012614', NULL)
,(5 ,'2021-03-28' ,'2021-04-16 13:55:00.000' ,'X0012614', NULL)
,(6 ,'2021-05-18' ,'2021-05-21 22:20:00.000' ,'X0012614', NULL)
,(7 ,'2021-06-21' ,'2021-07-08 13:30:00.000' ,'X0012614', NULL)
,(8 ,'2021-02-03' ,'2021-02-09 17:00:00.000' ,'X0019655', NULL)
,(9 ,'2021-02-17' ,'2021-02-22 17:25:00.000' ,'X0019655', NULL)
,(10 ,'2021-03-23' ,'2021-03-26 16:40:00.000' ,'X0019655', NULL)
,(11 ,'2021-03-15' ,'2021-03-18 15:53:00.000' ,'X4135958', NULL)
,(12 ,'2021-04-08' ,'2021-04-13 19:42:00.000' ,'X4135958', NULL)
,(13 ,'2021-05-17' ,'2021-05-23 14:55:00.000' ,'X4135958', NULL)
,(14 ,'2021-06-09' ,'2021-06-14 12:45:00.000' ,'X4135958', NULL)
,(15 ,'2021-06-24' ,'2021-07-13 15:06:00.000' ,'X4135958', NULL)
You can use a recursive CTE to identify all rows associated with each "index" discharge:
with a as (
select a.*, row_number() over (order by dischargedatetime) as seqnum
from admissions a
),
cte as (
select id, admitdate, dischargedatetime, unitnumber, seqnum, dischargedatetime as index_dischargedatetime
from a
where seqnum = 1
union all
select a.id, a.admitdate, a.dischargedatetime, a.unitnumber, a.seqnum,
(case when a.dischargedatetime > dateadd(day, 30, cte.index_dischargedatetime)
then a.dischargedatetime else cte.index_dischargedatetime
end) as index_dischargedatetime
from cte join
a
on a.seqnum = cte.seqnum + 1
)
select *
from cte;
You can then incorporate this into an update:
update admissions
set indexyn = (case when admissions.dischargedatetime = cte.index_dischargedatetime then 'Y' else 'N' end)
from cte
where cte.id = admissions.id;
Here is a db<>fiddle. Note that I changed the type of IndexYN to a character to assign 'Y'/'N', which makes sense given the column name.

Using EXISTS within a GROUP BY clause

Is it possible to do the following:
I have a table that looks like this:
declare #tran_TABLE TABLE(
EOMONTH DATE,
AccountNumber INT,
CLASSIFICATION_NAME VARCHAR(50),
Value Float
)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',10)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',15)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat1',5 )
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat2',10)
INSERT INTO #tran_TABLE VALUES('2018-11-30','123','cat3',12)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat1',5 )
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat2',10)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat2',15)
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat3',5 )
INSERT INTO #tran_TABLE VALUES('2019-01-31','123','cat3',2 )
INSERT INTO #tran_TABLE VALUES('2019-03-31','123','cat1',15)
EOMONTH AccountNumber CLASSIFICATION_NAME Value
2018-11-30 123 cat1 10
2018-11-30 123 cat1 15
2018-11-30 123 cat1 5
2018-11-30 123 cat2 10
2018-11-30 123 cat3 12
2019-01-31 123 cat1 5
2019-01-31 123 cat2 10
2019-01-31 123 cat2 15
2019-01-31 123 cat3 5
2019-01-31 123 cat3 2
2019-03-31 123 cat1 15
I want to produce a result where it will check whether in each month, for each AccountNumber (just one in this case) there exists a CLASSIFICATION_NAME cat1, cat2, cat3.
If all 3 exist for the month, then return 1 but if any are missing return 0.
The result should look like:
EOMONTH AccountNumber CLASSIFICATION_NAME
2018-11-30 123 1
2019-01-31 123 1
2019-03-31 123 0
But I want to do it as compactly as possible, without first creating a table that groups everything by CLASSIFICATION_NAME, EOMONTH and AccountNumber and then selects from that table.
For example, in the pseudo code below, is it possible to use maybe an EXISTS statement to do the group by?
SELECT
EOMONTH
,AccountNumber
,CASE WHEN EXISTS (CLASSIFICATION_NAME = 'cat1' AND 'cat2' AND 'cat3') THEN 1 ELSE 0 end
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY
EOMONTH
,AccountNumber
You could emulate this behavior by counting the distinct classifications that answer this condition (per group):
SELECT
EOMONTH
,AccountNumber
,CASE COUNT(DISTINCT CASE WHEN classification_name IN ('cat1', 'cat2', 'cat3') THEN classification_name END)
WHEN 3 THEN 1
ELSE 0
END
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY
EOMONTH
,AccountNumber
Try this-
SELECT EOMONTH,
AccountNumber,
CASE
WHEN COUNT(DISTINCT CLASSIFICATION_NAME) = 3 THEN 1
ELSE 0
END CLASSIFICATION_NAME
FROM #tran_TABLE
GROUP BY EOMONTH,AccountNumber
Output is-
2018-11-30 123 1
2019-01-31 123 1
2019-03-31 123 0
Query like this. You can count distinct values.
When you count unique values then column 'Three_Unique_Cat'. When you count exactly 'cat1','cat2','cat3' then column 'Three_Cat1_Cat2_Cat3'
SELECT
EOMONTH, AccountNumber
,CASE WHEN
COUNT(DISTINCT CLASSIFICATION_NAME)=3 THEN 1
ELSE 0
END AS 'Three_Unique_Cat'
,CASE WHEN
COUNT(DISTINCT CASE WHEN CLASSIFICATION_NAME IN ('cat1','cat2','cat3')
THEN CLASSIFICATION_NAME ELSE NULL END)=3 THEN 1
ELSE 0
END AS 'Three_Cat1_Cat2_Cat3'
,SUM(Value) AS totalSpend
FROM #tran_TABLE
GROUP BY EOMONTH, AccountNumber
Output:
EOMONTH AccountNumber Three_Unique_Cat Three_Cat1_Cat2_Cat3 totalSpend
2018-11-30 123 1 1 52
2019-01-31 123 1 1 37
2019-03-31 123 0 0 15
It's easy, just as below:
select
EOMONTH,
AccountNumber,
case when count(distinct CLASSIFICATION_NAME) = 3 then 1 else 0 end as CLASSIFICATION_NAME
from
tran_TABLE
group by
EOMONTH,
AccountNumber

Sql Pivot on time

Table 1: Daily attendance data:
att_date emp_code emp_name in_time out_time
2018-10-21 9999 Test 2018-10-21 08:00:00.000 2018-10-22 06:00:00.000
Table 2: Trnevents
emp_readr_id DT EVENTID
9999 2018-10-24 07:00:00.000 0
9999 2018-10-24 05:00:00.000 0
9999 2018-10-24 03:00:00.000 0
9999 2018-10-23 21:00:00.000 0
9999 2018-10-23 19:00:00.000 0
9999 2018-10-23 06:00:00.000 0
9999 2018-10-22 06:00:00.000 0
9999 2018-10-21 08:00:00.000 0
I used this query to get all times in between in time and out time ,below query works fine but i try to make in row by using pivot. While using pivot out time shows in next row.
declare #tempProcesstable as table(
[id] [nvarchar](200) NULL,
[time_stamp] datetime NULL,
[AccessType] varchar(3) NULL)
insert into #tempProcesstable
select distinct t1.emp_Reader_id, t1.DT,t1.eventid from daily_attendance_data t2 join trnevents t1
on t1.emp_reader_id=t2.emp_reader_id where (CONVERT(VARCHAR(26), t2.att_Date, 23) >=CONVERT(VARCHAR(26), '2018-10-20', 23)
and CONVERT(VARCHAR(26), t2.att_date, 23) <=CONVERT(VARCHAR(26), '2018-10-21', 23))
and
(t1.DT >=t2.in_time
and t1.DT <=t2.out_time)
-- and t1.emp_reader_id=1000
group by t1.emp_Reader_id,t1.dt,t1.eventid order by t1.emp_reader_id,DT asc
; With CheckIns
As (Select Rowemp_reader_id = Row_Number() Over (Partition by id, Cast(time_stamp As Date) Order By time_stamp),
id, time_stamp,
[Date] = Cast(time_stamp As Date),
[Time] = Cast(time_stamp As Time(0))
From #tempProcesstable)
Select Pvt.id,B.emp_name , [Date], CHECK1, CHECK2,Cast(dateadd(ss,datediff(ss,CHECK1,CHECK2),0) As Time(0)) Total1,
CHECK3, CHECK4,Cast(dateadd(ss,datediff(ss,CHECK3,CHECK4),0) As Time(0)) Total2
From (Select id, [Date], [Time],
CHECKNum = 'CHECK' + Cast(Rowemp_reader_id As varchar(11))
From CheckIns) As P
Pivot (Min([Time])
For CheckNum In (Check1, [Check2], Check3, Check4)
) As Pvt
LEFT OUTER JOIN
dbo.employee AS B ON Pvt.id= B.emp_reader_id
My output:
id emp_name Date CHECK1 CHECK2 Total1 CHECK3 CHECK4 Total2
1048 Singh 2018-10-21 07:06:07 17:34:05 10:27:58 NULL NULL NULL
9999 Test 2018-10-21 08:00:00 NULL NULL NULL NULL NULL NULL
9999 Test 2018-10-22 06:00:00 NULL NULL NULL NULL NULL NULL
Expected output:
I want all times between in time and out time in night to morning also.
can any one help me to rectify this.
id emp_name Date CHECK1 CHECK2 Total1 CHECK3 CHECK4 Total2
1048 Singh 2018-10-21 07:06:07 17:34:05 10:27:58 NULL NULL NULL
9999 Test 2018-10-21 08:00:00 06:00:00 NULL NULL NULL NULL NULL
You can try to use ROW_NUMBER() window function make row number by each date.
then use condition aggregate function to do pivot
SELECT emp_readr_id,
emp_name,
[Date],
MAX(CASE WHEN RN = 1 THEN time END) CHECK1,
MAX(CASE WHEN RN = 2 THEN time END) CHECK2,
MAX(CASE WHEN RN = 3 THEN time END) CHECK3,
MAX(CASE WHEN RN = 4 THEN time END) CHECK4
FROM (
SELECT emp_readr_id,
emp_name,
CONVERT(VARCHAR(10),DT,120) 'Date',
ROW_NUMBER() OVER(PARTITION BY CONVERT(VARCHAR(10),DT,120) ORDER BY DT) rn,
CONVERT(VARCHAR(10),DT,108) time
FROM Daily d
JOIN Trnevents t on t.DT between d.in_time and d.out_time
) t1
group by emp_readr_id,
emp_name,
[Date]
sqlifddle

SQLHow do I modify this query to select unique by hour

(Looking for a better title)
Hello I have the query below
Declare #CDT varchar(23)
Declare #CDT2 varchar(23)
set #cdt = '2016-01-18 00:00:00.000'
set #cdt2 = '2016-01-26 00:00:00.000'
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (5) and calldatetime between #cdt and #cdt2) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (124) and calldatetime between #cdt and #cdt2) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM history t2
WHERE DATEPART(HOUR, t2.calldatetime)=spt.number
AND projectid IN (576) and calldatetime between #cdt and #cdt2) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
I now need to select a unique number per hour rather than a distinct ammount overall.
for instance if I run this with the "select distinct(Agentid), rest of query here, it will give me a count of agentids, independant of the cases, how do I "WHEN AGENTID is unique"?
I copied examples from the original question
Project id Datetime Agentid
---------- ----------------------- ---------
5 11-23-2015 09:00:00.000 12
5 11-23-2015 10:00:00.000 12
6 11-23-2015 11:00:00.000 12
1 11-23-2015 12:00:00.000 3
3 11-23-2015 13:00:00.000 4
124 11-23-2015 14:00:00.000 7
124 11-23-2015 15:00:00.000 9
124 11-23-2015 16:00:00.000 10
576 11-23-2015 17:00:00.000 10
576 11-23-2015 18:00:00.000 44
576 11-23-2015 19:00:00.000 69
etc 11-23-2015 20:00:00.000 23
Expected output (Ignore the incorrect counts, assume they are correct from above^):
Datetime 5 124 576
------------- --- --- ---
09:00 - 09:59 0 4 5
10:00 - 10:59 4 3 1
11:00 - 11:59 5 2 1
12:00 - 12:59 1 1 1
13:00 - 13:59 6 1 1
14:00 - 14:59 6 1 1
15:00 - 15:59 7 1 2
16:00 - 16:59 8 1 3
17:00 - 17:59 9 1 3
18:00 - 18:59 1 1 2
19:00 - 19:59 12 1 0
20:00 - 20:59 0 0 0
so far
Hour of Day Project 5 Project 124 Project 576
0 0 0 0
1 0 0 0
2 0 0 0
3 0 0 0
4 0 0 0
5 0 0 0
6 0 0 0
7 0 0 0
8 0 0 0
9 0 0 0
10 0 0 0
11 0 0 0
I'm pretty sure you need to do this with subqueries:
SELECT
spt.number AS [Hour of Day],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (5)) AS [Project 5 ],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (124)) AS [Project 124],
(SELECT COUNT(DISTINCT AgentId)
FROM YourTable t2
WHERE DATEPART(HOUR, t2.yourdatetime)=spt.number
AND projectId IN (576)) AS [Project 576]
FROM master..spt_values spt
WHERE spt.number BETWEEN 0 AND 11 AND spt.type = 'p'
GROUP BY spt.number
ORDER BY spt.number
Here is the table used by these queries:
DECLARE #wt TABLE (
projectid varchar(4) not null,
edate datetime not null,
agentid int not null );
If you want to get the counts by time and project, use this query:
SELECT edate, projectid, COUNT(*) as nentries
FROM #wt
GROUP BY edate, projectid;
I haven't dealt with bucketing the dates by hour; that is a separate issue.
To get a tabular result set as you have shown:
SELECT edate, [5] AS [Project 5], [124] AS [Project 124], [576] AS [Project 576]
FROM (
SELECT edate, CAST(projectid AS int) AS projectid
FROM #wt
WHERE ISNUMERIC(projectid) <> 0 ) AS s
PIVOT (
COUNT(projectid)
FOR projectid IN ([5], [124], [576])) AS p;
Here is the result set for the PIVOT query using the above data:
However, you have to specify the projects of interest in the query. If you want to have an arbitrary number of projects and get columns for each one, that is going to require dynamic SQL to construct the PIVOT query.
#Tab Alleman: I added some data to illustrate the conditions that will test your scenario. Here is the result set with the same PIVOT query: