First of all I would like to thank the friends who helped this complex and difficult query.
I have three tables
Table 1
StaffId FirstName LastName staffType
---------------------------------------
1 Adam Sorme Student
2 Lara Sandra Teacher
3 Jack Jones Student
Table 2
GateId GateName
---------------------------------------
1 frontDoor
2 superDoor
Table 3
Id transitionDate GateId StaffId
---------------------------------------
1 2018-01-1 08:00:00 1 1
2 2018-01-1 10:00:00 2 1
3 2018-01-1 20:00:00 2 1
4 2018-01-2 07:00:00 1 2
5 2018-01-2 10:00:00 1 3
6 2018-01-9 12:00:00 2 2
I want the first and last movements of students for each day. Value must be set to null if no movement is available between the specified dates
transitionDate> '2018-01-1 00:00:00 000'
and transitionDate< '2018-01-03 00:00:00 000'
OUTPUT:
Id Date MinTransitionDate MaxTransitionDate FirstGateName LastGateName StaffId StaffType
1 2018-01-01 2018-01-1 08:00:00 2018-01-1 20:00:00 frontDoor superDoor 1 Student
2 2018-01-01 null null null null 3 student
3 2018-01-02 null null null null 1 student
4 2018-01-02 2018-01-2 10:00:00 null frontDoor null 3 student
The following query is partially working.
select
q.*,
g1.GateName as first_gate_name,
g2.GateName as last_gate_name
from
(
select s.staffId, d.dte,
min(t.transitionDate) as min_Date,
max_Date= case when count(1)>1 then max(t.transitionDate) else null end,
max(case when seqnum_asc = 1 then gateId end) as first_gateid,
max(case when seqnum_desc = 1 then gateId end) as last_gateid
from (select s.* from Staff s where stafftype = 'Student') s cross join
(select distinct cast(transitionDate as date) as dte from Transitions) d left join
(select t.*,
row_number() over (partition by StaffId, cast(transitionDate as date) order by transitionDate) as seqnum_asc,
row_number() over (partition by StaffId, cast(transitionDate as date) order by transitionDate desc) as seqnum_desc
from Transitions t
) t
on cast(t.transitiondate as date) = d.dte and
t.staffId = s.staffId and
1 in (t.seqnum_asc, t.seqnum_desc)
group by s.staffId, d.dte
) q
left join Gates g1 on g1.gateId = q.first_gateid
left join Gates g2 on g2.gateId = q.last_gateid
see working demo
Problem : max_date in 4. row is empty. I want the value of last_gateid to be null. Can you help me?
screenshot 4. row
https://cdn.pbrd.co/images/H7vyu31.png
A quick fix would be replacing this line
max(case when seqnum_desc = 1 then gateId end) as last_gateid
with
max(case when (seqnum_desc = 1 and seqnum_asc != 1) then t.gateId end) as last_gateid
It is obvious that some one helped you write the query and you gave him more explanation of the problem , what i did is tweaking the query you provided and used the same logic to give you the desired output although i would prefer to rewrite it in a more understandable way :
select
q.*,
g1.GateName as first_gate_name,
g2.GateName as last_gate_name
from
(
select s.staffId, d.dte,
min(t.transitionDate) as min_Date,
max_Date= case when count(1)>1 then max(t.transitionDate) else null end,
max(case when seqnum_asc = 1 then gateId end) as first_gateid,
case when count(1)>1 then MAX(gateId) end as last_gateid
from (select s.* from Staff s where stafftype = 'Student') s cross join
(select distinct cast(transitionDate as date) as dte from Transitions) d left join
(select t.*,
row_number() over (partition by StaffId, cast(transitionDate as date) order by transitionDate) as seqnum_asc,
row_number() over (partition by StaffId, cast(transitionDate as date) order by transitionDate desc) as seqnum_desc
from Transitions t
) t
on cast(t.transitiondate as date) = d.dte and
t.staffId = s.staffId and
1 in (t.seqnum_asc, t.seqnum_desc)
group by s.staffId, d.dte
) q
left join Gates g1 on g1.gateId = q.first_gateid
left join Gates g2 on g2.gateId = q.last_gateid
IF OBJECT_ID('dbo.Staff') IS NOT NULL DROP TABLE Staff
create table Staff (StaffId int, FirstName varchar(20), LastName
varchar(20), staffType varchar(20))
insert into Staff values
(1, 'Adam', 'Sorme', 'Student'),
(2, 'Lara', 'Sandra', 'Teacher'),
(3, 'Jack', 'Jones', 'Student')
IF OBJECT_ID('dbo.Gates') IS NOT NULL DROP TABLE Gates
create table Gates (GateId int, GateName varchar(20))
insert into Gates values
(1, 'frontDoor'),
(2, 'backDoor')
IF OBJECT_ID('dbo.Transitions') IS NOT NULL DROP TABLE Transitions
create table Transitions (Id int, transitionDate datetime, GateId int,
StaffId int)
insert into Transitions values
(1, '2018-01-1 08:00:00', 1, 1),
(2, '2018-01-1 10:00:00', 2, 1),
(3, '2018-01-1 20:00:00', 2, 1),
(4, '2018-01-2 07:00:00', 1, 2),
(5, '2018-01-2 10:00:00', 2, 3),
(6, '2018-01-9 12:00:00', 2, 2)
--select * from Transitions
DECLARE #Datefrom DATETIME = '2018-01-01'
DECLARE #DateTo DATETIME = '2018-01-03'
----1. If the transition table itself is not reliable meaning there
--could be imports and the maxID could be for a lesser datetime
IF OBJECT_ID('tempdb..#Transitions') IS NOT NULL DROP TABLE
#Transitions
SELECT *, [GateSortID] = RANK() OVER (PARTITION BY StaffID ORDER BY
StaffID,GateID,TransitionDate)
INTO #Transitions
FROM Transitions
----2.Based on above temp table get first and last dates
IF OBJECT_ID('tempdb..#FirstandLastDates') IS NOT NULL DROP TABLE
#FirstandLastDates
SELECT T.StaffId,
[FirstDate] = FirstDate.transitionDate,
[LastDate] = LastDate.transitionDate
INTO #FirstandLastDates
FROM #Transitions T
JOIN
(
SELECT TT.StaffId,
[MinGateSortID] = MIN(GateSortID),
TT.GateID,
G.transitionDate
FROM #Transitions TT
JOIN Transitions G
ON TT.StaffId = G.StaffId
AND TT.Id = G.ID
AND TT.GateId = G.GateId
WHERE GateSortID = 1-- MIN VALUE WILL ALWAYS BE 1
GROUP BY TT.StaffID,TT.GateID,G.transitionDate
)FirstDate --lol!!
ON T.StaffID= FirstDate.StaffID
LEFT JOIN
(
SELECT TT.StaffId, TT.GateId, G.TransitionDate
FROM #Transitions TT
JOIN #Transitions G
ON TT.StaffId = G.StaffId
AND TT.Id = G.ID
AND TT.GateId = G.GateId
JOIN
(
SELECT StaffID,
[MaxGateSortID] = MAX(GateSortID)
FROM #Transitions TT
WHERE GateSortID <> 1 -- SO THAT IF THE PERSON HAS NOT --CLOCKED OUT YET ONLY CLOCKED IN DO NOT CONSIDER THAT THE
--GATES ARE SAME
GROUP BY StaffID
) LastGate1
ON TT.StaffId = LastGate1.StaffId
AND TT.GateSortID = LastGate1.MaxGateSortID
)LastDate
ON T.StaffId = LastDate.StaffId
GROUP BY T.StaffId,FirstDate.transitionDate,LastDate.transitionDate
----3.Based on above temp table get first and last transition gates
IF OBJECT_ID('tempdb..#FirstandLastGates') IS NOT NULL DROP TABLE
#FirstandLastGates
SELECT T.StaffId,
[FirstGate] = FirstGate.GateName,
[LastGate] = LastGate.GateName
INTO #FirstandLastGates
FROM #Transitions T
JOIN
(
SELECT StaffID,
[MinGateSortID] = MIN(GateSortID),
TT.GateID,
GateName
FROM #Transitions TT
JOIN Gates G
ON TT.GateId = G.GateId
WHERE GateSortID = 1-- MIN VALUE WILL ALWAYS BE 1
--AND TT.transitionDate BETWEEN #Datefrom AND #DateTo
GROUP BY StaffID,TT.GateID,GateName
)FirstGate
ON T.StaffID= FirstGate.StaffID
LEFT JOIN
(
SELECT TT.StaffId, TT.GateId, G.GateName
FROM #Transitions TT
JOIN Gates G
ON TT.GateId = G.GateId
JOIN
(
SELECT StaffID,
[MaxGateSortID] = MAX(GateSortID)
FROM #Transitions TT
WHERE GateSortID <> 1 -- SO THAT IF THE PERSON HAS NOT CLOCKED
-- OUT YET ONLY CLOCKED IN DO NOT CONSIDER THAT THE GATES ARE --SAME
--AND TT.transitionDate BETWEEN #Datefrom AND #DateTo
GROUP BY StaffID
) LastGate1
ON TT.StaffId = LastGate1.StaffId
AND TT.GateSortID = LastGate1.MaxGateSortID
)LastGate
ON T.StaffId = LastGate.StaffId
GROUP BY T.StaffId,FirstGate.GateName,LastGate.GateName
--MAIN OUTPUT
SELECT DISTINCT --t.Id,
--t.transitionDate,
[MinTransitionDate] = D.FirstDate,
[MaxTransitionDate] = D.LastDate,
[FirstGateName] = G.FirstGate,
[LastGateName] = G.LastGate,
T.[StaffId],
S.StaffType
from Staff S
JOIN #Transitions T
ON S.StaffId = T.StaffId
JOIN #FirstandLastDates D
ON T.StaffId = D.StaffId
JOIN #FirstandLastGates G
ON T.StaffId = G.StaffId
GROUP BY t.Id, D.FirstDate, D.LastDate, G.FirstGate,G.LastGate,T.[StaffId],
S.StaffType
Related
I am new to SQL Server, I have 3 tables consider an employee table
empid
name
location
1
abc
USA
2
efg
UK
Another table named location-table
location
holidaycode
uk
uk1
usa
usa1
And also holidaytable:
holiday-code
date
type
uk1
2022-01-01
LM
uk1
2022-01-01
RMC
Expected result is:
empid
location
holidaycode
date
type 1
2
uk
uk1
2022-01-01
RMC
suppose I query the tables based on empid and date say ,the holidays available for empid 2 from a particular from date such that on a particular day (2022-01-01) I have LM and RMC on that day , I need only RMC ,if RMC is not available else LMC.
Please try this
select e.empid, e.location, h.holidaycode, [date], max([type])
from employee e inner join location_table l
on e.location=l.location
inner join holidaytable h
on l.holidaycode=h.holidaycode
where e.empid=2 and h.[date]='2022-01-01'
group by e.empid, e.location, h.holidaycode, [date]
Example:
Schema and insert statements:
create table employee(empid int, name varchar(50), location varchar(50));
insert into employee values(1, 'abc', 'USA');
insert into employee values(2, 'efg', 'UK');
create table location_table(location varchar(50),holidaycode varchar(50));
insert into location_table values('uk','uk1');
insert into location_table values('usa','usa1');
create table holidaytable(holidaycode varchar(50), [date] date, [type] varchar(50));
insert into holidaytable values('uk1', '2022-01-01', 'LM');
insert into holidaytable values('uk1', '2022-01-01', 'RMC');
Query:
select e.empid, e.location, h.holidaycode, [date], max([type]) [type 1]
from employee e inner join location_table l
on e.location=l.location
inner join holidaytable h
on l.holidaycode=h.holidaycode
where e.empid=2 and h.[date]='2022-01-01'
group by e.empid, e.location, h.holidaycode, [date]
Output:
empid
location
holidaycode
date
type 1
2
UK
uk1
2022-01-01
RMC
db<>fiddle here
You can use ROW_NUMBER to get only the first row of each grouping.
Either do ORDER BY type DESC or use ORDER BY CASE WHEN type = 'RMC' THEN 1 ELSE 2 END
SELECT
e.empid,
e.location,
l.holidaycode,
h.date,
h.type
FROM employee e
JOIN location l ON l.location = e.location
JOIN (
SELECT *,
rn = ROW_NUMBER() OVER (PARTITION BY h.holidaycode, h.date ORDER BY h.type DESC)
FROM holiday h
) h ON h.holidaycode = l.holidaycode AND h.rn = 1
WHERE h.date = '2022-01-01';
db<>fiddle
I have data like this. first row of Id 1 from particular time period and second row of id 1 is another time period. so now want to combined id and name which are same in the two time periods reaming are same.if there is no orders from that time period its should be display 0 or null.
Id Name Qty Price
----------------------
1 Rose 4 540
1 Rose 1 640
2 Lilly 5 550
2 Lilly 18 360
3 Grand 2 460
3 Grand 10 360
4 lotus 0 0
4 Lotus 9 580
now I want data like this..
Id Name Qty Price
4 540
1 rose
1 640
5 550
2 Lilly
18 360
2 460
3 Grand
10 360
0 0
4 Lotus
9 580
This is my procedure
create PROCEDURE [dbo].[Sp_Orders]
(
#Startdate varchar(30),
#Enddate varchar(30),
#Startdate1 varchar(30),
#Enddate1 varchar(30)
)
--[Sp_Orders] '03/01/2016','03/15/2016','02/01/2016','02/28/2016'
AS
BEGIN
---First Duration----
SELECT DISTINCT
op.ProductId as id, op.Price as Prc,
sc.SubCategoryName as ScName,
COUNT(op.ProductId) AS Qty,
ROUND(SUM(op.Price * op.Quantity), 0) AS Revenue,
FROM
orderdetails od
INNER JOIN
(SELECT DISTINCT
Orderid, Productid, ProductFeatures, Price, Quantity
FROM
OrderProducts) op ON od.Orderid = op.Orderid
INNER JOIN
products p ON p.productid = op.productid
INNER JOIN
subcategory sc ON sc.subcategoryid = p.subcategoryid
WHERE
CONVERT(datetime, CONVERT(varchar(50), od.DeliveryDate, 101)) BETWEEN #Startdate AND #Enddate
GROUP BY
op.ProductID, op.Price, sc.SubCategoryName
---Second Duration----
SELECT DISTINCT
op.ProductID AS id, op.Price AS Prc,
sc.SubCategoryName AS ScName,
COUNT(op.ProductId) AS Qty,
ROUND(SUM(op.Price * op.Quantity), 0) AS Revenue,
FROM
orderdetails od
INNER JOIN
(SELECT DISTINCT
Orderid, Productid, ProductFeatures, Price, Quantity
FROM
OrderProducts) op ON od.Orderid = op.Orderid
INNER JOIN
products p ON p.productid = op.productid
INNER JOIN
subcategory sc ON sc.subcategoryid = p.subcategoryid
WHERE
CONVERT(datetime, CONVERT(varchar(50),od.DeliveryDate,101)) BETWEEN #Startdate1 AND #Enddate1
GROUP BY
op.ProductID, op.Price, sc.SubCategoryName
END
From what I understood from your Question and Comments:
Schema for your case
SELECT * INTO #TAB FROM(
SELECT 1 ID, 'ROSE' NAME, 4 QTY, 540 PRICE
UNION ALL
SELECT 1 , 'ROSE' , 1 , 640
UNION ALL
SELECT 2 , 'LILLY' , 5 , 550
UNION ALL
SELECT 2 , 'LILLY' , 18 ,360
UNION ALL
SELECT 3 , 'GRAND' , 2 , 460
UNION ALL
SELECT 3 , 'GRAND' , 10 ,360
UNION ALL
SELECT 4 , NULL,NULL,NULL
UNION ALL
SELECT 4 , 'LOTUS' , 9 , 580
) AS A
And the Logic to display is as below
SELECT CASE WHEN SNO=1 THEN CAST(ID AS VARCHAR(250)) ELSE '' END ID,
CASE WHEN SNO=1 THEN ISNULL(NAME,'') ELSE '' END NAME,ISNULL(Qty,0)Qty
,ISNuLL(Price,0)Price FROM (
SELECT ROW_NUMBER() Over(partition by Name, Id ORDER BY (SELECT 1)) SNO
,ID, NAME , Qty, Price, ID AS ID2 FROM #TAB
)AS A
ORDER BY ID2, NAME DESC
Try this from your Procedure. And may need to do type cast based on your actual datatypes
CREATE PROCEDURE [DBO].[SP_ORDERS]
(
#STARTDATE VARCHAR(30),
#ENDDATE VARCHAR(30),
#STARTDATE1 VARCHAR(30),
#ENDDATE1 VARCHAR(30)
)
--[SP_ORDERS] '03/01/2016','03/15/2016','02/01/2016','02/28/2016'
AS
BEGIN
SELECT CASE WHEN SNO=1 THEN CAST(ID AS VARCHAR(250)) ELSE '' END ID,CASE WHEN SNO=1 THEN ISNULL(SCNAME,'') ELSE '' END NAME,ISNULL(QTY,0)QTY,ISNULL(REVENUE,0)PRICE FROM (
SELECT ROW_NUMBER() OVER(PARTITION BY SCNAME, ID ORDER BY (SELECT 1)) SNO, ID, SCNAME , QTY, REVENUE, ID AS ID2 FROM (
SELECT DISTINCT OP.PRODUCTID AS ID,OP.PRICE AS PRC,SC.SUBCATEGORYNAME AS SCNAME,COUNT(OP.PRODUCTID) AS QTY, ROUND(SUM(OP.PRICE * OP.QUANTITY), 0) AS REVENUE
FROM ORDERDETAILS OD INNER JOIN
(SELECT DISTINCT ORDERID,PRODUCTID,PRODUCTFEATURES,PRICE,QUANTITY FROM ORDERPRODUCTS ) OP ON OD.ORDERID=OP.ORDERID
INNER JOIN PRODUCTS P ON P.PRODUCTID=OP.PRODUCTID
INNER JOIN SUBCATEGORY SC ON SC.SUBCATEGORYID=P.SUBCATEGORYID
WHERE CONVERT(DATETIME,CONVERT(VARCHAR(50),OD.DELIVERYDATE,101)) BETWEEN #STARTDATE AND #ENDDATE
GROUP BY OP.PRODUCTID,OP.PRICE,SC.SUBCATEGORYNAME
---SECOND DURATION----
UNION ALL --ADDED NOW
SELECT DISTINCT OP.PRODUCTID AS ID,OP.PRICE AS PRC,SC.SUBCATEGORYNAME AS SCNAME,COUNT(OP.PRODUCTID) AS QTY, ROUND(SUM(OP.PRICE * OP.QUANTITY), 0) AS REVENUE
FROM ORDERDETAILS OD INNER JOIN
(SELECT DISTINCT ORDERID,PRODUCTID,PRODUCTFEATURES,PRICE,QUANTITY FROM ORDERPRODUCTS ) OP ON OD.ORDERID=OP.ORDERID
INNER JOIN PRODUCTS P ON P.PRODUCTID=OP.PRODUCTID
INNER JOIN SUBCATEGORY SC ON SC.SUBCATEGORYID=P.SUBCATEGORYID
WHERE CONVERT(DATETIME,CONVERT(VARCHAR(50),OD.DELIVERYDATE,101)) BETWEEN #STARTDATE1 AND #ENDDATE1
GROUP BY OP.PRODUCTID,OP.PRICE,SC.SUBCATEGORYNAME
)
AS A
)B
ORDER BY ID2, NAME
END
Based on your sample data i have given this Out put but if the data is inconsistent it may not give accurate results if you see the Expected Output it gives exact same
Declare #Table1 TABLE
(Id VARCHAR(10), Name varchar(5),Qty VARCHAR(10), Price varchar(10))
;
INSERT INTO #Table1
(Id, Name,Qty, Price)
VALUES
(1, 'Rose',4, 540),
(1, 'Rose',1, 640),
(2, 'Lilly',5, 550),
(2, 'Lilly',18, 360),
(3, 'Grand',2, 460),
(3, 'Grand',10, 360),
(4,'Lotus',0,0),
(4, 'Lotus',9, 580)
;
SCRIPT
;WITH CTE AS (
Select
CASE WHEN RN = 1 THEN ID ELSE NULL END ID,
CASE WHEN RN = 1 THEN Name ELSE NULL END NAME,
Qty,
Price
from (
select
Id,
Name,
Qty,
Price,
ROW_NUMBER()OVER(PARTITION BY ID,NAME ORDER BY NAME)RN
FROM
#Table1)T)
Select CASE WHEN RN = 2 THEN T.Id ELSE '' END ID,
CASE WHEN RN = 2 THEN T.Name ELSE '' END Name,
CASE WHEN RN IN (1,3) THEN ISNULL(T.Qty,0) ELSE '' END qty,
CASE WHEN RN IN (1,3) THEN ISNULL(T.Price,0) ELSE '' END qty from (
Select
T.ID,
T.NAME,
c.Qty,
C.Price,
ROW_NUMBER()OVER(PARTITION BY T.ID,T.NAME ORDER BY T.NAME)RN
from #Table1 T
INNER JOIN CTE C
ON T.Id = C.ID
AND T.Name = C.NAME
OR (T.Qty = C.Qty OR T.Price = C.Price ))T
WHERE T.RN <> 4
Data:
Name | Score | Date
==
John | 10 | 09/01/2012
John | 20 | 09/01/2012
John | 5 | 09/01/2012
Frank | 20 | 11/01/2012
I want to run an SQL statement that will pull only the highest score from each day only, so I don't want three scores from one date, only the highest. So the return I'd like from SQL would be:
John | 20 | 09/01/2012
Frank | 20 | 11/01/2012
Is it possible to do this via SQL, currently I can do it after pulling everything by checking dates. But would be great if it's possible to do it direct from the DB.
I have tried a few solutions but as Date and Score aren't distinct, as I want the highest I am unsure of where to proceed. The SQL is being used with a MS Access database.
Here's a complete example. I did it in Oracle, so you might have to tweak the syntax a bit for Access.
CREATE TABLE tbl1 (NAME VARCHAR2(100), score INT, Dt DATE);
INSERT INTO tbl1 VALUES ('John',10,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('John',20,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('John',5,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('Harry',15,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('Frank',20,to_date('20121101','YYYYMMDD'));
select a.dt
, b.name
, a.score
FROM (SELECT dt
, MAX(score) score
FROM tbl1
GROUP BY dt) a
, (SELECT NAME
, dt
, MAX(score) score
FROM tbl1
GROUP BY name, dt) b
WHERE a.dt = b.dt
AND a.score = b.score
Output:
1 11/1/2012 Frank 20
2 9/1/2012 John 20
Note that if two people have highest score for given day, it will print them both. For example:
INSERT INTO tbl1 VALUES ('John',10,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('John',20,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('John',5,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('Harry',20,to_date('20120901','YYYYMMDD'));
INSERT INTO tbl1 VALUES ('Frank',20,to_date('20121101','YYYYMMDD'));
select a.dt
, b.name
, a.score
FROM (SELECT dt
, MAX(score) score
FROM tbl1
GROUP BY dt) a
, (SELECT NAME
, dt
, MAX(score) score
FROM tbl1
GROUP BY name, dt) b
WHERE a.dt = b.dt
AND a.score = b.score
Output:
DT NAME SCORE
1 11/1/2012 Frank 20
2 9/1/2012 John 20
3 9/1/2012 Harry 20
I guess it is pretty simple
Select [Name], max([score]) as Score, [Date] from scoresTable
group by [Date], [Name]
Edited:
More accurate but a little complex
select o.[Name], i.[Date], i.Score from scoresTable o
inner join (select [Date], max(Score) as Score
from scoresTable group by [Date]) as i
on o.[Date] = i.[Date] and i.Score = o.Score
Even More, if your date field contains time too
select o.[Name], i.[Date], i.Score from scoresTable o
inner join (select convert(varchar, [Date], 101) as Date, max(Score) as Score
from scoresTable
group by convert(varchar, [Date], 101)) as i
on convert(varchar, o.[Date], 101) = i.[Date] and i.Score = o.Score
I have table the following data structure in SQL Server:
ID Date Allocation
1, 2012-01-01, 0
2, 2012-01-02, 2
3, 2012-01-03, 0
4, 2012-01-04, 0
5, 2012-01-05, 0
6, 2012-01-06, 5
etc.
What I need to do is get all consecutive day periods where Allocation = 0, and in the following form:
Start Date End Date DayCount
2012-01-01 2012-01-01 1
2012-01-03 2012-01-05 3
etc.
Is it possible to do this in SQL, and if so how?
In this answer, I'll assume that the "id" field numbers the rows consecutively when sorted by increasing date, like it does in the example data. (Such a column can be created if it does not exist).
This is an example of a technique described here and here.
1) Join the table to itself on adjacent "id" values. This pairs adjacent rows. Select rows where the "allocation" field has changed. Store the result in a temporary table, also keeping a running index.
SET #idx = 0;
CREATE TEMPORARY TABLE boundaries
SELECT
(#idx := #idx + 1) AS idx,
a1.date AS prev_end,
a2.date AS next_start,
a1.allocation as allocation
FROM allocations a1
JOIN allocations a2
ON (a2.id = a1.id + 1)
WHERE a1.allocation != a2.allocation;
This gives you a table having "the end of the previous period", "the start of the next period", and "the value of 'allocation' in the previous period" in each row:
+------+------------+------------+------------+
| idx | prev_end | next_start | allocation |
+------+------------+------------+------------+
| 1 | 2012-01-01 | 2012-01-02 | 0 |
| 2 | 2012-01-02 | 2012-01-03 | 2 |
| 3 | 2012-01-05 | 2012-01-06 | 0 |
+------+------------+------------+------------+
2) We need the start and end of each period in the same row, so we need to combine adjacent rows again. Do this by creating a second temporary table like boundaries but having an idx field 1 greater:
+------+------------+------------+
| idx | prev_end | next_start |
+------+------------+------------+
| 2 | 2012-01-01 | 2012-01-02 |
| 3 | 2012-01-02 | 2012-01-03 |
| 4 | 2012-01-05 | 2012-01-06 |
+------+------------+------------+
Now join on the idx field and we get the answer:
SELECT
boundaries2.next_start AS start,
boundaries.prev_end AS end,
allocation
FROM boundaries
JOIN boundaries2
USING(idx);
+------------+------------+------------+
| start | end | allocation |
+------------+------------+------------+
| 2012-01-02 | 2012-01-02 | 2 |
| 2012-01-03 | 2012-01-05 | 0 |
+------------+------------+------------+
** Note that this answer gets the "internal" periods correctly but misses the two "edge" periods where allocation = 0 at the beginning and allocation = 5 at the end. Those can be pulled in using UNION clauses but I wanted to present the core idea without that complication.
Following would be one way to do it. The gist of this solution is
Use a CTE to get a list of all consecutive start and enddates with Allocation = 0
Use the ROW_NUMBER window function to assign rownumbers depending on both start- and enddates.
Select only those records where both ROW_NUMBERS equal 1.
Use DATEDIFFto calculate the DayCount
SQL Statement
;WITH r AS (
SELECT StartDate = Date, EndDate = Date
FROM YourTable
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN YourTable q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT [Start Date] = s.StartDate
, [End Date ] = s.EndDate
, [DayCount] = DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Test script
;WITH q (ID, Date, Allocation) AS (
SELECT * FROM (VALUES
(1, '2012-01-01', 0)
, (2, '2012-01-02', 2)
, (3, '2012-01-03', 0)
, (4, '2012-01-04', 0)
, (5, '2012-01-05', 0)
, (6, '2012-01-06', 5)
) a (a, b, c)
)
, r AS (
SELECT StartDate = Date, EndDate = Date
FROM q
WHERE Allocation = 0
UNION ALL
SELECT r.StartDate, q.Date
FROM r
INNER JOIN q ON DATEDIFF(dd, r.EndDate, q.Date) = 1
WHERE q.Allocation = 0
)
SELECT s.StartDate, s.EndDate, DATEDIFF(dd, s.StartDate, s.EndDate) + 1
FROM (
SELECT *
, rn1 = ROW_NUMBER() OVER (PARTITION BY StartDate ORDER BY EndDate DESC)
, rn2 = ROW_NUMBER() OVER (PARTITION BY EndDate ORDER BY StartDate ASC)
FROM r
) s
WHERE s.rn1 = 1
AND s.rn2 = 1
OPTION (MAXRECURSION 0)
Alternative way with CTE but without ROW_NUMBER(),
Sample data:
if object_id('tempdb..#tab') is not null
drop table #tab
create table #tab (id int, date datetime, allocation int)
insert into #tab
select 1, '2012-01-01', 0 union
select 2, '2012-01-02', 2 union
select 3, '2012-01-03', 0 union
select 4, '2012-01-04', 0 union
select 5, '2012-01-05', 0 union
select 6, '2012-01-06', 5 union
select 7, '2012-01-07', 0 union
select 8, '2012-01-08', 5 union
select 9, '2012-01-09', 0 union
select 10, '2012-01-10', 0
Query:
;with cte(s_id, e_id, b_id) as (
select s.id, e.id, b.id
from #tab s
left join #tab e on dateadd(dd, 1, s.date) = e.date and e.allocation = 0
left join #tab b on dateadd(dd, -1, s.date) = b.date and b.allocation = 0
where s.allocation = 0
)
select ts.date as [start date], te.date as [end date], count(*) as [day count] from (
select c1.s_id as s, (
select min(s_id) from cte c2
where c2.e_id is null and c2.s_id >= c1.s_id
) as e
from cte c1
where b_id is null
) t
join #tab t1 on t1.id between t.s and t.e and t1.allocation = 0
join #tab ts on ts.id = t.s
join #tab te on te.id = t.e
group by t.s, t.e, ts.date, te.date
Live example at data.SE.
Using this sample data:
CREATE TABLE MyTable (ID INT, Date DATETIME, Allocation INT);
INSERT INTO MyTable VALUES (1, {d '2012-01-01'}, 0);
INSERT INTO MyTable VALUES (2, {d '2012-01-02'}, 2);
INSERT INTO MyTable VALUES (3, {d '2012-01-03'}, 0);
INSERT INTO MyTable VALUES (4, {d '2012-01-04'}, 0);
INSERT INTO MyTable VALUES (5, {d '2012-01-05'}, 0);
INSERT INTO MyTable VALUES (6, {d '2012-01-06'}, 5);
GO
Try this:
WITH DateGroups (ID, Date, Allocation, SeedID) AS (
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, MyTable.ID
FROM MyTable
LEFT JOIN MyTable Prev ON Prev.Date = DATEADD(d, -1, MyTable.Date)
AND Prev.Allocation = 0
WHERE Prev.ID IS NULL
AND MyTable.Allocation = 0
UNION ALL
SELECT MyTable.ID, MyTable.Date, MyTable.Allocation, DateGroups.SeedID
FROM MyTable
JOIN DateGroups ON MyTable.Date = DATEADD(d, 1, DateGroups.Date)
WHERE MyTable.Allocation = 0
), StartDates (ID, StartDate, DayCount) AS (
SELECT SeedID, MIN(Date), COUNT(ID)
FROM DateGroups
GROUP BY SeedID
), EndDates (ID, EndDate) AS (
SELECT SeedID, MAX(Date)
FROM DateGroups
GROUP BY SeedID
)
SELECT StartDates.StartDate, EndDates.EndDate, StartDates.DayCount
FROM StartDates
JOIN EndDates ON StartDates.ID = EndDates.ID;
The first section of the query is a recursive SELECT, which is anchored by all rows that are allocation = 0, and whose previous day either doesn't exist or has allocation != 0. This effectively returns IDs: 1 and 3 which are the starting dates of the periods of time you want to return.
The recursive part of this same query starts from the anchor rows, and finds all subsequent dates that also have allocation = 0. The SeedID keeps track of the anchored ID through all the iterations.
The result so far is this:
ID Date Allocation SeedID
----------- ----------------------- ----------- -----------
1 2012-01-01 00:00:00.000 0 1
3 2012-01-03 00:00:00.000 0 3
4 2012-01-04 00:00:00.000 0 3
5 2012-01-05 00:00:00.000 0 3
The next sub query uses a simple GROUP BY to filter out all the start dates for each SeedID, and also counts the days.
The last sub query does the same thing with the end dates, but this time the day count isn't needed as we already have this.
The final SELECT query joins these two together to combine the start and end dates, and returns them along with the day count.
Give it a try if it works for you
Here SDATE for your DATE remains same as your table.
SELECT SDATE,
CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) >0 THEN(
CASE WHEN (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) IS NULL THEN SDATE
ELSE (SELECT SDATE FROM TABLE1 WHERE ID =(SELECT MAX(ID) FROM TABLE1 WHERE ID >TBL1.ID AND ID<(SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0))) END
)ELSE (SELECT SDATE FROM TABLE1 WHERE ID = (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID ))END AS EDATE
,CASE WHEN (SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) <0 THEN
(SELECT COUNT(*) FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MAX(ID) FROM TABLE1 WHERE ID > TBL1.ID )) ELSE
(SELECT COUNT(*)-1 FROM TABLE1 WHERE ID BETWEEN TBL1.ID AND (SELECT MIN(ID) FROM TABLE1 WHERE ID > TBL1.ID AND ALLOCATION!=0)) END AS DAYCOUNT
FROM TABLE1 TBL1 WHERE ALLOCATION = 0
AND (((SELECT ALLOCATION FROM TABLE1 WHERE ID=(SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID))<> 0 ) OR (SELECT MAX(ID) FROM TABLE1 WHERE ID < TBL1.ID)IS NULL);
A solution without CTE:
SELECT a.aDate AS StartDate
, MIN(c.aDate) AS EndDate
, (datediff(day, a.aDate, MIN(c.aDate)) + 1) AS DayCount
FROM (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS a
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS b ON a.idn = b.idn + 1 AND b.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS c ON a.idn <= c.idn AND c.allocation = a.allocation
LEFT JOIN (
SELECT x.aDate, x.allocation, COUNT(*) idn FROM table1 x
JOIN table1 y ON y.aDate <= x.aDate
GROUP BY x.id, x.aDate, x.allocation
) AS d ON c.idn = d.idn - 1 AND d.allocation = c.allocation
WHERE b.idn IS NULL AND c.idn IS NOT NULL AND d.idn IS NULL AND a.allocation = 0
GROUP BY a.aDate
Example
I have a Patient table:
PatientId Admitted
--------- ---------------
1 d/m/yy hh:mm:ss
2 d/m/yy hh:mm:ss
3 d/m/yy hh:mm:ss
I have a PatientMeasurement table (0 to many):
PatientId MeasurementId Recorded Value
--------- ------------- --------------- -----
1 A d/h/yy hh:mm:ss 100
1 A d/h/yy hh:mm:ss 200
1 A d/h/yy hh:mm:ss 300
2 A d/h/yy hh:mm:ss 10
2 A d/h/yy hh:mm:ss 20
1 B d/h/yy hh:mm:ss 1
1 B d/h/yy hh:mm:ss 2
I am trying to create a result set that resembles:
PatientId Numerator Denominator
--------- -------- -----------
1 1 1
2 1 1
3 0 1
Essentially, a patient will have a 1 in the numerator if the have at least one value for measurement A and one value for measurement B. In this example, patient 1 has 3 A measurements and 2 B measures, so the numerator is 1. Patient 2 has 2 A measurements, but no B measurements, so the numerator is 0. Patient has neither an A measurement nor a B measurement, so the numerator is 0.
My query thus far is:
SELECT PatientId, CASE WHEN a.cnt+b.cnt>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM patient p
LEFT OUTER JOIN (
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='A'
--AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) a ON p.PatientId=a.PatientId
LEFT OUTER JOIN (
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='B'
--AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) b ON p.PatientId=b.PatientId
This works as expected as long as I don't include the correlated, date restriction (Recorded < dateadd(hh, 12, Admitted). Unfortunately, correlating an 'inline view' in this manner is not syntactically valid.
This has forced me to re-write the SQL to:
SELECT PatientId, CASE WHEN v.a+v.b>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM (
SELECT PatientId,
(
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE PatientId=p.PatientId
AND MeasurementId='A'
AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) a,
(
SELECT PatientId, count(*) cnt
FROM PatientMeasurement pm
WHERE PatientId=p.PatientId
AND MeasurementId='B'
AND Recorded <= dateadd(hh, 12, Admitted)
GROUP BY PatientId
) b
FROM Patient p
) v
My question: Is there a better, more-efficient way to do this?
Thanks for your time.
Try this :
WITH GroupPatients AS
(SELECT MeasurementID, PatientId, Count(*) AS cnt
FROM PatientMeasurement AS pm
INNER JOIN Patient p ON pm.PatientID = p.PatientID
WHERE
MeasurementId IN ('A', 'B')
AND
Recorded <= dateadd(hh, 12, Admitted)
GROUP BY MeasureMentID, PatientId)
SELECT p.PatientID, Case
When IsNull(GPA.cnt, 0) > 0 AND IsNull(GPB.cnt, 0) > 0 Then 1
Else 0
End AS Numerator, 1 AS Denominator
FROM Patient p
LEFT JOIN GroupPatientsA AS GPA ON p.PatientID = GPA.PatientID AND GPA.MeasurementID = 'A'
LEFT JOIN GroupPatientsB AS GPB ON p.PatientID = GPB.PatientID AND GPB.MeasurementID = 'B'
I've made one tweak to the business logic too - your spec says Numerator should be one if a patient has both A and B measurements - however, your clause of a.cnt+b.cnt>2 will erroneously return one if either a.cnt or b.cnt are 3 or more and the other is zero.
Another solution can be close to your original attempt using OUTER APPLY:
SELECT PatientId, CASE WHEN a.cnt+b.cnt>2 THEN 1 ELSE 0 END Numerator, 1 Denominator
FROM patient p
OUTER APPLY (
SELECT count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='A'
AND Recorded <= dateadd(hh, 12, p.Admitted)
AND pm.PatientId = p.PatientId
) AS a(cnt)
OUTER APPLY (
SELECT count(*) cnt
FROM PatientMeasurement pm
WHERE MeasurementId='B'
AND Recorded <= dateadd(hh, 12, p.Admitted)
AND pm.PatientId = p.PatientId
) AS b(cnt)
SELECT p.*,
CASE WHEN
EXISTS
(
SELECT NULL
FROM PatientMeasurement pm
WHERE pm.PatientID = p.ID
AND pm.Type = 'A'
AND pm.Recorded <= DATEADD(hh, 12, p.Admitted)
) AND EXISTS (
SELECT NULL
FROM PatientMeasurement pm
WHERE pm.PatientID = p.ID
AND pm.Type = 'B'
AND pm.Recorded <= DATEADD(hh, 12, p.Admitted)
) THEN 1 ELSE 0 END
FROM Patient p
Assuming you are using Sql 2005 or 2008, the entire query can be simplified using some window functions and a pivot:
with pData as
(
select count(*) over(partition by PatientId, MeasurementId) as cnt,
PatientId, MeasurementId
from PatientMeasurement pm
where MeasurementId in('A','B')
and Recorded <= dateadd(hh, 12, Admitted)
)
select PatientId, coalesce([A],0) as cntA, coalesce([B],0) as cntB,
case when coalesce([A],0) + coalesce([B],0) > 2 then 1 else 0 end as Numerator,
1 as Denominator
from pData
pivot (max(cnt) for MeasurementId in([A],[B])) pvt
DECLARE #TimeSlot int;
SET #TimeSlot = 12;
WITH
pt AS (
SELECT p.PatientID, p.Admitted, m.MeasurementID, m.Recorded,
CASE
WHEN m.Recorded <= dateadd(hh, #TimeSlot, p.Admitted) THEN 1
ELSE 0
END AS "InTimeSlot"
FROM Patient AS p
LEFT JOIN PatientMeasurement AS m ON p.PatientID = m.PatientID
),
cntA AS (
SELECT PatientID, count(*) AS "A_count"
FROM pt WHERE MeasurementID='A' AND InTimeSlot = 1
GROUP BY PatientID
),
cntB AS (
SELECT PatientID, count(*) AS "B_count"
FROM pt WHERE MeasurementID='B' AND InTimeSlot = 1
GROUP BY PatientID
),
cntAB AS (
SELECT p.PatientID
,coalesce(a.A_count, 0) AS "A_cnt"
,coalesce(b.B_count, 0) AS "B_cnt"
FROM Patient as p
LEFT JOIN cntA AS a ON p.PatientID = a.PatientID
LEFT JOIN cntB AS b ON p.PatientID = b.PatientID
),
cntN AS (
SELECT PatientID,
CASE WHEN A_cnt > 0 AND B_cnt > 0 THEN 1 ELSE 0 END AS Numerator
FROM cntAB
)
SELECT PatientID, Numerator, 1 AS Denominator FROM cntN