I have the following table :
Customer_ID PurchaseDatetime
309 2/3/2014 12:29:00
309 2/27/2014 17:11:00
309 4/15/2014 13:24:00
I want to write a query which would calculate the difference between the datetime field of two consecutive rows. Ideally the output should be like
Customer_ID PurchaseDatetime
309 0
309 2/27/2014 17:11:00 - 2/3/2014 12:29:00 // The exact time difference in hours
309 4/15/2014 13:24:00 - 2/27/2014 17:11:00 // The exact time difference in hours
How do I write such a query?
Try this...
CREATE TABLE #Purchases
(
CustomerID INT,
PurchaseDate DATETIME
)
INSERT INTO #Purchases
VALUES
(100004,'2016-05-16 08:00:00'),
(100005,'2016-05-16 09:05:00'),
(100006,'2016-05-16 10:08:40'),
(32141 ,'2016-05-16 11:18:00'),
(84230 ,'2016-05-16 12:25:10'),
(23444 ,'2016-05-16 13:40:00'),
(100001,'2016-05-16 14:50:00')
;WITH CTE AS
(
SELECT
CustomerID,
PurchaseDate,
ROW_NUMBER() OVER (ORDER BY PurchaseDate) AS Seq
FROM #Purchases
)
SELECT
p.CustomerID,
p.PurchaseDate,
pl.PurchaseDate,
DATEADD(SECOND,DATEDIFF(SECOND, pl.PurchaseDate,p.PurchaseDate),0) AS DiffDT,
DATEDIFF(HOUR, pl.PurchaseDate,p.PurchaseDate) HourDiff
FROM CTE AS p
LEFT OUTER JOIN CTE AS pl ON pl.Seq = p.Seq - 1 -- Last batch
ORDER BY p.PurchaseDate
Try This Query...
;with cte as
(select row_number() over(order by (select 100)) Id.Customer_ID,PurchaseDatetime from Table)
select a.Customer_ID,b.PurchaseDatetime-a.PurchaseDatetime from cte a inner join cte b on a.id=b.id-1
Thanks
SELECT *,
PURCHASEDATETIME =
CASE CUSTOMER_ID
WHEN CUSTOMER_ID THEN
DATEDIFF(HH, LAG(PURCHASEDATETIME, 1) OVER(ORDER BY CUSTOMER_ID, PURCHASEDATETIME), PURCHASEDATETIME)
ELSE
NULL
END
FROM table
Related
I have a table like this in SQL Server:
id start_time end_time
1 10:00:00 10:34:00
2 10:38:00 10:52:00
3 10:53:00 11:23:00
4 11:24:00 11:56:00
5 14:20:00 14:40:00
6 14:41:00 14:59:00
7 15:30:00 15:40:00
What I would like to have is a query that outputs consolidated records based on the time difference between two consecutive records (end_time of row n and start_time row n+1) . All records where the time difference is less than 2 minutes should be combined into one time entry and the ID of the first record should be kept. This should also combine more than two records if multiple consecutive records have a time difference less than 2 minutes.
This would be the expected output:
id start_time end_time
1 10:00:00 10:34:00
2 10:38:00 11:56:00
5 14:20:00 14:59:00
7 15:30:00 15:40:00
Thanks in advance for any tips how to build the query.
Edit:
I started with following code to calculate the lead_time and the time difference but do not know how to group and consolidate.
WITH rows AS
(
SELECT *, ROW_NUMBER() OVER (ORDER BY Id) AS rn
FROM #temp
)
SELECT mc.id, mc.start_time, mc.end_time, mp.start_time lead_time, DATEDIFF(MINUTE, mc.[end_time], mp.[start_time]) as DiffToNewSession
FROM rows mc
LEFT JOIN rows mp
ON mc.rn = mp.rn - 1
The window function in t-sql can realize a lot of data statistics, such as
create table #temp(id int identity(1,1), start_time time, end_time time)
insert into #temp(start_time, end_time)
values ('10:00:00', '10:34:00')
, ('10:38:00', '10:52:00')
, ('10:53:00', '11:23:00')
, ('11:24:00', '11:56:00')
, ('14:20:00', '14:40:00')
, ('14:41:00', '14:59:00')
, ('15:30:00', '15:40:00')
;with c0 as(
select *, LAG(end_time,1,'00:00:00') over (order by id) as lag_time
from #temp
), c1 as(
select *, case when DATEDIFF(MI, lag_time, start_time) <= 2 then 1 else -0 end as gflag
from c0
), c2 as(
select *, SUM(case when gflag=0 then 1 else 0 end) over(order by id) as gid
from c1
)
select MIN(id) as id, MIN(start_time) as start_time, MAX(end_time) as end_time
from c2
group by gid
In order to better describe the process of data construction, I simply use c0, c1, c2... to represent levels, you can merge some levels and optimize.
If you can’t use id as a sorting condition, then you need to change the sorting part in the above statement.
You can use a recursive cte to get the result that you want. This method just simple compare current end_time with next start_time. If it is less than the 2 mintues threshold use the same start_time as grp_start. And the end, simple do a GROUP BY on the grp_start
with rcte as
(
-- anchor member
select *, grp_start = start_time
from tbl
where id = 1
union all
-- recursive member
select t.id, t.start_time, t.end_time,
grp_start = case when datediff(second, r.end_time, t.start_time) <= 120
then r.grp_start
else t.start_time
end
from tbl t
inner join rcte r on t.id = r.id + 1
)
select id = min(id), grp_start as start_time, max(end_time) as end_time
from rcte
group by grp_start
demo
I guess this should do the trick without recursion. Again I used several ctes in order to make the solution a bit easier to read. guess it can be reduced a little...
INSERT INTO T1 VALUES
(1,'10:00:00','10:34:00')
,(2,'10:38:00','10:52:00')
,(3,'10:53:00','11:23:00')
,(4,'11:24:00','11:56:00')
,(5,'14:20:00','14:40:00')
,(6,'14:41:00','14:59:00')
,(7,'15:30:00','15:40:00')
GO
WITH cte AS(
SELECT *
,ROW_NUMBER() OVER (ORDER BY id) AS rn
,DATEDIFF(MINUTE, ISNULL(LAG(endtime) OVER (ORDER BY id), starttime), starttime) AS diffMin
,COUNT(*) OVER (PARTITION BY (SELECT 1)) as maxRn
FROM T1
),
cteFirst AS(
SELECT *
FROM cte
WHERE rn = 1 OR diffMin > 2
),
cteGrp AS(
SELECT *
,ISNULL(LEAD(rn) OVER (ORDER BY id), maxRn+1) AS nextRn
FROM cteFirst
)
SELECT f.id, f.starttime, MAX(ISNULL(n.endtime, f.endtime)) AS endtime
FROM cteGrp f
LEFT JOIN cte n ON n.rn >= f.rn AND n.rn < f.nextRn
GROUP BY f.id, f.starttime
I am trying to get a cumulative SUM of a DATEDIFF into a percentage from some basic data I have, here is a small snapshot:
ID IIn IOut
AB123 2015-11-06 15:24:44.057 2015-11-14 01:00:00.000
QA565 2015-10-27 20:12:19.753 2015-11-06 03:00:00.000
UN555 2015-12-29 06:29:23.417 2016-01-03 08:00:00.000
LG602 2015-08-07 16:52:13.573 2015-08-11 03:00:00.000
ETC ETC
I then use DATEDIFF to get a number of days:
SELECT ID, DATEDIFF(hour, IIn, IOut)/24.0 IDays
FROM TimeTable
Which gives me:
ID IDays
AB123 7.416666
QA565 9.291666
UN555 5.083333
LG602 3.458333
What I want is a count of ID'S split by their IDay's (rounded down) with a cumulative % from lowest IDay's to highest like so:
ID IDays IDaysPer
LG602 3 12.5
UN555 5 33.33
AB123 7 62.49
QA565 9 100
You can do this using a couple of windowed aggregates, placing your original query in a CTE for convenience (A subquery would also work):
declare #timeTable table (ID char(5) not null, IIn datetime not null,
IOut datetime not null)
insert into #timeTable(ID,IIn,IOut) values
('AB123','2015-11-06T15:24:44.057','2015-11-14T01:00:00.000'),
('QA565','2015-10-27T20:12:19.753','2015-11-06T03:00:00.000'),
('UN555','2015-12-29T06:29:23.417','2016-01-03T08:00:00.000'),
('LG602','2015-08-07T16:52:13.573','2015-08-11T03:00:00.000')
;With Diffs as (
SELECT ID, DATEDIFF(hour, IIn, IOut)/24.0 IDays
FROM #timeTable
)
select
*,
(
SUM(IDays) OVER (ORDER BY IDays, ID)
/
SUM(IDays) OVER ()
) * 100 as IDaysPer
from
Diffs
order by IDays
Note that I couldn't quite make sense of your "rounded down" requirement but you should be able to use any common rounding technique wrapped around the appropriate calculation. So my outputs don't quite match yours:
ID IDays IDaysPer
----- --------------------------------------- ---------------------------------------
LG602 3.458333 13.696300
UN555 5.083333 33.828300
AB123 7.416666 63.201300
QA565 9.291666 100.000000
Consider TimeTable has already the data
WITH t1 (ID, IDays)
AS (
SELECT ID, DATEDIFF(hour, IIn, IOut) / 24.0 AS IDays
FROM TimeTable
)
SELECT
ID, FLOOR(IDays),
(FLOOR(IDays) / (SELECT SUM(FLOOR(IDays)) FROM t1 t2 WHERE t1.IDays <= t2.IDays)) * 100.0 AS IDaysPer
FROM t1
ORDER BY 2 ASC
Here you go : Output matches with yours...
create table #TEMp
(ID VARCHAR(100)
,IIn datetime
,IOut datetime
)
insert into #temp(ID,IIn,IOut) values
('AB123','2015-11-06T15:24:44.057','2015-11-14T01:00:00.000'),
('QA565','2015-10-27T20:12:19.753','2015-11-06T03:00:00.000'),
('UN555','2015-12-29T06:29:23.417','2016-01-03T08:00:00.000'),
('LG602','2015-08-07T16:52:13.573','2015-08-11T03:00:00.000')
select ID,IDays AS Idays,ROUND(CAST(SUM(IDays) OVER(ORDER BY IDays) AS FLOAT)/CAST(SUM(IDays)OVER() AS FLOAT) * 100,2) AS IdaysPer
from
(
select *,ROUND(DATEDIFF(hour, IIn, IOut)/24,0) IDays
from #TEMP
)T
I have a Table in which there are multiple columns but I need to find the Difference Between Current Row of StartTime Column And Previous Row of EndTime Column.
Example is the Following output.
Batch Number Start Time End Time Difference
100004 8:00:00 8:03:30
100005 8:05:00 8:07:00 00:01:30
100006 8:08:40 8:15:00 00:01:40
32141 8:18:00 8:22:45 00:03:00
84230 8:25:10 8:33:42 00:02:25
23444 8:40:00 8:43:00 00:06:18
100001 8:50:00 8:52:00 00:07:00
I am new to SQL and am using SQL SERVER 2008 R2.
Please help me to get the output in Simple Select Query.
CREATE TABLE #Batches
(
BatchID INT,
StartTime Datetime,
EndTime Datetime,
)
INSERT INTO #Batches
VALUES
(100004,'2016-05-16 08:00:00','2016-05-16 08:03:30'),
(100005,'2016-05-16 08:05:00','2016-05-16 08:07:00'),
(100006,'2016-05-16 08:08:40','2016-05-16 08:15:00'),
(32141 ,'2016-05-16 08:18:00','2016-05-16 08:22:45'),
(84230 ,'2016-05-16 08:25:10','2016-05-16 08:33:42'),
(23444 ,'2016-05-16 08:40:00','2016-05-16 08:43:00'),
(100001,'2016-05-16 08:50:00','2016-05-16 08:52:00')
;WITH CTE AS
(
SELECT
BatchID,
StartTime,
EndTime,
ROW_NUMBER() OVER (ORDER BY StartTime) AS Seq
FROM #Batches
)
SELECT
b.BatchID,
b.StartTime,
b.EndTime,
CONVERT(VARCHAR(20), DATEADD(SECOND,DATEDIFF(SECOND, bl.EndTime,b.StartTime),0),108) AS Diff,
DATEADD(SECOND,DATEDIFF(SECOND, bl.EndTime,b.StartTime),0) AS DiffDT
FROM CTE AS b
LEFT OUTER JOIN CTE AS bl ON bl.Seq = b.Seq - 1 -- Last batch
ORDER BY b.StartTime
I follow this link http://blog.sqlauthority.com/2011/11/24/sql-server-solution-to-puzzle-simulate-lead-and-lag-without-using-sql-server-2012-analytic-function/
This will give you the difference in seconds.
;with cteMain as (
select *, ROW_NUMBER() over (order by Start_time) sn
from table)
select m.batch_number, sLag.End_date, m.Start_time, convert(varchar,DateAdd(Second,DATEDIFF(SECOND, sLag.End_date, m.Start_time),0),108) as time_diff
from cteMain as m LEFT OUTER JOIN cteMain AS sLag ON sLag.sn = m.sn-1
order by m.batch_number
I have been a long time reader of this forum. It has helped me a lot, however I have a question which I cannot find a solution specific to my requirements.
I am given the task to develop a metric to determine how many days the 'Staff Performance Evaulations' are past due. The data comes in the following format:
EmployeeID LastEvalCompleteDate NextEvalDueDate
1001 2010-01-01 2010-11-01
1001 2010-11-20 2011-11-01
1001 2011-10-29 2012-11-15
1002 NULL 2013-12-01
According to the sample data above, the employee 1001 has had 3 evals since 2010-01-01. Employee 1002 has started this year and his first eval is due on 2013-12-01.
What I need to do is to convert the data to this format:
EmployeeID EvalDueDate EvalCompleteDate DaysPastDue
1001 2010-11-01 2010-11-20 19
1001 2011-11-01 2011-10-29 -2
1001 2012-11-15 NULL 342 (based on today's date)
1002 2013-12-01 NULL -39 (based on today's date)
As you noticed, I derive a new row by taking the value of NextEvalDueDate column and mapping it to the EvalDueDate column in my new table. I also take the value in the LastEvalCompleteDate column in the NEXT row and map it to the NextEvalDueDate column.
I am having trouble with iterating through the rows for a given EmployeeID. I tried using ROW_NUMBER() OVER (PARTITION BY ...) but it did not take me anywhere.
I appreciate any kind of help. Thank you.
You went into right direction using ROW_NUMBER() OVER (PARTITION BY ...). Don't know where have you stuck, but it should be something like this:
WITH CTE AS
(
SELECT *, ROW_NUMBER() OVER (PARTITION BY EmployeeID ORDER BY NextEvalDueDate) RN
FROM dbo.Table1
)
SELECT
c1.EmployeeID
, c1.NextEvalDueDate AS EvalDueDate
, c2.LastEvalCompleteDate AS EvalCompleteDate
, DATEDIFF(DAY, c1.NextEvalDueDate, COALESCE(c2.LastEvalCompleteDate, GETDATE())) AS DaysPastDue
FROM CTE c1
LEFT JOIN CTE c2 ON c1.EmployeeID = c2.EmployeeID AND c1.RN = c2.RN - 1
ORDER BY c1.EmployeeID, c1.RN
DECLARE #Results TABLE
(
EmployeeID INT NOT NULL,
RowNum INT NOT NULL,
PRIMARY KEY (RowNum, EmployeeID),
LastEvalCompleteDate DATE,
NextEvalDueDate DATE
);
INSERT #Results (RowNum, EmployeeID, LastEvalCompleteDate, NextEvalDueDate)
SELECT ROW_NUMBER() OVER(PARTITION BY e.EmployeeID ORDER BY e.LastEvalCompleteDate),
e.EmployeeID,
e.LastEvalCompleteDate,
e.NextEvalDueDate
FROM dbo.EmployeeEvaluation e;
WITH Base
AS
(
SELECT crt.RowNum,
crt.EmployeeID,
crt.NextEvalDueDate AS EvalDueDate,
nxt.LastEvalCompleteDate AS EvalCompleteDate
FROM #Results crt
LEFT JOIN #Results nxt ON crt.EmployeeID = nxt.EmployeeID AND crt.RowNum + 1 = nxt.RowNum
)
SELECT r.*,
DATEDIFF(DAY, r.EvalDueDate, ISNULL(r.EvalCompleteDate, GETDATE())) AS DaysPastDue
FROM Base r
ORDER BY r.EmployeeID, r.RowNum
I have hospital patient admission data in Microsoft SQL Server r2 that looks something like this:
PatientID, AdmitDate, DischargeDate
Jones. 1-jan-13 01:37. 1-jan-13 17:45
Smith 1-jan-13 02:12. 2-jan-13 02:14
Brooks. 4-jan-13 13:54. 5-jan-13 06:14
I would like count the number of patients in the hospital day by day and hour by hour (ie at
1-jan-13 00:00. 0
1-jan-13 01:00. 0
1-jan-13 02:00. 1
1-jan-13 03:00. 2
And I need to include the hours when there are no patients admitted in the result.
I can't create tables so making a reference table listing all the hours and days is out, though.
Any suggestions?
To solve this problem, you need a list of date-hours. The following gets this from the admit date cross joined to a table with 24 hours. The table of 24 hours is calculating from information_schema.columns -- a trick for getting small sequences of numbers in SQL Server.
The rest is just a join between this table and the hours. This version counts the patients at the hour, so someone admitted and discharged in the same hour, for instance is not counted. And in general someone is not counted until the next hour after they are admitted:
with dh as (
select DATEADD(hour, seqnum - 1, thedatehour ) as DateHour
from (select distinct cast(cast(AdmitDate as DATE) as datetime) as thedatehour
from Admission a
) a cross join
(select ROW_NUMBER() over (order by (select NULL)) as seqnum
from INFORMATION_SCHEMA.COLUMNS
) hours
where hours <= 24
)
select dh.DateHour, COUNT(*) as NumPatients
from dh join
Admissions a
on dh.DateHour between a.AdmitDate and a.DischargeDate
group by dh.DateHour
order by 1
This also assumes that there are admissions on every day. That seems like a reasonable assumption. If not, a calendar table would be a big help.
Here is one (ugly) way:
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
)
SELECT B.AdmitDate, A.DayHour, COUNT(DISTINCT PatientID) Patients
FROM DayHours A
CROSS JOIN (SELECT DISTINCT CONVERT(DATE,AdmitDate) AdmitDate
FROM YourTable) B
LEFT JOIN YourTable C
ON B.AdmitDate = CONVERT(DATE,C.AdmitDate)
AND A.DayHour = DATEPART(HOUR,C.AdmitDate)
GROUP BY B.AdmitDate, A.DayHour
This is a bit messy and includes a temp table with the test data you provided but
CREATE TABLE #HospitalPatientData (PatientId NVARCHAR(MAX), AdmitDate DATETIME, DischargeDate DATETIME)
INSERT INTO #HospitalPatientData
SELECT 'Jones.', '1-jan-13 01:37:00.000', '1-jan-13 17:45:00.000' UNION
SELECT 'Smith', '1-jan-13 02:12:00.000', '2-jan-13 02:14:00.000' UNION
SELECT 'Brooks.', '4-jan-13 13:54:00.000', '5-jan-13 06:14:00.000'
;WITH DayHours AS
(
SELECT 0 DayHour
UNION ALL
SELECT DayHour+1
FROM DayHours
WHERE DayHour+1 <= 23
),
HospitalPatientData AS
(
SELECT CONVERT(nvarchar(max),AdmitDate,103) as AdmitDate ,DATEPART(hour,(AdmitDate)) as AdmitHour, COUNT(PatientID) as CountOfPatients
FROM #HospitalPatientData
GROUP BY CONVERT(nvarchar(max),AdmitDate,103), DATEPART(hour,(AdmitDate))
),
Results AS
(
SELECT MAX(h.AdmitDate) as Date, d.DayHour
FROM HospitalPatientData h
INNER JOIN DayHours d ON d.DayHour=d.DayHour
GROUP BY AdmitDate, CountOfPatients, DayHour
)
SELECT r.*, COUNT(h.PatientId) as CountOfPatients
FROM Results r
LEFT JOIN #HospitalPatientData h ON CONVERT(nvarchar(max),AdmitDate,103)=r.Date AND DATEPART(HOUR,h.AdmitDate)=r.DayHour
GROUP BY r.Date, r.DayHour
ORDER BY r.Date, r.DayHour
DROP TABLE #HospitalPatientData
This may get you started:
BEGIN TRAN
DECLARE #pt TABLE
(
PatientID VARCHAR(10)
, AdmitDate DATETIME
, DischargeDate DATETIME
)
INSERT INTO #pt
( PatientID, AdmitDate, DischargeDate )
VALUES ( 'Jones', '1-jan-13 01:37', '1-jan-13 17:45' ),
( 'Smith', '1-jan-13 02:12', '2-jan-13 02:14' )
, ( 'Brooks', '4-jan-13 13:54', '5-jan-13 06:14' )
DECLARE #StartDate DATETIME = '20130101'
, #FutureDays INT = 7
;
WITH dy
AS ( SELECT TOP (#FutureDays)
ROW_NUMBER() OVER ( ORDER BY name ) dy
FROM sys.columns c
) ,
hr
AS ( SELECT TOP 24
ROW_NUMBER() OVER ( ORDER BY name ) hr
FROM sys.columns c
)
SELECT refDate, COUNT(p.PatientID) AS PtCount
FROM ( SELECT DATEADD(HOUR, hr.hr - 1,
DATEADD(DAY, dy.dy - 1, #StartDate)) AS refDate
FROM dy
CROSS JOIN hr
) ref
LEFT JOIN #pt p ON ref.refDate BETWEEN p.AdmitDate AND p.DischargeDate
GROUP BY refDate
ORDER BY refDate
ROLLBACK