Finding duplicate records in a specific date range

Finding duplicate records in a specific date range - sql

I have a table where I have 4 columns
Serial(nvarchar), SID(nvarchar), DateCreated(Date), CID(unique and int)
I want to find the records where there is duplicate serial and SID and where the 2 duplicate serial fall between date range of 180 days.
please help
Sample Data
Serial SID DateCreated CID
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 1
02302-25-0036 HONMD01 2017-05-01 00:00:00.000 3
0264607 HONMD01 2017-05-01 00:00:00.000 65
0264607 HONMD01 2016-05-01 00:00:00.000 45
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 34
03118-09-0366 PRIVA00 2016-05-20 00:00:00.000 87
0969130 140439 2017-05-09 00:00:00.000 32
0969130 140439 2017-05-09 00:00:00.000 23
1049567 INIIL00 2017-04-12 00:00:00.000 76

create table #Test (Serial nvarchar(20), [SID] nvarchar(10), DateCreated datetime, CID int)
Insert into #Test values ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1)
, ('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3)
, ('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65)
, ('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34)
, ('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 32)
, ('0969130', '140439', '2017-05-09 00:00:00.000', 23)
, ('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
select distinct a.*
from
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) a
full outer join
(
select t.*
from #Test t
inner join (
Select Serial, [SID]
from #Test
group by Serial, [SID]
Having count(*)>=2
) d on d.Serial = t.Serial and t.SID= t.SID
) b on a.Serial = b.Serial and a.SID= b.SID
where datediff(d,a.DateCreated, b.DateCreated)<180

Try to do this:
with cte as (
select
serial,
sid,
dateCreated,
cid,
coalesce(max(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between unbounded preceding and 1 preceding), '1900-01-01') as last,
coalesce(min(dateCreated) over(partition by serial, sid order by cid, dateCreated asc rows between 1 following and unbounded following), '5999-01-01') as next
from table_name
)
select *
from cte
where
datediff(day, last, dateCreated) >= 180
and datediff(day, dateCreated, next) >= 180

This was a challenging question ! I have left final output with *(PreviousDate, rno) for easy understanding. Here is my way to solve :
Create table #t(Serial nvarchar(100),SID nvarchar(100),DateCreated date,CID int)
Insert into #t values
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 1),
('02302-25-0036', 'HONMD01', '2017-05-01 00:00:00.000', 3),
('0264607', 'HONMD01', '2017-05-01 00:00:00.000', 65),
('0264607', 'HONMD01', '2016-05-01 00:00:00.000', 45),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 34),
('03118-09-0366', 'PRIVA00', '2016-05-20 00:00:00.000', 87),
('0969130', '140439', '2017-05-09 00:00:00.000', 32),
('0969130', '140439', '2017-05-09 00:00:00.000', 23),
('1049567', 'INIIL00', '2017-04-12 00:00:00.000', 76)
Select iq2.*
FROM
(Select iq.Serial, iq.SID, iq.DateCreated, iq.CID, iq.PreviousDate,
ROW_NUMBER() OVER (PARTITION BY iq.Serial,iq.SID, CASE WHEN DATEDIFF(day, iq.DateCreated, iq.PreviousDate) <= 180 THEN 1 ELSE 0 END
ORDER BY Serial,SID) rno
FROM
(select Serial,SID,DateCreated,CID,
MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID) maxDate,
DATEADD(day,-180,MAX(DateCreated) OVER (PARTITION BY Serial,SID ORDER BY Serial,SID)) PreviousDate
from #t
)iq
)iq2
where iq2.rno <> 1
output :
Serial SID DateCreated CID PreviousDate rno
---------- ------- ---------- ---- ----------- ----
02302-25-0036 HONMD01 2017-05-01 3 2016-11-02 2
03118-09-0366 PRIVA00 2016-05-20 87 2015-11-22 2
0969130 140439 2017-05-09 23 2016-11-10 2
PS : PreviousDate is MAX PreviousDate

Related

SQL Server Query for average value over a date period

DECLARE #SampleOrderTable TABLE
(
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '12/10/2019', '762.84'),
(2, '11/10/2019', '886.32'),
(3, '11/9/2019', '10245.00')
How do I select the the last 4 days prior to OrderDate and the average Amount over that period?
So result data would be:
pkPersonID Date Amount
------------------------------------
1 '12/7/2019' 190.71
1 '12/8/2019' 190.71
1 '12/9/2019' 190.71
1 '12/10/2019' 190.71
2 '12/7/2019' 221.58
2 '12/8/2019' 221.58
2 '12/9/2019' 221.58
2 '12/10/2019' 221.58
3 '11/6/2019' 2561.25
3 '11/7/2019' 2561.25
3 '11/8/2019' 2561.25
3 '11/9/2019' 2561.25

You may try with the following approach, using DATEADD(), windowed COUNT() and VALUES() table value constructor:
Table:
DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
)
INSERT INTO #SampleOrderTable (pkPersonID, OrderDate, Amount)
VALUES (1, '20191210', '762.84'),
(2, '20191210', '886.32'),
(3, '20191109', '10245.00')
Statement:
SELECT
t.pkPersonID,
DATEADD(day, -v.Day, t.OrderDate) AS [Date],
CONVERT(numeric(18, 6), Amount / COUNT(Amount) OVER (PARTITION BY t.pkPersonID)) AS Amount
FROM #SampleOrderTable t
CROSS APPLY (VALUES (0), (1), (2), (3)) v(Day)
ORDER BY t.pkPersonID, [Date]
Result:
pkPersonID Date Amount
1 07/12/2019 00:00:00 190.710000
1 08/12/2019 00:00:00 190.710000
1 09/12/2019 00:00:00 190.710000
1 10/12/2019 00:00:00 190.710000
2 07/12/2019 00:00:00 221.580000
2 08/12/2019 00:00:00 221.580000
2 09/12/2019 00:00:00 221.580000
2 10/12/2019 00:00:00 221.580000
3 06/11/2019 00:00:00 2561.250000
3 07/11/2019 00:00:00 2561.250000
3 08/11/2019 00:00:00 2561.250000
3 09/11/2019 00:00:00 2561.250000

You can use sql functions like AVG, DATEADD and GETDATE.
SELECT AVG(Amount) as AverageAmount
FROM #SampleOrderTable
WHERE OrderDate >= DATEADD(DAY, -4, GETDATE())

DECLARE #SampleOrderTable TABLE (
pkPersonID INT,
OrderDate DATETIME,
Amount NUMERIC(18, 6)
);
INSERT INTO #SampleOrderTable
(pkPersonID, OrderDate, Amount)
VALUES
(1, '12/20/2019', 762.84),
(2, '12/20/2019', 886.32),
(3, '12/20/2019', 10245.00),
(4, '12/19/2019', 50.00),
(5, '12/19/2019', 100.00),
(6, '09/01/2019', 200.00),
(7, '09/01/2019', 300.00),
(8, '12/15/2019', 400.00),
(9, '12/15/2019', 500.00),
(10, '09/02/2019', 150.00),
(11, '09/02/2019', 1100.00),
(12, '09/02/2019', 1200.00),
(13, '09/02/2019', 1300.00),
(14, '09/02/2019', 1400.00),
(15, '09/02/2019', 1500.00);
SELECT OrderDate,AVG(Amount) AS Average_Value
FROM #SampleOrderTable
WHERE DATEDIFF(DAY, CAST(OrderDate AS DATETIME), CAST(GETDATE() AS Datetime)) <= 4
GROUP BY OrderDate;

SQL - Start and End date based on another column

Simplified structure.
I need the two dates between a record that has an action type of 4 and an action type of 1.
The record could be in that state multiple times and I would need separate rows for their times
For example for IncidentId = 1
Row 1 - StartTime = 2017-01-01 14:00 (id:3) - End Time = 2017-01-01 20:00 (id: 5)
Row 2 - StartTime = 2017-01-01 21:00 (id:6) - End Time = 2017-01-02 11:00 (id: 9)
CREATE TABLE #returntable
(
[incidentid] INT,
[starttime] DATETIME,
[endtime] DATETIME
)
CREATE TABLE #testtableofdoom
(
[incidentlogid] INT,
[incidentid] INT,
[timestamp] DATETIME,
[actiontypeid] INT
)
INSERT INTO #testtableofdoom
( incidentlogid, incidentid, timestamp, actiontypeid )
VALUES ( 1, 1, '2017-01-01 09:00', 1 )
, ( 2, 1, '2017-01-01 11:00', 1 )
, ( 3, 1, '2017-01-01 14:00', 4 )
, ( 4, 1, '2017-01-01 16:00', 4 )
, ( 5, 1, '2017-01-01 20:00', 1 )
, ( 6, 1, '2017-01-01 21:00', 4 )
, ( 7, 1, '2017-01-02 09:00', 4 )
, ( 8, 2, '2017-01-02 10:00', 1 )
, ( 9, 1, '2017-01-02 11:00', 1 )
, ( 10, 1, '2017-01-02 14:00', 1 )
, ( 11, 2, '2017-01-02 15:00', 4 )
, ( 12, 1, '2017-01-02 16:00', 1 )
, ( 13, 1, '2017-01-02 17:00', 1 )
, ( 14, 1, '2017-01-02 18:00', 1 )
, ( 15, 2, '2017-01-02 15:00', 1 );
DROP TABLE #testtableofdoom
DROP TABLE #returntable

I used table variables instead of temp tables, and shorter column names than you, but this works:
declare #tt TABLE (
logId INT, iId INT,
dt DATETIME, atId INT
INSERT #tt (logId, iId,
dt, atId) values
(1, 1, '2017-01-01 09:00', 1),
(2, 1, '2017-01-01 11:00', 1),
(3, 1, '2017-01-01 14:00', 4),
(4, 1, '2017-01-01 16:00', 4),
(5, 1, '2017-01-01 20:00', 1),
(6, 1, '2017-01-01 21:00', 4),
(7, 1, '2017-01-02 09:00', 4),
(8, 2, '2017-01-02 10:00', 1),
(9, 1, '2017-01-02 11:00', 1),
(10, 1, '2017-01-02 14:00', 1),
(11, 2, '2017-01-02 15:00', 4),
(12, 1, '2017-01-02 16:00', 1),
(13, 1, '2017-01-02 17:00', 1),
(14, 1, '2017-01-02 18:00', 1),
(15, 2, '2017-01-02 15:00', 1)
Select s.logId startLogid, e.logId endLogId,
s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt
where iId = s.iID
and atId = 1
and logId > s.logId)
where s.aTid = 4
and ((Select atId from #tt
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))
This produces the following:
startLogid endLogId iID startTime endTime
----------- ----------- ---- ---------------- ----------------
3 5 1 2017-01-01 14:00 2017-01-01 20:00
6 9 1 2017-01-01 21:00 2017-01-02 11:00
11 15 2 2017-01-02 15:00 2017-01-02 15:00
it uses a self-join. s represents the first (start) record with actionType 4, and e represents end record with action type 1. Since logId increments, the end record must have higher logId than the start record, and it must be the lowest logId higher than the start records that has same iId and an atId = 1.
Select s.iID, s.dt startTime, e.dt endTime
from #tt s join #tt e
on e.logId =
(Select min(logId) from #tt -- lowest log greater than start logId
where iId = s.iID -- same iId
and atId = 1 -- with atId = 1
and logId > s.logId) -- greater than start logId
finally, the start record must be restricted to those "4" records which either have no other same incident records before it or have a "1" record immediately prior to it.
where s.aTid = 4
and ((Select atId from #tt -- atId of immed prior = 1
Where logId =
(Select Max(logId) from #tt
where logId < s.LogId
and iId = s.iId)) = 1
or Not Exists -- or there is no prior record
(Select * from #tt
Where logId < s.LogId
and iId = s.iID))

something like this?
select
d.[timestamp] as StartDate,
(select top 1 [timestamp]
from #testTableOfDoom d2
where d2.incidentid = 1 and d2.[timestamp] > d.[timestamp] and actiontypeid = 1
order by d2.[timestamp] asc
) as EndDate
from
(select
p.[timestamp],
LAG(p.actiontypeid) OVER (ORDER BY incidentlogid asc) PrevValue,
p.actiontypeid
from #testTableOfDoom p
where p.incidentid = 1) d
where d.actiontypeid = 4
and d.PrevValue <> 4

Calculate total time worked in a day with multiple stops and starts

I can use DATEDIFF to find the difference between one set of dates like this
DATEDIFF(MINUTE, #startdate, #enddate)
but how would I find the total time span between multiple sets of dates? I don't know how many sets (stops and starts) I will have.
The data is on multiple rows with start and stops.
ID TimeStamp StartOrStop TimeCode
----------------------------------------------------------------
1 2017-01-01 07:00:00 Start 1
2 2017-01-01 08:15:00 Stop 2
3 2017-01-01 10:00:00 Start 1
4 2017-01-01 11:00:00 Stop 2
5 2017-01-01 10:30:00 Start 1
6 2017-01-01 12:00:00 Stop 2

This code would work assuming that your table only store data from one person, and they should be of the order Start/Stop/Start/Stop
WITH StartTime AS (
SELECT
TimeStamp
, ROW_NUMBER() PARTITION BY (ORDER BY TimeStamp) RowNum
FROM
<<table>>
WHERE
TimeCode = 1
), StopTime AS (
SELECT
TimeStamp
, ROW_NUMBER() PARTITION BY (ORDER BY TimeStamp) RowNum
FROM
<<table>>
WHERE
TimeCode = 2
)
SELECT
SUM (DATEDIFF( MINUTE, StartTime.TimeStamp, StopTime.TimeStamp )) As TotalTime
FROM
StartTime
JOIN StopTime ON StartTime.RowNum = StopTime.RowNum

This will work if your starts and stops are reliable. Your sample has two starts in order - 10:00 and 10:30 starts. I assume in production you will have an employee id to group on, so I added this to the sample data in place of the identity column.
Also in production, the CTE sets will be reduced by using a parameter on date. If there are overnight shifts, you would want your stops CTE to use dateadd(day, 1, #startDate) as your upper bound when retrieving end date.
Set up sample:
declare #temp table (
EmpId int,
TimeStamp datetime,
StartOrStop varchar(55),
TimeCode int
);
insert into #temp
values
(1, '2017-01-01 07:00:00', 'Start', 1),
(1, '2017-01-01 08:15:00', 'Stop', 2),
(1, '2017-01-01 10:00:00', 'Start', 1),
(1, '2017-01-01 11:00:00', 'Stop', 2),
(2, '2017-01-01 10:30:00', 'Start', 1),
(2, '2017-01-01 12:00:00', 'Stop', 2)
Query:
;with starts as (
select t.EmpId,
t.TimeStamp as StartTime,
row_number() over (partition by t.EmpId order by t.TimeStamp asc) as rn
from #temp t
where Timecode = 1 --Start time code?
),
stops as (
select t.EmpId,
t.TimeStamp as EndTime,
row_number() over (partition by t.EmpId order by t.TimeStamp asc) as rn
from #temp t
where Timecode = 2 --Stop time code?
)
select cast(min(sub.StartTime) as date) as WorkDay,
sub.EmpId as Employee,
min(sub.StartTime) as ClockIn,
min(sub.EndTime) as ClockOut,
sum(sub.MinutesWorked) as MinutesWorked
from
(
select strt.EmpId,
strt.StartTime,
stp.EndTime,
datediff(minute, strt.StartTime, stp.EndTime) as MinutesWorked
from starts strt
inner join stops stp
on strt.EmpId = stp.EmpId
and strt.rn = stp.rn
)sub
group by sub.EmpId

This works assuming your table has an incremental ID and interleaving start/stop records
--Data sample as provided
declare #temp table (
Id int,
TimeStamp datetime,
StartOrStop varchar(55),
TimeCode int
);
insert into #temp
values
(1, '2017-01-01 07:00:00', 'Start', 1),
(2, '2017-01-01 08:15:00', 'Stop', 2),
(3, '2017-01-01 10:00:00', 'Start', 1),
(4, '2017-01-01 11:00:00', 'Stop', 2),
(5, '2017-01-01 10:30:00', 'Start', 1),
(6, '2017-01-01 12:00:00', 'Stop', 2)
--let's see every pair start/stop and discard stop/start
select start.timestamp start, stop.timestamp stop,
datediff(mi,start.timestamp,stop.timestamp) minutes
from #temp start inner join #temp stop
on start.id+1= stop.id and start.timecode=1
--Sum all for required result
select sum(datediff(mi,start.timestamp,stop.timestamp) ) totalMinutes
from #temp start inner join #temp stop
on start.id+1= stop.id and start.timecode=1
Results
+-------------------------+-------------------------+---------+
| start | stop | minutes |
+-------------------------+-------------------------+---------+
| 2017-01-01 07:00:00.000 | 2017-01-01 08:15:00.000 | 75 |
| 2017-01-01 10:00:00.000 | 2017-01-01 11:00:00.000 | 60 |
| 2017-01-01 10:30:00.000 | 2017-01-01 12:00:00.000 | 90 |
+-------------------------+-------------------------+---------+
+--------------+
| totalMinutes |
+--------------+
| 225 |
+--------------+
Maybe the tricky part is the join clause. We need to join #table with itself by deferring 1 ID. Here is where on start.id+1= stop.id did its work.
In the other hand, for excluding stop/start couple we use start.timecode=1. In case we don't have a column with this information, something like stop.id%2=0 works just fine.

Get sequence of days from days

We have a table of days like:
ID Date
1 2015-07-29
2 2015-07-30
3 2015-07-31
4 2015-08-01
5 2015-08-03
7 2015-08-04
8 2015-08-05
9 2015-08-06
10 2015-08-07
11 2015-08-10
And we want to find all the sequences (day+1). The result should be something like this:
Start End
2015-07-29 2015-08-01
2015-08-03 2015-08-07
2015-08-10 2015-08-10
1. Update
First I modified Deepanshu Kalra answer to use RowNumber instead of the Id (Id is autoincrement, so its possible that ids are missing)
DECLARE #P TABLE(DATE DATE)
INSERT INTO #P
SELECT MIN([DATE])
FROM MietvertragsArtikelDays
UNION
SELECT T1.[DATE]
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [DATE]) RowNumber, [DATE] FROM MietvertragsArtikelDays) AS T1 INNER JOIN (SELECT ROW_NUMBER() OVER (ORDER BY [DATE]) RowNumber, [DATE] FROM MietvertragsArtikelDays) AS T2 ON T1.RowNumber=T2.RowNumber+1
WHERE DATEDIFF(DAY,T2.[DATE],T1.[DATE]) <>1
UNION
SELECT T2.[DATE]
FROM (SELECT ROW_NUMBER() OVER (ORDER BY [DATE]) RowNumber, [DATE] FROM MietvertragsArtikelDays) AS T1 INNER JOIN (SELECT ROW_NUMBER() OVER (ORDER BY [DATE]) RowNumber, [DATE] FROM MietvertragsArtikelDays) AS T2 ON T1.RowNumber=T2.RowNumber+1
WHERE DATEDIFF(DAY,T2.[DATE],T1.[DATE]) <>1
DECLARE #X TABLE(DATE DATE, RN INT)
INSERT INTO #X
SELECT *, ROW_NUMBER() OVER(ORDER BY [DATE]) AS X FROM #P
SELECT A.[DATE] Start, ISNULL(B.[DATE],A.[DATE]) [End] FROM #X A
LEFT JOIN (SELECT [DATE], RN-1 AS RN FROM #X) B
ON A.RN=B.RN
WHERE A.RN%2=1
2. Update
The most elegant solution is Ughais
;WITH CTE as
(
SELECT *,DATEDIFF(D,0,[Date]) - ROW_NUMBER()OVER(ORDER BY ID ASC) grp
FROM MietvertragsArtikelDays
)
SELECT MIN([Date]),MAX([Date])
FROM CTE
GROUP BY grp

This is an Islands and Gap problem. You can use ROW_NUMBER and DATEDIFF. Something like this.
SQL Fiddle
Sample Data
DECLARE #Dates TABLE
([ID] int, [Date] datetime);
INSERT INTO #Dates
([ID], [Date])
VALUES
(1, '2015-07-29 00:00:00'),
(2, '2015-07-30 00:00:00'),
(3, '2015-07-31 00:00:00'),
(4, '2015-08-01 00:00:00'),
(5, '2015-08-03 00:00:00'),
(7, '2015-08-04 00:00:00'),
(8, '2015-08-05 00:00:00'),
(9, '2015-08-06 00:00:00'),
(10, '2015-08-07 00:00:00'),
(11, '2015-08-10 00:00:00');
Query
;WITH CTE as
(
SELECT *,DATEDIFF(D,0,[Date]) - ROW_NUMBER()OVER(ORDER BY ID ASC) grp
FROM #Dates
)
SELECT MIN([Date]),MAX([Date])
FROM CTE
GROUP BY grp
Output
2015-07-29 00:00:00.000 2015-08-01 00:00:00.000
2015-08-03 00:00:00.000 2015-08-07 00:00:00.000
2015-08-10 00:00:00.000 2015-08-10 00:00:00.000

As I started from an answer which was already posted, maybe I made it very complex. But it works.
DECLARE #T TABLE(ID INT, DATE DATE)
INSERT INTO #T
SELECT 1, '2015-07-29' UNION ALL
SELECT 2, '2015-07-30' UNION ALL
SELECT 3, '2015-07-31' UNION ALL
SELECT 4, '2015-08-01' UNION ALL
SELECT 5, '2015-08-03' UNION ALL
SELECT 7, '2015-08-04' UNION ALL
SELECT 8, '2015-08-05' UNION ALL
SELECT 9, '2015-08-06' UNION ALL
SELECT 10, '2015-08-07' UNION ALL
SELECT 11, '2015-08-10'
DECLARE #P TABLE(DATE DATE)
INSERT INTO #P
SELECT MIN([DATE])
FROM #T
UNION
SELECT T1.[DATE]
FROM #T AS T1 INNER JOIN #T AS T2 ON T1.ID=T2.ID+1
WHERE DATEDIFF(DAY,T2.[DATE],T1.[DATE]) <>1
UNION
SELECT T2.[DATE]
FROM #T AS T1 INNER JOIN #T AS T2 ON T1.ID=T2.ID+1
WHERE DATEDIFF(DAY,T2.[DATE],T1.[DATE]) <>1
DECLARE #X TABLE(DATE DATE, RN INT)
INSERT INTO #X
SELECT *, ROW_NUMBER() OVER(ORDER BY [DATE]) AS X FROM #P
SELECT A.[DATE], B.[DATE] FROM #X A
LEFT JOIN (SELECT [DATE], RN-1 AS RN FROM #X) B
ON A.RN=B.RN
WHERE A.RN%2=1
Please excuse the standards and all. Will edit later in the day. Sorry for that.

This will work whatever the order and value of Id is:
Declare #dates table(ID int, D datetime)
Insert Into #dates(ID, D)
values (1, '2015-07-29')
, (2, '2015-07-30')
, (3, '2015-07-31')
, (4, '2015-08-01')
, (5, '2015-08-03')
, (7, '2015-08-04')
, (8, '2015-08-05')
, (9, '2015-08-06')
, (10, '2015-08-07')
, (11, '2015-08-10')
; With start(ID, D) as (
-- Get 1st Dates
Select d1.ID, d1.D From #dates as d1
Left Join #dates as d2 On d1.D = DATEADD(DAY, 1, d2.D)
Where d2.ID is NULL
), loop(startD, endD) as (
-- Loop through consecutives dates
Select D, D From start
Union All
Select l.startD, s.D From loop as l
Inner Join #dates as s On s.D = DATEADD(DAY, 1, l.endD)
)
-- Get max end date for each start date
Select startD as [Start], max(endD) as [End] From loop group by startD
Output:
Start End
2015-07-29 2015-08-01
2015-08-03 2015-08-07
2015-08-10 2015-08-10

find time slots with sql

I have a scenario (SQL 2008) where I need to find the occupied timeframes /non-gaps from the below table. For e.g . I have created this dummy table.
CREATE TABLE Job
(
JobID INT NOT NULL,
WorkerID INT NOT NULL,
JobStart DATETIME NOT NULL,
JobEnd DATETIME NOT NULL
);
INSERT INTO Job (JobID, WorkerID, JobStart, JobEnd)
VALUES (1, 25, '2012-11-17 16:00', '2012-11-17 17:00'),
(2, 25, '2012-11-17 16:00', '2012-11-17 16:50'),
(3, 25, '2012-11-19 18:00', '2012-11-19 18:30'),
(4, 25, '2012-11-19 17:30', '2012-11-19 18:10'),
(5, 26, '2012-11-18 16:00', '2012-11-18 17:10'),
(6, 26, '2012-11-18 16:00', '2012-11-19 16:50');
so for this , the qry shd return data like this:
WorkerID | StartDate | EndDate
25 2012-11-17 16:00 2012-11-17 17:00
25 2012-11-17 17:30 2012-11-17 18:30
26 2012-11-18 16:00 2012-11-18 17:10
I am able to get the result but I am using while loop and its a pretty iterative method. Any chance , I can avoid using while to get the result

This is a Packing Date and Time Interval problem. Itzik Ben-Gan has published an article that provides many solutions to this problem. Using one of Itzik's solution, here is a query to solve your problem:
SQL Fiddle
WITH C1 AS(
SELECT
JobID, WorkerId, JobStart AS ts, +1 AS type, NULL AS e,
ROW_NUMBER() OVER(PARTITION BY WorkerId ORDER BY JobStart, JobId) AS s
FROM Job
UNION ALL
SELECT
JobID, WorkerId, JobEnd AS ts, -1 AS type,
ROW_NUMBER() OVER(PARTITION BY WorkerId ORDER BY JobEnd, JobId) AS e,
NULL AS s
FROM Job
),
C2 AS(
SELECT *,
ROW_NUMBER() OVER(PARTITION BY WorkerId ORDER BY ts, type DESC, JobId) AS se
FROM C1
),
C3 AS(
SELECT ts, WorkerId,
FLOOR((ROW_NUMBER() OVER(PARTITION BY WorkerId ORDER BY ts) - 1) / 2 + 1) AS grpnum
FROM C2
WHERE COALESCE(s - (se - s) - 1, (se - e) - e) = 0
)
SELECT
WorkerId,
MIN(ts) AS StartDate,
MAX(ts) AS EndDate
FROM C3
GROUP BY WorkerID, grpnum
ORDER BY WorkerID
Result
WorkerId StartDate EndDate
----------- ----------------------- -----------------------
25 2012-11-17 16:00:00.000 2012-11-17 17:00:00.000
25 2012-11-19 17:30:00.000 2012-11-19 18:30:00.000
26 2012-11-18 16:00:00.000 2012-11-19 16:50:00.000

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Finding duplicate records in a specific date range - sql

Related

SQL Server Query for average value over a date period

SQL - Start and End date based on another column

Calculate total time worked in a day with multiple stops and starts

Get sequence of days from days

find time slots with sql

Categories

Resources