SQL SELECT with time range - sql

I have below click_log table logging hits for some urls
site ip ua direction hit_time
-----------------------------------------------------
1 127.0.0.1 1 20010/01/01 00:00:00
2 127.0.0.1 1 20010/01/01 00:01:00
3 127.0.0.1 0 20010/01/01 00:10:00
.... .........
I want to select incoming hits (direction:1) and group by sites that are:
from same ip and browser
logged within 10 minutes of each other
occured more than 4 times in 10 minutes.
I'm not sure if above was clear enough. English is not my first language. Let me try to explain with an example.
If site 1 gets 5 hits from same ip and browser with in 10 minutes after getting first unique hit from that ip and browser i want it to be included in the selection.
Basically I am trying to find abusers.

I think this does what you need. I have included some sample data too.
Create Table #t
(
[Site] int,
IP varchar(20),
Direction int,
Hit_Time datetime
)
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00')
Insert Into #t
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:00:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:01:00')
Insert Into #t
Values (2,'127.0.0.2',0,'2010-01-01 00:03:00')
Insert Into #t
Values (2,'127.0.0.2',1,'2010-01-01 00:04:00')
Select Distinct Site
From #t
Where Direction = 1
Group by Site, IP
Having (DateDiff(minute,Min(HIt_Time), max(hit_time)) <= 10) And Count(*) >= 4
Drop Table #t

You're probably looking for the Between operator as described here:
http://www.w3schools.com/sql/sql_between.asp

What about
SELECT IP, (SELECT COUNT(*) FROM Click_Log WHERE Click_Log.IP = CL.IP
AND DIRECTION = 1 AND DATEDIFF(MINUTE, ClickLog.HIT_TIME, CL.HIT_TIME)
BETWEEN -10 AND 10) AS CLICK_COUNT
FROM Click_Log CL
WHERE DIRECTION = 1 AND CLICK_COUNT > 4

;WITH rankings AS (
SELECT *, DENSE_RANK() OVER(ORDER BY [site], ip, ua) groupId,
ROW_NUMBER() OVER(PARTITION BY [site], ip, ua ORDER BY hit_time) sequence
FROM Hits
WHERE direction = 1),
periods AS (
SELECT r.groupId, r.sequence, count(*) hitCount
FROM rankings r
LEFT OUTER JOIN rankings r2
ON r2.groupId = r.groupId and r2.sequence < r.sequence
AND r2.hit_time >= DATEADD(second, -10*60, r.hit_time)
AND r2.hit_time < r.hit_time
GROUP BY r.groupId, r.sequence
),
groups AS (
SELECT p.groupId, MAX(p.hitCount) maxHitCount
FROM periods p
GROUP BY p.groupId
)
SELECT DISTINCT r.[site], r.ip, r.ua, g.maxHitCount
FROM rankings r
INNER JOIN groups g ON g.groupId = r.groupId
WHERE maxHitCount >= 5
ORDER BY maxHitCount DESC

I have added this answer in response to the OP comment.
I've used the following test data:
Create Table dbo.Temp
(
[Site] int,
IP varchar(20),
Direction int,
Hit_Time datetime
)
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:00:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:01:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:03:00')
Insert Into dbo.Temp
Values (1,'127.0.0.1',1,'2010-01-01 00:04:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:00:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:31:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:32:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:33:00')
Insert Into dbo.Temp
Values (2,'127.0.0.2',1,'2010-01-01 15:34:00')
First you need to create a Function to do the working out:
Create Function dbo.fn_CheckSuspectActivity (#Site int, #IP varchar(20), #MinDate datetime,
#MaxDate datetime, #Direction int, #Interval int,
#MaxCount int)
returns int
as begin
Declare #OrigMaxDate datetime,
#IsSuspect int
Set #OrigMaxDate = #MaxDate
Set #IsSuspect = 0
if (DATEDIFF(minute, #MinDate, #MaxDate) > 10)
--Min and Max dates for site & Ip
-- are more than 10 minutes apart
begin
--Loop through the records
While (#MaxDate <= #OrigMaxDate And #IsSuspect = 0)
begin
-- Set The MaxDate to the MinDate plus 10 mins
Set #MaxDate = DATEADD(Minute, 10, #MinDate)
If (Select COUNT(*)
From dbo.Temp
Where Site = #Site
And IP = #IP
And Hit_Time >= #MinDate
And Hit_Time <= #MaxDate
And Direction = #Direction
) >= #MaxCount
Begin
-- Hit Count exceeded for the specified 10 min range
set #IsSuspect = 1
End
Else
Begin
-- Set the minDate to the maxDate
Set #MinDate = #MaxDate
--Add another 10 minutes on
Set #MaxDate = DATEADD(minute, 10,#MaxDate)
End
end
-- We've finished the loop but if #IsSuspect is still zero we need to do one final check
if (#IsSuspect = 0)
begin
-- Check the number of records based on the last MinDate used
-- and the original MaxDate
If (Select COUNT(*)
From dbo.Temp
Where Site = #Site
And IP = #IP
And Hit_Time >= #MinDate
And Hit_Time <= #OrigMaxDate
And Direction = #Direction
) >= #MaxCount
begin
-- Hit Count exceeded for the specified 10 min range
set #IsSuspect = 1
end
else
begin
set #IsSuspect = 0
end
end
end
else
-- Time difference isn't more than 10 minutes so do a "normal" check
begin
If (Select COUNT(*)
From dbo.Temp
Where Site = #Site
And IP = #IP
And Hit_Time >= #MinDate
And Hit_Time <= #MaxDate
And Direction = #Direction) >= #MaxCount
BEGIN -- Its a suspect IP
Set #IsSuspect = 1
END
ELSE
BEGIN
-- It's ok
Set #IsSuspect = 0
END
end
return #IsSuspect
End
Go
Then this select statement should give you the correct answer:
With Qry as
(
Select Site,
IP,
MIN(Hit_Time) as'MinTime',
MAX(Hit_TIme) as 'MaxTime'
From dbo.Temp
Group By Site, IP
)
Select Site
From Qry
Where dbo.fn_CheckSuspectActivity(Site, IP, MinTime, MaxTime, 1, 10, 4) = 1
-- function params are as follows: Site Number, IP Address, FirstTimeLogged,
-- LastTimeLogged, Direction, IntervalToCheck, MaxOccurences
If the first and last dates are less than 10 mins apart then it checks if they have exceed the hit count. If first date and last date are more than 10 mins apart it increments the first date by intervals of 10 mins and checks to see if they have exceeded the hitcount during that 10 min period.
I hope this is what you need.
Barry

Related

Return 0 with dates having empty results [duplicate]

I want to show all dates between two dates when there is any date data missing then its should show zero in val column .
declare #temp table (
id int identity(1,1) not null,
CDate smalldatetime ,
val int
)
INSERT STATEMENT FOR DATA TO CHECK
insert into #temp select '10/2/2012',1
insert into #temp select '10/3/2012',1
insert into #temp select '10/5/2012',1
insert into #temp select '10/7/2012',2
insert into #temp select '10/9/2012',2
insert into #temp select '10/10/2012',2
insert into #temp select '10/13/2012',2
insert into #temp select '10/15/2012',2
Retrieve records between first day of month and today
select * from #temp where CDate between '10/01/2012' AND '10/15/2012'
As i run this query its show me all data between these two dates but i want to also include missing dates with val=0
SQL FIDDLE WITH SAMPLE DATA
;with d(date) as (
select cast('10/01/2012' as datetime)
union all
select date+1
from d
where date < '10/15/2012'
)
select t.ID, d.date CDate, isnull(t.val, 0) val
from d
left join temp t
on t.CDate = d.date
order by d.date
OPTION (MAXRECURSION 0) -- use this if your dates are >99 days apart
You need to make up the dates, so I've use a recursive common table expression here.
SQL Fiddle
MAXRECURSION number
Specifies the maximum number of recursions allowed for this query. number is a nonnegative
integer between 0 and 32767. When 0 is specified, no limit is applied. If this option is
not specified, the default limit for the server is 100.
When the specified or default number for MAXRECURSION limit is reached during query
execution, the query is ended and an error is returned.
This will work as long as there are less than 2047 days between from and to dates
declare #from smalldatetime = '10/01/2012'
declare #to smalldatetime = '10/15/2012'
select t.id, dateadd(day, number,#from), isnull(val, 0) val from #temp t
right join master..spt_values s
on dateadd(d, s.number, #from) = t.CDate
where
datediff(day, #from, #to ) > s.number
and s.type = 'P'
I think the best way to do this is to create your own table with dates (you can also use master.dbo.spt_values, but I personally don't like that solution)
declare #Temp_Dates table (CDate datetime)
declare #Date datetime
select #Date = (select min(CDate) from temp)
while #Date <= (select max(CDate) from temp)
begin
insert into #Temp_Dates (CDate)
select #Date
select #Date = dateadd(dd, 1, #Date)
end
select D.CDate, isnull(T.id, 0) as id
from #Temp_Dates as D
left outer join temp as T on T.CDate = D.CDate
you can also use recursive solution with CTE
DECLARE #min DATETIME,
#max DATETIME,
#val INT
SELECT #min = Min(CDATE),
#max = Max(CDATE)
FROM TEMP
DECLARE #temp TABLE
(
CDATE SMALLDATETIME,
VAL INT
)
WHILE #min < #max
BEGIN
SELECT #val = VAL
FROM TEMP
WHERE CDATE = #min
INSERT #temp
VALUES (#min,
#val)
SET #min = Dateadd(D, 1, #min)
SET #val = 0
END
SELECT *
FROM #temp
Declare #temp Table(id int identity(1,1) not null,CDate smalldatetime ,val int)
insert into #temp select '10/2/2012',1
insert into #temp select '10/3/2012',1
insert into #temp select '10/5/2012',1
insert into #temp select '10/7/2012',2
insert into #temp select '10/9/2012',2
insert into #temp select '10/10/2012',2
insert into #temp select '10/13/2012',2
insert into #temp select '10/15/2012',2
DECLARE #startDate DATE= '10/01/2012'
DECLARE #endDate DATE= '10/15/2012'
SELECT t.Id, X.[Date],Val = COALESCE(t.val,0)
FROM
(SELECT [Date] = DATEADD(Day,Number,#startDate)
FROM master..spt_values
WHERE Type='P'
AND DATEADD(day,Number,#startDate) <= #endDate)X
LEFT JOIN #temp t
ON X.[Date] = t.CDate
using a recursive cte with min and max
declare #T table (id int identity(1,1) primary key, dt date not null, val int not null);
insert into #T (dt, val) values
('10/2/2012',1)
, ('10/3/2012',1)
, ('10/5/2012',1)
, ('10/7/2012',2)
, ('10/9/2012',2)
, ('10/10/2012',2)
, ('10/13/2012',2)
, ('10/15/2012',2);
--select * from #T;
with cte as
( select min(dt) as dt, max(dt) as mx
from #T
union all
select dateadd(dd, 1, dt), mx
from CTE
where dt < mx
)
select c.dt, isnull(t.val, 0) as val
from cte c
left join #T t
on c.dt = t.dt
order by c.dt
option (maxrecursion 0);
dt val
---------- -----------
2012-10-02 1
2012-10-03 1
2012-10-04 0
2012-10-05 1
2012-10-06 0
2012-10-07 2
2012-10-08 0
2012-10-09 2
2012-10-10 2
2012-10-11 0
2012-10-12 0
2012-10-13 2
2012-10-14 0
2012-10-15 2

Split the overlapping time in SQL

I have a requirement to equally split the duration for records which has overlapping in the datetime.
Example:
As per the example, if I calculate total runtime of the machine for Order 1, it is 3 hours. But I want it to be 2 hours because in the same machine another order ran between that duration (From 9 AM to 11 AM).
I tried searching the form, and all are pointed to exclude the overlapping duration or doing some other functionality. But I want to split the overlapping duration for all the records.
Sample Table Structure:
declare #st datetime, #et datetime;
DECLARE #table TABLE (Machine varchar(4),OrderId varchar(6),StartTime DateTime2, EndTime DateTime2)
INSERT INTO #table SELECT 'M2','ORD1','2017-11-01 10:30:00.000', '2017-11-01 12:00:00.000'
INSERT INTO #table SELECT 'M2','ORD2','2017-11-01 11:00:00.000', '2017-11-01 12:30:00.000'
INSERT INTO #table SELECT 'M2','ORD3','2017-11-01 11:30:00.000', '2017-11-01 13:00:00.000'
Expected Result:
Expected Result
Based on the above picture,
Duration for ORD1 = 30 MIN + 15 MIN (30 MIN overlap between ORD1 and ORD2) + 10 MIN (30 MIN overlap between ORD1, ORD2 and ORD3)
Duration for ORD2 = 15 MIN + 10 MIN + 15 MIN
Duration for ORD3 = 10 MIN + 15 MIN + 30 MIN
Total Machine Run time will be 55 + 40 + 55 = 150 MIN (2 Hours and 30 MIN)
Thanks,
Aravinth
You should be able to use "window functions" to determine the overall span then divide by number of orders processed, along these lines:
select
machine, order, datetime_start, datetime_end
, min(datetime_start) over(partition by machine, order, date(datetime_start)) min_dt
, max(datetime_start) over(partition by machine, order, date(datetime_start)) max_dt
, count(*) over(partition by machine, order, date(datetime_start)) num
, datediff(ss,min(datetime_start) over(partition by machine, order, date(datetime_start))
,max(datetime_start) over(partition by machine, order, date(datetime_start)))
/ count(*) over(partition by machine, order) as equal_duration
from (
select * from your_query here
)
For more; we would need much more detail from you.
Thanks for all the response. Finally this scenario has been covered with the help of one of our team member. Below is the solution,
DECLARE #table TABLE (OrdId varchar(12),MId varchar(4), ST DateTime, ET DateTime)
INSERT INTO #table SELECT '10001','M1','2017-11-01 10:30:00.000', '2017-11-01 12:00:00.000' INSERT INTO #table SELECT '10002','M1','2017-11-01 11:00:00.000', '2017-11-01 12:30:00.000' INSERT INTO #table SELECT '10003','M1','2017-11-01 11:30:00.000', '2017-11-01 14:00:00.000' INSERT INTO #table SELECT '10004','M2','2017-11-01 14:30:00.000', '2017-11-01 16:00:00.000'
DECLARE #ST datetime, #ET datetime, #NEXT_ST datetime, #RC smallint, #MCHr smallint; set #MCHr = 0; set #ST = (select MIN(ST) AS ST from #table where MId = 'M1' and OrdId = '10001') set #ET = (select MAX(ET) AS ET from #table where MId = 'M1' and OrdId = '10001') WHILE #ST < #ET BEGIN
set #NEXT_ST = (select MIN(ST) AS ST from #table where MId = 'M1' and ST > #ST)
if #NEXT_ST is not null
begin
set #RC = ( SELECT count(*) from #table where MId = 'M1' and (#ST >= ST and #ST < #NEXT_ST))
if #RC > 0
begin
SET #MCHr = #MCHr + (select DATEDIFF(MI,0,#NEXT_ST-#ST) / #RC);
end;
set #ST = #NEXT_ST;
end;
else
begin
set #NEXT_ST = (select MIN(ET) AS ET from #table where MId = 'M1' and (#ST >= ST and #ST < ET))
set #RC = ( SELECT count(*) from #table where MId = 'M1' and (#ST >= ST and #ST < ET))
if #RC > 0
SET #MCHr = #MCHr + (select DATEDIFF(MI,0,#NEXT_ST-#ST) / #RC)
set #ST = #NEXT_ST;
end; END; select #MCHr as MCHr

Average per quarter hour

I have a table with these columns:
Id, Method, DateTime, time taken
Ex
1, Done, 2014-06-22 08:18:00.000, 2000
2, Not Done, 2014-06-23 04:15:00.000, 5000
3, Done, 2014-06-23 14:15:00.000, 6000
I want to have a result set as, "average time taken by DONE methods in each 15 min interval between 8AM to 15PM"
Please guide me on how to proceed on this, I am not sure if cursor fits in this req.
You can use a CTE to generate a list of quarters. Then left join to look up the run times per quarter. A group by will allow you to calculate the average.
In SQL Server 2012, the time type is available, and you can:
; with quarters as
(
select cast('08:00' as time) as time
union all
select dateadd(minute, 15, time)
from quarters
where time <= '14:30'
)
select q.time
, avg(rt.time_taken) as avg_time_taken
from quarters q
left join
RunTime rt
on q.time <= cast(rt.dt as time)
and cast(rt.dt as time) < dateadd(minute, 15, q.time)
and method = 'Done'
group by
q.time
Live example at SQL Fiddle.
For SQL Server 2008R2 and earler, you can use integer math instead:
; with quarters as
(
select 8*60 as min
union all
select min + 15
from quarters
where min < 15*60
)
select q.min / 60 as hour
, q.min % 60 as minute
, avg(rt.time_taken) as avg_time_taken
from quarters q
left join
(
select datepart(minute, dt) +
60 * datepart(hour, dt) as min
, time_taken
from RunTime
where method = 'Done'
) rt
on q.min <= rt.min and rt.min < q.min + 15
group by
q.min;
Live example at SQL Fiddle.
I'm not entirely sure if this is what you want, but here ist the code:
CREATE TABLE #Test(
id int IDENTITY(1,1) PRIMARY KEY,
Method nvarchar(50),
[Datetime] datetime,
timeTaken Bigint
)
CREATE TABLE #Result(
[Between] datetime,
[And] datetime,
[Avg] bigint)
INSERT INTO #Test (Method,Datetime,timeTaken)
VALUES(
'Done', '2014-06-22 08:18:00.000', 2000),
('Not Done', '2014-06-23 04:15:00.000', 5000),
('Done', '2014-06-23 14:15:00.000', 6000)
DECLARE #MaxTime datetime,#StartTime datetime,#Next datetime
SELECT #MaxTime = MAX([datetime]),
#StartTime = MIN([datetime])
FROM #TEST
WHILE #StartTime <= #MaxTime
BEGIN
SET #Next = (SELECT Dateadd(MINUTE,15,#StartTime))
INSERT INTO #Result
SELECT #StartTime AS [Between], #Next AS [And],AVG(timeTaken) AS [AVG]
FROM #Test
WHERE [Datetime] Between #StartTime AND #Next
AND Method = 'Done'
SET #StartTime = #Next
END
SELECT * FROM #Result
DROP TABLE #Test
DROP TABLE #Result
You can now set a where to the Select * from #result in which you can say between 8 AM and 3 PM
Please let me know if this is what you want
Etienne

In a set of overlapping, version-numbered intervals, find the most recent version at each point in time

I'm working with a set of date intervals where each interval has a version number and new intervals will frequently overlap old ones, or even be subsets of them. From this data I need to calculate a new set of intervals that shows the most recent version number, at each point in time. Is there a set-based solution to this problem?
Here's an illustration:
Interval 1: 11111111111111111111111
Interval 2: 2222222222
Interval 3: 33333333333333
Interval 4: 444444444
Interval 5: 555555555
Result : 11333333333333331155555555544
Here is a sample of the data I'm working with:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 1/1/2011 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2012 12/31/2012 6
1 10/1/2012 11/1/2012 8
... and the desired output:
groupId startDate endDate version
-------- --------- ---------- ------
1 1/1/2010 10/1/2010 1
1 10/1/2010 7/5/2011 2
1 7/5/2011 8/13/2012 3
1 8/13/2011 10/1/2012 6
1 10/1/2012 11/1/2012 8 << note how version 8 supersedes version 6
1 11/1/2012 12/31/2012 6 << version 6 is split into two records
I haven't found any other examples of this problem, my googling only turns up queries that identify gaps and islands or covering sets.
I think I have an iterative solution (SQL Server 2008). It starts with a temp table for intervals in the result set and defines the start and end points for the range that we want to cover by inserting records with special version numbers. Then, it repeatedly identifies gaps between result set intervals and attempts to fill them with the most recent records from the original data set, until there are no more gaps or no more records to add:
GO
-- Create data set and results table
CREATE TABLE #Data (
groupId INT
,startDate DATE
,endDate DATE
,versionId INT
)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2007-12-22', '2008-12-22', 8)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2008-12-22', '2009-12-22', 9)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2009-12-22', '2010-12-22', 10)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2010-12-22', '2011-12-22', 11)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-01-01', '2011-11-30', 500)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2011-12-22', '2012-12-22', 12)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 13)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-01-22', '2012-12-22', 14)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 17)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (1, '2012-04-22', '2012-12-22', 19)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-01-01', '2011-01-01', 1)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2010-10-01', '2011-07-05', 2)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2011-07-05', '2012-08-13', 3)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-08-13', '2012-12-31', 6)
INSERT INTO #Data (groupId, startDate, endDate, versionId) VALUES (2, '2012-10-01', '2012-11-01', 8)
CREATE TABLE #Results (
groupId VARCHAR(10)
,startDate DATE
,endDate DATE
,versionId BIGINT
)
DECLARE #startDate DATE
DECLARE #endDate DATE
DECLARE #placeholderId BIGINT
SET #startDate = '20030101'
SET #endDate = '20121231'
SET #placeholderId = 999999999999999
INSERT #Results
SELECT DISTINCT
groupId
,CASE WHEN MIN(startDate) < #startDate THEN MIN(startDate) ELSE #startDate END
,CASE WHEN MIN(startDate) < #startDate THEN #startDate ELSE MIN(startDate) END
,#placeholderId
FROM #data
GROUP BY groupId
UNION ALL
SELECT DISTINCT
groupId
,CASE WHEN MAX(endDate) < #endDate THEN MAX(endDate) ELSE #endDate END
,CASE WHEN MAX(endDate) < #endDate THEN #endDate ELSE MAX(endDate) END
,#placeholderId
FROM #data
GROUP BY groupId
GO
-- Fill gaps in results table
DECLARE #startDate DATE
DECLARE #endDate DATE
DECLARE #placeholderId BIGINT
SET #startDate = '20030101'
SET #endDate = '20111231'
SET #placeholderId = 999999999999999
DECLARE #counter INT
SET #counter = 0
WHILE #counter < 10
BEGIN
SET #counter = #counter + 1;
WITH Gaps AS (
SELECT
gs.groupId
,gs.startDate
,MIN(ge.endDate) as endDate
,ROW_NUMBER() OVER (ORDER BY gs.groupId, gs.startDate) as gapId
FROM (
SELECT groupId, endDate as startDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.startDate <= r1.endDate
AND r2.endDate > r1.endDate
)
AND NOT (endDate >= #endDate AND versionId = #placeholderId)
) gs
INNER JOIN (
SELECT groupId, startDate as endDate
FROM #Results r1
WHERE NOT EXISTS (
SELECT *
FROM #Results r2
WHERE r2.groupId = r1.groupId
AND r2.versionId <> r1.versionId
AND r2.endDate >= r1.startDate
AND r2.startDate < r1.startDate
)
AND NOT (startDate <= #startDate AND versionId = #placeholderId)
) ge
ON ge.groupId = gs.groupId
AND ge.endDate >= gs.startDate
GROUP BY gs.groupId, gs.startDate
)
INSERT #Results (
groupId
,startDate
,endDate
,versionId
)
SELECT
d.groupId
,CASE WHEN d.startDate < g.startDate THEN g.startDate ELSE d.startDate END
,CASE WHEN d.endDate > g.endDate THEN g.endDate ELSE d.endDate END
,d.versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
INNER JOIN (
SELECT
d.groupId
,gapId
,MAX(d.versionId) as versionId
FROM #Data d
INNER JOIN Gaps g
ON g.groupId = d.groupId
AND g.startDate <= d.endDate
AND g.endDate >= d.startDate
WHERE d.versionId < (
SELECT MIN(versionId)
FROM #Results r
WHERE r.groupId = d.groupId
AND (r.startDate = g.endDate OR r.endDate = g.startDate)
)
AND NOT EXISTS (
SELECT *
FROM #Data dsup
WHERE dsup.groupId = d.groupId
AND dsup.versionId > d.versionId
AND dsup.startDate <= d.startDate
AND dsup.endDate >= d.endDate
)
GROUP BY
d.groupId
,g.gapId
) mg
ON mg.groupId = g.groupId
AND mg.gapId = g.gapId
AND mg.versionId = d.versionId
END
SELECT *
FROM #Results
WHERE versionId <> #placeholderId
order by groupId, startDate
A set-based solution would be much more useful, but I've struggled to find one. Any ideas?
-- create a dates table
create table dates (thedate date primary key clustered);
;with dates(thedate) as (
select dateadd(yy,years.number,0)+days.number
from master..spt_values years
join master..spt_values days
on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
where years.type='p' and years.number between 100 and 150
-- note: 100-150 creates dates in the year range 2000-2050
-- adjust as required
)
insert dbo.dates select * from dates;
-- for each date, determine the prevailing version
select t.groupId, d.thedate, max(t.versionId) versionId
into #tmp1
from dates d
join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate;
-- create index to help
create clustered index cix_tmp1 on #tmp1(groupId, thedate, versionId);
-- find the start dates
;with t as (
select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
from #tmp1 a
left join #tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
where b.versionId is null
)
select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
order by groupId, startdate;
Of course, you can do everything in "one query" but do it at your peril, as the performance goes down the drain, big time.
DO NOT USE - for academic interest only-
;with dates(thedate) as (
select dateadd(yy,years.number,0)+days.number
from master..spt_values years
join master..spt_values days
on days.type='p' and days.number < datepart(dy,dateadd(yy,years.number+1,0)-1)
where years.type='p' and years.number between 100 and 150
-- note: 100-150 creates dates in the year range 2000-2050
-- adjust as required
), tmp1 as (
select t.groupId, d.thedate, max(t.versionId) versionId
from dates d
join #Data t on t.startDate <= d.thedate and d.thedate <= t.endDate
group by t.groupId, d.thedate
), t as (
select a.*, rn=row_number() over (partition by a.groupId order by a.thedate)
from tmp1 a
left join tmp1 b on b.thedate = dateadd(d,-1,a.thedate) and a.groupId = b.groupId and a.versionId = b.versionId
where b.versionId is null
)
select c.groupId, c.thedate startdate, dateadd(d,-1,d.thedate) enddate, c.versionId
from t c
left join t d on d.rn=c.rn+1 and c.groupId = d.groupId
order by groupId, startdate;
Updated due to some feedback from the comments. I'm not going to worry about the end cases that a few people have pointed out since they've been proven trivial to solve in other Answers, but I wanted to go ahead and get a working version out that didn't require DDL... I figure it's just good to have options. :-)
This code should work:
select nesty.groupId, nesty.startDate, nesty.segment_end_date, Max(bob.versionId)
from(
select starter.groupId, starter.startDate,
coalesce(DATEADD(DAY,-1,ender.startDate),('2012-12-31')) AS segment_end_date
from
(select groupId, startDate, ROW_NUMBER() over (partition by groupID order by startDate) as rownumber from
(select groupID, startDate from #Data union select groupID, DATEADD(DAY, 1,endDate) as startDate from #Data) xx) starter
left outer join
(select groupId, startDate, ROW_NUMBER() over (partition by groupID order by startDate) as rownumber from
(select groupID, startDate from #Data union select groupID, DATEADD(DAY, 1,endDate) as startDate from #Data) xy) ender on
starter.groupId = ender.groupId and
starter.rownumber = ender.rownumber - 1
where
starter.startDate<= coalesce(DATEADD(DAY,-1,ender.startDate),('2012-12-31'))
) nesty
left outer join #Data bob on
bob.groupId = nesty.groupId and
nesty.segment_end_date between bob.startDate and bob.endDate
group by nesty.groupId, nesty.startDate, nesty.segment_end_date
order by nesty.groupId, nesty.startDate
There are a couple of tiny caveats I had to do to get it into a single SQL statement. First, the max end date is not dynamic; I hard coded '2012-12-31'. You can replace it with a MAX(endDate), but you can't put that in the GROUP BY statement. If you can do this in a procedure, you can do:
select into #max_end_date MAX(endDate) from #Data
and replace '2012-12-31' with #max_end_date.
Second, I do not guarantee that two adjacent segments won't have the same value! This may or may not be important to you... that is, if you had the following:
Interval 1: 111111
Interval 2: 22222222222222
Your output would be:
Interval 1: 2222
Interval 2: 2222222222
Still, I think it's worth hitting it in a simple and efficient SQL query. It may not be hard to fix those caveats, but it didn't matter to what I was working on, so I haven't bothered yet.
If the end dates are important, as well as gaps, here's a way you can do it. This solution could also be adapted to work if your versions are datetimes instead of just dates.
First a bunch of functions
One to get the version at a given date
Create Function dbo.VersionAtDate(#GroupID int, #Date datetime) Returns int as
Begin
Declare #Ret int = Null
Select
#Ret = Max(VersionID)
From
VersionedIntervals iv
Where
iv.GroupID = #GroupID And
iv.StartDate <= #Date And
iv.EndDate + 1 > #Date -- if dates were half open intervals this would just be iv.EndDate > #Date
Return #Ret
End
Next to get the midpoint of two datetimes (minute resolution):
Create Function dbo.Midpoint(#Start datetime, #End datetime) Returns datetime as
Begin
Return DateAdd(Minute, DateDiff(Minute, #Start, #End) / 2, #Start)
End
Version at a midpoint:
Create Function dbo.VersionAtMidpoint(#GroupID int, #Start datetime, #End datetime) returns int as
Begin
Return dbo.VersionAtDate(#GroupID, dbo.Midpoint(#Start, #End))
End;
Finally a table valued function to help with the fact that some points are the start of one range and the end of another, and it helps to get two rows from one input for this:
-- returns two rows if a point is the end of one interval and the
-- start of another
Create Function dbo.EndPoints(#GroupID int, #RN bigint, #Start datetime, #End datetime, #Next datetime, #Version int)
Returns #EndPoints Table (
GroupID int,
RN bigint,
Version int,
StartDate datetime,
EndDate datetime
) As
Begin
Declare #NextVersion int, #VersionAtMidpoint int
Set #NextVersion = dbo.VersionAtDate(#GroupID, #Next)
If #NextVersion = #Version
-- interval carries on
Insert Into #EndPoints Select #GroupID, #RN, #Version, #Start, #Next
Else
Begin
-- interval has ended
Set #VersionAtMidpoint = dbo.VersionAtMidPoint(#GroupID, #End, #Next)
If #VersionAtMidpoint != #Version
-- we have something like this, start a run of 3s (run of 4s is already ended by previous call)
-- 3333333
-- 44
Insert Into #EndPoints Select #GroupID, #RN, #VersionAtMidpoint, #End, #Next
Else
Begin
-- We have something like this, end the run of 3s and start the run of fours
-- 33333
-- 444
Insert Into #EndPoints Select #GroupID, -1, #Version, #Start, #Next
Insert Into #EndPoints Select #GroupID, #RN, #NextVersion, #Next, #Next
End
End
Return
End
With all this machinery in place, finally a recursive CTE plust table variable, you'll need to set maxrecursion appropriately:
Declare #Bounds Table (GroupID int, RN bigint, BoundDate datetime, Primary Key (GroupID, RN))
Insert Into
#Bounds
Select
GroupID,
Row_Number() Over (Partition By GroupID Order By BoundDate),
BoundDate
From (
Select
GroupID,
StartDate As BoundDate
From
dbo.VersionedIntervals
Union
Select
GroupID,
EndDate
From
dbo.VersionedIntervals
) a
;With VersionedBounds (GroupID, RN, StartDate, EndDate, Version) as (
Select
GroupID,
RN,
BoundDate,
BoundDate,
dbo.VersionAtDate(GroupID, BoundDate)
From
#Bounds
Where
RN = 1
Union All
Select
e.GroupID,
e.RN,
e.StartDate,
e.EndDate,
e.Version
From
#Bounds b
Inner Join
VersionedBounds v
On v.GroupID = b.GroupID And b.RN = v.RN + 1
Cross Apply
dbo.EndPoints(v.GroupID, b.RN, v.StartDate, v.EndDate, b.BoundDate, v.Version) e
)
Select
GroupID,
StartDate,
Max(EndDate) As EndDate,
Max(Version) As Version
From
VersionedBounds
Group By
GroupID,
StartDate
Order By
GroupID,
StartDate
http://sqlfiddle.com/#!6/b95bd/2

SHOW ALL Dates data between two dates; if no row exists for particular date then show zero in all columns

I want to show all dates between two dates when there is any date data missing then its should show zero in val column .
declare #temp table (
id int identity(1,1) not null,
CDate smalldatetime ,
val int
)
INSERT STATEMENT FOR DATA TO CHECK
insert into #temp select '10/2/2012',1
insert into #temp select '10/3/2012',1
insert into #temp select '10/5/2012',1
insert into #temp select '10/7/2012',2
insert into #temp select '10/9/2012',2
insert into #temp select '10/10/2012',2
insert into #temp select '10/13/2012',2
insert into #temp select '10/15/2012',2
Retrieve records between first day of month and today
select * from #temp where CDate between '10/01/2012' AND '10/15/2012'
As i run this query its show me all data between these two dates but i want to also include missing dates with val=0
SQL FIDDLE WITH SAMPLE DATA
;with d(date) as (
select cast('10/01/2012' as datetime)
union all
select date+1
from d
where date < '10/15/2012'
)
select t.ID, d.date CDate, isnull(t.val, 0) val
from d
left join temp t
on t.CDate = d.date
order by d.date
OPTION (MAXRECURSION 0) -- use this if your dates are >99 days apart
You need to make up the dates, so I've use a recursive common table expression here.
SQL Fiddle
MAXRECURSION number
Specifies the maximum number of recursions allowed for this query. number is a nonnegative
integer between 0 and 32767. When 0 is specified, no limit is applied. If this option is
not specified, the default limit for the server is 100.
When the specified or default number for MAXRECURSION limit is reached during query
execution, the query is ended and an error is returned.
This will work as long as there are less than 2047 days between from and to dates
declare #from smalldatetime = '10/01/2012'
declare #to smalldatetime = '10/15/2012'
select t.id, dateadd(day, number,#from), isnull(val, 0) val from #temp t
right join master..spt_values s
on dateadd(d, s.number, #from) = t.CDate
where
datediff(day, #from, #to ) > s.number
and s.type = 'P'
I think the best way to do this is to create your own table with dates (you can also use master.dbo.spt_values, but I personally don't like that solution)
declare #Temp_Dates table (CDate datetime)
declare #Date datetime
select #Date = (select min(CDate) from temp)
while #Date <= (select max(CDate) from temp)
begin
insert into #Temp_Dates (CDate)
select #Date
select #Date = dateadd(dd, 1, #Date)
end
select D.CDate, isnull(T.id, 0) as id
from #Temp_Dates as D
left outer join temp as T on T.CDate = D.CDate
you can also use recursive solution with CTE
DECLARE #min DATETIME,
#max DATETIME,
#val INT
SELECT #min = Min(CDATE),
#max = Max(CDATE)
FROM TEMP
DECLARE #temp TABLE
(
CDATE SMALLDATETIME,
VAL INT
)
WHILE #min < #max
BEGIN
SELECT #val = VAL
FROM TEMP
WHERE CDATE = #min
INSERT #temp
VALUES (#min,
#val)
SET #min = Dateadd(D, 1, #min)
SET #val = 0
END
SELECT *
FROM #temp
Declare #temp Table(id int identity(1,1) not null,CDate smalldatetime ,val int)
insert into #temp select '10/2/2012',1
insert into #temp select '10/3/2012',1
insert into #temp select '10/5/2012',1
insert into #temp select '10/7/2012',2
insert into #temp select '10/9/2012',2
insert into #temp select '10/10/2012',2
insert into #temp select '10/13/2012',2
insert into #temp select '10/15/2012',2
DECLARE #startDate DATE= '10/01/2012'
DECLARE #endDate DATE= '10/15/2012'
SELECT t.Id, X.[Date],Val = COALESCE(t.val,0)
FROM
(SELECT [Date] = DATEADD(Day,Number,#startDate)
FROM master..spt_values
WHERE Type='P'
AND DATEADD(day,Number,#startDate) <= #endDate)X
LEFT JOIN #temp t
ON X.[Date] = t.CDate
using a recursive cte with min and max
declare #T table (id int identity(1,1) primary key, dt date not null, val int not null);
insert into #T (dt, val) values
('10/2/2012',1)
, ('10/3/2012',1)
, ('10/5/2012',1)
, ('10/7/2012',2)
, ('10/9/2012',2)
, ('10/10/2012',2)
, ('10/13/2012',2)
, ('10/15/2012',2);
--select * from #T;
with cte as
( select min(dt) as dt, max(dt) as mx
from #T
union all
select dateadd(dd, 1, dt), mx
from CTE
where dt < mx
)
select c.dt, isnull(t.val, 0) as val
from cte c
left join #T t
on c.dt = t.dt
order by c.dt
option (maxrecursion 0);
dt val
---------- -----------
2012-10-02 1
2012-10-03 1
2012-10-04 0
2012-10-05 1
2012-10-06 0
2012-10-07 2
2012-10-08 0
2012-10-09 2
2012-10-10 2
2012-10-11 0
2012-10-12 0
2012-10-13 2
2012-10-14 0
2012-10-15 2