Find consecutive working dates for each employee - sql

In SQL 2000 I have a table that contains the following:
ID Date WorkingTime EmployeeID
For August, this table would contain 200 employees with dates of 8/1 - 8/31. I need to find out what is the MIN date of the first 5 consecutive days of working time for each employee starting at the day passed in and going backward.
For Example:
If employee 123 looked as follows and 8/10/2013 was passed in:
ID Date WorkingTime EmployeeID
1 8/1 1 123
2 8/2 0 123
3 8/3 0 123
4 8/4 1 123
5 8/5 1 123
6 8/6 1 123
7 8/7 1 123
8 8/8 1 123
9 8/9 0 123
10 8/10 1 123
The result would be 8/4. This needs to be done all at once for all of the employees in the table, so they would all have different min dates, all starting on 8/10 since that was the date that was passed into the query. This table is very large in real life and contins many dates and employees, not just in Auguest. I thought about using a cursor to go through all of this but I think that would be really slow. I was also thinking of adding all of the working times to a temp table and doing a datediff on them to find the consecutive 5 with a datediff of 1, but I wasn't quite sure how to execute that. Is there a better way I am not thinking of?

DECLARE #MyTable TABLE
(
ID INT IDENTITY PRIMARY KEY,
[Date] SMALLDATETIME NOT NULL,
WorkingTime INT NOT NULL,
EmployeeID INT NOT NULL
);
INSERT #MyTable ([Date], WorkingTime, EmployeeID)
-- First employee
SELECT '20130801', 1, 123 UNION ALL
SELECT '20130802', 0, 123 UNION ALL
SELECT '20130803', 0, 123 UNION ALL
SELECT '20130804', 1, 123 UNION ALL
SELECT '20130805', 1, 123 UNION ALL
SELECT '20130806', 1, 123 UNION ALL
SELECT '20130807', 1, 123 UNION ALL
SELECT '20130808', 1, 123 UNION ALL
SELECT '20130809', 0, 123 UNION ALL
SELECT '20130810', 1, 123 UNION ALL
-- Second employee
SELECT '20130801', 1, 126 UNION ALL
SELECT '20130802', 1, 126 UNION ALL
SELECT '20130803', 1, 126 UNION ALL
SELECT '20130804', 1, 126 UNION ALL
SELECT '20130805', 1, 126 UNION ALL
SELECT '20130806', 0, 126 UNION ALL
-- Third employee
SELECT '20130801', 0, 127 UNION ALL
SELECT '20130802', 0, 127 UNION ALL
SELECT '20130803', 1, 127 UNION ALL
SELECT '20130804', 1, 127 UNION ALL
SELECT '20130805', 0, 127 UNION ALL
SELECT '20130806', 0, 127;
--
DECLARE #Results TABLE
(
EmployeeID INT NOT NULL,
DaysDiff INT NOT NULL,
PRIMARY KEY(EmployeeID, DaysDiff), -- This is a "clustered index"/index organized table
RowNum INT IDENTITY NOT NULL,
[Date] SMALLDATETIME NOT NULL
);
INSERT #Results (EmployeeID, DaysDiff, [Date])
SELECT x.EmployeeID,
DATEDIFF(DAY, 0, x.[Date]) AS DaysDiff,
x.[Date]
FROM #MyTable x
WHERE x.WorkingTime = 1
/*
This ORDER BY clause and the clustered index (PRIMARY KEY(EmployeeID, DaysDiff))
should give a hint to SQL Server so that
RowNum IDENTITY values will be generated in this order: EmployeeID, DaysDiff
Note #1: There is not 100% guarantee that insert order will be the same as
ORDER BY x.EmployeeID, DaysDiff
and
clustered index key (EmployeeID, DaysDiff)
Note #2: This INSERT INTO table with identity column simulates the ROW_NUMBER function
which is available starting with SQL2005.
*/
ORDER BY x.EmployeeID, DaysDiff
OPTION (MAXDOP 1); -- It minimizes the risk of messing up the order of RowNum
SELECT y.EmployeeID, MAX(y.GroupStartDate) AS FirstGroupStartDate
FROM
(
SELECT x.EmployeeID, x.GroupID,
MIN(x.[Date]) AS GroupStartDate, MAX(x.[Date]) AS GroupEndDate,
DATEDIFF(DAY, MIN(x.[Date]), MAX(x.[Date]))+1 AS ContinuousDays
FROM
(
SELECT *, r.DaysDiff - r.RowNum AS GroupID
FROM #Results r
) x
GROUP BY x.EmployeeID, x.GroupID
) y
WHERE y.ContinuousDays > 4
GROUP BY y.EmployeeID;

Below query will give good start for what you want to achieve, modify the schema based on your tables.
SQL fiddle demo
#DateToPull - Date for which you want to pull data for.
#TimeSheet is your original table
#SubsetTimeSheet - table with subset of records from #TimeSheet table. Populated with records from first of the month till passed date.
Note: This query can be written more efficiently with newer version of SQL Server.
declare #DateToPull datetime
select #DateToPull = '08/10/2013'
if object_id('tempdb..#TimeSheet') is not null
drop table #TimeSheet
create table #TimeSheet
(
ID int identity(1, 1),
EmployeeID int,
[WorkDate] datetime,
WorkingTime bit
)
insert into #TimeSheet(EmployeeID, [WorkDate], WorkingTime)
select 123 , '08/01/2013', 0
union all
select 123 , '08/02/2013', 1
union all
select 123 , '08/03/2013', 0
union all
select 123 , '08/04/2013', 1
union all
select 123 , '08/05/2013', 1
union all
select 123 , '08/06/2013', 1
union all
select 123 , '08/07/2013', 1
union all
select 123 , '08/08/2013', 1
union all
select 123 , '08/09/2013', 0
union all
select 123 , '08/10/2013', 1
union all
select 123 , '08/11/2013', 1
union all
select 123 , '08/12/2013', 1
union all
select 123 , '08/13/2013', 1
union all
select 123 , '08/14/2013', 1
union all
select 123 , '08/15/2013', 0
union all
select 123 , '08/16/2013', 1
union all
select 123 , '08/17/2013', 1
union all
select 123 , '08/18/2013', 1
union all
select 123 , '08/19/2013', 1
union all
select 123 , '08/20/2013', 1
if object_id('tempdb..#SubsetTimeSheet') is not null
drop table #SubsetTimeSheet
create table #SubsetTimeSheet
(
EmployeeID int,
[WorkDate] datetime,
WorkingTime bit
)
insert into #SubsetTimeSheet(EmployeeID, [WorkDate], WorkingTime)
select EmployeeID, [WorkDate], WorkingTime
from #TimeSheet
where
datediff(dd, [WorkDate], #DateToPull) >= 0
and datediff(dd, DATEADD(dd, -(DAY(#DateToPull)-1), #DateToPull), [WorkDate]) >= 0
and WorkingTime = 1
order by
EmployeeID,
[WorkDate] desc
select A.EmployeeID, max(E.WorkDate) WorkDate
from
#SubsetTimeSheet A
inner join #SubsetTimeSheet B on datediff(dd, A.[WorkDate] - 1, B.WorkDate) = 0 and A.EmployeeID = B.EmployeeID
inner join #SubsetTimeSheet C on datediff(dd, A.[WorkDate] - 2, C.WorkDate) = 0 and A.EmployeeID = C.EmployeeID
inner join #SubsetTimeSheet D on datediff(dd, A.[WorkDate] - 3, D.WorkDate) = 0 and A.EmployeeID = D.EmployeeID
inner join #SubsetTimeSheet E on datediff(dd, A.[WorkDate] - 4, E.WorkDate) = 0 and A.EmployeeID = E.EmployeeID
group by
A.EmployeeID

Related

T SQL Cte delete where group by is greater than 1

I'm using SQL Server 2016. I have the below table:
SKU Mkt Week Cost Code
ABC 05 1 10 100
ABC 05 2 12 100
DEF 05 3 20 100
DEF 05 3 25 125
XYZ 08 1 10 100
XYZ 08 2 12 100
XZY 08 2 14 125
This is the desired result:
SKU Mkt Week Cost Code
ABC 05 1 10 100
ABC 05 2 12 100
DEF 05 3 25 125
XYZ 08 1 10 100
XZY 08 2 14 125
So if a SKU\Mkt\Week\Cost exist more than once, I want to keep the record where code = 125 and delete the row where the code is 100.
I'm using the below Cte:
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER( PARTITION BY SKU, Mkt, Week
ORDER BY SKU, Mkt, Week)
FROM [table]
WHERE code = 100
)
DELETE FROM CTE
WHERE RN > 1
However, the Cte doesnot delete anything -what am I missing?
Based on the query and sample data you have provided, You need to note to this section of the cte inner query:
WHERE code = 100
when this filter applied you have the following data:
SKU Mkt Week Cost Code
ABC 05 1 10 100
ABC 05 2 12 100
DEF 05 3 20 100
which will get the 1 as Row_Number()'s output!, so running the following query will not effect any rows:
DELETE FROM CTE
WHERE RN > 1
To achieve the desired result you need to remove the WHERE section in CTE's inner query.
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER( PARTITION BY SKU, Mkt, Week
ORDER BY SKU, Mkt, Week, Cost DESC) --Code/Cost DESC <==== Note this too
FROM [table]
--WHERE code = 100 <========== HERE, I've commented it
)
DELETE FROM CTE
WHERE RN > 1
You need to also add the Cost DESC or Code Desc to Row_Number()'s Order By section.
Ranking function will be evaluated in the select statement , which means the where clause WHERE code = 100 is evaluated before ROW_NUMBER() and so it has already removed the rows with code 125. Use order by Code as well and then apply the code=100 check when deleting from the CTE
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER( PARTITION BY SKU, Mkt, Week
ORDER BY SKU, Mkt, Week,Code DESC)
FROM tt1
)
DELETE FROM CTE
WHERE RN > 1
AND CODE = 100
Try below query to get the desired result -
Sample data and Query
Declare #Table table
(SKU varchar(20), Mkt int, [Week] int, Cost int, Code int)
Insert into #Table
values
( 'ABC', 05 , 1, 10 , 100),
( 'ABC' , 05 , 2 , 12 , 100),
('DEF' ,05 , 3 , 20 , 100),
('DEF' ,05 , 3 ,25 , 125),
('XYZ' , 08 , 1 ,10 , 100),
('XYZ' , 08 , 2 ,12 , 100),
('XYZ' , 08, 2 ,14, 125)
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER( PARTITION BY SKU, Mkt, Week
ORDER BY SKU, Mkt, Week, code desc)
FROM #Table
)
delete from Cte where RN > 1
Along with moving your Where statement, I believe you also want a second cte to work with the records you are identifying... In the following your first cte identifies the duplicate records while the second cte isolates them so you can perform your delete against those SKUs
Table
Create Table #tbl
(
SKU VarChar(10),
Mkt VarChar(10),
Week Int,
Cost Int,
Code Int
)
Insert Into #tbl Values
('ABC','05',1,10,100),
('ABC','05',2,12,100),
('DEF','05',3,20,100),
('DEF','05',3,25,125),
('XYZ','08',1,10,100),
('XYZ','08',2,12,100),
('XYZ','08',2,14,125)
Query
;WITH CTE AS
(
SELECT *,
RN = ROW_NUMBER() OVER( PARTITION BY SKU, Mkt, Week
ORDER BY SKU, Mkt, Week)
FROM #tbl
--WHERE code = 100
)
, cte1 As
(
Select sku from cte where rn > 1
)
DELETE c FROM CTE c inner join cte1 c1 On c.SKU = c1.SKU
WHERE c.Code = 100
Select * From #tbl
Result (Your 'desired result' example removed an XYZ record where the week was not duplicated?)
SKU Mkt Week Cost Code
ABC 05 1 10 100
ABC 05 2 12 100
DEF 05 3 25 125
XYZ 08 1 10 100
XYZ 08 2 12 100
XZY 08 2 14 125
Your CTE statement is only considering rows with code = 100. If you remove it, then CTE will rank based on all rows from the table. Using this, first find out which combination of have multiple rows. Then, among these combinations, identify rows with code = 100 and delete them.
create table #e1
(
SKU varchar(50)
,Mkt varchar(50)
,_Week int
,Cost int
,_code int
)
insert into #e1(SKU, Mkt, _Week, Cost, _code)
select 'ABC', '05', 1, 10, 100 UNION
SELECT 'ABC', '05', 2, 12, 100 union
SELECT 'DEF', '05', 3, 20, 100 UNION
SELECT 'DEF', '05', 3, 25, 125 UNION
SELECT 'XYZ', '08', 1, 10, 100 UNION
SELECT 'XYZ', '08', 2, 12, 100 UNION
SELECT 'XZY', '08', 2, 14, 125
delete s
from
#e1 s
JOIN
(
SELECT SKU, Mkt, _Week
FROM #e1
group by
SKU, Mkt, _Week
having count(1) > 1
) m
ON
s.SKU = m.sku and s.mkt = m.mkt and s._Week = m._Week
WHERE s._code = 100
Create table #tab1 (SKU varchar(50),Mkt varchar(50),[Week] varchar(50),Cost varchar(50),Code varchar(50))
insert into #tab1
select 'ABC','05','1','10','100'
union
select 'ABC','05','2','12','100'
union
select 'DEF','05','3','20','100'
union
select 'DEF','05','3','25','125'
union
select 'XYZ','08','1','10','100'
union
select 'XYZ','08','2','12','100'
union
select 'XYZ','08','2','14','125'
delete t from #tab1 t
inner join (select t1.SKU,t1.Mkt,t1.[Week],t1.Cost as Cost,t1.Code as Code,ROW_NUMBER()over(partition by t1.SKU,t1.Mkt,t1.[Week] order by t1.Cost desc,t1.Code desc ) as rno
from #tab1 t1
) c on c.SKU = t.SKU and c.Mkt = t.Mkt and c.Cost = t.Cost and c.[Week] = t.[Week] and c.Code = t.Code
where c.rno = 2
select * from #tab1
Output:
SKU Mkt Week Cost Code
ABC 05 1 10 100
ABC 05 2 12 100
DEF 05 3 25 125
XYZ 08 1 10 100
XYZ 08 2 14 125

TSQL, Get top N unique rows across ordered groups

I have the following table of values, sorted by arbitrary segment id specified by the user. ( I know how to do that query and below are the results )
SegmentID SequenceID
3 100
3 200
3 400
3 430
1 100
1 200
1 300
1 410
2 100
2 200
2 300
2 420
I need a SQL query ( Sql Server 2012 ) that returns top N Records in order of Precedence where SequenceID is not repeated.
Example: user wants 7 sequences in order of segment preference: 3, 1,2.
The correct answer is
SegmentID SequenceID
3 100
3 200
3 400
3 430
1 300
1 410
2 420
in a nutshell, i need to traverse recordset from top to bottom, grab unique sequences as i go and add to the list.
How can I do that in a TSql statement?
create table #data (SegmentID int,SequenceID int);
insert into #data values
(3,100),
(3,200),
(3,400),
(3,430),
(1,100),
(1,200),
(1,300),
(1,410),
(2,100),
(2,200),
(2,300),
(2,420);
This table declares the ordering preference:
create table #prefs (Preference int, SegmentID int);
insert into #prefs values(1,3),(2,1),(3,2);
with cte as
(
select #data.SegmentID,
#data.SequenceID,
Preference,
row_number() over (partition by SequenceID order by Preference) rn
from #data
inner join #prefs on #data.SegmentID = #prefs.SegmentID
)
select SegmentId,
SequenceID
from cte
where rn = 1
order by Preference, SequenceID;
DEMO:
http://rextester.com/JKNKD15000
With cte (SegmentID, SequenceID) as
(SELECT 3, 100 UNION ALL
SELECT 3, 200 UNION ALL
SELECT 3, 400 UNION ALL
SELECT 3, 430 UNION ALL
SELECT 1, 100 UNION ALL
SELECT 1, 200 UNION ALL
SELECT 1, 300 UNION ALL
SELECT 1, 410 UNION ALL
SELECT 2, 100 UNION ALL
SELECT 2, 200 UNION ALL
SELECT 2, 300 UNION ALL
SELECT 2, 420),
userOrder (SegmentID, orderID) as (
SELECT 3, 1 UNION ALL
SELECT 1, 2 UNION ALL
SELECT 2, 3),
Results (SegmentID, SequenceID, RN, orderID) as (
Select A.*
, Row_number() over (Partition by A.SequenceID order by B.orderID) RN
, B.orderID
from cte A
INNER JOIN userOrder B
on A.SegmentID = B.SegmentID)
Select Top 7 *
from results where RN = 1
order by OrderID, SequenceID

Compare getdate() with two different fields

I have 2 tables:
T1 T2
id Effdate E_id DOB
-------------- ------------
1 20161212 2 1950-02-16 00:12:24
2 20130124 5 1978-01-16 10:14:30
I want to compare getdate() < Maxdate(effdate, DOB)?
I am getting datetime conversion error.
for example : getdate() < MAXDATE( 20161212 , 1950-02-16 00:12:24)
expected result should be from table T1:
id Effdate
--------------
1 20161212
If id in both tables are correspondent on id = E_id you can UNION them and GROUP BY id:
;WITH T1 AS (
SELECT 1 id,
CAST('20161212' as varchar(10)) Effdate
UNION ALL
SELECT 2,
'20130124'
), T2 AS (
SELECT 1 E_id,
CAST('1950-02-16 00:12:24' as datetime) DOB
UNION ALL
SELECT 2,
'1978-01-16 10:14:30'
)
SELECT id,
MAX(CAST(Effdate as datetime)) as MD
FROM (
SELECT *
FROM T1
UNION ALL
SELECT *
FROM T2
) t
GROUP BY id
HAVING MAX(CAST(Effdate as datetime)) >= GETDATE()
Will bring you expected result

Coalesce over Rows in MSSQL 2008,

I'm trying to determine the best approach here in MSSQL 2008.
Here is my sample data
TransDate Id Active
-------------------------
1/18 1pm 5 1
1/18 2pm 5 0
1/18 3pm 5 Null
1/18 4pm 5 1
1/18 5pm 5 0
1/18 6pm 5 Null
If grouped by Id and ordered by the TransDate, I want the last Non Null Value for the Active Column, and the MAX of TransDate
SELECT MAX(TransDate) AS TransDate,
Id,
--LASTNonNull(Active) AS Active
Here would be the results:
TransDate Id Active
---------------------
1/18 6pm 5 0
It would be like a Coalesce but over the rows, instead of two values/columns.
There would be many other columns that would also have this similiar method applied, so I really don't want to make a seperate join for each of the columns.
Any ideas?
I'd probably use a correlated sub query.
SELECT MAX(TransDate) AS TransDate,
Id,
(SELECT TOP (1) Active
FROM T t2
WHERE t2.Id = t1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) AS Active
FROM T t1
GROUP BY Id
A way without
SELECT
Id,
MAX(TransDate) AS TransDate,
CAST(RIGHT(MAX(CONVERT(CHAR(23),TransDate,121) + CAST(Active AS CHAR(1))),1) AS BIT) AS Active,
/*You can probably figure out a more efficient thing to
compare than the above depending on your data. e.g.*/
CAST(MAX(DATEDIFF(SECOND,'19500101',TransDate) * CAST(10 AS BIGINT) + Active)%10 AS BIT) AS Active2
FROM T
GROUP BY Id
Or following the comments would cross apply work better for you?
WITH T (TransDate, Id, Active, SomeOtherColumn) AS
(
select GETDATE(), 5, 1, 'A' UNION ALL
select 1+GETDATE(), 5, 0, 'B' UNION ALL
select 2+GETDATE(), 5, null, 'C' UNION ALL
select 3+GETDATE(), 5, 1, 'D' UNION ALL
select 4+GETDATE(), 5, 0, 'E' UNION ALL
select 5+GETDATE(), 5, null,'F'
),
T1 AS
(
SELECT MAX(TransDate) AS TransDate,
Id
FROM T
GROUP BY Id
)
SELECT T1.TransDate,
Id,
CA.Active AS Active,
CA.SomeOtherColumn AS SomeOtherColumn
FROM T1
CROSS APPLY (SELECT TOP (1) Active, SomeOtherColumn
FROM T t2
WHERE t2.Id = T1.Id
AND Active IS NOT NULL
ORDER BY TransDate DESC) CA
This example should help, using analytical functions Max() OVER and Row_Number() OVER
create table tww( transdate datetime, id int, active bit)
insert tww select GETDATE(), 5, 1
insert tww select 1+GETDATE(), 5, 0
insert tww select 2+GETDATE(), 5, null
insert tww select 3+GETDATE(), 5, 1
insert tww select 4+GETDATE(), 5, 0
insert tww select 5+GETDATE(), 5, null
select maxDate as Transdate, id, Active
from (
select *,
max(transdate) over (partition by id) maxDate,
ROW_NUMBER() over (partition by id
order by case when active is not null then 0 else 1 end, transdate desc) rn
from tww
) x
where rn=1
Another option, quite expensive, would be doing it through XML. For educational purposes only
select
ID = n.c.value('#id', 'int'),
trandate = n.c.value('(data/transdate)[1]', 'datetime'),
active = n.c.value('(data/active)[1]', 'bit')
from
(select xml=convert(xml,
(select id [#id],
( select *
from tww t
where t.id=tww.id
order by transdate desc
for xml path('data'), type)
from tww
group by id
for xml path('node'), root('root'), elements)
)) x cross apply xml.nodes('root/node') n(c)
It works on the principle that the XML generated has each record as a child node of the ID. Null columns have been omitted, so the first column found using xpath (child/columnname) is the first non-null value similar to COALESCE.
You could use a subquery:
SELECT MAX(TransDate) AS TransDate
, Id
, (
SELECT TOP 1 t2.Active
FROM YourTable t2
WHERE t1.id = t2.id
and t2.Active is not null
ORDER BY
t2.TransDate desc
)
FROM YourTable t1
I created a temp table named #temp to test my solution, and here is what I came up with:
transdate id active
1/1/2011 12:00:00 AM 5 1
1/2/2011 12:00:00 AM 5 0
1/3/2011 12:00:00 AM 5 null
1/4/2011 12:00:00 AM 5 1
1/5/2011 12:00:00 AM 5 0
1/6/2011 12:00:00 AM 5 null
1/1/2011 12:00:00 AM 6 2
1/2/2011 12:00:00 AM 6 3
1/3/2011 12:00:00 AM 6 null
1/4/2011 12:00:00 AM 6 2
1/5/2011 12:00:00 AM 6 null
This query...
select max(a.transdate) as transdate, a.id, (
select top (1) b.active
from #temp b
where b.active is not null
and b.id = a.id
order by b.transdate desc
) as active
from #temp a
group by a.id
Returns these results.
transdate id active
1/6/2011 12:00:00 AM 5 0
1/5/2011 12:00:00 AM 6 2
Assuming a table named "test1", how about using ROW_NUMBER, OVER and PARTITION BY?
SELECT transdate, id, active FROM
(SELECT transdate, ROW_NUMBER() OVER(PARTITION BY id ORDER BY transdate desc) AS rownumber, id, active
FROM test1
WHERE active is not null) a
WHERE a.rownumber = 1

SQL Server query without using loops

I have a Payment table that looks a little like this:
Id (int identity)
CustomerId (int)
PaymentDate (SmallDateTime)
Now I want to write a query that will find those customers that have made three payments within a period of three months. Given the following data:
Id CustomerId PaymentDate (YYYY-MM-DD)
------------------------------------------
1 1 2010-01-01
2 1 2010-02-01
3 1 2010-03-01
4 1 2010-06-01
5 2 2010-04-01
6 2 2010-05-01
7 2 2010-06-01
8 2 2010-07-01
I would like to produce the following result:
CustomerId LastPaymentDateInPeriod
-------------------------------------
1 2010-03-01
2 2010-07-01
Where LastPaymentDateInPeriod is the PaymentDate with the highest value within a three-month period. If there is more than one three-month period for a given customer it would have to return the highest value from the most recent period (this is what I tried to illustrate for customer 2 in the above example). Note that three payments on three consecutive days would also meet the criteria. The payments just have to fall within a three-month period.
I know how to do this with a cursor and a lot of smaller queries but this is slow (and, I've come to understand, should only be a last resort). So do any of you SqlServer geniuses know how to do this with a query?
Thanks in advance.
This should do the job:
select
CustomerID,
max(LastPaymentDateInPeriod) as LastPaymentDateInPeriod
from
(
select
LastPaymentInPeriod.CustomerID,
LastPaymentInPeriod.PaymentDate as LastPaymentDateInPeriod
from Payment LastPaymentInPeriod
inner join Payment RelatedPayment on
LastPaymentInPeriod.CustomerID = RelatedPayment.CustomerID and
LastPaymentInPeriod.PaymentDate > RelatedPayment.PaymentDate and
datediff(m, RelatedPayment.PaymentDate, LastPaymentInPeriod.PaymentDate) < 3
group by
LastPaymentInPeriod.CustomerID,
LastPaymentInPeriod.PaymentDate
having
count(*) > 1
) as PaymentPeriods
group by
CustomerID
update: I've tested this now and it seems to work for #Martin's data
update2: If it's a requirement that Jan 31 and Apr 1 should be considered as less than 3 months apart then the DATEDIFF function call can be replaced with something like this:
create function fn_monthspan
(
#startdate datetime,
#enddate datetime
)
returns int
as
begin
return datediff(m, #startdate, #enddate) - case when datepart(d, #startdate) > datepart(d, #enddate) then 1 else 0 end
end
Bit of a rushed job as I'm off out.
declare #T TABLE
(
Id int,
CustomerId int,
PaymentDate SmallDateTime
)
insert into #T
SELECT 1, 1,'2010-01-01' UNION ALL
SELECT 2, 1,'2010-02-01' UNION ALL
SELECT 3, 1,'2010-03-01' UNION ALL
SELECT 4, 1,'2010-06-01' UNION ALL
SELECT 5, 2,'2010-04-01' UNION ALL
SELECT 6, 2,'2010-05-01' UNION ALL
SELECT 7, 2,'2010-06-01' UNION ALL
SELECT 8, 2,'2010-07-01'
;with CTE1 AS
(
SELECT Id, CustomerId, PaymentDate, ROW_NUMBER() OVER (PARTITION BY CustomerId ORDER BY PaymentDate) RN
FROM #T
), CTE2 AS
(
SELECT C1.Id, C1.CustomerId, MAX(C2.PaymentDate) AS LastPaymentDateInPeriod
FROM CTE1 C1 LEFT JOIN CTE1 C2 ON C1.CustomerId = C2.CustomerId AND C2.RN BETWEEN C1.RN AND C1.RN + 2 and C2.PaymentDate <=DATEADD(MONTH,3,C1.PaymentDate)
GROUP BY C1.Id, C1.CustomerId
HAVING COUNT(*)=3
)
SELECT CustomerId, MAX(LastPaymentDateInPeriod) LastPaymentDateInPeriod
FROM CTE2
GROUP BY CustomerId
This gives you all three payments within a 3 month span.
;
WITH CustomerPayments AS
(
SELECT 1 Id, 1 CustomerId, Convert (DateTime, '2010-01-01') PaymentDate
UNION SELECT 2, 1, '2010-02-01'
UNION SELECT 3, 1, '2010-03-01'
UNION SELECT 4, 1, '2010-06-01'
UNION SELECT 5, 2, '2010-04-01'
UNION SELECT 6, 2, '2010-05-01'
UNION SELECT 7, 2, '2010-06-01'
UNION SELECT 8, 2, '2010-07-01'
UNION SELECT 9, 3, '2010-07-01'
UNION SELECT 10, 3, '2010-07-01'
),
FirstPayment AS
(
SELECT Id, CustomerId, PaymentDate
FROM CustomerPayments
where Id IN
(
SELECT Min (Id) Id
FROM CustomerPayments
Group by CustomerId
)
),
SecondPayment AS
(
SELECT Id, CustomerId, PaymentDate
FROM CustomerPayments
where Id IN
(
SELECT Min (Id) Id
FROM CustomerPayments
WHERE ID NOT IN
(
SELECT ID
from FirstPayment
)
Group by CustomerId
)
),
ThirdPayment AS
(
SELECT Id, CustomerId, PaymentDate
FROM CustomerPayments
where Id IN
(
SELECT Min (Id) Id
FROM CustomerPayments
WHERE ID NOT IN
(
SELECT ID
from FirstPayment
UNION
SELECT ID
from SecondPayment
)
Group by CustomerId
)
)
SELECT *
FROM
FirstPayment FP
Left JOIN SecondPayment SP
ON FP.CustomerId = SP.CustomerId
Left JOIN ThirdPayment TP
ON SP.CustomerId = TP.CustomerId
WHERE 1=1
AND SP.PaymentDate IS NOT NULL
AND TP.PaymentDate IS NOT NULL
AND ABS (DATEDIFF (mm, SP.PaymentDate, TP.PaymentDate)) <3
I thought of:
select customerId,max(PaymentDate) from payment where customerId in
(select case when count(*)<3 then null else customerId end as customerId from payment
where paymentdate>dateadd(month,-3,getdate()) group by customerId)
group by customerId;