Create episode for each value with new Begin and End Dates - sql

This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic

You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum

Related

Get consecutive days with condition

There is a table with three columns:
CREATE TABLE #t1 ( Id INT
,VisitDate DATE
,Counter INT)
AND test data:
INSERT INTO #t1 VALUES (1,'2019-01-01', 50)
INSERT INTO #t1 VALUES (2,'2019-01-02', 15)
INSERT INTO #t1 VALUES (3,'2019-01-03', 7)
INSERT INTO #t1 VALUES (4,'2019-01-04', 7)
INSERT INTO #t1 VALUES (5,'2019-01-05', 18)
INSERT INTO #t1 VALUES (6,'2019-01-06', 19)
INSERT INTO #t1 VALUES (7,'2019-01-07', 11)
INSERT INTO #t1 VALUES (8,'2019-01-08', 1)
INSERT INTO #t1 VALUES (9,'2019-01-09', 19)
Need to find three and more consecutive days where Counter more or equal ten:
Id VisitDate Counter
5 2019-01-05 18
6 2019-01-06 19
7 2019-01-07 11
My SELECT statement is
;WITH cte AS
(
SELECT *
,IIF(Counter > 10, 1,0) AS MoreThanTen
FROM #t1
), lag_lead_cte AS
(
SELECT *
,LAG(MoreThanTen) OVER (ORDER BY VisitDate) AS LagShift
,(LAG(MoreThanTen) OVER (ORDER BY VisitDate) + MoreThanTen ) AS LagMoreThanTen
,LEAD(MoreThanTen) OVER (ORDER BY VisitDate) AS LeadShift
,(LEAD(MoreThanTen) OVER (ORDER BY VisitDate) + MoreThanTen ) AS LeadMoreThanTen
FROM cte
)
SELECT *
FROM lag_lead_cte
WHERE LagMoreThanTen = 2 OR LeadMoreThanTen = 2
But the result is not fully consistent
Id VisitDate Counter
1 2019-01-01 50
2 2019-01-02 15
5 2019-01-05 18
6 2019-01-06 19
7 2019-01-07 11
It looks like a gaps-and-islands problem.
Here is one way to do it.
I'm assuming SQL Server based on the T-SQL tag.
Run this query CTE-by-CTE and examine intermediate results to understand how it works.
Query
WITH
CTE_rn
AS
(
SELECT *
,CASE WHEN Counter>10 THEN 1 ELSE 0 END AS MoreThanTen
,ROW_NUMBER() OVER (ORDER BY VisitDate) AS rn1
,ROW_NUMBER() OVER (PARTITION BY CASE WHEN Counter>10 THEN 1 ELSE 0 END ORDER BY VisitDate) AS rn2
FROM #t1
)
,CTE_Groups
AS
(
SELECT
*
,rn1-rn2 AS Diff
,COUNT(*) OVER (PARTITION BY MoreThanTen, rn1-rn2) AS GroupLength
FROM CTE_rn
)
SELECT
ID
,VisitDate
,Counter
FROM CTE_Groups
WHERE
GroupLength >= 3
AND Counter > 10
ORDER BY VisitDate
;
Result
+----+------------+---------+
| ID | VisitDate | Counter |
+----+------------+---------+
| 5 | 2019-01-05 | 18 |
| 6 | 2019-01-06 | 19 |
| 7 | 2019-01-07 | 11 |
+----+------------+---------+
Try this:
select Id, VisitDate, Counter from (
select Id, VisitDate, Counter, count(*) over (partition by grp) cnt from (
select *,
-- here I used difference between row number and day to group consecutive days
row_number() over (order by visitDate) - day(visitDate) grp
from #t1
where [Counter] > 10
) a
) a where cnt >= 3 --where group count is greater or equal to three
Based on the comment that days does not need to be consecutive, just rows have to be consecutive, here is updated query, which uses similair technique:
select id, visitdate, counter from (
select id, visitdate, counter, count(*) over (partition by grp) cnt from (
select *, rn - row_number() over (order by visitDate) grp from (
select *,
case when (Counter > 10) or (lag(Counter) over (order by visitDate) > 10 and Counter > 10) then
row_number() over (order by visitdate) end rn
from #t1
) a where rn is not null
) a
) a where cnt >= 3
I think this might be most simply handled by just looking at the sequences using lead() and lag():
select id, visitdate, counter
from (select t1.*,
lag(counter, 2) over (order by visitdate) as counter_2p,
lag(counter, 1) over (order by visitdate) as counter_1p,
lead(counter, 1) over (order by visitdate) as counter_1l,
lead(counter, 2) over (order by visitdate) as counter_2l
from t1
) t1
where counter >= 10 and
((counter_2p >= 10 and counter_1p >= 10) or
(counter_1p >= 10 and counter_1l >= 10) or
(counter_1l >= 10 and counter_2l >= 10)
);
Cross apply also works for this Question
with result as (
select
t.Id as Id1,t.VisitDate as VisitDate1,t.Counter as Counter1
,tt.Id as Id2,tt.VisitDate as VisitDate2,tt.Counter as Counter2
from #t1 t cross join #t1 tt where DATEDIFF(Day,t.VisitDate,tt.visitDate)=1
and t.Counter>10 and tt.Counter>10
)
select Id1 as Id,VisitDate1 as VisitDate ,Counter1 as [Counter] from result
union
select Id2 as Id,VisitDate2 as VisitDate,Counter2 as [Counter] from result

get the most two recent dates for each customer

basically, I need to retrieve the last two dates for customers who purchased in at least two different dates, implying there are some customer who had purchased only in one date, the data has the following form
client_id date
1 2016-07-02
1 2016-07-02
1 2016-06-01
2 2015-06-01
and I would like to get it in the following form
client_id previous_date last_date
1 2016-06-01 2016-07-02
remarques:
a client can have multiple entries for the same date
a client can have entries only for one date, such customer should be discarded
Rank your dates with DENSE_RANK. Then group by client_id and show the last dates (ranked #1 and #2).
select
client_id,
max(case when rn = 2 then date end) as previous_date,
max(case when rn = 1 then date end) as last_date
from
(
select
client_id,
date,
dense_rank() over (partition by client_id order by date desc) as rn
from mytable
)
group by client_id
having max(rn) > 1;
build up:
t=# create table s153 (c int, d date);
CREATE TABLE
t=# insert into s153 values (1,'2016-07-02'), (1,'2016-07-02'),(1,'2016-06-01'),(2,'2016-06-01');
INSERT 0 4
query:
t=# with a as (
select distinct c,d from s153
)
, b as (
select c,nth_value(d,1) over (partition by c order by d) last_date, nth_value(d,2) over (partition by c order by d) prev_date
from a
)
select * from b where prev_date is not null
;
c | last_date | prev_date
---+------------+------------
1 | 2016-06-01 | 2016-07-02
(1 row)
UNTESTED:
We use a common table expression to assign a row number based on the date in descending order and then only include those records having a row number <=2 and then ensure that those having 1 row are excluded by the having.
WITH CTE AS (
SELECT Distinct Client_ID
, Date
, row_number() over (partition by clientID order by date desc) rn
FROM Table)
SELECT Client_ID, min(date) previous_date, max(date) last_date)
FROM CTE
WHERE RN <=2
GROUP BY Client_ID
HAVING max(RN) > 1
All you need is a group by...
--test date
declare #tablename TABLE
(
client_id int,
[date] datetime
);
insert into #tablename
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(2 , '2015-06-01');
--query
SELECT client_id,MIN([DATE]) AS [PREVIOUS_DATE], MAX([DATE]) AS [LAST_DATE]
FROM #tablename
GROUP BY client_id
Updated
-- create data
create table myTable
(
client_id integer,
given_date date
);
insert into myTable
values( 1 , '2016-07-02'),
(1 , '2016-07-02'),
(1 , '2016-06-01'),
(1 , '2016-06-03'),
(1 , '2016-06-09'),
(2 , '2015-06-01'),
(3 , '2016-06-03'),
(3 , '2016-06-09');
-- query
SELECT sub.client_id, sub.PREVIOUS_DATE, sub.LAST_DATE
FROM
(select
ROW_NUMBER() OVER (PARTITION BY a.client_id order by b.given_date desc,(MAX(b.given_date) - a.given_date)) AS ROW_NUMBER,
a.client_id,a.given_date AS PREVIOUS_DATE, MAX(b.given_date) - a.given_date AS diff, (b.given_date) AS LAST_DATE
FROM myTable AS a
JOIN myTable AS b
ON b.client_id = a.client_id
WHERE a.given_date <> b.given_date
group by a.client_id, a.given_date, b.given_date) AS sub
WHERE sub.ROW_NUMBER = 1

Determine first year of minimum consecutive year range and count of consecutive years

Given the following table,
PersonID Year
---------- ----------
1 1991
1 1992
1 1993
1 1994
1 1996
1 1997
1 1998
1 1999
1 2000
1 2001
1 2002
1 2003
2 1999
2 2000
... ...
Is there a way with a SQL select query to get the first year of the most recent range of consecutive years meeting a minimum number, as well as the total consecutive years? In this case, for 4 year minimum, for personID 1, it would return 1996 and 8.
This will be joined to another table on personID, so the personID is not specific.
Thanks for your help.
You can create islands of years in the cte and check your conditions:
declare #PersonId int = 1, #cnt int = 4
;with cte_numbered as (
select
PersonID,
[Year],
row_number() over(partition by PersonID order by [Year]) as rn
from Table1
), cte_grouped as (
select
PersonID, min([Year]) as [Year], count(*) as cnt
from cte_numbered
group by PersonID, [Year] - rn
)
select top 1 *
from cte_grouped
where PersonId = #PersonId and cnt >= #cnt
order by [Year] desc
sql fiddle demo
You also could do something more optimized, like this
declare #PersonId int = 1, #cnt int = 4
;with cte_numbered as (
select
PersonID,
[Year],
row_number() over(partition by PersonID order by [Year]) as rn
from Table1
where personId = #personId
), cte_grouped as (
select
row_number() over(partition by [year] - rn order by year) as cnt, year
from cte_numbered
)
select top 1 cnt, year - cnt + 1
from cte_grouped
where cnt >= #cnt
order by [Year] desc, cnt desc
sql fiddle demo
Using two CTEs to create row number groupings allows you to group by PersonID and display all personIDs that it applies to:
Declare #MinimumConsecutiveYears int=4
;With YearGroupings as (
Select
PersonID
,year
,row_number() over(partition by personid order by year asc) rown
From #years
)
, ConsecutiveYears as (
Select
PersonID
,min(year) as MinYear
,count(rown) as ConsecutiveYears
,row_number() over(partition by PersonID order by count(rown) desc) rown
From YearGroupings
Group By PersonID,year-rown
Having Count(rown)>#MinimumConsecutiveYears
)
Select PersonID,MinYear,ConsecutiveYears
From ConsecutiveYears
Where Rown=1
Alternatively, without CTEs:
Declare #MinimumConsecutiveYears int=4
Select
PersonID
,year
,row_number() over(partition by personid order by year asc) rown
Into #YearGroupings
From #years
Select
PersonID
,min(year) as MinYear
,count(rown) as ConsecutiveYears
,row_number() over(partition by PersonID order by count(rown) desc) rown
Into #ConsecutiveYears
From YearGroupings
Group By PersonID,year-rown
Having Count(rown)>#MinimumConsecutiveYears
Select PersonID,MinYear,ConsecutiveYears
From #ConsecutiveYears
Where Rown=1
try this:
declare #minnumber int
set #minnumber = 4
declare #personid int
set #personid = 0
select orig.[PersonID], min(orig.[Year]) as FirstYear ,count(*) as TCYears
from --add rownumber, sorted by year column
(
SELECT ROW_NUMBER()
OVER (Partition by [PersonID] ORDER BY [Year]) AS Row,*
from Table1
where PersonID = #personid
) orig
where orig.PersonID = #personid
and orig.Row > #minnumber --
group by orig.PersonID

How to maintain cumulative sum for each User in SQL server

I had a table like
ID UserID rupees time
1 1 200 2014-01-05
---------------------------------
2 1 500 2014-04-06
----------------------------------
3 2 10 2014-05-05
----------------------------------
4 2 20 2014-05-06
----------------------------------
I want the output lie
ID UserID Rupees time CumulativeSum
1 1 200 2014-01-05 200
-------------------------------------------------
2 1 500 2014-04-06 700
-------------------------------------------------
3 2 10 2014-05-06 10
-------------------------------------------------
4 2 20 2014-05-06 30
---------------------------------------------------
How can i get this table as purput
Please try using CTE:
;With T as(
select
*,
ROW_NUMBER() over(partition by UserId order by [time]) RN
from tbl
)
select
UserID,
rupees,
[time],
(select SUM(rupees)
from T b
where b.UserID=a.UserID and b.RN<=a.RN) CumulativeSum
from T a
For records with column value time increasing, try the below query:
select
UserID,
rupees,
[time],
(select SUM(rupees)
from tbl b
where b.UserID=a.UserID and b.[time]<=a.[time]) CumulativeSum
from tbl a
For SQL Server 2012 or later, you can use SUM() with an OVER clause that specifies a ROW clause:
declare #t table (ID int,UserID int,rupees int,[time] date)
insert into #t(ID,UserID,rupees,[time]) values
(1,1,200,'20140105'),
(2,1,500,'20140406'),
(3,2, 10,'20140505'),
(4,2, 20,'20140506')
select
*,
SUM(rupees) OVER (
PARTITION BY UserID
ORDER BY id /* or time? */
ROWS BETWEEN
UNBOUNDED PRECEDING AND
CURRENT ROW)
as total
from #t
Result:
ID UserID rupees time total
----------- ----------- ----------- ---------- -----------
1 1 200 2014-01-05 200
2 1 500 2014-04-06 700
3 2 10 2014-05-05 10
4 2 20 2014-05-06 30
DECLARE #t table (UserID INT,rupees INT,DateKey Date )
INSERT INTO #t VALUES
(1,200,'2014-01-05'),
(2,300,'2014-01-06'),
(2,800,'2014-03-06')
select UserID,
rupees,
DateKey,
(SELECT SUM(rupees)from #t t
where t.rupees <= tt.rupees) from #t tt
GROUP BY UserID,rupees,DateKey
Hope this too helps you.
DECLARE #tab TABLE (id INT,userId INT,rupees INT,[time] Date)
INSERT INTO #tab VALUES
(1,1,200 ,'2014-01-05'),
(2,1,500 ,'2014-04-06'),
(3,2,10 ,'2014-05-05'),
(4,2,20 ,'2014-05-06')
SELECT LU.id,LU.userId,LU.rupees,LU.time,SUM(b.rupees) CumulativeSum
FROM (SELECT *,ROW_NUMBER() OVER (PARTITION BY userId ORDER BY [time]) R FROM #tab) B
JOIN (SELECT *,ROW_NUMBER() OVER (PARTITION BY userId ORDER BY [time]) R FROM #tab) LU
ON B.userId = LU.userId AND B.R <= LU.R
GROUP BY LU.id,LU.userId,LU.rupees,LU.time
Result
I am assuming that you are not using SQL Server 2012, which provides the cumulative sum function. The other answers use some form of the row_number() function, but these seems totally unnecessary. I usually approach cumulative sums using correlated subqueries:
select ID, UserID, rupees, [time],
(select sum(rupees)
from table t2
where t2.UserId = t.UserId and
t2.ID <= t.ID
) as CumulativeSum
from table t;
This requires having a column that uniquely identifies each row, and that seems to be the purpose of id. For performance, I would want to have an index on table(UserId, ID, rupees).
select *, SUM(rupees) OVER (
PARTITION BY UserID
ORDER BY id) as CumSum from #tbl

Query to return first date of missing date ranges

Looking for help with a query using SQL 2008 R2... I have a table with client and date fields. Most clients have a record for most dates, however some don't.
For example I have this data:
CLIENTID DT
1 5/1/14
1 5/2/14
2 5/3/14
3 5/1/14
3 5/2/14
I can find the missing dates for each CLIENTID by creating a temp table with all possible dates for the period and then joining that to each CLIENTID and DT and only selecting where there is a NULL.
This is what I can get easily for the date range 5/1/14 to 5/4/14:
CLIENTID DTMISSED
1 5/3/14
1 5/4/14
2 5/1/14
2 5/2/14
2 5/4/14
3 5/3/14
3 5/4/14
However I want to group each consecutive missed period together and get the beginning of each period and the length.
For example, if I use the date range 5/1/14 to 5/4/14 I'd like to get:
CLIENTID DTSTART MISSED
1 5/3/14 2
2 5/1/14 2
2 5/4/14 1
3 5/3/14 2
Thanks for helping!
It's fascinating how more elegantly and also mere efficiently this kind of problems can be solved in 2012.
First, the tables:
create table #t (CLIENTID int, DT date)
go
insert #t values
(1, '5/1/14'),
(1, '5/2/14'),
(2, '5/3/14'),
(3, '5/1/14'),
(3, '5/2/14')
go
create table #calendar (dt date)
go
insert #calendar values ('5/1/14'),('5/2/14'),('5/3/14'),('5/4/14')
go
Here's the 2008 solution:
;with x as (
select *, row_number() over(order by clientid, dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, x2.dt as x2_dt, x2.clientid as x2_clientid
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
),
z as (
select *, (select sum(case when x2_dt is null then 1 else 0 end) from y y2 where y2.rn<=y.rn) as grp
from y
)
select clientid, min(dt), count(*)
from z
group by clientid, grp
order by clientid
Compare it to 2012:
;with x as (
select *, row_number() over(order by dt) as rn
from #calendar c
cross join (select distinct clientid from #t) x
where not exists (select * from #t where c.dt=#t.dt and x.clientid=#t.clientid)
),
y as (
select x1.*, sum(case when x2.dt is null then 1 else 0 end) over(order by x1.clientid,x1.dt) as grp
from x x1
left join x x2 on x1.clientid=x2.clientid and x1.dt=dateadd(day,1,x2.dt)
)
select clientid, min(dt), count(*)
from y
group by clientid, grp
order by clientid