Determine first year of minimum consecutive year range and count of consecutive years - sql

Given the following table,
PersonID Year
---------- ----------
1 1991
1 1992
1 1993
1 1994
1 1996
1 1997
1 1998
1 1999
1 2000
1 2001
1 2002
1 2003
2 1999
2 2000
... ...
Is there a way with a SQL select query to get the first year of the most recent range of consecutive years meeting a minimum number, as well as the total consecutive years? In this case, for 4 year minimum, for personID 1, it would return 1996 and 8.
This will be joined to another table on personID, so the personID is not specific.
Thanks for your help.

You can create islands of years in the cte and check your conditions:
declare #PersonId int = 1, #cnt int = 4
;with cte_numbered as (
select
PersonID,
[Year],
row_number() over(partition by PersonID order by [Year]) as rn
from Table1
), cte_grouped as (
select
PersonID, min([Year]) as [Year], count(*) as cnt
from cte_numbered
group by PersonID, [Year] - rn
)
select top 1 *
from cte_grouped
where PersonId = #PersonId and cnt >= #cnt
order by [Year] desc
sql fiddle demo
You also could do something more optimized, like this
declare #PersonId int = 1, #cnt int = 4
;with cte_numbered as (
select
PersonID,
[Year],
row_number() over(partition by PersonID order by [Year]) as rn
from Table1
where personId = #personId
), cte_grouped as (
select
row_number() over(partition by [year] - rn order by year) as cnt, year
from cte_numbered
)
select top 1 cnt, year - cnt + 1
from cte_grouped
where cnt >= #cnt
order by [Year] desc, cnt desc
sql fiddle demo

Using two CTEs to create row number groupings allows you to group by PersonID and display all personIDs that it applies to:
Declare #MinimumConsecutiveYears int=4
;With YearGroupings as (
Select
PersonID
,year
,row_number() over(partition by personid order by year asc) rown
From #years
)
, ConsecutiveYears as (
Select
PersonID
,min(year) as MinYear
,count(rown) as ConsecutiveYears
,row_number() over(partition by PersonID order by count(rown) desc) rown
From YearGroupings
Group By PersonID,year-rown
Having Count(rown)>#MinimumConsecutiveYears
)
Select PersonID,MinYear,ConsecutiveYears
From ConsecutiveYears
Where Rown=1
Alternatively, without CTEs:
Declare #MinimumConsecutiveYears int=4
Select
PersonID
,year
,row_number() over(partition by personid order by year asc) rown
Into #YearGroupings
From #years
Select
PersonID
,min(year) as MinYear
,count(rown) as ConsecutiveYears
,row_number() over(partition by PersonID order by count(rown) desc) rown
Into #ConsecutiveYears
From YearGroupings
Group By PersonID,year-rown
Having Count(rown)>#MinimumConsecutiveYears
Select PersonID,MinYear,ConsecutiveYears
From #ConsecutiveYears
Where Rown=1

try this:
declare #minnumber int
set #minnumber = 4
declare #personid int
set #personid = 0
select orig.[PersonID], min(orig.[Year]) as FirstYear ,count(*) as TCYears
from --add rownumber, sorted by year column
(
SELECT ROW_NUMBER()
OVER (Partition by [PersonID] ORDER BY [Year]) AS Row,*
from Table1
where PersonID = #personid
) orig
where orig.PersonID = #personid
and orig.Row > #minnumber --
group by orig.PersonID

Related

Selecting rows that have row_number more than 1

I have a table as following (using bigquery):
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
112
2020
11
3000
1
113
2020
11
1000
1
Is there a way in which I can select rows that have row numbers more than one?
For example, my desired output is:
id
year
month
sales
row_number
111
2020
11
1000
1
111
2020
12
2000
2
I don't want to just exclusively select rows with row_number = 2 but also row_number = 1 as well.
The original code block I used for the first table result is:
SELECT
id,
year,
month,
SUM(sales) AS sales,
ROW_NUMBER() OVER (PARTITIONY BY id ORDER BY id ASC) AS row_number
FROM
table
GROUP BY
id, year, month
You can use window functions:
select t.* except (cnt)
from (select t.*,
count(*) over (partition by id) as cnt
from t
) t
where cnt > 1;
As applied to your aggregation query:
SELECT iym.* EXCEPT (cnt)
FROM (SELECT id, year, month,
SUM(sales) as sales,
ROW_NUMBER() OVER (Partition by id ORDER BY id ASC) AS row_number
COUNT(*) OVER(Partition by id ORDER BY id ASC) AS cnt
FROM table
GROUP BY id, year, month
) iym
WHERE cnt > 1;
You can wrap your query as in below example
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (YOUR_ORIGINAL_QUERY)
)
where flag
so it can look as
select * except(flag) from (
select *, countif(row_number > 1) over(partition by id) > 0 flag
from (
SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month
)
)
where flag
so when applied to sample data in your question - it will produce below output
Try this:
with tmp as (SELECT id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(Partition by id ORDER BY id ASC) AS row_number
FROM table
GROUP BY id, year, month)
select * from tmp a where exists ( select 1 from tmp b where a.id = b.id and b.row_number =2)
It's a so clearly exists statement SQL
This is what I use, it's similar to #ElapsedSoul answer but from my understanding for static list "IN" is better than using "EXISTS" but I'm not sure if the performance difference, if any, is significant:
Difference between EXISTS and IN in SQL?
WITH T1 AS
(
SELECT
id,
year,
month,
SUM(sales) as sales,
ROW_NUMBER() OVER(PARTITION BY id ORDER BY id ASC) AS ROW_NUM
FROM table
GROUP BY id, year, month
)
SELECT *
FROM T1
WHERE id IN (SELECT id FROM T1 WHERE ROW_NUM > 1);

Create episode for each value with new Begin and End Dates

This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic
You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum

Find date sequence in SQL Server

I'm trying to find the maximum sequence of days by customer in my data.
I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table:
custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using SQL Server 2014.
Thanks
There is a trick, if you have an incrementing number ordered by your date then a subtracting that number of days from your dates will be the same if they are consecutive. So like this:
SELECT custid,
min(orderdate) as start_of_group,
max(orderdate) as end_of_group,
count(*) as num_days
FROM (
SELECT custid, orderdate
ROW_NUMBER() OVER (PARTITION BY custid ORDER BY orderdate) as rn
) x
GROUP BY custid, dateadd(day, - rn, orderdate);
You could take the result of this and pull out the max number of days to solve your problem:
SELECT custid, max(num_days) as longest
FROM (
SELECT custid,
count(*) as num_days
FROM (
SELECT custid, orderdate
ROW_NUMBER() OVER (PARTITION BY custid ORDER BY orderdate) as rn
) x
GROUP BY custid, dateadd(day, - rn, orderdate)
) y
GROUP BY custid
If you want to solve it with MySQL:
select user_id,max(num_days) as longest
from(
select user_id, count(*) as num_days
from
(
SELECT (CASE a1.user_id
WHEN #curType
THEN #curRow := #curRow + 1
ELSE #curRow := 1 AND #curType := a1.user_id END
) AS rank,
a1.user_id,
a1.last_update as dat
FROM (select a2.user_id,left(FROM_UNIXTIME(a2.last_update),10) as 'last_update'
from visits as a2 group by 1,2) as a1 ,
(SELECT #curRow := 0, #curType := '') r
ORDER BY a1.user_id DESC, dat) x
group by user_id, DATE_ADD(dat,INTERVAL -rank day)
) y
group by 1
order by longest desc

get maximum date in separated columns (month and year)

I have this table :
Month Year Provider Number
1 2015 1 345
2 2015 1 345
3 2015 1 345
12 2015 2 444
1 2016 2 444
Let's say I want to get all different numbers by provider but only the max month and max year, something like this:
Month Year Provider Number
3 2015 1 345
1 2016 2 444
I have this ugly query that I would like to improve :
SELECT (SELECT max([Month])
FROM dbo.Info b
WHERE b.Provider = a.Provider
AND b.Number = a.Number
AND [Year] = (SELECT max([Year])
FROM dbo.Info c
WHERE c.Provider = a.Provider
AND c.Number = a.Number)) AS [Month],
(SELECT max([Year])
FROM dbo.Info d
WHERE d.Provider = a.Provider
AND d.Number = a.Number)) AS [Year],
a.Provider,
a.Number
FROM dbo.Info a
You could use a row_number and cte
;WITH cte AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY Provider ORDER BY [Year] DESC, [Month] DESC) as rNum
FROM Info)
SELECT *
FROM cte where rNum = 1
If you want to create a view then
CREATE VIEW SomeViewName
AS
WITH cte AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY Provider ORDER BY [Year] DESC, [Month] DESC) as rNum
FROM Info)
SELECT *
FROM cte where rNum = 1
One option is to use row_number:
select *
from (
select *, row_number() over (partition by provider
order by [year] desc, [month] desc) rn
from dbo.Info
) t
where rn = 1
This assumes the number and provider fields are the same. If not, you may need to also partition by the number field.

TSQL - Get Data grouped by row number

I have a table with ID and Date field
ID |Date
1 |2013-5-22
1 |2013-5-23
1 |2013-5-25
1 |2013-5-26
2 |2013-5-26
2 |2013-5-27
1 |2013-5-27
1 |2013-5-28
With the Row_Number i can group all data by id and ghet the Min date and Max Date
;WITH q AS(
SELECT f.*,
grp = DATEDIFF(day, 0, f.Date) - ROW_NUMBER() OVER (PARTITION BY f.ID ORDER BY f.Date),
FROM myTable f
)
SELECT
MIN(q.ID) as ID,
MIN(q.Date) as StartDate,
MAX(q.Date) as EndDate
FROM q
GROUP BY q.grp, q.ID, Date
;
Result:
ID |StartDate |EndDate
1 |2013-5-22 |2013-5-23
2 |2013-5-26 |2013-5-27
1 |2013-5-25 |2013-5-28
Now i need to get the date step by <= 3
Example:
ID |StartDate |EndDate
1 |2013-5-22 |2013-5-23
2 |2013-5-26 |2013-5-27
1 |2013-5-25 |2013-5-27
1 |2013-5-28 |2013-5-28
Can someone, please, illuminate my way?
ty
EDIT
Sorry
;WITH q AS(
SELECT f.*,
grp = DATEDIFF(day, 0, f.Date) - ROW_NUMBER() OVER (PARTITION BY f.ID ORDER BY f.Date)
FROM MyTable f
)
SELECT
MIN(q.ID) as ID,
MIN(q.Date) as StartDate,
MAX(q.Date) as EndDate
FROM q
GROUP BY q.grp, q.ID
;
My first attempt had a bug, try this instead:
;WITH q AS(
SELECT ID, Date,
grp = DATEDIFF(day, 0, Date) - ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Date)
FROM myTable
), r as
(
select id, date, grp,
(ROW_NUMBER() OVER (PARTITION BY grp ORDER BY Date)-1)/3 a from q
)
SELECT
MIN(ID) as ID,
MIN(Date) as StartDate,
MAX(Date) as EndDate
FROM r
GROUP BY grp, ID, a