Sql query to Count Total Consecutive Years from latest year - sql

I have a table Temp:
CREATE TABLE Temp
(
[ID] [int],
[Year] [INT],
)
**ID Year**
1 2016
1 2016
1 2015
1 2012
1 2011
1 2010
2 2016
2 2015
2 2014
2 2012
2 2011
2 2010
2 2009
3 2016
3 2015
3 2004
3 1999
4 2016
4 2015
4 2014
4 2010
5 2016
5 2014
5 2013
I want to calculate the total consecutive years starting from the most recent Year.
Result should look like this:
ID Total Consecutive Yrs
1 2
2 3
3 2
4 3
5 1

select ID,
-- returns a sequence without gaps for consecutive years
first_value(year) over (partition by ID order by year desc) - year +1 as x,
-- returns a sequence without gaps
row_number() over (partition by ID order by year desc) as rn
from Temp
e.g. for ID=1:
1 2016 1 1
1 2015 2 2
1 2012 5 3
1 2011 6 4
1 2010 7 5
As long as there's no gap, both sequences increase the same.
Now check for equal sequences and count the rows:
with cte as
(
select ID,
-- returns a sequence without gaps for consecutive years
first_value(year) over (partition by ID order by year desc) - year + 1 as x,
-- returns a sequence without gaps
row_number() over (partition by ID order by year desc) as rn
from Temp
)
select ID, count(*)
from cte
where x = rn -- no gap
group by ID
Edit:
Based on your year zero comment:
with cte as
(
select ID, year,
-- returns a sequence without gaps for consecutive years
first_value(year) over (partition by ID order by year desc) - year + 1 as x,
-- returns a sequence without gaps
row_number() over (partition by ID order by year desc) as rn
from Temp
)
select ID,
-- remove the year zero from counting
sum(case when year <> 0 then 1 else 0 end)
from cte
where x = rn
group by ID

You can use lead and get this counts as below:
Select top (1) with ties Id, RowN as [Total Consecutive Years] from (
Select *, Num = case when ([year]- lead(year) over(partition by Id order by [Year] desc) > 1) then 0 else 1 end
, RowN = Row_Number() over (partition by Id order by [Year] desc)
from temp
) a
where a.Num = 0
order by row_number() over(partition by Id order by RowN)
Output as below:
+----+-------------------------+
| Id | Total Consecutive Years |
+----+-------------------------+
| 1 | 2 |
| 2 | 3 |
| 3 | 2 |
| 4 | 3 |
| 5 | 1 |
+----+-------------------------+

You can do this using window functions:
select id, count(distinct year)
from (select t.*,
dense_rank() over (partition by id order by year + seqnum desc) as grp
from (select t.*,
dense_rank() over (partition by id order by year desc) as seqnum
from temp t
) t
) t
where grp = 1
group by id;
This assumes that "most recent year" is per id.

Gordon Linoff,
Your code is awesome!
Your code pulls consecutive years from the most recent year.
I modified it to pull overall max consecutive years.
Posted here in case anyone else needs it:
--overall max consecutive years
select id,max(yr_cnt) max_consecutive_years
from (
select id, grp,count(seqnum) yr_cnt
from (select t.*,
dense_rank() over (partition by id order by year + seqnum desc) as grp
from (select t.*,
dense_rank() over (partition by id order by year desc) as seqnum
from temp t
) t
) t
group by id,grp) t2
group by id;

Related

SQL getting top 2 rows by date per PolicyId but with distinct dates

ValId | PolicyId | Date | Value
------+----------+------------+-------
1 | 11 | 2020-06-01 | 2000
2 | 11 | 2020-06-03 | 3000
3 | 11 | 2020-06-03 | 4000
4 | 12 | 2020-06-02 | 8000
5 | 12 | 2020-06-03 | 8500
I wanted to get top 2 latest Val rows for each PolicyId but they cannot be from the same date.
Rows for PolicyId = 12 are returned correctly - ValId 4 and 5.
For PolicyId = 11, rows with ValId 2 and 3 are returned but as they are on the same date I wanted row of ValId 1 to be returned instead of ValId 2.
SELECT
V.ValId, V.PolicyId, V.Value, V.Date
FROM
(SELECT
ValId, PolicyId, Value, Date,
ROW_NUMBER() OVER (PARTITION BY PolicyId ORDER BY Date Desc, ValId DESC) AS RowNum
FROM
TVal) V
WHERE
RowNum <= 2
You can enumerate the rows by dates and within dates:
select t.*
from (select t.*,
dense_rank() over (partition by policyid order by date desc valId desc) as seqnum,
rank() over (partition by policyid, date order by valId desc) as seqnum_within_date
from tval
) t
where seqnum <= 2 and seqnum_within_date = 1;
Using the suggestion from Gordon Linoff I was able to complete the sql as below
Select v.* from
(
select t.*,
row_number() over (partition by policyid order by date desc valId desc) as seqnum,
from (select t.*
dense_rank() over (partition by policyid, date order by valId desc) as seqnum_within_date
from tval
) t where seqnum_within_date = 1
)v where seqnum <= 2

How to select top 2 values for each id

I have a table with values
id sales date
1 5 "2015-01-04"
1 3 "2015-01-03"
1 1 "2015-01-01"
1 1 "2015-01-01"
2 7 "2015-01-05"
2 6 "2015-01-04"
2 4 "2015-01-03"
3 11 "2015-01-08"
3 10 "2015-01-07"
3 9 "2015-01-06"
3 8 "2015-01-05"
I want to select top two values of each id as shown in desired output.
Desired output:
id sales date
1 5 "2015-01-04"
1 3 "2015-01-03"
2 7 "2015-01-05"
2 6 "2015-01-04"
3 11 "2015-01-08"
3 10 "2015-01-07"
My attempt:
can someone help me with this. Thank you in advance!
select transactions.salesperson_id, transactions.id, transactions.date
from transactions
ORDER BY transactions.salesperson_id ASC, transactions.date DESC;
This can be done using window functions:
select id, sales, "date"
from (
select id, sales, "date",
dense_rank() over (partition by id order by "date" desc) as rnk
from transactions
) t
where rnk <= 2;
If there are multiple rows on the same date this might return more than two rows for the same ID. If you don't want that, use row_number() instead of dense_rank()
row_number() will get what you want.
select * from
(select row_number() over (partition by id order by date) as rn, sales, date from transactions) t1
where t1.rn <= 2

SQL Window Function - Number of Rows since last Max

I am trying to create a SQL query that will pull the number of rows since the last maximum value within a windows function over the last 5 rows. In the example below it would return 2 for row 8. The max value is 12 which is 2 rows from row 8.
For row 6 it would return 5 because the max value of 7 is 5 rows away.
|ID | Date | Amount
| 1 | 1/1/2019 | 7
| 2 | 1/2/2019 | 3
| 3 | 1/3/2019 | 4
| 4 | 1/4/2019 | 1
| 5 | 1/5/2019 | 1
| 6 | 1/6/2019 | 12
| 7 | 1/7/2019 | 2
| 8 | 1/8/2019 | 4
I tried the following:
SELECT ID, date, MAX(amount)
OVER (ORDER BY date ASC ROWS 5 PRECEDING) mymax
FROM tbl
This gets me to the max values but I am unable to efficiently determine how many rows away it is. I was able to get close using multiple variables within the SELECT but this did not seem efficient or scalable.
You can calculate the cumulative maximum and then use row_number() on that.
So:
select t.*,
row_number() over (partition by running_max order by date) as rows_since_last_max
from (select t.*,
max(amount) over (order by date rows between 5 preceding and current row) as running_max
from tbl t
) t;
I think this works for your sample data. It might not work if you have duplicates.
In that case, you can use date arithmetic:
select t.*,
datediff(day,
max(date) over (partition by running_max order by date),
date
) as days_since_most_recent_max5
from (select t.*,
max(amount) over (order by date rows between 5 preceding and current row) as running_max
from tbl t
) t;
EDIT:
Here is an example using row number:
select t.*,
(seqnum - max(case when amount = running_amount then seqnum end) over (partition by running_max order by date)) as rows_since_most_recent_max5
from (select t.*,
max(amount) over (order by date rows between 5 preceding and current row) as running_max,
row_number() over (order by date) as seqnum
from tbl t
) t;
It would be :
select *,ID-
(
SELECT ID
FROM
(
SELECT
ID,amount,
Maxamount =q.mymax
FROM
Table_4
) AS derived
WHERE
amount = Maxamount
) as result
from (
SELECT ID, date,
MAX(amount)
OVER (ORDER BY date ASC ROWS 5 PRECEDING) mymax
FROM Table_4
)as q

How to Generate Row number Partition by two column match in sql

Tbl1
---------------------------------------------------------
Id Date Qty ReOrder
---------------------------------------------------------
1 1-1-18 1 3
2 2-1-18 0 3
3 3-1-18 2 3
4 4-1-18 3< >3
5 5-1-18 2 3
6 6-1-18 0 3
7 7-1-18 1 3
8 8-1-18 0 3
---------------------------------------------------------
I want the result like below
---------------------------------------------------------
Id Date Qty ReOrder
---------------------------------------------------------
1 1-1-18 1 3
5 5-1-18 2 3
---------------------------------------------------------
if ReOrder not same with Qty then date will be same upto after reorder=Qty
You can use cumulative approach with row_number() function :
select top (1) with ties *
from (select *, max(case when qty = reorder then 'v' end) over (order by id desc) grp
from table
) t
order by row_number() over(partition by grp order by id);
Unfortunately this will require SQL Server, But you can also do:
select *
from (select *, row_number() over(partition by grp order by id) seq
from (select *, max(case when qty = reorder then 'v' end) over (order by id desc) grp
from table
) t
) t
where seq = 1;

How to calculate the number of a day in series of consecutive dates?

I have a table
id name created_at
1 name 1 08/01/2017
2 name 2 08/02/2017
3 name 3 08/03/2017
4 name 4 08/05/2017
5 name 5 08/06/2017
6 name 6 08/07/2017
7 name 7 08/10/2017
8 name 8 08/12/2017
I need to add a column where be rank for all rows, but if they were created from day to day.
The result should be like below
id name created_at days_on
1 name 1 08/01/2017 1
2 name 2 08/02/2017 2
3 name 3 08/03/2017 3
4 name 4 08/05/2017 1
5 name 5 08/06/2017 2
6 name 6 08/07/2017 3
7 name 7 08/10/2017 null
8 name 8 08/12/2017 null
There are many answers describing typical approaches to similar problems, where you can also find an explanation of the techniques used below.
select
id, name, created_at,
case when count(*) over wa > 1 then row_number() over wo end as rank
from (
select
id, name, created_at,
sum(first) over w as part
from (
select *, (lag(created_at) over w+ 1 is distinct from created_at)::int as first
from my_table
window w as (order by id)
) s
window w as (order by id)
) s
window
wa as (partition by part),
wo as (partition by part order by id);
DbFiddle.
This is a variation of the group-and-islands problem. Let me show a solution using lag() to define the groups:
lag() to get the previous day
cumulative sum to get the groups
row_number() to assign the final values
This works as:
select id, name, created_at,
(case when count(*) over (partition by grp) > 1
then row_number() over (partition by grp order by id)
end) as days_on
from (select t.*,
sum( (prev_ca <> created_at - interval '1 day')::int ) as grp
from (select t.*,
lag(created_at) over (order by id) as prev_ca
from t
) t;