TSQL - Get Data grouped by row number - sql

I have a table with ID and Date field
ID |Date
1 |2013-5-22
1 |2013-5-23
1 |2013-5-25
1 |2013-5-26
2 |2013-5-26
2 |2013-5-27
1 |2013-5-27
1 |2013-5-28
With the Row_Number i can group all data by id and ghet the Min date and Max Date
;WITH q AS(
SELECT f.*,
grp = DATEDIFF(day, 0, f.Date) - ROW_NUMBER() OVER (PARTITION BY f.ID ORDER BY f.Date),
FROM myTable f
)
SELECT
MIN(q.ID) as ID,
MIN(q.Date) as StartDate,
MAX(q.Date) as EndDate
FROM q
GROUP BY q.grp, q.ID, Date
;
Result:
ID |StartDate |EndDate
1 |2013-5-22 |2013-5-23
2 |2013-5-26 |2013-5-27
1 |2013-5-25 |2013-5-28
Now i need to get the date step by <= 3
Example:
ID |StartDate |EndDate
1 |2013-5-22 |2013-5-23
2 |2013-5-26 |2013-5-27
1 |2013-5-25 |2013-5-27
1 |2013-5-28 |2013-5-28
Can someone, please, illuminate my way?
ty
EDIT
Sorry
;WITH q AS(
SELECT f.*,
grp = DATEDIFF(day, 0, f.Date) - ROW_NUMBER() OVER (PARTITION BY f.ID ORDER BY f.Date)
FROM MyTable f
)
SELECT
MIN(q.ID) as ID,
MIN(q.Date) as StartDate,
MAX(q.Date) as EndDate
FROM q
GROUP BY q.grp, q.ID
;

My first attempt had a bug, try this instead:
;WITH q AS(
SELECT ID, Date,
grp = DATEDIFF(day, 0, Date) - ROW_NUMBER() OVER (PARTITION BY ID ORDER BY Date)
FROM myTable
), r as
(
select id, date, grp,
(ROW_NUMBER() OVER (PARTITION BY grp ORDER BY Date)-1)/3 a from q
)
SELECT
MIN(ID) as ID,
MIN(Date) as StartDate,
MAX(Date) as EndDate
FROM r
GROUP BY grp, ID, a

Related

SQL Server : create group of N rows each and give group number for each group

I want to create a SQL query that SELECT a ID column and adds an extra column to the query which is a group number as shown in the output below.
Each group consists of 3 rows and should have the MIN(ID) as a GroupID for each group. The order by should be ASC on the ID column.
ID GroupNr
------------
100 100
101 100
102 100
103 103
104 103
105 103
106 106
107 106
108 106
I've tried solutions with ROW_NUMBER() and DENSE_RANK(). And also this query:
SELECT
*, MIN(ID) OVER (ORDER BY ID ASC ROWS 2 PRECEDING) AS Groupnr
FROM
Table
ORDER BY
ID ASC
Use row_number() to enumerate the rows, arithmetic to assign the group and then take the minimum of the id:
SELECT t.*, MIN(ID) OVER (PARTITION BY grp) as groupnumber
FROM (SELECT t.*,
( (ROW_NUMBER() OVER (ORDER BY ID) - 1) / 3) as grp
FROM Table
) t
ORDER BY ID ASC;
It is possible to do this without a subquery, but the logic is rather messy:
select t.*,
(case when row_number() over (order by id) % 3 = 0
then lag(id, 2) over (order by id)
when row_number() over (order by id) % 3 = 2
then lag(id, 1) over (order by id)
else id
end) as groupnumber
from table t
order by id;
Assuming you want the lowest value in the group, and they are always groups of 3, rather than the NTILE (as Saravantn suggests, which splits the data into that many even(ish) groups), you could use a couple of window functions:
WITH Grps AS(
SELECT V.ID,
(ROW_NUMBER() OVER (ORDER BY V.ID) -1) / 3 AS Grp
FROM (VALUES(100),
(101),
(102),
(103),
(104),
(105),
(106),
(107),
(108))V(ID))
SELECT G.ID,
MIN(G.ID) OVER (PARTITION BY G.Grp) AS GroupNr
FROM Grps G;
SELECT T2.ID, T1.ID
FROM (
SELECT MIN(ID) AS ID, GroupNr
FROM
(
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) AS T1
GROUP BY GroupNr
) AS T1
INNER JOIN (
SELECT ID, ( Row_number()OVER(ORDER BY ID) - 1 ) / 3 + 1 AS GroupNr
FROM Table
) T2 ON T1.GroupNr = T2.GroupNr

Create episode for each value with new Begin and End Dates

This is in reference to below Question
Loop through each value to the seq num
But now Client want to see the data differently and started a new thread for this question.
below is the requirement.
This is the data .
ID seqNum DOS Service End Date
1 1 1/1/2017 1/15/2017
1 2 1/16/2017 1/16/2017
1 3 1/17/2017 1/21/2017
1 4 1/22/2017 2/13/2017
1 5 2/14/2017 3/21/2017
1 6 2/16/2017 3/21/2017
Expected outPut:
ID SeqNum DOSBeg DOSEnd
1 1 1/1/2017 1/30/2017
1 2 1/31/2017 3/1/2017
1 3 3/2/2017 3/31/2017
For each DOSBeg, add 29 and that is DOSEnd. then Add 1 to DOSEnd (1/31/2017) is new DOSBeg.
Now add 29 to (1/31/2017) and that is 3/1/2017 which is DOSEnd . Repeat this untill DOSend >=Max End Date i.e 3/21/2017.
Basically, we need episode of 29 days for each ID.
I tried with this code and it is giving me duplicates.
with cte as (
select ID, minDate as DOSBeg,dateadd(day,29,mindate) as DOSEnd
from #temp
union all
select ID,dateadd(day,1,DOSEnd) as DOSBeg,dateadd(day,29,dateadd(day,1,DOSEnd)) as DOSEnd
from cte
)
select ID,DOSBeg,DOSEnd
from cte
OPTION (MAXRECURSION 0)
Here mindate is Minimum DOS for this ID i.e. 1/1/2017
I came up with below logic and this is working fine for me. Is there any better way than this ?
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
select * into #temp from #table
--drop table #data
select distinct ID, cast(min(DOS) over (partition by ID) as date) as minDate
,row_Number() over (partition by ID order by ID, DOS) as SeqNum,
DOS,
max(ServiceEndDate) over (partition by ID)as maxDate
into #data
from #temp
--drop table #StartDateLogic
with cte as
(select ID,mindate as startdate,maxdate
from #data
union all
select ID,dateadd(day,30,startdate) as startdate,maxdate
from cte
where maxdate >= dateadd(day,30,startdate))
select distinct ID,startdate
into #StartDateLogic
from cte
OPTION (MAXRECURSION 0)
--final Result set
select ID
,ROW_NUMBER() over (Partition by ID order by ID,StartDate) as SeqNum
,StartDate
,dateadd(day,29,startdate) as EndDate
from #StartDateLogic
You were on the right track wit the recursive cte, but you forgot the anchor.
declare #table table (id int, seqNum int identity(1,1), DOS date, ServiceEndDate date)
insert into #table
values
(1,'20170101','20170115'),
(1,'20170116','20170116'),
(1,'20170117','20170121'),
(1,'20170122','20170213'),
(1,'20170214','20170321'),
(1,'20170216','20170321'),
(2,'20170101','20170103'),
(2,'20170104','20170118')
;with dates as(
select top 1 with ties id, seqnum, DOSBeg = DOS, DOSEnd = dateadd(day,29,DOS)
from #table
order by row_number() over (partition by id order by seqnum)
union all
select t.id, t.seqNum, DOSBeg = dateadd(day,1,d.DOSEnd), DOSEnd = dateadd(day,29,dateadd(day,1,d.DOSEnd))
from dates d
inner join #table t on
d.id = t.id and t.seqNum = d.seqNum + 1
)
select *
from dates d
where d.DOSEnd <= (select max(dateadd(month,1,ServiceEndDate)) from #table where id = d.id)
order by id, seqNum

Collapse consecutive similar records into a single record

I have records of people from an old system that I'm trying to convert over to the new system. In the old system, a person might end up with several records for the same location. They could also go from location, to another, and then return to the previous location. Here's some example data:
PersonID | LocationID | StartDate | EndDate
1 | 1 | 1980-07-30 | 2007-07-16
1 | 1 | 2007-07-16 | 2008-01-30
1 | 2 | 2008-01-30 | 2009-03-02
1 | 2 | 2009-03-02 | 2009-11-06
1 | 3 | 2014-07-16 | 2015-01-16
1 | 1 | 2016-01-26 | 2999-12-31
I would like to collapse this data so that I get a date range for any consecutive LocationIDs. For the data above, this is what I would expect:
PersonID | LocationID | StartDate | EndDate
1 | 1 | 1980-07-30 | 2008-01-30
1 | 2 | 2008-01-30 | 2009-11-06
1 | 3 | 2014-07-16 | 2015-01-16
1 | 1 | 2016-01-26 | 2999-12-31
I'm unsure as to how to do this. I previously tried joining to the previous record, but that only works when there's two consecutive locations, not with 3 or more (there could be an undefined number of consecutive records).
select
a.PersonID,
a.LocationID,
a.StartDate,
a.EndDate,
case when a.LocationID = b.LocationID then a.PK_ID else b.PK_ID end as NewID
from employees a
left outer join employees b
on a.PersonID = b.PersonID
and a.PK_ID = b.PK_ID - 1
So, how can I write a query to get the results I need?
Note: we're treating '2999-12-31' are our 'NULL' date field
This is a classic Gaps-and-Islands (Edit- corrected for larger span 2999)
Select [PersonID]
,[LocationID]
,[StartDate] = min(D)
,[EndDate] = max(D)
From (
Select *
,Grp = Row_Number() over (Order By D) - Row_Number() over (Partition By [PersonID],[LocationID] Order By D)
from YourTable A
Cross Apply (
Select Top (DateDiff(DAY,A.[StartDate],A.[EndDate])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[StartDate])
From master..spt_values n1,master..spt_values n2
) B
) G
Group By [PersonID],[LocationID],Grp
Order By [PersonID],min(D)
Returns
PersonID LocationID StartDate EndDate
1 1 1980-07-30 2008-01-30
1 2 2008-01-30 2009-11-06
1 3 2014-07-16 2015-01-16
1 1 2016-01-26 2999-12-31
Using your original query
Select [PersonID]
,[LocationID]
,[StartDate] = min(D)
,[EndDate] = max(D)
From (
Select *
,Grp = Row_Number() over (Order By D) - Row_Number() over (Partition By [PersonID],[LocationID] Order By D)
From (
-- Your Original Query
select
a.PersonID,
a.LocationID,
a.StartDate,
a.EndDate,
case when a.LocationID = b.LocationID then a.PK_ID else b.PK_ID end as NewID
from employees a
left outer join employees b
on a.PersonID = b.PersonID
and a.PK_ID = b.PK_ID - 1
) A
Cross Apply (
Select Top (DateDiff(DAY,A.[StartDate],A.[EndDate])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[StartDate])
From master..spt_values n1,master..spt_values n2
) B
) G
Group By [PersonID],[LocationID],Grp
Order By [PersonID],min(D)
Requested Comments
Let's break it down to its components.
1) The CROSS APPLY Portion: This will expand a single record into N records. For example:
Declare #YourTable Table ([PersonID] int,[LocationID] int,[StartDate] date,[EndDate] date)
Insert Into #YourTable Values
(1,1,'1980-07-01','1980-07-03' )
,(1,1,'1980-07-02','1980-07-04' ) -- Notice the Overlap
,(1,2,'2008-01-30','2008-02-05')
Select *
from #YourTable A
Cross Apply (
Select Top (DateDiff(DAY,A.[StartDate],A.[EndDate])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[StartDate])
From master..spt_values n1,master..spt_values n2
) B
The above query will generate
2) The Grp Portion: Perhaps easier if I provide a simple example:
Declare #YourTable Table ([PersonID] int,[LocationID] int,[StartDate] date,[EndDate] date)
Insert Into #YourTable Values
(1,1,'1980-07-01','1980-07-03' )
,(1,1,'1980-07-02','1980-07-04' ) -- Notice the Overlap
,(1,2,'2008-01-30','2008-02-05')
Select *
,Grp = Row_Number() over (Order By D) - Row_Number() over (Partition By [PersonID],[LocationID] Order By D)
,RN1 = Row_Number() over (Order By D)
,RN2 = Row_Number() over (Partition By [PersonID],[LocationID] Order By D)
from #YourTable A
Cross Apply (
Select Top (DateDiff(DAY,A.[StartDate],A.[EndDate])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[StartDate])
From master..spt_values n1,master..spt_values n2
) B
The above query Generates:
RN1 and RN2 are breakouts of the GRP, just to illustrate the mechanic. Notice RN1 minus RN2 equals the GRP. Once we have the GRP, it becomes a simple matter of aggregation via a group by
3) Pulling it all Together:
Declare #YourTable Table ([PersonID] int,[LocationID] int,[StartDate] date,[EndDate] date)
Insert Into #YourTable Values
(1,1,'1980-07-01','1980-07-03' )
,(1,1,'1980-07-02','1980-07-04' ) -- Notice the Overlap
,(1,2,'2008-01-30','2008-02-05')
Select [PersonID]
,[LocationID]
,[StartDate] = min(D)
,[EndDate] = max(D)
From (
Select *
,Grp = Row_Number() over (Order By D) - Row_Number() over (Partition By [PersonID],[LocationID] Order By D)
from #YourTable A
Cross Apply (
Select Top (DateDiff(DAY,A.[StartDate],A.[EndDate])+1) D=DateAdd(DAY,-1+Row_Number() Over (Order By (Select Null)),A.[StartDate])
From master..spt_values n1,master..spt_values n2
) B
) G
Group By [PersonID],[LocationID],Grp
Order By [PersonID],min(D)
Returns
For your sample data, you can use the difference of row numbers approach:
select personid, locationid, min(startdate), max(enddate)
from (select e.*,
row_number() over (partition by personid order by startdate) as seqnum_p,
row_number() over (partition by personid, locationid order by startdate) as seqnum_pl
from employees e
) e
group by (seqnum_p - seqnum_pl), personid, locationid;
This assumes that the start and end dates are contiguous. That is, there is no gap for a given employee at the same location.

Find date sequence in SQL Server

I'm trying to find the maximum sequence of days by customer in my data.
I want to understand what is the max sequence of days that specific customer made. If someone enter to my app in the 25/8/16 AND 26/08/16 AND 27/08/16 AND 01/09/16 AND 02/09/16 - The max sequence will be 3 days (25,26,27).
In the end (The output) I want to get two fields: custid | MaxDaySequence
I have the following fields in my data table:
custid | orderdate(timestemp)
For exmple:
custid orderdate
1 25/08/2007
1 03/10/2007
1 13/10/2007
1 15/01/2008
1 16/03/2008
1 09/04/2008
2 18/09/2006
2 08/08/2007
2 28/11/2007
2 04/03/2008
3 27/11/2006
3 15/04/2007
3 13/05/2007
3 19/06/2007
3 22/09/2007
3 25/09/2007
3 28/01/2008
I'm using SQL Server 2014.
Thanks
There is a trick, if you have an incrementing number ordered by your date then a subtracting that number of days from your dates will be the same if they are consecutive. So like this:
SELECT custid,
min(orderdate) as start_of_group,
max(orderdate) as end_of_group,
count(*) as num_days
FROM (
SELECT custid, orderdate
ROW_NUMBER() OVER (PARTITION BY custid ORDER BY orderdate) as rn
) x
GROUP BY custid, dateadd(day, - rn, orderdate);
You could take the result of this and pull out the max number of days to solve your problem:
SELECT custid, max(num_days) as longest
FROM (
SELECT custid,
count(*) as num_days
FROM (
SELECT custid, orderdate
ROW_NUMBER() OVER (PARTITION BY custid ORDER BY orderdate) as rn
) x
GROUP BY custid, dateadd(day, - rn, orderdate)
) y
GROUP BY custid
If you want to solve it with MySQL:
select user_id,max(num_days) as longest
from(
select user_id, count(*) as num_days
from
(
SELECT (CASE a1.user_id
WHEN #curType
THEN #curRow := #curRow + 1
ELSE #curRow := 1 AND #curType := a1.user_id END
) AS rank,
a1.user_id,
a1.last_update as dat
FROM (select a2.user_id,left(FROM_UNIXTIME(a2.last_update),10) as 'last_update'
from visits as a2 group by 1,2) as a1 ,
(SELECT #curRow := 0, #curType := '') r
ORDER BY a1.user_id DESC, dat) x
group by user_id, DATE_ADD(dat,INTERVAL -rank day)
) y
group by 1
order by longest desc

Rows inside the greatest streak?

Given the Rows
symbol_id profit date
1 100 2009-08-18 01:01:00
1 100 2009-08-18 01:01:01
1 156 2009-08-18 01:01:04
1 -56 2009-08-18 01:01:06
1 18 2009-08-18 01:01:07
How would I most efficiently select the rows that are involved in the greatest streak (of profit).
The greatest streak would be the first 3 rows, and I would want those rows. The query I came up with is just a bunch of nested queries and derived tables. I am looking for an efficient way to do this using common table expressions or something more advanced.
You haven't defined how 0 profit should be treated or what happens if there is a tie for longest streak. But something like...
;WITH T1 AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY symbol_id ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY symbol_id, SIGN(profit)
ORDER BY date) AS Grp
FROM Data
), T2 AS
(
SELECT *,
COUNT(*) OVER (PARTITION BY symbol_id,Grp) AS StreakLen
FROM T1
)
SELECT TOP 1 WITH TIES *
FROM T2
ORDER BY StreakLen DESC
Or - if you are looking for most profitable streak
;WITH T1 AS
(
SELECT *,
ROW_NUMBER() OVER (PARTITION BY symbol_id ORDER BY date) -
ROW_NUMBER() OVER (PARTITION BY symbol_id, CASE WHEN profit >= 0 THEN 1 END
ORDER BY date) AS Grp
FROM Data
), T2 AS
(
SELECT *,
SUM(profit) OVER (PARTITION BY symbol_id,Grp) AS StreakProfit
FROM T1
)
SELECT TOP 1 WITH TIES *
FROM T2
ORDER BY StreakProfit DESC
declare #T table
(
symbol_id int,
profit int,
[date] datetime
)
insert into #T values
(1, 100, '2009-08-18 01:01:00'),
(1, 100, '2009-08-18 01:01:01'),
(1, 156, '2009-08-18 01:01:04'),
(1, -56, '2009-08-18 01:01:06'),
(1, 18 , '2009-08-18 01:01:07')
;with C1 as
(
select *,
row_number() over(order by [date]) as rn
from #T
),
C2 as
(
select *,
rn - row_number() over(order by rn) as grp
from C1
where profit >= 0
)
select top 1 with ties *
from C2
order by sum(profit) over(partition by grp) desc
Result:
symbol_id profit date rn grp
----------- ----------- ----------------------- -------------------- --------------------
1 100 2009-08-18 01:01:00.000 1 0
1 100 2009-08-18 01:01:01.000 2 0
1 156 2009-08-18 01:01:04.000 3 0
If that's a MSSQL server then you want to consider using TOP 3 in your select clause
and ORDER BY PROFIT DESC.
If mysql/postgres you might want to consider using limit in your select clause with
the same order by too.
hope this helps.