Cumulative count over dates, discarding some values - sql

I have an Answers table with the following schema:
CREATE TABLE Answers
([id] int, [analyst_id] int, [date] date);
I have to "accumulate-count" how many answers an analyst has per month, discarding any answers given before a period of 3 months after the last answer. Given the following:
INSERT INTO Answers
([id], [analyst_id], [date])
VALUES
(1, 1, '2017/01/01'),
(2, 1, '2017/02/01'), -- should be discarded
(3, 1, '2017/03/01'), -- should be discarded
(4, 1, '2017/05/01'),
(5, 1, '2017/06/01'), -- should be discarded
(6, 1, '2017/07/01'), -- should be discarded
(7, 1, '2017/08/01'),
(8, 2, '2017/01/01'),
(9, 2, '2017/04/01'),
(10, 1, '2018/02/01'),
(11, 2, '2018/03/01');
The expected result is:
analyst_id | month-year | count
-------------------------------
1 | 01/2017 | 1
1 | 02/2017 | 1
1 | 03/2017 | 1
1 | 04/2017 | 1
1 | 05/2017 | 2
1 | 06/2017 | 2
1 | 07/2017 | 2
1 | 08/2017 | 3
1 | 09/2017 | 3
1 | 10/2017 | 3
1 | 11/2017 | 3
1 | 12/2017 | 3
2 | 01/2017 | 1
2 | 02/2017 | 1
2 | 03/2017 | 1
2 | 04/2017 | 2
2 | 05/2017 | 2
2 | 06/2017 | 2
2 | 07/2017 | 2
2 | 08/2017 | 2
2 | 09/2017 | 2
2 | 10/2017 | 2
2 | 11/2017 | 2
2 | 12/2017 | 2
1 | 01/2018 | 0
1 | 02/2018 | 1
1 | 03/2018 | 1
2 | 01/2018 | 0
2 | 02/2018 | 0
2 | 03/2018 | 1
DBMS is a SQL Server 2012.
EDIT
I wrote this fiddle with my current half-solution: http://sqlfiddle.com/#!6/c2e82e/5
Each year, the count need to be reset.

EDIT:
OK, for the updated question, you essentially need to make a "dates" table (here a CTE called "D") that contains all the dates between the minimum and maximum date in your Answers table. Then you can essentially left join your results to that and use a DENSE_RANK() window function to determine the count.
DECLARE #Answers TABLE (ID INT, Analyst_ID INT, [Date] DATE);
INSERT #Answers (ID, Analyst_ID, [Date])
VALUES
(1, 1, '2017/01/01'),
(2, 1, '2017/02/01'),
(3, 1, '2017/03/01'),
(4, 1, '2017/05/01'),
(5, 1, '2017/06/01'),
(6, 1, '2017/07/01'),
(7, 1, '2017/08/01'),
(8, 2, '2017/01/01'),
(9, 2, '2017/04/01'),
(10, 1, '2018/02/01'),
(11, 2, '2018/03/01');
WITH CTE AS
(
SELECT A.Analyst_ID, [Date] = MIN(A.[Date])
FROM #Answers AS A
GROUP BY A.Analyst_ID
UNION ALL
SELECT A.Analyst_ID, A.[Date]
FROM
(
SELECT A.Analyst_ID, A.[Date], RN = ROW_NUMBER() OVER (PARTITION BY A.Analyst_ID ORDER BY A.ID)
FROM #Answers AS A
JOIN CTE
ON CTE.Analyst_ID = A.Analyst_ID
AND DATEADD(MONTH, 3, CTE.[Date]) <= A.[Date]
) AS A
WHERE A.RN = 1
),
D AS -- List of dates between minimum and maximum date in table for each analyst ID.
(
SELECT [Date] = DATEADD(MONTH, RN, (SELECT MIN([Date]) FROM #Answers)),
A.Analyst_ID
FROM (SELECT RN = ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) - 1 FROM sys.objects) AS O
CROSS JOIN (SELECT DISTINCT Analyst_ID FROM #Answers) AS A
WHERE RN <= (SELECT DATEDIFF(MONTH, MIN([Date]), MAX([Date])) FROM #Answers)
)
SELECT D.Analyst_ID,
[Month-Year] = FORMAT(D.[Date], 'MM/yyyy'),
[Count] = CASE WHEN A.[Date] IS NULL THEN 0 ELSE DENSE_RANK() OVER (PARTITION BY D.Analyst_ID, DATEPART(YEAR, A.[Date]) ORDER BY A.[Date]) END
FROM D
OUTER APPLY (SELECT TOP 1 * FROM CTE WHERE CTE.[Date] <= D.[Date] AND DATEDIFF(YEAR, CTE.[Date], D.[Date]) = 0 AND CTE.Analyst_ID = D.Analyst_ID ORDER BY CTE.[Date] DESC) AS A
ORDER BY D.Analyst_ID, D.[Date];

Related

SQL return second max date for each id, date and channel

I have the following table:
id channel_id date
1 | 1 | 2017-01-10
1 | 2 | 2018-02-05
1 | 1 | 2019-03-07
1 | 2 | 2020-03-15
2 | 1 | 2018-01-17
2 | 1 | 2019-07-20
2 | 1 | 2020-01-10
I want to return for previous maximum date for each date and id but two separate columns for both channel_id. So, one column for previous max date for channel_id is equal to 1 and another for previous max date for channel_id is equal to 2. What I want to get can be found below:
id channel_id date prev_date_channel_id1 prev_date_channel_id2
1 | 1 | 2017-01-10 | NULL | NULL |
1 | 2 | 2018-02-05 | 2017-01-10 | NULL |
1 | 1 | 2019-03-07 | 2017-01-10 | 2018-02-05 |
1 | 2 | 2020-03-15 | 2019-03-07 | 2018-02-05 |
2 | 1 | 2018-01-17 | NULL | NULL |
2 | 1 | 2019-07-20 | 2018-01-17 | NULL |
2 | 1 | 2020-01-10 | 2019-07-20 | NULL |
I made a query as below and returns what I want but takes too much time. I'd appreciate any optimization suggestions!
SELECT
a.id,
a.date,
MAX(c.date) AS prev_date_channel_id1,
MAX(d.date) AS prev_date_channel_id2
FROM
table a
LEFT JOIN
table c ON a.id=c.id AND a.date>c.date AND c.channel_id=1
LEFT JOIN
table d ON a.id=d.id AND a.date>d.date AND d.channel_id=2
GROUP BY a.id, a.date
Use lag() for the previous date and a cumulative conditional max for the channel 2 date:
select t.*, lag(date) over (partition by id order by date) as prev_date,
max(case when channel = 2 then date end) over
(partition by id
order by date
rows between unbounded preceding and 1 row preceding
) as prev_date_channel2
from t;
I think there's an error in your "expected output" for the value of prev_date_channel_id1 on the last row (it should be 2019-07-20).
In any case, with appropriate indexing an outer apply top 1 construct might serve you better:
create table t
(
id int,
channel_id int,
[date] date
constraint pk_t primary key clustered (id, channel_id, [date])
);
insert t values
(1, 1, '2017-01-10'),
(1, 2, '2018-02-05'),
(1, 1, '2019-03-07'),
(1, 2, '2020-03-15'),
(2, 1, '2018-01-17'),
(2, 1, '2019-07-20'),
(2, 1, '2020-01-10');
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = c1.dt,
prev_date_channel_id2 = c2.dt
from t t1
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 1
and [date] < t1.[date]
order by date desc
) c1(dt)
outer apply (
select top 1 [date]
from t
where id = t1.id
and channel_id = 2
and [date] < t1.[date]
order by date desc
) c2(dt)
order by t1.id, t1.[date];
Or possibly faster still, especially with the key changed to constraint pk_t primary key clustered (id, [date], [channel_id]))
select t1.id,
t1.channel_id,
t1.[date],
prev_date_channel_id1 = prev.c1,
prev_date_channel_id2 = prev.c2
from t t1
outer apply (
select c1 = max(iif(channel_id = 1, [date], null)),
c2 = max(iif(channel_id = 2, [date], null))
from t
where id = t1.id
and [date] < t1.[date]
) prev
Assuming you have an index on those three columns, you can use subqueries:
SELECT [T0].[id],
[T0].[channel_id],
[T0].[date],
[prev_date_channel_id1] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 1
),
[prev_date_channel_id2] = (
SELECT MAX([T1].[date])
FROM [t] [T1]
WHERE [T1].[id] = [T0].[id]
AND [T1].[date] < [T0].[date]
AND [T1].[channel_id] = 2
)
FROM [t] [T0];

SQL Order By On two columns but same prority

I'm stuck on this simple select and don't know what to do.
I Have this:
ID | Group
===========
1 | NULL
2 | 100
3 | 100
4 | 100
5 | 200
6 | 200
7 | 100
8 | NULL
and want this:
ID | Group
===========
1 | NULL
2 | 100
3 | 100
4 | 100
7 | 100
5 | 200
6 | 200
8 | NULL
all group members keep together, but others order by ID.
I can not write this script because of that NULL records. NULL means that there is not any group for this record.
First you want to order your rows by the minimum ID of their group - or their own ID in case they belong to no group.Then you want to order by ID. That is:
order by min(id) over (partition by case when grp is null then id else grp end), id
If IDs and groups can overlap (i.e. the same number can be used for an ID and for a group, e.g. add a record for ID 9 / group 1 to your sample data) you should change the partition clause to something like
order by min(id) over (partition by case when grp is null
then 'ID' + cast(id as varchar)
else 'GRP' + cast(grp as varchar) end),
id;
Rextester demo: http://rextester.com/GPHBW5600
What about data after a null? In a comment you said don't sort the null.
declare #T table (ID int primary key, grp int);
insert into #T values
(1, NULL)
, (3, 100)
, (5, 200)
, (6, 200)
, (7, 100)
, (8, NULL)
, (9, 200)
, (10, 100)
, (11, NULL)
, (12, 150);
select ttt.*
from ( select tt.*
, sum(ff) over (order by tt.ID) as sGrp
from ( select t.*
, iif(grp is null or lag(grp) over (order by id) is null, 1, 0) as ff
from #T t
) tt
) ttt
order by ttt.sGrp, ttt.grp, ttt.id
ID grp ff sGrp
----------- ----------- ----------- -----------
1 NULL 1 1
3 100 1 2
7 100 0 2
5 200 0 2
6 200 0 2
8 NULL 1 3
10 100 0 4
9 200 1 4
11 NULL 1 5
12 150 1 6

Find Comebacks in Football Database With Sql Query

I have 4 tables.
Matches:
| id | HomeTeamID | AwayTeamID |
--------|-------------|------------
| 1 | 1 | 2
| 2 | 1 | 3
| 3 | 3 | 1
Goals:
| id | MatchID | Minute | TeamID
--------|-------------|---------- |---------
| 1 | 1 | 3 | 2
| 2 | 1 | 5 | 1
| 3 | 1 | 15 | 1
| 4 | 2 | 43 | 3
| 5 | 2 | 75 | 1
| 6 | 2 | 85 | 1
| 7 | 3 | 11 | 1
| 8 | 3 | 13 | 3
| 9 | 3 | 77 | 3
Teams:
| id | Name |
--------|-------------|
| 1 | Chelsea |
| 2 | Arsenal |
| 3 | Tottenham |
Managers:
| id | Name | TeamID |
--------|-------------|-------------
| 1 | Conte | 1
| 2 | Wenger | 2
| 3 | Pochettino | 3
I want to get the number of comebacks matches for managers. For example, Conte's team conceded the first goal in match 1 and 2 but they won. So Conte has 2 comebacks. Pochettino has 1 comeback at match 3. I want to find that with SQL Query.
I found the first goal of matches for each team. But after some steps, I'm losing what I'm doing.
SELECT MatchID, MIN(minute), g.TeamID
FROM Goals g
JOIN Managers m ON m.TeamID = g.TeamID
GROUP BY MatchID, g.TeamID
with cte
(
MatchID,TeamID,TotalGoalTime,NoOfGoals,ManagerName,comeback)
as(SELECT MatchID, g.TeamID,sum(minutea) as'TotalGoalTime' ,count(*)as'NoOfGoals',m.name as'ManagerName'
,comeback =ROW_NUMBER() OVER(PARTITION BY MatchID order by sum(minutea) desc)
FROM Goals g
JOIN Managers m ON m.TeamID = g.TeamID
join [Teams] t on t.Id=g.TeamId
GROUP BY MatchID, g.TeamID,m.name )
Select MatchID,TeamID,NoOfGoals,ManagerName from cte where comeback =1
The above query for now gives us the overall comeback, Will update the no of come backs.
In case you want to count every comeback in football match, you can use the solution below. Then the definition of the comeback is every time when a team score one goal more the opponent, after loosing. For example, for the following scenario we have three comebacks:
Team A Team B
0 - 1 //team b scores
1 - 1 //team a scores
2 - 1 //team a scores (comeback for a)
2 - 2 //team b scores
2 - 3 //team b scores (comeback for b)
3 - 3 //team a scores
4 - 3 //team a scores (comeback for a)
From the above it seems that we have a comeback, when score is changed, and the previous score was even. I am using SUM with OVER and ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW ordering by minute in order to calculated the score each time a goal is scored.
Here is full working example:
DECLARE #matches TABLE
(
[id] TINYINT
,[HomeTeamID] TINYINT
,[AwayTeamID] TINYINT
);
DECLARE #Goals TABLE
(
[id] TINYINT
,[MatchID] TINYINT
,[Minute] TINYINT
,[TeamID] TINYINT
);
DECLARE #Teams TABLE
(
[id] TINYINT
,[Name] VARCHAR(12)
);
DECLARE #Managers TABLE
(
[Id] TINYINT
,[Name] VARCHAR(12)
,[TeamID] TINYINT
);
INSERT INTO #matches ([id], [HomeTeamID], [AwayTeamID])
VALUES (1, 1, 2)
,(2, 1, 3)
,(3, 3, 1)
,(4, 1, 4);
INSERT INTO #Goals ([id], [MatchID], [Minute], [TeamID])
VALUES (1, 1, 3, 2)
,(2, 1, 5, 1)
,(3, 1, 15, 1)
,(4, 2, 43, 3)
,(5, 2, 75, 1)
,(6, 2, 85, 1)
,(7, 3, 11, 1)
,(8, 3, 13, 3)
,(9, 3, 77, 3)
,(10, 4, 3, 1)
,(11, 4, 5, 4)
,(12, 4, 10, 4)
,(13, 4, 12, 1)
,(14, 4, 25, 1)
,(15, 4, 46, 4)
,(16, 4, 60, 4)
,(17, 4, 72, 4)
,(18, 4, 84, 4);
INSERT INTO #Teams ([id], [Name])
VALUES (1, 'Chelsea')
,(2, 'Arsenal')
,(3, 'Tottenham')
,(4, 'Real Madrid');
INSERT INTO #Managers ([Id], [Name], [TeamID])
VALUES (1, 'Conte', 1)
,(2, 'Wenger', 2)
,(3, 'Pochettino', 3)
,(4, 'Zidane', 4);
WITH DataSource AS
(
SELECT m.[id]
,m.[HomeTeamID]
,m.[AwayTeamID]
,ROW_NUMBER() OVER (PARTITION BY m.[id] ORDER BY g.[minute]) AS [EventID]
,IIF
(
SUM(IIF(m.[HomeTeamID] = g.[teamID], 1, 0)) OVER (PARTITION BY m.[id] ORDER BY g.[minute] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) - 1
=
SUM(IIF(m.[AwayTeamID] = g.[teamID], 1, 0)) OVER (PARTITION BY m.[id] ORDER BY g.[minute] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
OR
SUM(IIF(m.[HomeTeamID] = g.[teamID], 1, 0)) OVER (PARTITION BY m.[id] ORDER BY g.[minute] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)
=
SUM(IIF(m.[AwayTeamID] = g.[teamID], 1, 0)) OVER (PARTITION BY m.[id] ORDER BY g.[minute] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) -1
,IIF(m.[HomeTeamID] = g.[teamID], 'H', 'A') -- (H)ome come back, (A)way come ba
,'N' -- no come back
) AS [ComeBack]
FROM #matches m
INNER JOIN #Goals g
ON m.[id] = g.[MatchID]
)
SELECT T.[Name]
FROM DataSource DS
INNER JOIN #Teams T
ON IIF([ComeBack] = 'H', [HomeTeamID], [AwayTeamID]) = T.[id]
WHERE DS.[EventID] <> 1
AND DS.[ComeBack] <> 'N';
The above will give us:
Chelsea
Chelsea
Chelsea
Tottenham
Real Madrid
Real Madrid
Note, I have added one more match to demonstrate this.
Here "Comeback" means that team conceded first goal, but team won this game.
I use 2 general subquery, 1) winners which contains MatchID and TeamID from each game which not ended as draw. And 2) first_goals, which contains those TeamID's, which scored first goal in match.
So, joining between these subqueries using:
on winners.MatchID = first_goals.MatchID and winners.TeamID <> first_goals.TeamID
gives us those matchs, where team won, but not scored first goal (i.e "Comeback").
Finally we use simple joins with Teams and Managers tables:
with Goals(id , MatchID , Minute ,TeamID) as (
select 1 , 1 , 3 , 2 union all
select 2 , 1 , 5 , 1 union all
select 3 , 1 , 15 , 1 union all
select 4 , 2 , 43 , 3 union all
select 5 , 2 , 75 , 1 union all
select 6 , 2 , 85 , 1 union all
select 7 , 3 , 11 , 1 union all
select 8 , 3 , 13 , 3 union all
select 9 , 3 , 77 , 3
),
Teams (id, Name) as(
select 1 ,'Chelsea' union all
select 2 ,'Arsenal' union all
select 3 ,'Tottenham'
),
Managers(id, Name, TeamID) as (
select 1 ,'Conte', 1 union all
select 2 ,'Wenger', 2 union all
select 3 ,'Pochettino', 3
)
select winners.TeamID, winners.MatchID, Teams.Name, Managers.Name from (
select t1.* from
(
select TeamID, MatchID, count(*) as goal_scored from Goals
group by TeamID, MatchID
)t1
inner join
(
select MatchID, max(goal_scored) as winner_goals_cnt from (
select TeamID, MatchID, count(*) as goal_scored from Goals
group by TeamID, MatchID
)t
group by MatchID
having min(goal_scored) <> max(goal_scored)
)t2
on t1.MatchID = t2.MatchID and t1.goal_scored = t2.winner_goals_cnt
) winners
inner join
(
select * from (
select Goals.*, row_number() over(partition by MatchID order by Minute, id) rn from Goals
) f
where rn = 1
) first_goals
on winners.MatchID = first_goals.MatchID and winners.TeamID <> first_goals.TeamID
inner join Teams
on winners.TeamID = Teams.id
inner join Managers
on winners.TeamID = Managers.TeamID
I don't follow much of football, but assuming that a comeback is when a team concedes the first goal but then wins the game, I would use the following logic to get the result.
First, we need the teams that had conceded the first goal in a match:
SELECT TeamID, MatchID FROM Goals WHERE GoalID in
(SELECT Min(GoalID) FROM Goals GROUP BY MatchID)
Second, for each match, we need the matches where the team making first goal went on to win.
SELECT MatchID FROM
(SELECT COUNT(GoalID) as TotalGoals, MatchID FROM Goals GROUP BY MatchID) AS MatchSummary
INNER JOIN
(SELECT COUNT(GoalID) as TeamGoals, MatchID FROM
(SELECT TeamID, MatchID FROM Goals WHERE GoalID in
(SELECT Min(GoalID) FROM Goals GROUP BY MatchID)
) as GoalsOfTheTeamThatConcededFirstGoal
GROUP BY MatchID) as SummaryOfTeamThatConcededFirstGoal
ON MatchSummary.MatchID = SummaryOfTeamThatConcededFirstGoal.MatchID
WHERE (TotalGoals - TeamGoals) < TeamGoals
Combining these two queries you will be able to get the TeamIDs for those teams that have made a comeback.
I think this task could be done much better with cursors or temporary tables, but I wanted to keep the answer as simple as possible. Hence I have avoided it. You should definitely explore those two methods.

use preceding in calculation sql

I need to calculate the column E using column B,C,D & previous row of E... I have the sample statement and calculation for reference. Note that prev(E) is the preceding value of E which I need to use in calculation but am unable to.
+---------------------------------------------------------------------------------------------------------------------------------------+
| TransactionDt | total_allotment(B) | invchange(C) | roomssold_flag(D) | available(E) | samplestatement | calculation |
+---------------------------------------------------------------------------------------------------------------------------------------+
| 1/1/16 | 5 | 0 | null | 5 | E=case when D=null then B | 5 |
| 1/2/16 | 5 | 0 | 1 | 4 | E=case when C=0 then prev(E)-D | E=(5-1) |
| 1/3/16 | 5 | 0 | 0 | 4 | E=case when C=0 then prev(E)-D | E=(4-0) |
| 1/4/16 | 6 | 1 | 1 | 5 | E=case when C=1 then B-D | E=(6-1) |
| 1/5/16 | 6 | 0 | 0 | 5 | E=case when C=0 then prev(E)-D | E=(5-0) |
| 1/6/16 | 7 | 1 | 1 | 6 | E=case when C=1 then B-D | E=(7-1) |
+---------------------------------------------------------------------------------------------------------------------------------------+
You can use first_value() function with preceding clause to get privious value:
set dateformat dmy;
declare #t table (TransactionDt smalldatetime, b int, c int, d int, e int);
insert into #t (TransactionDt, b, c, d, e) values
(cast('01.01.2016' as date), 5, 0, null, 5),
(cast('02.01.2016' as date), 5, 0, 1, 4),
(cast('03.01.2016' as date), 5, 0, 0, 4),
(cast('04.01.2016' as date), 6, 1, 1, 5),
(cast('05.01.2016' as date), 6, 0, 0, 5),
(cast('06.01.2016' as date), 7, 1, 1, 6);
select
t.*
,first_value(t.e) over(order by t.TransactionDt asc rows 1 preceding) [prevE]
,case t.c
when 0 then
first_value(t.e)
over(order by t.TransactionDt asc rows 1 preceding)
- t.d
when 1 then
t.b - t.d
end [calculation]
from
#t t
order by
t.TransactionDt
;
Tested on MS SQL 2012.
I'm not big fan of Teradata, but this should work:
select
t.e
,sum(t.e)
over(order by t.TransactionDt asc rows between 1 preceding and 1 preceding) ePrev
,case t.c
when 0 then
sum(t.e)
over(order by t.TransactionDt asc rows between 1 preceding and 1 preceding)
- t.d
when 1 then
t.b - t.d
end calculation
from
(
select cast('01.01.2016' as date format 'dd.mm.yyyy') TransactionDt, 5 b, 0 c, null d, 5 e from (select 1 x) x
union all
select cast('02.01.2016' as date format 'dd.mm.yyyy') TransactionDt, 5 b, 0 c, 1 d, 4 e from (select 1 x) x
union all
select cast('03.01.2016' as date format 'dd.mm.yyyy'), 5, 0, 0, 4 from (select 1 x) x
union all
select cast('04.01.2016' as date format 'dd.mm.yyyy'), 6, 1, 1, 5 from (select 1 x) x
union all
select cast('05.01.2016' as date format 'dd.mm.yyyy'), 6, 0, 0, 5 from (select 1 x) x
union all
select cast('06.01.2016' as date format 'dd.mm.yyyy'), 7, 1, 1, 6 from (select 1 x) x
) t
order by
t.TransactionDt
;
When you need to restart the calculation whenever invchange=1 you have to create a group for partitioning using
sum(invchange)
over (order by TransactionDt
rows unbounded preceding) as grp
invchange seems to be based on a previous row query, so you need to nest it't calculation in a Dervied Table.
Now you it's the total_allotment value minus a Cumulative Sum over roomssold_flag:
select t.*,
b - sum(coalesce(D,0))
over (partition by grp
order by TransactionDt
rows unbounded preceding)
from
(
select TransactionDt,b,c,d,
sum(c) over (order by TransactionDt rows unbounded preceding) as grp
from t
) as t
Btw, using a 0/1 flag to get dynamic partitioning is similar to RESET WHEN

SQL Server 2008 Cumulative Sum that resets value

I want to have the last column cumulative based on ROW_ID that resets every time it starts again with '1'.
Initially my table doesn't have the ROW_ID, this was created using partition so at least I can segregate my records.
It should add the Amt + CumulativeSum (except for the first record) all the way down and reset every time the Row_ID = 1.
I have tried several queries but it doesn't give me the desired result. I am trying to read answers from several forums but to no avail.
Can someone advise the best approach to do this?
For the sake of representation, I made the sample table as straightforward as possible.
ID ROW-ID Amt RunningTotal(Amt)
1 1 2 2
2 2 4 6
3 3 6 12
4 1 2 2
5 2 4 6
6 3 6 12
7 4 8 20
8 5 10 30
9 1 2 2
10 2 4 6
11 3 6 12
12 4 8 20
try this
declare #tb table(ID int, [ROW-ID] int, Amt money)
insert into #tb(ID, [ROW-ID], Amt) values
(1,1,2),
(2,2,4),
(3,3,6),
(4,1,2),
(5,2,4),
(7,4,8),
(8,5,10),
(9,1,2),
(10,2,4),
(11,3,6),
(12,4,8)
select *,sum(amt) over(partition by ([id]-[row-id]) order by id,[row-id]) AS cum from #tb
other version
select *,(select sum(amt) from #tb t where
(t.id-t.[row-id])=(t1.id-t1.[ROW-ID]) and (t.id<=t1.id) ) as cum
from #tb t1 order by t1.id,t1.[row-id]
Try this
SELECT distinct (T1.ID),
T1.ROW_ID,
T1.Amt,
CumulativeSum =
CASE
WHEN T1.RoW_ID=1 THEN T1.Amt
ELSE T1.Amt+ T2.Amt
END
FROM TestSum T1, TestSum T2
WHERE T1.ID = T2.ID+1
http://sqlfiddle.com/#!6/8b2a2/2
The idea is to create partitions from R column. First leave 1 if R = 1, else put 0. Then cumulative sum on that column. When you have partitions you can finally calculate cumulative sums on S column in those partitions:
--- --- ---
| 1 | | 1 | | 1 |
| 2 | | 0 | | 1 | --prev 1 + 0
| 3 | | 0 | | 1 | --prev 1 + 0
| 1 | | 1 | | 2 | --prev 1 + 1
| 2 | => | 0 | => | 2 | --prev 2 + 0
| 3 | | 0 | | 2 | --prev 2 + 0
| 4 | | 0 | | 2 | --prev 2 + 0
| 5 | | 0 | | 2 | --prev 2 + 0
| 1 | | 1 | | 3 | --prev 2 + 1
| 2 | | 0 | | 3 | --prev 3 + 0
--- --- ---
DECLARE #t TABLE ( ID INT, R INT, S INT )
INSERT INTO #t
VALUES ( 1, 1, 2 ),
( 2, 2, 4 ),
( 3, 3, 6 ),
( 4, 1, 2 ),
( 5, 2, 4 ),
( 6, 3, 6 ),
( 7, 4, 8 ),
( 8, 5, 10 ),
( 9, 1, 2 ),
( 10, 2, 4 ),
( 11, 3, 6 ),
( 12, 4, 8 );
For MSSQL 2008:
WITH cte1
AS ( SELECT ID ,
CASE WHEN R = 1 THEN 1
ELSE 0
END AS R ,
S
FROM #t
),
cte2
AS ( SELECT ID ,
( SELECT SUM(R)
FROM cte1 ci
WHERE ci.ID <= co.ID
) AS R ,
S
FROM cte1 co
)
SELECT * ,
( SELECT SUM(S)
FROM cte2 ci
WHERE ci.R = co.R
AND ci.ID <= co.ID
)
FROM cte2 co
For MSSQL 2012:
WITH cte
AS ( SELECT ID ,
SUM(CASE WHEN R = 1 THEN 1
ELSE 0
END) OVER ( ORDER BY ID ) AS R ,
S
FROM #t
)
SELECT * ,
SUM(s) OVER ( PARTITION BY R ORDER BY ID ) AS T
FROM cte
Output:
ID R S T
1 1 2 2
2 1 4 6
3 1 6 12
4 2 2 2
5 2 4 6
6 2 6 12
7 2 8 20
8 2 10 30
9 3 2 2
10 3 4 6
11 3 6 12
12 3 8 20
EDIT:
One more way. This looks way better by execution plan then first example:
SELECT * ,
CASE WHEN R = 1 THEN S
ELSE ( SELECT SUM(S)
FROM #t it
WHERE it.ID <= ot.ID
AND it.ID >= ( SELECT MAX(ID)
FROM #t iit
WHERE iit.ID < ot.ID
AND iit.R = 1
)
)
END
FROM #t ot