T-SQL: Paging WITH TIES - sql

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.

declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.

I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])

Related

SQL Query to get the value of a product given a date

I have a table which gives the rate of a product on a particular date, #tableA.
create table #tableA
(
Id int not null,
ValueDate date,
Price decimal(9,2)
)
insert into #tableA (Id, ValueDate, Price)
values
(1, '2020-08-01', 100),
(1, '2020-08-05', 110),
(1, '2020-08-07', 50)
My other table has the id and the date the product is active.
create table #tableB
(
Id int not null,
Dates date
)
insert into #tableB (Id, Dates)
values
(1, '2020-08-01'),
(1, '2020-08-02'),
(1, '2020-08-03'),
(1, '2020-08-04'),
(1, '2020-08-05'),
(1, '2020-08-06'),
(1, '2020-08-07'),
(1, '2020-08-04')
I cannot find an efficient query where my resulting table gives the rate of the product on a given date.
I am expecting this result.
Id Dates ValueDate Price
-------------------------------------
1, '2020-08-01', '2020-08-01', 100
1, '2020-08-02', '2020-08-01', 100
1, '2020-08-03', '2020-08-01', 100
1, '2020-08-04', '2020-08-01', 100
1, '2020-08-05', '2020-08-05', 110
1, '2020-08-06', '2020-08-05', 110
1, '2020-08-07', '2020-08-07', 50
Something like this:
SELECT DISTINCT B.[id]
,B.[Dates]
,DS.*
FROM #tableB B
CROSS APPLY
(
SELECT TOP 1 *
FROM #tableA A
WHERE B.[Id] = A.[Id]
AND B.[Dates] >= A.[ValueDate]
AND A.[Price] IS NOT NULL
ORDER BY A.[ValueDate] DESC
) DS;
or this:
WITH DataSource AS
(
SELECT DISTINCT B.[ID]
,B.[Dates]
,A.[ValueDate]
,A.[Price]
,SUM(IIF(A.[ID] IS NOT NULL, 1, 0)) OVER (ORDER BY B.[Dates]) AS [GroupID]
FROM #tableB B
LEFT JOIN #tableA A
ON B.[Id] = A.[Id]
AND B.[Dates] = A.[ValueDate]
AND A.[Price] IS NOT NULL
)
SELECT [ID]
,[Dates]
,MAX([ValueDate]) OVER (PARTITION BY [GroupID]) AS [ValueDate]
,MAX([Price]) OVER (PARTITION BY [GroupID]) AS [Price]
FROM DataSource;

Partition the date into a weeks from a given date to the last date in the record

I wanted to count the time gap between two rows for the same id if the second is less than an hour after the first, and partition the count for the week.
Suppose given date with time is 2020-07-01 08:00
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
The week should extend up to the last date in the record. Here, the last date is
2020-07-21 10:25
Have to transform the output from this piece of code and divide the duration weekly.
select Id, sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id;
Output:
id duration_minutes
1 41
2 230
3 115
The desired output should divide this duration on a weekly basis,
like Week 1, Week 2, Week 3, and so on.
Desired Output:
If the
start date is 2020-07-01 08:00
end date is 2020-07-21 10:25
id | Week 1 | Week 2 | Week 3
--------------------------------------
1 | 30 | 0 | 11
2 | 115 | 115 | 0
3 | 0 | 0 | 115
similarly, if the
start date is 2020-07-08 08:00
id | Week 1 | Week 2
---------------------------
1 | 11 | 0
2 | 115 | 0
3 | 0 | 115
Is this what you want?
select Id,
1 + datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7) as week_num,
sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id, datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7)
order by id, week_num;
Here is a db<>fiddle.
I am not able to understand the logic behind the week periods. Anyone, in the example below I am using the following code to set the week:
'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
You can adjust it to ignore the ours, be more precise or something else to match your real requirements.
Apart from that, you just need to perform a dynamic PIVOT. Here is the full working example:
DROP TABLE IF EXISTS #Temp;
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
DROP TABLE IF EXISTS #TEST
CREATE TABLE #TEST
(
[ID] INT
,[week_day] VARCHAR(12)
,[time_in_minutes] BIGINT
)
DECLARE #FirstDate DATE;
SELECT #FirstDate = MIN(Time)
FROM #Temp
INSERT INTO #TEST
select id
,'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
,datediff(minute, Time, next_ts)
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
DECLARE #columns NVARCHAR(MAX);
SELECT #columns = STUFF
(
(
SELECT ',' + QUOTENAME([week_day])
FROM
(
SELECT DISTINCT CAST(REPLACE([week_day], 'Week ', '') AS INT)
,[week_day]
FROM #TEST
) DS ([rowID], [week_day])
ORDER BY [rowID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
);
DECLARE #DanymicSQL NVARCHAR(MAX);
SET #DanymicSQL = N'
SELECT [ID], ' + #columns + '
FROM #TEST
PIVOT
(
SUM([time_in_minutes]) FOR [week_day] IN (' + #columns + ')
) PVT';
EXEC sp_executesql #DanymicSQL;

Creating a table of sequences (integers and dates)

Sometimes I need to create tables of regularly occuring sequences of dates or integers. I manually create them and that works but it's more difficult to maintain because there's a lot of code duplication. What is the more maintainable way to say a sequence of integers or dates that occur at regular intervals?
Here's my current approach:
DECLARE #IndexDate TABLE (
[Id] INT,
[Date] DATE
)
INSERT INTO #IndexDate (
Id, Date
)
VALUES
(1, CONCAT(YEAR(GETDATE()), '-01-01')),
(2, CONCAT(YEAR(GETDATE()), '-02-01')),
(3, CONCAT(YEAR(GETDATE()), '-03-01')),
(4, CONCAT(YEAR(GETDATE()), '-04-01')),
(5, CONCAT(YEAR(GETDATE()), '-05-01')),
(6, CONCAT(YEAR(GETDATE()), '-06-01')),
(7, CONCAT(YEAR(GETDATE()), '-07-01')),
(8, CONCAT(YEAR(GETDATE()), '-08-01')),
(9, CONCAT(YEAR(GETDATE()), '-09-01')),
(10, CONCAT(YEAR(GETDATE()), '-10-01')),
(11, CONCAT(YEAR(GETDATE()), '-11-01')),
(12, CONCAT(YEAR(GETDATE()), '-12-01'))
SELECT * FROM #IndexDate
You could use recursive cte:
WITH cte(n) AS (
SELECT 1
UNION ALL
SELECT n+ 1
FROM cte
WHERE n < 12
)
SELECT *
FROM cte
OPTION (maxrecursion 0);
WITH cte(d) AS (
SELECT CAST('20190101' AS DATE)
UNION ALL
SELECT DATEADD(m, 1, d)
FROM cte
WHERE d < '20200101'
)
SELECT *
FROM cte
OPTION (maxrecursion 0);
db<>fiddle demo
To match your logic with a recursive CTE, you can do:
with indextable as (
select 1 as id, datefromparts(year(getdate()), 1, 1) as date
union all
select 1 + id, dateadd(month, 1, date)
from indextable
where id < 12
)
select *
from indextable;
For one year, you don't have to worry about option (maxrecursion).
I'm not a big fan of using date as a column name, because it is a keyword, but SQL Server allows it.

SQL Server: stored procedure using recursive CTE finding values matching a total

I need to find within a stored procedure which values match a wanted total following valex's solution recursive query in SQL Server
The following works pretty well assuming the CTE anchor recordset is very small
CREATE TABLE #t ([id] INT, [num] FLOAT);
DECLARE #wanted FLOAT = 100000
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
/* when you add more records the query becomes too slow, remove this comment
to test*/
/*,(18,10000),(19,78000),(20,10000),(21,10000),(22,80000),(23,19000),
(24,8000),(25,5000),(26,10000),(27,4000),(28,46000),(29,48000),(30,20000),
(31,10000),(32,25000),(33,10000),(34,13000),(35,16000),(36,10000),
(37,5000), 38,5000),(39,30000),(40,15000),(41,10000)*/
;
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
WITH CTE AS
(
SELECT
id, num AS CSum,
CAST(id AS VARCHAR(MAX)) AS path
FROM
#t
WHERE num <= #wanted
UNION ALL
SELECT
#t.id, #t.num + CTE.CSum AS CSum,
CTE.path + ',' + CAST(#t.id AS VARCHAR(MAX)) AS path
FROM
#T
INNER JOIN
CTE ON #T.num + CTE.CSum <= #wanted AND CTE.id < #T.id
WHERE
#T.num + CTE.CSum <= #wanted
)
SELECT TOP 1 Path
FROM CTE
WHERE CTE.CSum = #wanted
ORDER BY id
DROP TABLE #t
It will return 3,4 which are the first 2 rows whose [num] values gives the #wanted total.
This works reasonably fast when there are just a few records in the temp table #t but when you remove the comment and all remaining records (from id 17 to id 41) the query just takes forever because the CTE grows exponentially.
Is there a way to speed up the code? i just need the first matching total (the list anchor dataset is ordered so a result like 3,4 is better than 8,20,22)
What if you took an iterative approach? This would be pretty simple to give the ability to stop as soon as a solution is found.
This was put together quickly, so you may can optimize further. I tested for your example (ran in less than 1 second) and several other combinations and levels of depth.
Result Depth Total IdList NumList
------ ----------- ----------- ---------- -------------
Found 1 100000 3,4 53000,47000
Full Code:
-- Configuration
DECLARE #wanted FLOAT = 100000
DECLARE #MaxDepth INT = 10 -- Customize how many levels you want to look
SET NOCOUNT ON
IF OBJECT_ID('tempdb..#T') IS NOT NULL DROP TABLE #T
IF OBJECT_ID('tempdb..#T') IS NULL BEGIN
CREATE TABLE #T (Id INT, Num INT)
INSERT INTO #t ([id], [num])
VALUES (1, 17000), (2, 33000), (3, 53000), (4, 47000), (5, 10000),
(6, 53000), (7, 7000), (8, 10000), (9, 20000), (10, 5000),
(11, 40000), (12, 30000), (13, 10000), (14, 8000), (15, 8000),
(16, 10000), (17, 74000)
CREATE NONCLUSTERED INDEX [idx_id] ON #t ([id]);
END
-- Setup processing table
IF OBJECT_ID('tempdb..#U') IS NOT NULL DROP TABLE #U
CREATE TABLE #U (
MaxId INT,
Total INT,
IdList VARCHAR(MAX),
NumList VARCHAR(MAX)
)
-- Initial population from source table
INSERT #U
SELECT Id, Num,
CONVERT(VARCHAR(10), Id),
CONVERT(VARCHAR(10), Num)
FROM #T
-- Iterative approach
DECLARE #Depth INT = 0
WHILE NOT EXISTS (SELECT * FROM #U WHERE Total = #wanted) BEGIN
-- Increment depth
SET #Depth = #Depth + 1
IF #Depth >= #MaxDepth BEGIN
PRINT 'Max depth reached'
RETURN -- Stop processing further
END
-- Calculate sum for this depth
IF OBJECT_ID('tempdb..#V') IS NOT NULL
DROP TABLE #V
SELECT
T.Id AS MaxId,
U.Total + T.Num AS Total,
U.IdList + ',' + CONVERT(VARCHAR(10), T.Id) AS IdList,
U.NumList + ',' + CONVERT(VARCHAR(10), T.Num) AS NumList
INTO #V
FROM #U U
INNER JOIN #T T
ON U.MaxId < T.Id
-- Replace data for next iteration
TRUNCATE TABLE #U
INSERT #U
SELECT * FROM #V
-- Check if no more combinations available
IF ##ROWCOUNT = 0 BEGIN
PRINT 'All combinations tested'
RETURN -- Stop processing further
END
END
-- Return result
SELECT TOP 1 'Found' AS [Result], #Depth AS Depth, Total, IdList, NumList FROM #U WHERE Total = #wanted

Summing up the records as per given conditions

I have a table like below, What I need that for any particular fund and up to any particular date logic will sum the amount value. Let say I need the sum for 3 dates as 01/28/2015,03/30/2015 and 04/01/2015. Then logic will check for up to first date how many records are there in table . If it found more than one record then it'll sum the amount value. Then for next date it'll sum up to the next date but from the previous date it had summed up.
Id Fund Date Amount
1 A 01/20/2015 250
2 A 02/28/2015 300
3 A 03/20/2015 400
4 A 03/30/2015 200
5 B 04/01/2015 500
6 B 04/01/2015 600
I want result to be like below
Id Fund Date SumOfAmount
1 A 02/28/2015 550
2 A 03/30/2015 600
3 B 04/01/2015 1100
Based on your question, it seems that you want to select a set of dates, and then for each fund and selected date, get the sum of the fund amounts from the selected date to the previous selected date. Here is the result set I think you should be expecting:
Fund Date SumOfAmount
A 2015-02-28 550.00
A 2015-03-30 600.00
B 2015-04-01 1100.00
Here is the code to produce this output:
DECLARE #Dates TABLE
(
SelectedDate DATE PRIMARY KEY
)
INSERT INTO #Dates
VALUES
('02/28/2015')
,('03/30/2015')
,('04/01/2015')
DECLARE #FundAmounts TABLE
(
Id INT PRIMARY KEY
,Fund VARCHAR(5)
,Date DATE
,Amount MONEY
);
INSERT INTO #FundAmounts
VALUES
(1, 'A', '01/20/2015', 250)
,(2, 'A', '02/28/2015', 300)
,(3, 'A', '03/20/2015', 400)
,(4, 'A', '03/30/2015', 200)
,(5, 'B', '04/01/2015', 500)
,(6, 'B', '04/01/2015', 600);
SELECT
F.Fund
,D.SelectedDate AS Date
,SUM(F.Amount) AS SumOfAmount
FROM
(
SELECT
SelectedDate
,LAG(SelectedDate,1,'1/1/1900') OVER (ORDER BY SelectedDate ASC) AS PreviousDate
FROM #Dates
) D
JOIN
#FundAmounts F
ON
F.Date BETWEEN DATEADD(DAY,1,D.PreviousDate) AND D.SelectedDate
GROUP BY
D.SelectedDate
,F.Fund
EDIT: Here is alternative to the LAG function for this example:
FROM
(
SELECT
SelectedDate
,ISNULL((SELECT TOP 1 SelectedDate FROM #Dates WHERE SelectedDate < Dates.SelectedDate ORDER BY SelectedDate DESC),'1/1/1900') AS PreviousDate
FROM #Dates Dates
) D
If i change your incorrect sample data to ...
CREATE TABLE TableName
([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
;
INSERT INTO TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '2015-01-28 00:00:00', 250),
(2, 'A', '2015-01-28 00:00:00', 300),
(3, 'A', '2015-03-30 00:00:00', 400),
(4, 'A', '2015-03-30 00:00:00', 200),
(5, 'B', '2015-04-01 00:00:00', 500),
(6, 'B', '2015-04-01 00:00:00', 600)
;
this query using GROUP BY works:
SELECT MIN(Id) AS Id,
MIN(Fund) AS Fund,
[Date],
SUM(Amount) AS SumOfAmount
FROM dbo.TableName t
WHERE [Date] IN ('01/28/2015','03/30/2015','04/01/2015')
GROUP BY [Date]
Demo
Initially i have used Row_number and month function to pick max date of every month and in 2nd cte i did sum of amounts and joined them..may be this result set matches your out put
declare #t table (Id int,Fund Varchar(1),Dated date,amount int)
insert into #t (id,Fund,dated,amount) values (1,'A','01/20/2015',250),
(2,'A','01/28/2015',300),
(3,'A','03/20/2015',400),
(4,'A','03/30/2015',200),
(5,'B','04/01/2015',600),
(6,'B','04/01/2015',500)
;with cte as (
select ID,Fund,Amount,Dated,ROW_NUMBER() OVER
(PARTITION BY DATEDIFF(MONTH, '20000101', dated)ORDER BY dated desc)AS RN from #t
group by ID,Fund,DATED,Amount
),
CTE2 AS
(select SUM(amount)Amt from #t
GROUP BY MONTH(dated))
,CTE3 AS
(Select Amt,ROW_NUMBER()OVER (ORDER BY amt)R from cte2)
,CTE4 AS
(
Select DISTINCT C.ID As ID,
C.Fund As Fund,
C.Dated As Dated
,ROW_NUMBER()OVER (PARTITION BY RN ORDER BY (SELECT NULL))R
from cte C INNER JOIN CTE3 CC ON c.RN = CC.R
Where C.RN = 1
GROUP BY C.ID,C.Fund,C.RN,C.Dated )
select C.R,C.Fund,C.Dated,cc.Amt from CTE4 C INNER JOIN CTE3 CC
ON c.R = cc.R
declare #TableName table([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
declare #Sample table([SampleDate] datetime)
INSERT INTO #TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '20150120 00:00:00', 250),
(2, 'A', '20150128 00:00:00', 300),
(3, 'A', '20150320 00:00:00', 400),
(4, 'A', '20150330 00:00:00', 200),
(5, 'B', '20150401 00:00:00', 500),
(6, 'B', '20150401 00:00:00', 600)
INSERT INTO #Sample ([SampleDate])
values ('20150128 00:00:00'), ('20150330 00:00:00'), ('20150401 00:00:00')
-- select * from #TableName
-- select * from #Sample
;WITH groups AS (
SELECT [Fund], [Date], [AMOUNT], MIN([SampleDate]) [SampleDate] FROM #TableName
JOIN #Sample ON [Date] <= [SampleDate]
GROUP BY [Fund], [Date], [AMOUNT])
SELECT [Fund], [SampleDate], SUM([AMOUNT]) FROM groups
GROUP BY [Fund], [SampleDate]
Explanation:
The CTE groups finds the earliest SampleDate which is later than (or equals to) your
data's date and enriches your data accordingly, thus giving them the group to be summed up in.
After that, you can group on the derived date.