Partition the date into a weeks from a given date to the last date in the record - sql

I wanted to count the time gap between two rows for the same id if the second is less than an hour after the first, and partition the count for the week.
Suppose given date with time is 2020-07-01 08:00
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
The week should extend up to the last date in the record. Here, the last date is
2020-07-21 10:25
Have to transform the output from this piece of code and divide the duration weekly.
select Id, sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id;
Output:
id duration_minutes
1 41
2 230
3 115
The desired output should divide this duration on a weekly basis,
like Week 1, Week 2, Week 3, and so on.
Desired Output:
If the
start date is 2020-07-01 08:00
end date is 2020-07-21 10:25
id | Week 1 | Week 2 | Week 3
--------------------------------------
1 | 30 | 0 | 11
2 | 115 | 115 | 0
3 | 0 | 0 | 115
similarly, if the
start date is 2020-07-08 08:00
id | Week 1 | Week 2
---------------------------
1 | 11 | 0
2 | 115 | 0
3 | 0 | 115

Is this what you want?
select Id,
1 + datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7) as week_num,
sum(datediff(minute, Time, next_ts)) as duration_minutes
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from Temp t
) t
where datediff(minute, Time, next_ts) < 60
group by Id, datediff(second, '2020-07-01 06:00', time) / (24 * 60 * 60 * 7)
order by id, week_num;
Here is a db<>fiddle.

I am not able to understand the logic behind the week periods. Anyone, in the example below I am using the following code to set the week:
'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
You can adjust it to ignore the ours, be more precise or something else to match your real requirements.
Apart from that, you just need to perform a dynamic PIVOT. Here is the full working example:
DROP TABLE IF EXISTS #Temp;
create table #Temp (
Id integer not null,
Time datetime not null
);
insert into #Temp values (1, '2020-07-01 08:00');
insert into #Temp values (1, '2020-07-01 08:01');
insert into #Temp values (1, '2020-07-01 08:06');
insert into #Temp values (1, '2020-07-01 08:30');
insert into #Temp values (1, '2020-07-08 09:35');
insert into #Temp values (1, '2020-07-15 16:10');
insert into #Temp values (1, '2020-07-15 16:20');
insert into #Temp values (1, '2020-07-17 06:40');
insert into #Temp values (1, '2020-07-17 06:41');
insert into #Temp values (2, '2020-07-01 08:30');
insert into #Temp values (2, '2020-07-01 09:26');
insert into #Temp values (2, '2020-07-01 10:25');
insert into #Temp values (2, '2020-07-09 08:30');
insert into #Temp values (2, '2020-07-09 09:26');
insert into #Temp values (2, '2020-07-09 10:25');
insert into #Temp values (3, '2020-07-21 08:30');
insert into #Temp values (3, '2020-07-21 09:26');
insert into #Temp values (3, '2020-07-21 10:25');
DROP TABLE IF EXISTS #TEST
CREATE TABLE #TEST
(
[ID] INT
,[week_day] VARCHAR(12)
,[time_in_minutes] BIGINT
)
DECLARE #FirstDate DATE;
SELECT #FirstDate = MIN(Time)
FROM #Temp
INSERT INTO #TEST
select id
,'Week ' + CAST(DENSE_RANK() OVER (ORDER BY DATEDIFF(DAY, #FirstDate, next_ts) / 7) AS VARCHAR(12))
,datediff(minute, Time, next_ts)
from (select t.*,
lead(Time) over (partition by id order by Time) as next_ts
from #Temp t
) t
where datediff(minute, Time, next_ts) < 60
DECLARE #columns NVARCHAR(MAX);
SELECT #columns = STUFF
(
(
SELECT ',' + QUOTENAME([week_day])
FROM
(
SELECT DISTINCT CAST(REPLACE([week_day], 'Week ', '') AS INT)
,[week_day]
FROM #TEST
) DS ([rowID], [week_day])
ORDER BY [rowID]
FOR XML PATH(''), TYPE
).value('.', 'VARCHAR(MAX)')
,1
,1
,''
);
DECLARE #DanymicSQL NVARCHAR(MAX);
SET #DanymicSQL = N'
SELECT [ID], ' + #columns + '
FROM #TEST
PIVOT
(
SUM([time_in_minutes]) FOR [week_day] IN (' + #columns + ')
) PVT';
EXEC sp_executesql #DanymicSQL;

Related

SQL : 12 hr shift function from 7am - 7am

I need to query all production records by 12 hr. shift. 7am-7am. if the date is after midnight and before 7am it's actually the previous day shift. In the below example I need to make them all 2022-01-01 like the last column. If I query by 2022-01-01 I don't get all the rows. Can I use a function for this to compare the time and make it the previous day?
declare #temp table
(
Emp_id int,
Time datetime,
shiftDate date
);
insert into #temp values (1, '2022-01-01 08:10:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 10:21:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 13:10:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-01 22:22:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 02:15:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 04:22:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 06:18:00:000', '2022-01-01')
insert into #temp values (1, '2022-01-02 06:55:00:000', '2022-01-01')
select * from #temp
select * from #temp
where convert(date, [time]) = '2022-01-01'

SQL select items that make datetime range between flag toggle

Say I have a table like this one:
CREATE TABLE TESTTABLE (
ID Integer NOT NULL,
ATMOMENT Timestamp NOT NULL,
ISALARM Integer NOT NULL,
CONSTRAINT PK_TESTTABLE PRIMARY KEY (ID)
);
It has ISALARM flag that toggles between 0 and 1 at random moments ATMOMENT, like in this example dataset:
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('1', '01.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('2', '01.01.2016, 00:01:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('3', '01.01.2016, 00:02:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('4', '01.01.2016, 00:02:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('10', '02.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('11', '02.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('12', '02.01.2016, 00:01:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('20', '03.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('21', '03.01.2016, 00:01:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('22', '03.01.2016, 00:02:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('23', '03.01.2016, 00:02:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('30', '04.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('31', '04.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('32', '04.01.2016, 00:00:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('33', '04.01.2016, 00:00:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('40', '05.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('41', '05.01.2016, 00:00:00.000', '1');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('42', '05.01.2016, 00:00:00.000', '0');
INSERT INTO TESTTABLE (ID, ATMOMENT, ISALARM) VALUES ('43', '05.01.2016, 00:00:00.000', '0');
I need to select all alarm ranges, i.e. the ATMOMENT ranges where ISALARM is set to 1 (first time after previous range is closed) at range begin and reset back to 0 at range end. Say for clarity first reset is enough to close such range; say also that the simultaneous ISALARM set and reset are treated like the range end (while possibly as the begin).
Example dataset above is expected to produce something like this:
ALARMBEGIN | LASTALARMBEGIN | ALARMEND
-------------------------- | -------------------------- | --------
'01.01.2016, 00:00:00.000' | '01.01.2016, 00:01:00.000' | '01.01.2016, 00:02:00.000'
'02.01.2016, 00:00:00.000' | '02.01.2016, 00:00:00.000' | '02.01.2016, 00:01:00.000'
'03.01.2016, 00:00:00.000' | '03.01.2016, 00:02:00.000' | '03.01.2016, 00:02:00.000'
'04.01.2016, 00:00:00.000' | '04.01.2016, 00:00:00.000' | '04.01.2016, 00:00:00.000'
'05.01.2016, 00:00:00.000' | '05.01.2016, 00:00:00.000' | '05.01.2016, 00:00:00.000'
My own solution to this (below) looks pretty ugly and runs stunningly slow (about 1minute) even if the TESTTABLE has relatively small dataset with only ~2500 records (tested it with Firebird2.5 and Postgresql; I'm not good with DB optimization; "CREATE INDEX IDX_TESTTABLE1 ON TESTTABLE (ATMOMENT,ISALARM)" helps but not very much).
It is pretty strange for me because simple linear iteration on all TESTTABLE records (ordered by ATMOMENT) while comparing ISALARM field to one of the previous record gives me the ranges I want much faster.
Are there any elegant solution to make SQL select this faster and in cleaner way?
SELECT DISTINCT a1.ATMOMENT AS ALARMBEGIN, a2.ATMOMENT AS LASTALARMBEGIN, a3.ATMOMENT AS ALARMEND
FROM TESTTABLE a1
JOIN TESTTABLE a2 ON
(a1.ATMOMENT<a2.ATMOMENT
AND NOT EXISTS(SELECT * FROM TESTTABLE x WHERE
x.ISALARM=0 AND a1.ATMOMENT<=x.ATMOMENT AND x.ATMOMENT<a2.ATMOMENT))
OR (a1.ATMOMENT=a2.ATMOMENT)
JOIN TESTTABLE a3 ON
(a2.ATMOMENT<a3.ATMOMENT
AND NOT EXISTS(SELECT * FROM TESTTABLE x WHERE
(x.ISALARM=0 AND a2.ATMOMENT<=x.ATMOMENT AND x.ATMOMENT<a3.ATMOMENT)
OR (x.ISALARM=1 AND a2.ATMOMENT<x.ATMOMENT AND x.ATMOMENT<=a3.ATMOMENT)))
OR (a2.ATMOMENT=a3.ATMOMENT)
WHERE a1.ISALARM<>0 AND a2.ISALARM<>0 AND a3.ISALARM=0
AND (NOT EXISTS(SELECT * FROM TESTTABLE x1 WHERE
x1.ATMOMENT<a1.ATMOMENT)
OR EXISTS(SELECT * FROM TESTTABLE x1 WHERE
x1.ISALARM=0
AND x1.ATMOMENT<a1.ATMOMENT
AND NOT EXISTS(SELECT * FROM TESTTABLE x2 WHERE
x1.ATMOMENT<x2.ATMOMENT AND x2.ATMOMENT<a1.ATMOMENT)))
ORDER BY a1.ATMOMENT
Thank you.
Upd 1
Thanks to Gordon Linoff's and Jayvee's solutions (which are very good with Firebird3.0 and PostgreSQL) I've decided to rely on ordering efficiency of Firebird2.5 and contrived the "select" which is even uglier than my previous one but runs significantly faster. For those who need it done with Firebird2.5:
WITH
GROUPEDTABLE_TT (ATMOMENT, NOTISALARMRESET, ISALARMSET)
AS(
SELECT a.ATMOMENT, MIN(a.ISALARM), MAX(a.ISALARM)
FROM TESTTABLE a
GROUP BY a.ATMOMENT),
INTERVALBEGIN_TT
AS(
SELECT a1.ATMOMENT
FROM GROUPEDTABLE_TT a1
WHERE
a1.ISALARMSET<>0
AND (NOT EXISTS (SELECT * FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT<a1.ATMOMENT)
OR (SELECT FIRST 1 x.NOTISALARMRESET FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT<a1.ATMOMENT
ORDER BY x.ATMOMENT DESC)=0)),
INTERVALLAST_TT
AS(
SELECT a2.ATMOMENT FROM GROUPEDTABLE_TT a2
WHERE a2.ISALARMSET=1
AND (a2.NOTISALARMRESET=0
OR (a2.NOTISALARMRESET=1
AND (SELECT FIRST 1 x.NOTISALARMRESET FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT>a2.ATMOMENT
ORDER BY x.ATMOMENT ASC)=0
AND (SELECT FIRST 1 x.ISALARMSET FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT>a2.ATMOMENT
ORDER BY x.ATMOMENT ASC)=0))),
INTERVALEND_TT
AS(
SELECT a1.ATMOMENT
FROM GROUPEDTABLE_TT a1
WHERE
a1.NOTISALARMRESET=0
AND (a1.ISALARMSET=1
OR (a1.ISALARMSET=0
AND (SELECT FIRST 1 x.ISALARMSET FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT<a1.ATMOMENT
ORDER BY x.ATMOMENT DESC)=1
AND (SELECT FIRST 1 x.NOTISALARMRESET FROM GROUPEDTABLE_TT x WHERE
x.ATMOMENT<a1.ATMOMENT
ORDER BY x.ATMOMENT DESC)=1))),
ENCLOSEDINTERVALS_TT (BEGINMOMENT, LASTBEGINMOMENT, ENDMOMENT)
AS(
SELECT ib.ATMOMENT,
(SELECT FIRST 1 il.ATMOMENT FROM INTERVALLAST_TT il WHERE
ib.ATMOMENT<=il.ATMOMENT ORDER BY il.ATMOMENT ASC),
(SELECT FIRST 1 ie.ATMOMENT FROM INTERVALEND_TT ie WHERE
ib.ATMOMENT<=ie.ATMOMENT ORDER BY ie.ATMOMENT ASC)
FROM INTERVALBEGIN_TT ib)
SELECT * FROM ENCLOSEDINTERVALS_TT
ORDER BY BEGINMOMENT
Upd 2
...but my selects seems to show quadratic growth (or at least faster then linear) of the fetch number depending of the total record number; it's better to use procedure with single-pass linear iteration for FB2.5. Or to use FB30 with solutions below...
This has been tested in PostgreSQL, the idea is create 3 ordered common tables for beginnings, last beginnings and ends respectively and then join the 3 tables.
It can be done with less code by creating only one CTE and flagging the rows with a case statement and then a selfjoin, which you can do later but in this way the code is more self explanatory and should be fairly efficient too.
;
with beginnings
as
(
select atmoment, row_number() over(order by atmoment) rn from
(
select *, lag(atmoment,1) over(order by atmoment,isalarm desc) prevtime,
lag(isalarm,1) over(order by atmoment,isalarm desc) prevstatus
from testtable
) t
where coalesce(prevstatus,0)=0 and isalarm=1
),
ends
as
(
select atmoment, row_number() over(order by atmoment) rn from
(
select *, lead(atmoment,1) over(order by atmoment,isalarm) nexttime,
lead(isalarm,1) over(order by atmoment,isalarm) nextstatus
from testtable
) t
where coalesce(nextstatus,1)=1 and isalarm=0
),
lastbeginnings
as
(
select atmoment, row_number() over(order by atmoment) rn from
(
select *, lead(atmoment,1) over(order by atmoment,isalarm desc) nexttime,
lead(isalarm,1) over(order by atmoment,isalarm desc) nextstatus
from testtable
) t
where coalesce(nextstatus,0)=0 and isalarm=1
)
select b.atmoment ALARMBEGIN, lb.atmoment LASTALARMBEGIN, e.atmoment ALARMEND
from beginnings b
join lastbeginnings lb on lb.rn=b.rn
join ends e on e.rn=b.rn
result:
> 2016-01-01 00:00:00 | 2016-01-01 00:01:00 | 2016-01-01 00:02:00
> 2016-01-02 00:00:00 | 2016-01-02 00:00:00 | 2016-01-02 00:01:00
> 2016-01-03 00:00:00 | 2016-01-03 00:02:00 | 2016-01-03 00:02:00
> 2016-01-04 00:00:00 | 2016-01-04 00:00:00 | 2016-01-04 00:00:00
> 2016-01-05 00:00:00 | 2016-01-05 00:00:00 | 2016-01-05 00:00:00
I think you can do this in Firebird 3.0, using row_number():
select alarm, min(atmoment), max(atmoment)
from (select t.*,
row_number() over (order by atmoment) as seqnum,
row_number() over (partition by alarm order by atmoment) as seqnum_a
from testtable t
) t
group by alarm, (seqnum - seqnum_a);
It is a little hard to explain how this works. But if you run the subquery, you'll see how the difference identifies the groups you are interested in.

How to use DateDiff into only one SELECT statement?

I want to make a short version on my DATEDIFF function on my SQL Query. In my code, I created two temporary tables then there, I select and use the DATEDIFF funtion.
I would want this code to be simplified and only use ONE SELECT statement that will provide the same results. Is it possible?
Here is my result:
This is my SQL Query
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp INTO #Temp2
FROM #Temp
SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp INTO #Temp3
FROM #Temp
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM #Temp2 as temp2
LEFT JOIN #Temp3 as temp3 on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
Thanks!
If you are using SQL Server 2012:
select * ,isnull(datediff(day,lag(stamp) over(partition by id order by stamp),stamp) ,0)
from #temp t1
Else use this..
;with cte
as
(select * ,row_number() over (partition by id order by stamp ) as rownum
from #temp t1
)
select c1.id,c1.stamp,isnull(datediff(day,c2.stamp,c1.stamp),0) as datee
from cte c1
left join
cte c2
on c1.id=c2.id and c1.rownum=c2.rownum+1
You could remove insert into the temp-tables and use subselects within the final query:
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM (SELECT ROW_NUMBER() OVER (ORDER BY ID) as c, ID, Stamp FROM #Temp) as temp2
LEFT JOIN (SELECT ROW_NUMBER() OVER (ORDER BY ID) as d, ID, Stamp FROM #Temp) as temp3
on temp2.ID = temp3.ID and temp2.c = temp3.d + 1
In SQL Server 2012+, you would just use lag():
select t.*
isnull(datediff(day, lag(stamp) over (partition by id order by stamp), stamp), 0)
from #temp t;
In earlier versions, I would use outer apply:
select t.*,
isnull(datediff(day, t2.stamp, t.stamp), 0)
from #temp t outer apply
(select top 1 t2.*
from #temp t2
where t2.id = t.id and t2.stamp < t.stamp
order by t2.stamp desc
) t2;
try a cte,
DECLARE #Temp TABLE (ID int, Stamp datetime)
INSERT INTO #Temp (ID, Stamp) VALUES (1, '2016-08-17')
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (1, GETDATE()+0.5)
INSERT INTO #Temp (ID, Stamp) VALUES (2, '2016-08-16')
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE())
INSERT INTO #Temp (ID, Stamp) VALUES (2, GETDATE()+3)
;WITH CTE AS
(
SELECT ROW_NUMBER() OVER (ORDER BY ID) as RowNo, ID, Stamp
FROM #Temp
)
SELECT temp2.ID, temp2.Stamp, ISNULL(DATEDIFF(day, temp3.Stamp, temp2.Stamp),0) as DateDiff
FROM CTE as temp2
LEFT JOIN CTE as temp3 on temp2.ID = temp3.ID
AND temp2.RowNo = temp3.RowNo + 1

Summing up the records as per given conditions

I have a table like below, What I need that for any particular fund and up to any particular date logic will sum the amount value. Let say I need the sum for 3 dates as 01/28/2015,03/30/2015 and 04/01/2015. Then logic will check for up to first date how many records are there in table . If it found more than one record then it'll sum the amount value. Then for next date it'll sum up to the next date but from the previous date it had summed up.
Id Fund Date Amount
1 A 01/20/2015 250
2 A 02/28/2015 300
3 A 03/20/2015 400
4 A 03/30/2015 200
5 B 04/01/2015 500
6 B 04/01/2015 600
I want result to be like below
Id Fund Date SumOfAmount
1 A 02/28/2015 550
2 A 03/30/2015 600
3 B 04/01/2015 1100
Based on your question, it seems that you want to select a set of dates, and then for each fund and selected date, get the sum of the fund amounts from the selected date to the previous selected date. Here is the result set I think you should be expecting:
Fund Date SumOfAmount
A 2015-02-28 550.00
A 2015-03-30 600.00
B 2015-04-01 1100.00
Here is the code to produce this output:
DECLARE #Dates TABLE
(
SelectedDate DATE PRIMARY KEY
)
INSERT INTO #Dates
VALUES
('02/28/2015')
,('03/30/2015')
,('04/01/2015')
DECLARE #FundAmounts TABLE
(
Id INT PRIMARY KEY
,Fund VARCHAR(5)
,Date DATE
,Amount MONEY
);
INSERT INTO #FundAmounts
VALUES
(1, 'A', '01/20/2015', 250)
,(2, 'A', '02/28/2015', 300)
,(3, 'A', '03/20/2015', 400)
,(4, 'A', '03/30/2015', 200)
,(5, 'B', '04/01/2015', 500)
,(6, 'B', '04/01/2015', 600);
SELECT
F.Fund
,D.SelectedDate AS Date
,SUM(F.Amount) AS SumOfAmount
FROM
(
SELECT
SelectedDate
,LAG(SelectedDate,1,'1/1/1900') OVER (ORDER BY SelectedDate ASC) AS PreviousDate
FROM #Dates
) D
JOIN
#FundAmounts F
ON
F.Date BETWEEN DATEADD(DAY,1,D.PreviousDate) AND D.SelectedDate
GROUP BY
D.SelectedDate
,F.Fund
EDIT: Here is alternative to the LAG function for this example:
FROM
(
SELECT
SelectedDate
,ISNULL((SELECT TOP 1 SelectedDate FROM #Dates WHERE SelectedDate < Dates.SelectedDate ORDER BY SelectedDate DESC),'1/1/1900') AS PreviousDate
FROM #Dates Dates
) D
If i change your incorrect sample data to ...
CREATE TABLE TableName
([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
;
INSERT INTO TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '2015-01-28 00:00:00', 250),
(2, 'A', '2015-01-28 00:00:00', 300),
(3, 'A', '2015-03-30 00:00:00', 400),
(4, 'A', '2015-03-30 00:00:00', 200),
(5, 'B', '2015-04-01 00:00:00', 500),
(6, 'B', '2015-04-01 00:00:00', 600)
;
this query using GROUP BY works:
SELECT MIN(Id) AS Id,
MIN(Fund) AS Fund,
[Date],
SUM(Amount) AS SumOfAmount
FROM dbo.TableName t
WHERE [Date] IN ('01/28/2015','03/30/2015','04/01/2015')
GROUP BY [Date]
Demo
Initially i have used Row_number and month function to pick max date of every month and in 2nd cte i did sum of amounts and joined them..may be this result set matches your out put
declare #t table (Id int,Fund Varchar(1),Dated date,amount int)
insert into #t (id,Fund,dated,amount) values (1,'A','01/20/2015',250),
(2,'A','01/28/2015',300),
(3,'A','03/20/2015',400),
(4,'A','03/30/2015',200),
(5,'B','04/01/2015',600),
(6,'B','04/01/2015',500)
;with cte as (
select ID,Fund,Amount,Dated,ROW_NUMBER() OVER
(PARTITION BY DATEDIFF(MONTH, '20000101', dated)ORDER BY dated desc)AS RN from #t
group by ID,Fund,DATED,Amount
),
CTE2 AS
(select SUM(amount)Amt from #t
GROUP BY MONTH(dated))
,CTE3 AS
(Select Amt,ROW_NUMBER()OVER (ORDER BY amt)R from cte2)
,CTE4 AS
(
Select DISTINCT C.ID As ID,
C.Fund As Fund,
C.Dated As Dated
,ROW_NUMBER()OVER (PARTITION BY RN ORDER BY (SELECT NULL))R
from cte C INNER JOIN CTE3 CC ON c.RN = CC.R
Where C.RN = 1
GROUP BY C.ID,C.Fund,C.RN,C.Dated )
select C.R,C.Fund,C.Dated,cc.Amt from CTE4 C INNER JOIN CTE3 CC
ON c.R = cc.R
declare #TableName table([Id] int, [Fund] varchar(1), [Date] datetime, [Amount] int)
declare #Sample table([SampleDate] datetime)
INSERT INTO #TableName
([Id], [Fund], [Date], [Amount])
VALUES
(1, 'A', '20150120 00:00:00', 250),
(2, 'A', '20150128 00:00:00', 300),
(3, 'A', '20150320 00:00:00', 400),
(4, 'A', '20150330 00:00:00', 200),
(5, 'B', '20150401 00:00:00', 500),
(6, 'B', '20150401 00:00:00', 600)
INSERT INTO #Sample ([SampleDate])
values ('20150128 00:00:00'), ('20150330 00:00:00'), ('20150401 00:00:00')
-- select * from #TableName
-- select * from #Sample
;WITH groups AS (
SELECT [Fund], [Date], [AMOUNT], MIN([SampleDate]) [SampleDate] FROM #TableName
JOIN #Sample ON [Date] <= [SampleDate]
GROUP BY [Fund], [Date], [AMOUNT])
SELECT [Fund], [SampleDate], SUM([AMOUNT]) FROM groups
GROUP BY [Fund], [SampleDate]
Explanation:
The CTE groups finds the earliest SampleDate which is later than (or equals to) your
data's date and enriches your data accordingly, thus giving them the group to be summed up in.
After that, you can group on the derived date.

T-SQL: Paging WITH TIES

I am trying to implement a paging routine that's a little different.
For the sake of a simple example, let's assume that I have a table defined and populated as follows:
DECLARE #Temp TABLE
(
ParentId INT,
[TimeStamp] DATETIME,
Value INT
);
INSERT INTO #Temp VALUES (1, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (1, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (1, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (2, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (2, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (2, '1/1/2013 02:00', 8);
INSERT INTO #Temp VALUES (3, '1/1/2013 00:00', 6);
INSERT INTO #Temp VALUES (3, '1/1/2013 01:00', 7);
INSERT INTO #Temp VALUES (3, '1/1/2013 02:00', 8);
TimeStamp will always be the same interval, e.g. daily data, 1 hour data, 1 minute data, etc. It will not be mixed.
For reporting and presentation purposes, I want to implement paging that:
Orders by TimeStamp
Starts out using a suggested pageSize (say 4), but will automatically adjust to include additional records matching on TimeStamp. In other words, if 1/1/2013 01:00 is included for one ParentId, the suggested pageSize will be overridden and all records for hour 01:00 will be included for all ParentId's. It's almost like the TOP WITH TIES option.
So running this query with pageSize of 4 would return 6 records. There are 3 hour 00:00 and 1 hour 01:00 by default, but because there are more hour 01:00's, the pageSize would be overridden to return all hour 00:00 and 01:00.
Here's what I have so far, and I think I'm close as it works for the first iteration, but sequent queries for the next pageSize+ rows doesn't work.
WITH CTE AS
(
SELECT ParentId, [TimeStamp], Value,
RANK() OVER(ORDER BY [TimeStamp]) AS rnk,
ROW_NUMBER() OVER(ORDER BY [TimeStamp]) AS rownum
FROM #Temp
)
SELECT *
FROM CTE
WHERE (rownum BETWEEN 1 AND 4) OR (rnk BETWEEN 1 AND 4)
ORDER BY TimeStamp, ParentId
The ROW_NUMBER ensures the minimum pageSize is met, but the RANK will include additional ties.
declare #Temp as Table ( ParentId Int, [TimeStamp] DateTime, [Value] Int );
insert into #Temp ( ParentId, [TimeStamp], [Value] ) values
(1, '1/1/2013 00:00', 6),
(1, '1/1/2013 01:00', 7),
(1, '1/1/2013 02:00', 8),
(2, '1/1/2013 00:00', 6),
(2, '1/1/2013 01:00', 7),
(2, '1/1/2013 02:00', 8),
(3, '1/1/2013 00:00', 6),
(3, '1/1/2013 01:00', 7),
(3, '1/1/2013 02:00', 8);
declare #PageSize as Int = 4;
declare #Page as Int = 1;
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select Min( Rnk ) as MinRnk, Max( Rnk ) as MaxRnk
from Alpha
where ( #Page - 1 ) * #PageSize < RowNum and RowNum <= #Page * #PageSize )
select A.*
from Alpha as A inner join
Beta as B on B.MinRnk <= A.Rnk and A.Rnk <= B.MaxRnk
order by [TimeStamp], ParentId;
EDIT:
An alternative query that assigns page numbers as it goes, so that next/previous page can be implemented without overlapping rows:
with Alpha as (
select ParentId, [TimeStamp], Value,
Rank() over ( order by [TimeStamp] ) as Rnk,
Row_Number() over ( order by [TimeStamp] ) as RowNum
from #Temp ),
Beta as (
select ParentId, [TimeStamp], Value, Rnk, RowNum, 1 as Page, 1 as PageRow
from Alpha
where RowNum = 1
union all
select A.ParentId, A.[TimeStamp], A.Value, A.Rnk, A.RowNum,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then B.Page + 1 else B.Page end,
case when B.PageRow >= #PageSize and A.TimeStamp <> B.TimeStamp then 1 else B.PageRow + 1 end
from Alpha as A inner join
Beta as B on B.RowNum + 1 = A.RowNum
)
select * from Beta
option ( MaxRecursion 0 )
Note that recursive CTEs often scale poorly.
I think your strategy of using row_number() and rank() is overcomplicating things.
Just pick the top 4 timestamps from the data. Then choose any timestamps that match those:
select *
from #temp
where [timestamp] in (select top 4 [timestamp] from #temp order by [TimeStamp])