SQL Server 2008 equivalent for FETCH OFFSET with WHERE clause - sql

I have a program with which my users can look up all the data traffic that happend the last 7 days. I use a stored procedure to get me that data - 250 records at a time (the user can page through that). The problem was, that the users get a lot of timeouts when they wanted to see that data.
Here is the stored procedure before I tried to optimize ist.
#MaxRecCount INT,
#PageOffset INT,
#IncludeData BIT
SELECT [Client], [Schema], [Version], [Records], [Fetched], [Receipted], [ProvidedAt], [FetchedAt], [ReceiptedAt],[PacketIds], [Record] FROM (
SELECT TOP(#MaxRecCount) MAX(bai_ExportPendingArchive.[UserName]) AS Client,
MAX(bai_ExportPendingArchive.Category) AS [Schema],
MAX(bai_ExportPendingArchive.ContractVersion) AS [Version],
COUNT(*) AS [Records],
SUM (CASE WHEN bai_ExportPendingAckArchive.ExportPendingId IS NULL THEN 0 ELSE 1 END) as [Fetched],
SUM (CASE WHEN bai_ExportPendingAckArchive.Receipted IS NULL THEN 0 ELSE 1 END) as [Receipted],
MAX(bai_ExportArchive.Inserted) AS [ProvidedAt],
MAX(CASE WHEN bai_ExportPendingAckArchive.ExportPendingId IS NULL THEN NULL ELSE bai_ExportPendingAckArchive.Inserted END) AS [FetchedAt],
MAX(CASE WHEN bai_ExportPendingAckArchive.Receipted IS NULL THEN NULL ELSE bai_ExportPendingAckArchive.Receipted END) AS [ReceiptedAt],
bai_ExportArchive.PacketIds AS [PacketIds],
NULL AS [Record],
ROW_NUMBER() Over (Order By MAX(bai_ExportArchive.Inserted) desc) as [RowNumber]
FROM bai_ExportArchive
INNER JOIN bai_ExportPendingArchive ON bai_ExportArchive.Id = bai_ExportPendingArchive.ExportId
LEFT OUTER JOIN bai_ExportPendingAckArchive ON bai_ExportPendingAckArchive.ExportPendingId = bai_ExportPendingArchive.Id
GROUP BY bai_ExportPendingArchive.[UserName], bai_ExportArchive.PacketIds, bai_ExportPendingArchive.Category
) AS InnerTable WHERE RowNumber > (#PageOffset * #MaxRecCount) and RowNumber <= (#PageOffset * #MaxRecCount + #MaxRecCount)
ORDER BY RowNumber
#MaxRecCount, #PageOffset and #IncludeData are parameter which came from my C#-method.
This version needed about 1:35min to get me the data I wanted. To make the stored procedure faster I insered a WHERE clause to filter for the Inserted col (also I made an Index on this column) and to use OFFSET FETCH:
The stored procedure after the optimization:
#MaxRecCount INT,
#PageOffset INT,
#IncludeData BIT
Declare #pageStart int
Declare #pageEnd int
SET #pageStart = #PageOffset * #MaxRecCount
SET #pageEnd = #pageStart + #MaxRecCount + 50
IF #IncludeData = 0
BEGIN
SELECT [Client], [Schema], [Version], [Records], [Fetched], [Receipted], [ProvidedAt], [FetchedAt], [ReceiptedAt],[PacketIds], [Record] FROM (
SELECT TOP(#MaxRecCount) bai_ExportPendingArchive.[UserName] AS Client,
bai_ExportPendingArchive.Category AS [Schema],
MAX(bai_ExportPendingArchive.ContractVersion) AS [Version],
COUNT(*) AS [Records],
SUM (CASE WHEN bai_ExportPendingAckArchive.ExportPendingId IS NULL THEN 0 ELSE 1 END) as [Fetched],
SUM (CASE WHEN bai_ExportPendingAckArchive.Receipted IS NULL THEN 0 ELSE 1 END) as [Receipted],
MAX(bai_ExportArchive.Inserted) AS [ProvidedAt],
MAX(CASE WHEN bai_ExportPendingAckArchive.ExportPendingId IS NULL THEN NULL ELSE bai_ExportPendingAckArchive.Inserted END) AS [FetchedAt],
MAX(CASE WHEN bai_ExportPendingAckArchive.Receipted IS NULL THEN NULL ELSE bai_ExportPendingAckArchive.Receipted END) AS [ReceiptedAt],
bai_ExportArchive.PacketIds AS [PacketIds],
NULL AS [Record],
ROW_NUMBER() Over (Order By MAX(bai_ExportArchive.Inserted) desc) as [RowNumber]
FROM bai_ExportArchive
INNER JOIN bai_ExportPendingArchive ON bai_ExportArchive.Id = bai_ExportPendingArchive.ExportId
LEFT OUTER JOIN bai_ExportPendingAckArchive ON bai_ExportPendingAckArchive.ExportPendingId = bai_ExportPendingArchive.Id
Where bai_ExportArchive.Inserted <= (Select bai_ExportArchive.Inserted from bai_ExportArchive Order by bai_ExportArchive.Inserted DESC Offset #pageStart ROWS FETCH NEXT 1 ROWS Only)
And bai_ExportArchive.Inserted > (Select bai_ExportArchive.Inserted from bai_ExportArchive Order by bai_ExportArchive.Inserted DESC Offset #pageEnd ROWS FETCH NEXT 1 ROWS Only)
GROUP BY bai_ExportPendingArchive.[UserName], bai_ExportArchive.PacketIds, bai_ExportPendingArchive.Category
) AS InnerTable
ORDER BY RowNumber
This version gives me the data in about 2s. The only problem is, I work on Microsoft SQL Server 2014 BUT my Users use SQL Server 2008+. The Problem now is, that the OFFSET FETCH dosn't work in Server 2008. And now I'm clueless how I can optimize my stored procedure that it is fast and work on SQl Server 2008.
I'm thankful for any help :)

Try this method to handle the pagination in SQL Server 2005/2008.
First use a CTE for your select query with a ROW_NUMBER() column to identify the record number/count. After that you can select a range of records from this CTE using your PAGE_NUMBER and PAGE_COUNT. Example is below
DECLARE #P_PAGE_NUM INT = 0
,#P_PAGE_SIZE INT = 20
;WITH CTE
AS
( /*SELECT ROW_NUMBER() OVER (ORDER BY COL_to_SORT DESC) AS [ROW_NO]
,...
WHERE ....
*/ -- You can replace your select query here, but column [ROW_NO] should be there in your select list.
--ie ROW_NUMBER() OVER (ORDER BY put_column-to-sort-here DESC) AS [ROW_NO]
)
SELECT *
--,( SELECT COUNT(*) FROM CTE) AS [TOTAL_ROW_COUNT]
FROM CTE
WHERE (
ISNULL(#P_PAGE_NUM,0) = 0 OR
[ROW_NO] BETWEEN ( #P_PAGE_NUM - 1) * #P_PAGE_SIZE + 1
AND #P_PAGE_NUM * #P_PAGE_SIZE
)
ORDER BY [ROW_NO]

Related

Sql server aggregate function and GROUP BY Clause error

I have a query below where it compares the number of stagingCabincrew and StagingCockpitCrew columns from the staging schema and compares them to their data schema equivalent 'DataCabinCrew' and 'DataCockpitCrew'.
Below is the query and the results outputted:
WITH CTE AS
(SELECT cd.*,
c.*,
DataFlight,
l.ScheduledDepartureDate,
l.ScheduledDepartureAirport
FROM
(SELECT *,
ROW_NUMBER() OVER(PARTITION BY LegKey
ORDER BY UpdateID DESC) AS RowNumber
FROM Data.Crew) c
INNER JOIN Data.CrewDetail cd ON c.UpdateID = cd.CrewUpdateID
AND cd.IsPassive = 1
AND RowNumber = 1
INNER JOIN
(SELECT *,
Carrier + CAST(FlightNumber AS VARCHAR) + Suffix AS DataFlight
FROM Data.Leg) l ON c.LegKey = l.LegKey )
SELECT StagingFlight,
sac.DepartureDate,
sac.DepartureAirport,
cte.DataFlight,
cte.ScheduledDepartureDate,
cte.ScheduledDepartureAirport,
SUM(CASE
WHEN sac.CREWTYPE = 'F' THEN 1
ELSE 0
END) AS StagingCabinCrew,
SUM(CASE
WHEN sac.CREWTYPE = 'C' THEN 1
ELSE 0
END) AS StagingCockpitCrew,
SUM(CASE
WHEN cte.CrewType = 'F' THEN 1
ELSE 0
END) AS DataCabinCrew,
SUM(CASE
WHEN cte.CrewType = 'C' THEN 1
ELSE 0
END) AS DataCockpitCrew
FROM
(SELECT *,
Airline + CAST(FlightNumber AS VARCHAR) + Suffix AS StagingFlight,
ROW_NUMBER() OVER(PARTITION BY Airline + CAST(FlightNumber AS VARCHAR) + Suffix
ORDER BY UpdateId DESC) AS StageRowNumber
FROM Staging.SabreAssignedCrew) sac
LEFT JOIN CTE cte ON StagingFlight = DataFlight
AND sac.DepartureDate = cte.ScheduledDepartureDate
AND sac.DepartureAirport = cte.ScheduledDepartureAirport
AND sac.CREWTYPE = cte.CrewType
WHERE MONTH(sac.DepartureDate) + YEAR(sac.DepartureDate) = MONTH(GETDATE()) + YEAR(GETDATE())
AND StageRowNumber = 1 --AND cte.ScheduledDepartureDate IS NOT NULL
--AND cte.ScheduledDepartureAirport IS NOT NULL
GROUP BY StagingFlight,
sac.DepartureDate,
sac.DepartureAirport,
cte.DataFlight,
cte.ScheduledDepartureDate,
cte.ScheduledDepartureAirport
The results are correct, all I need to do is add a condition in the WHERE clause where StagingCabinCrew <> DataCabinCrew AND StagingCockpitCrew <> DataCockpitCrew
If a row appears then we have found an error in the data, I just need helping adding this condition in the WHERE Clause because the columns in the WHERE Clause are referring to a SUM and CASE Function. I just need help manipulating the query so that I can add this WHERE Clause
I will guess you are trying to use an alias in the same query.
You CANT do this, because the alias wont be recognized in the WHERE.
SELECT field1 + field2 as myField
FROM yourTable
WHERE myField > 3
You need to include it in a sub query
with cte2 as (
SELECT field1 + field2 as myField
FROM yourTable
)
SELECT *
FROM cte2
WHERE myField > 3
or repeat the function
SELECT field1 + field2 as myField
FROM yourTable
WHERE field1 + field2 > 3

Calculation of occurrence of strings

I have a table with 3 columns, id, name and vote. They're populated with many registers. I need that return the register with the best balance of votes. The votes types are 'yes' and 'no'.
Yes -> Plus 1
No -> Minus 1
This column vote is a string column. I am using SQL SERVER.
Example:
It must return Ann for me
Use conditional Aggregation to tally the votes as Kannan suggests in his answer
If you really only want 1 record then you can do it like so:
SELECT TOP 1
name
,SUM(CASE WHEN vote = 'yes' THEN 1 ELSE -1 END) AS VoteTotal
FROM
#Table
GROUP BY
name
ORDER BY
VoteTotal DESC
This will not allow for ties but you can use this method which will rank the responses and give you results use RowNum to get only 1 result or RankNum to get ties.
;WITH cteVoteTotals AS (
SELECT
name
,SUM(CASE WHEN vote = 'yes' THEN 1 ELSE -1 END) AS VoteTotal
,ROW_NUMBER() OVER (PARTITION BY 1 ORDER BY SUM(CASE WHEN vote = 'yes' THEN 1 ELSE -1 END) DESC) as RowNum
,DENSE_RANK() OVER (PARTITION BY 1 ORDER BY SUM(CASE WHEN vote = 'yes' THEN 1 ELSE -1 END) DESC) as RankNum
FROM
#Table
GROUP BY
name
)
SELECT name, VoteTotal
FROM
cteVoteTotals
WHERE
RowNum = 1
--RankNum = 1 --if you want with ties use this line instead
Here is the test data used and in the future do NOT just put an image of your test data spend the 2 minutes to make a temp table or a table variable so that people you are asking for help do not have to!
DECLARE #Table AS TABLE (id INT, name VARCHAR(25), vote VARCHAR(4))
INSERT INTO #Table (id, name, vote)
VALUES (1, 'John','no'),(2, 'John','no'),(3, 'John','yes')
,(4, 'Ann','no'),(5, 'Ann','yes'),(6, 'Ann','yes')
,(9, 'Marie','no'),(8, 'Marie','no'),(7, 'Marie','yes')
,(10, 'Matt','no'),(11, 'Matt','yes'),(12, 'Matt','yes')
Use this code,
;with cte as (
select id, name, case when vote = 'yes' then 1 else -1 end as votenum from register
) select name, sum(votenum) from cte group by name
You can get max or minimum based out of this..
This one gives the 'yes' rate for each person:
SELECT Name, SUM(CASE WHEN Vote = 'Yes' THEN 1 ELSE 0 END)/COUNT(*) AS Rate
FROM My_Table
GROUP BY Name

How should I make one column fix and others repeatable in sql procedure?

There is one scheme and different bidders against it, so the scenario is that if user send SchemeID to the procedure then it should return the SchemeName, EstimatedCost (once) and all bidders against single scheme i.e. BidderName, BidPrice, SchemeEstimatedCost... in this format
SchemeName EstimatedCost BidderName BidPrice
Scheme 1 13000 John 12000
Aamir 10000
Shumail 9000
Michael 8090
See I am searching data via SchemeID, it should return me detail in this format.
My try:
ALTER PROCEDURE [dbo].[SelectBidsByBidID]
#fk_Schemes_Bids_SchemeID int
AS
BEGIN
Select Schemes.SchemeName, Schemes.SchemeCost as SchemeEstimatedCost,
Bids.BidBidderName, Bids.BidPrice
From Bids
Inner Join Schemes
ON Schemes.pk_Schemes_SchemeID= Bids.fk_Schemes_Bids_SchemeID
Where Bids.fk_Schemes_Bids_SchemeID= 3
END
but it repeats the SchemeName and Estimated cost.
This should do the trick:
ALTER PROCEDURE [dbo].[SelectBidsByBidID]
#fk_Schemes_Bids_SchemeID int
AS
BEGIN
Select case when ROW_NUMBER() OVER (Order by BidPrice desc) = 1 then Schemes.SchemeName else '' end as SchemeName, case when ROW_NUMBER() OVER (Order by BidPrice desc) = 1 then Schemes.SchemeCost else '' end as SchemeEstimatedCost,
Bids.BidBidderName, Bids.BidPrice
From Bids
Inner Join Schemes
ON Schemes.pk_Schemes_SchemeID= Bids.fk_Schemes_Bids_SchemeID
Where Bids.fk_Schemes_Bids_SchemeID= 3
Order by BidPrice desc
END
However, as stated previously, this should not be done in the query, but in the report viewer/generator.
ALTER PROCEDURE [dbo].[SelectBidsByBidID]
#fk_Schemes_Bids_SchemeID int
AS
BEGIN
;with cte
as
(
Select Schemes.SchemeName, Schemes.SchemeCost as SchemeEstimatedCost,
Bids.BidBidderName, Bids.BidPrice,row_number() over(partition by schemename,SchemeCost order by bidprice desc) as rn
From Bids
Inner Join Schemes
ON Schemes.pk_Schemes_SchemeID= Bids.fk_Schemes_Bids_SchemeID
Where Bids.fk_Schemes_Bids_SchemeID= 3
)
select case when rn=1 then Schemes.SchemeName else '' end as SchemeName, case when rn=1 then cast(Schemes.SchemeCost as varchar(max)) else '' end as SchemeEstimatedCost,
Bids.BidBidderName, Bids.BidPrice
from cte
END
This should be done in a reporting tool/ any other front end tool. If you have no option try this
ALTER PROCEDURE [dbo].[SelectBidsByBidID] #fk_Schemes_Bids_SchemeID INT
AS
BEGIN
SELECT CASE
WHEN sno = 1
THEN SchemeName
ELSE ''
END AS SchemeName
,CASE
WHEN sno = 1
THEN SchemeEstimatedCost
ELSE ''
END AS SchemeEstimatedCost
,BidBidderName
,BidPrice
FROM (
SELECT row_number() OVER (
PARTITION BY Schemes.SchemeName
,Schemes.SchemeCost ORDER BY Bids.BidPrice DESC
) AS sno
,Schemes.SchemeName
,Schemes.SchemeCost AS SchemeEstimatedCost
,Bids.BidBidderName
,Bids.BidPrice
FROM Bids
INNER JOIN Schemes ON Schemes.pk_Schemes_SchemeID = Bids.fk_Schemes_Bids_SchemeID
WHERE Bids.fk_Schemes_Bids_SchemeID = 3
) AS t
END

Find start and end dates when one field changes

I have this data in a table
FIELD_A FIELD_B FIELD_D
249052903 10/15/2011 N
249052903 11/15/2011 P ------------- VALUE CHANGED
249052903 12/15/2011 P
249052903 1/15/2012 N ------------- VALUE CHANGED
249052903 2/15/2012 N
249052903 3/15/2012 N
249052903 4/15/2012 N
249052903 5/15/2012 N
249052903 6/15/2012 N
249052903 7/15/2012 N
249052903 8/15/2012 N
249052903 9/15/2012 N
When ever the value in FIELD_D changes it forms a group and I need the min and max dates in that group. The query shoud return
FIELD_A GROUP_START GROUP_END
249052903 10/15/2011 10/15/2011
249052903 11/15/2011 12/15/2011
249052903 1/15/2012 9/15/2012
The examples that I have seen so far have the data in Field_D being unique. Here the data can repeat as shown, First it is "N" then it changes to "P" and then back to "N".
Any help will be appreciated
Thanks
You can use analytic functions - LAG, LEAD, and COUNT() OVER to your advantage, if they are supported by your SQL implementation. SQL Fiddle here.
WITH EndsMarked AS (
SELECT
FIELD_A,
FIELD_B,
CASE WHEN FIELD_D = LAG(FIELD_D,1) OVER (ORDER BY FIELD_B)
THEN 0 ELSE 1 END AS IS_START,
CASE WHEN FIELD_D = LEAD(FIELD_D,1) OVER (ORDER BY FIELD_B)
THEN 0 ELSE 1 END AS IS_END
FROM T
), GroupsNumbered AS (
SELECT
FIELD_A,
FIELD_B,
IS_START,
IS_END,
COUNT(CASE WHEN IS_START = 1 THEN 1 END)
OVER (ORDER BY FIELD_B) AS GroupNum
FROM EndsMarked
WHERE IS_START=1 OR IS_END=1
)
SELECT
FIELD_A,
MIN(FIELD_B) AS GROUP_START,
MAX(FIELD_B) AS GROUP_END
FROM GroupsNumbered
GROUP BY FIELD_A, GroupNum;
This is fairly easy to express in SQL using subqueries:
select Field_A, Field_D, min(Field_B) as Group_Start, max(Field_B) as Group_End
from (select t.*,
(select min(field_B)
from t t2
where t2.field_A = t.field_A and
t2.field_B > t.field_B and
t2.Field_D <> t.field_D
) as TheGroup
from t
) t
group by Field_A, Field_D, TheGroup
This is assigning a group identifier using a correlated subquery. The identifier is the first value of Field_B where Field_D changes.
You don't mention the database you are using, so this uses standard SQL.
Don't use SQL for this problem because it is not possible to do it in SQL with a single table scan since it requires comparison between records. It would need a full table scan plus at least a join with itself. It is trivial to implement a solution in a imperative language and it only requires a single table scan.
Edit: a stored procedure would be best.
I modified the answers a bit where you have multiple Field_A's. This should always work :-)
WITH EndsMarked
AS
(
SELECT
[Field_A]
,[Field_B]
,CASE
WHEN LAG([Field_D],1) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B]) IS NULL
AND ROW_NUMBER() OVER (PARTITION BY [Field_A] ORDER BY [Field_B]) = 1
THEN 1
WHEN LAG([Field_D],1) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B]) > 0
<> LAG([Field_D],0) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B]) > 0
THEN 1
ELSE 0
END AS IS_START
,CASE
WHEN LEAD([Field_D],1) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B]) IS NULL
AND ROW_NUMBER() OVER (PARTITION BY [Field_A] ORDER BY [Field_B] DESC) = 1
THEN 1
WHEN LEAD([Field_D],0) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B])
<> LEAD([Field_D],1) OVER (PARTITION BY [Field_A] ORDER BY [Field_A],[Field_B])
THEN 1
ELSE 0
END AS IS_END
FROM
(
SELECT
[Field_A]
,[Field_B]
,[Field_D]
,[Aantal Facturen]
FROM [T]
) F
)
,GroupsNumbered
AS
(
SELECT
[Field_A]
,[Field_B]
,IS_START
,IS_END
,COUNT(CASE
WHEN IS_START = 1
THEN 1
END) OVER (ORDER BY [Field_A]
,[Field_B]) AS GroupNum
FROM EndsMarked
WHERE IS_START = 1
OR IS_END = 1
)
SELECT
[Field_A]
,MIN([Field_B]) AS GROUP_START
,MAX([Field_B]) AS GROUP_END
FROM GroupsNumbered
GROUP BY [Field_A], GroupNum

SQL Optimize - From History table get value from two different dates

Not sure where to start... But basically I have a report table, an account table, and an account history table. The account history table will have zero or more records, where each record is the state of the account cancelled flag after it changed.
There is other stuff going on, but basically i am looking to return the account detail data, with the state of account cancelled bit on the start date and enddate as different columns.
What is the best way to do this?
I have the following working query below
(Idea) Should I do seperate joins on history table, 1 for each date?
I guess I could do it in three separate queries ( Get Begin Snapshot, End Snapshot, Normal Report query with a join to each snapshot)
something else?
Expected output:
AccountID, OtherData, StartDateCancelled, EndDateCancelled
Test Tables:
DECLARE #Report TABLE (ReportID INT, StartDate DATETIME, EndDate DATETIME)
DECLARE #ReportAccountDetail TABLE( ReportID INT, Accountid INT, Cancelled BIT )
DECLARE #AccountHistory TABLE( AccountID INT, ModifiedDate DATETIME, Cancelled BIT )
INSERT INTO #Report
SELECT 1,'1/1/2011', '2/1/2011'
--
INSERT INTO #ReportAccountDetail
SELECT 1 AS ReportID, 1 AS AccountID, 0 AS Cancelled
UNION
SELECT 1,2,0
UNION
SELECT 1,3,1
UNION
SELECT 1,4,1
--
INSERT INTO #AccountHistory
SELECT 2 AS CustomerID, '1/2/2010' AS ModifiedDate, 1 AS Cancelled
UNION--
SELECT 3, '2/1/2011', 1
UNION--
SELECT 4, '1/1/2010', 1
UNION
SELECT 4, '2/1/2010', 0
UNION
SELECT 4, '2/1/2011', 1
Current Query:
SELECT Accountid, OtherData,
MAX(CASE WHEN BeginRank = 1 THEN CASE WHEN BeginHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS StartDateCancelled,
MAX(CASE WHEN EndRank = 1 THEN CASE WHEN EndHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS EndDateCancelled
FROM
(
SELECT c.Accountid,
'OtherData' AS OtherData,
--lots of other data
ROW_NUMBER() OVER (PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS BeginRank,
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END AS BeginHistoryExists,
ROW_NUMBER() OVER ( PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS EndRank,
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END AS EndHistoryExists,
CAST( ch.Cancelled AS INT) AS HistoryCancelled,
0 AS DefaultCancel
FROM
#Report AS Report
INNER JOIN #ReportAccountDetail AS C ON Report.ReportID = C.ReportID
--Others joins related for data to return
LEFT JOIN #AccountHistory AS CH ON CH.AccountID = C.AccountID
WHERE Report.ReportID = 1
) AS x
GROUP BY AccountID, OtherData
Welcome input on writing stack overflow questions. Thanks!
ROW_NUMBER() often suprises me and out-performs my expectations. In this case, however, I'd be tempted to just use correlated sub-queries. At least, I'd test them against the alternatives.
Note: I would also use real tables, with real indexes, and a realistic volume of fake data. (If it's worth posting this question, I'm assuming that it's worth testing this realistically.)
SELECT
[Report].ReportID,
[Account].AccountID,
[Account].OtherData,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].StartDate ORDER BY ModifiedDate DESC), 0) AS StartDateCancelled,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].EndDate ORDER BY ModifiedDate DESC), 0) AS EndDateCancelled
FROM
Report AS [Report]
LEFT JOIN
ReportAccountDetail AS [Account]
ON [Account].ReportID = [Report].ReportID
ORDER BY
[Report].ReportID,
[Account].AccountID
Note: For whatever reason, I've found that TOP 1 and ORDER BY is faster than MAX().
In terms of your suggested answer, I'd modify it slightly to just use ISNULL instead of trying to make the Exists columns work.
I'd also join on the "other data" after all of the working out, rather than inside the inner-most query, so as to avoid having to group by all the "other data".
WITH
HistoricData AS
(
SELECT
Report.ReportID,
c.Accountid,
c.OtherData,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate DESC) AS BeginRank,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY ch.ModifiedDate DESC) AS EndRank,
CH.Cancelled
FROM
#Report AS Report
INNER JOIN
#ReportAccountDetail AS C
ON Report.ReportID = C.ReportID
LEFT JOIN
#AccountHistory AS CH
ON CH.AccountID = C.AccountID
AND CH.ModifiedDate <= Report.EndDate
)
,
FlattenedData AS
(
SELECT
ReportID,
Accountid,
OtherData,
ISNULL(MAX(CASE WHEN BeginRank = 1 THEN Cancelled END), 0) AS StartDateCancelled,
ISNULL(MAX(CASE WHEN EndRank = 1 THEN Cancelled END), 0) AS EndDateCancelled
FROM
[HistoricData]
GROUP BY
ReportID,
AccountID,
OtherData
)
SELECT
*
FROM
[FlattenedData]
LEFT JOIN
[OtherData]
ON Whatever = YouLike
WHERE
[FlattenedData].ReportID = 1
And a final possible version...
WITH
ReportStartHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].StartDate
)
AS [data]
WHERE
SequenceID = 1
)
,
ReportEndHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].EndDate
)
AS [data]
WHERE
SequenceID = 1
)
SELECT
[Report].ReportID,
[Account].*,
ISNULL([ReportStartHistory].Cancelled, 0) AS StartDateCancelled,
ISNULL([ReportEndHistory].Cancelled, 0) AS EndDateCancelled
FROM
Report AS [Report]
INNER JOIN
Account AS [Account]
LEFT JOIN
[ReportStartHistory]
ON [ReportStartHistory].ReportID = [Report].ReportID
AND [ReportStartHistory].AccountID = [Account].AccountID
LEFT JOIN
[ReportEndHistory]
ON [ReportEndHistory].ReportID = [Report].ReportID
AND [ReportEndHistory].AccountID = [Account].AccountID