How to get first and last record in HiveSQL if key is different - sql

I need to get the first and last record for a user if one of the key fields is different over time using a Hive table:
This is some sample data:
UserID EntryDate Activity
a3324 1/1/16 walk
a3324 1/2/16 walk
a3324 1/3/16 walk
a3324 1/4/16 run
a5613 1/1/16 walk
a5613 1/2/16 walk
a5613 1/3/16 walk
a5613 1/4/16 walk
And I'm looking for output preferably like this:
a3324 1/1/16 walk 1/4/16 run
Or at least like this:
a3324 walk run
I start writing code like this:
SELECT UserID, MINIMUM(EntryDate), MAXIMUM(EntryDate), Activity
FROM
SELECT UserID, DISTINCT Activity
GROUP BY UserID
HAVING Count(Activity) > 1
But I know that's not it.
I'd also like to be able to specify the cases where the original activity was Walk and the second activity was Run perhaps in the Where clause.
Can you help with an approach?
Thanks

You can use lag /lead to get a solution
SELECT * FROM (
select UserID ,EntryDate , Activityslec,
lead(Activityslec, 1) over (UserID ,EntryDate ) as nextActivityslec
from table) as A
where Activityslec <> nextActivityslec

SELECT
t.UserId
,MIN(CASE WHEN t.RowNumAsc = 1 THEN t.EntryDate END) as MinEntryDate
,MIN(CASE WHEN t.RowNumAsc = 1 THEN t.Activity END) as MinActivity
,MAX(CASE WHEN t.RowNumDesc = 1 THEN t.EntryDate END) as MaxEntryDate
,MAX(CASE WHEN t.RowNumDesc = 1 THEN t.Activity END) as MaxActivity
FROM
(
SELECT
UserId
,EntryDate
,Activity
,ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY EntryDate) as RowNumAsc
,ROW_NUMBER() OVER (PARTITION BY UserId ORDER BY EntryDate DESC) as RowNumDesc
FROM
Table
) t
WHERE
t.RowNumAsc = 1
OR t.RowNumDesc = 1
GROUP BY
t.UserId
Looks like window functions are supported (https://cwiki.apache.org/confluence/display/Hive/LanguageManual+WindowingAndAnalytics) so using 2 row numbers 1 for EntryDate Ascending and another for Descending with Conditional Aggregation should get you to the answer.
And if you don't want to use Analytic Functions (window functions) you can use self left joins and conditional aggregation:
SELECT
t.UserId
,MIN(CASE WHEN mn.UserId IS NULL THEN t.EntryDate END) as MinEntryDate
,MIN(CASE WHEN mn.UserId IS NULL THEN t.Activity END) as MinActivity
,MAX(CASE WHEN mx.UserId IS NULL THEN t.EntryDate END) as MaxEntryDate
,MAX(CASE WHEN mx.UserId IS NULL THEN t.Activity END) as MaxActivity
FROM
Table t
LEFT JOIN Table mn
ON t.UserId = mn.UserId
AND t.EntryDate > mn.EntryDate
LEFT JOIN Table mx
ON t.UserId = mx.UserId
AND t.EntryDate < mx.EntryDate
WHERE
mn.UserId IS NULL
OR mx.UserId IS NULL
GROUP BY
t.UserId
Or a correlated Sub Query way:
SELECT
UserId
,MIN(EntryDate) as MinEntryDate
,(SELECT
Activity
FROM
Activity a
WHERE
u.UserId = a.UserId
AND a.EntryDate = MIN(u.EntryDate)
LIMIT 1
) as MinActivity
,MAX(EntryDate) as MaxEntryDate
,(SELECT
Activity
FROM
Activity a
WHERE
u.UserId = a.UserId
AND a.EntryDate = MAX(u.EntryDate)
LIMIT 1
) as MaxActivity
FROM
Activity u
GROUP BY
UserId

Related

SQL to return 1 or 0 depending on values in a column's audit trail

If I were to have a table such as the one below:
id_
last_updated_by
1
robot
1
human
1
robot
2
robot
3
robot
3
human
Using SQL, how could I group by the ID and create a new column to indicate whether a human has ever updated the record like this:
id_
last_updated_by
updated_by_human
1
robot
1
2
robot
0
3
robot
1
UPDATE
I'm currently doing the following, though I'm not sure how efficient this is. Selecting the latest record and then merging it with my calculated column via a sub-select.
SELECT MAIN.TRANSACTION_ID,
MAIN.CREATED_DATE
MAIN.CREATED_BY_USER_ID,
MAIN.OWNER_USER_ID,
STP.TOUCHED_BY_HUMAN
FROM (
SELECT TRANSACTION_ID,
CREATED_DATE
CREATED_BY_USER_ID_
OWNER_USER_ID_
FROM TABLE_NAME
WHERE CREATED_DATE >= CAST('{start_date} 00:00:00' AS TIMESTAMP)
AND CREATED_DATE <= CAST('{end_date} 23:59:59' AS TIMESTAMP)
QUALIFY row_number() OVER (partition by TRANSACTION_ID order by End_Dt desc) = 1
) MAIN
LEFT JOIN (
SELECT TRANSACTION_ID,
CASE
WHEN CREATED_BY_USER_ID IN ('ROBOT', 'MACHINE') OR
CREATED_BY_USER_ID LIKE 'N%' OR
CREATED_BY_USER_ID IS NULL
THEN 0
ELSE 1 END AS CREATED_BY_HUMAN,
CASE
WHEN OWNER_USER_ID IN ('ROBOT', 'MACHINE') OR
OWNER_USER_ID LIKE 'N%' OR
OWNER_USER_ID IS NULL
THEN 0
ELSE 1 END AS OWNED_BY_HUMAN,
CASE
WHEN CREATED_BY_HUMAN = 0 AND
OWNED_BY_HUMAN = 0
THEN 0
ELSE 1 END AS TOUCHED_BY_HUMAN_
FROM TABLE_NAME
WHERE CREATED_DATE >= CAST('{start_date} 00:00:00' AS TIMESTAMP)
AND CREATED_DATE <= CAST('{end_date} 23:59:59' AS TIMESTAMP)
QUALIFY row_number() OVER (partition by TRANSACTION_ID order by TOUCHED_BY_HUMAN_ desc) = 1
) STP
ON MAIN.TRANSACTION_ID = STP.TRANSACTION_ID
If I'm following your problem, then something like this should work.
SELECT
t.*
,CASE WHEN a.id IS NOT NULL THEN 1 ELSE 0 END AS updated_by_human
FROM table t
LEFT JOIN (SELECT DISTINCT id FROM table WHERE last_updated_by = 'human') a ON t.id = a.id
That takes care of the updated_by_human field, but if you also need to reduce the records in table (only keeping a subset) then you need more information to do that.
Exists clauses are usually not that performant but if your data isn't big this should work.
select id_,
IF (EXISTS (SELECT 1 FROM table_name t2 WHERE t2.last_updated_by = 'human' and t2.id_ = t1.id_), 1, 0) AS updated_by_human
from table_name t1;
here is another way
SELECT *
FROM table_name t1
GROUP BY ti.id_
HAVING COUNT(*) > 0
AND MAX(CASE t1.last_updated_by WHEN 'human' THEN 1 ELSE 0 END) = 1;
Since you didn't specified which column is used to determine this record is the newest record added by a given id, I assume that there will be a column to track the insert/modify timestamp (which is pretty standard table design), let's put it is last_updated_timestamp (if you don't have any, then I still insist you to have one as an auditing trail without timestamp does not make sense)
Given your table name is updating_trail
SELECT updating_trail.*, last_update_trail.modified_by_human
FROM updating_trail
INNER JOIN (
-- determine the id_, the lastest modified_timestamp, and a flag check to determine if there is any record with last_update_by is 'human' -> if yes then give 1
SELECT updating_trail.id_, MAX(last_update_timestamp) AS most_recent_update_ts, MAX(CASE WHEN updating_trail.last_updated_by = 'human' THEN 1 ELSE 0 END) AS modified_by_human
FROM updating_trail
GROUP BY updating_trail.id_
) last_update_trail
ON updating_trail.id_ = last_update_trail.id_ AND updating_trail.last_update_timestamp = last_update_trail.most_recent_update_ts;
Give
id_
last_updated_by
last_update_timestamp
modified_by_human
1
robot
2021-10-19T20:00:00.000Z
1
2
robot
2021-10-19T17:00:00.000Z
0
3
robot
2021-10-19T16:00:00.000Z
1
Check out this sample db fiddle I created for you
This is a 1:1 translation of your query to conditional aggregation:
SELECT TRANSACTION_ID,
CREATED_DATE,
CREATED_BY_USER_ID,
OWNER_USER_ID,
Max(CASE
WHEN CREATED_BY_USER_ID IN ('ROBOT', 'MACHINE') OR
CREATED_BY_USER_ID LIKE 'N%' OR
CREATED_BY_USER_ID IS NULL
THEN 0
ELSE 1
END) Over (PARTITION BY TRANSACTION_ID) AS CREATED_BY_HUMAN
FROM Table_Name
WHERE CREATED_DATE >= Cast('{start_date} 00:00:00' AS TIMESTAMP)
AND CREATED_DATE <= Cast('{end_date} 23:59:59' AS TIMESTAMP)
QUALIFY Row_Number() Over (PARTITION BY TRANSACTION_ID ORDER BY End_Dt DESC) = 1

How to get count of items present in each category but not present in other categories?

I have a table with different visit_types to hospital. They are Inpatient, Outpatient, Emergency
I would like to know the count of subjects solely present under each visit_type but not in other visit_types. In the above example the
Inpatient count - 4
Outpatient count -2
Emergency count - 3
I tried the below but not sure whether it is accurate?
SELECT count(DISTINCT PERSON_ID) FROM Visit WHERE PERSON_ID NOT IN
(select distinct person_id from Visit where visit_type = 'Inpatient')
AND VISIT_type = 'Outpatient';
SELECT count(DISTINCT PERSON_ID) FROM Visit WHERE PERSON_ID NOT IN
(select distinct person_id from Visit where visit_type = 'Inpatient')
AND VISIT_type = 'Emergency';
When I do this, it includes common subjects between Emergency and Outpatient?
How can I get the count correctly?
With a CTE which returns for each person_id all the types:
with cte as (
select person_id,
sum(case visit_type when 'Inpatient' then 1 else 0 end) Inpatient,
sum(case visit_type when 'Outpatient' then 1 else 0 end) Outpatient,
sum(case visit_type when 'Emergency' then 1 else 0 end) Emergency
from Visit
group by person_id
)
select
case
when Inpatient > 0 then 'Inpatient'
when Outpatient > 0 then 'Outpatient'
when Emergency > 0 then 'Emergency'
end visit_type,
count(*) counter
from cte
group by visit_type
See the demo.
Results:
visit_type | counter
:--------- | ------:
Outpatient | 2
Emergency | 3
Inpatient | 4
I would like to know the count of subjects solely present under each category but not in other categories.
You can aggregate by patient, keeping track of the categories. Then aggregate again:
select visit_type, count(*)
from (select patientId, min(visit_type) as visit_type
from t
group by patientId
having min(visit_type) = max(visit_type)
) p
group by visit_type;
An alternative method uses group by but filters before aggregation:
select visit_type, count(*)
from t
where not exists (select 1
from t t2
where t2.patientid = t.patientid and
t2.visit_type <> t.visit_type
)
group by visit_type;
Note: In this case, the count(*) is counting rows. If your data has duplicates, use count(distinct visit_type).
I have no idea what "I consider Inpatient category as base category" is supposed to mean, but the question itself is quite clear.
EDIT:
I am unclear on the relationships between the different categories that you want. You may find it most flexible to use:
select visit_type, count(*)
from (select patientId,
bool_or(visit_type = 'Inpatient') as has_inpatient,
bool_or(visit_type = 'Outpatient') as has_oupatient,
bool_or(visit_type = 'Emergency') as has_emergency,
count(distinct visit_type) as num_visit_types
from t
group by patientId
) p
where num_visit_types = 1
group by visit_type;
This version is the same as the earlier two queries. But you can use the has_ flags for additional filtering -- for instance where num_visit_types = 1 or (num_visit_types = 2 and has_inpatient) if you want people with one type or one type plus "inpatient".
You can use this query!
SELECT
C.visit_type,
COUNT(*) AS count_per_visit_type
FROM (
SELECT
person_id
FROM (
SELECT
person_id,
ARRAY_AGG(DISTINCT visit_type) AS visit_type_array
FROM visit
GROUP BY person_id
) A
WHERE LENGTH(visit_type_array) = 1
) B
JOIN visit C
ON B.person_id = C.person_id
GROUP BY C.visit_type

Calculate Count from a view ( SQL)

I have this View :
SELECT [ID]
,[PersonName]
,[PersonFUNCTION]
,[GUESTName]
,[Team]
,[sector]
,[MeetingCity]
,[GUESTCOMPANY]
,[TypeMeeting]
FROM [DB_TEST].[dbo].[Meetings]
From this view we can Read for Exemple :
The Person with the Name "XXX" (PersonName) who is A CEO (PersonFUNCTION) was in a meeting with "Mark Zuckerberg" (GUESTName) in Paris ( MeetingCity ) and "Facebook" is the ( GUESTCOMPANY ) and finally the meeting was a "One to One Meeting" (TypeMeeting) !
PS : Note that XXX can meet Mark Zuckerberg more than one time , in a different city for example.
What I Want to do is :
Add 3 columns : Count( One to One Meeting ) and Count( One to Few Meeting ) and Count ( Group Meeting )
Count( One to One Meeting ) = how many times the [PersonName] has met the [GUESTName] in a One to One meeting no matter if the city is different or anything else is different ...
So something like that :
SELECT [ID]
,[PersonName]
,[PersonFUNCTION]
,Count( One to One Meeting between PersonName and GUESTName ) ?
,Count( One to Few Meeting between PersonName and GUESTName) ?
,Count ( Group Meeting between PersonName and GUESTName) ?
,[GUESTName]
,[Team]
,[sector]
,[MeetingCity]
,[GUESTCOMPANY]
,[TypeMeeting]
FROM [DB_TEST].[dbo].[Meetings]
Thanks
Something like this should help you get all columns and counts.
SELECT [ID],
[PersonName],
[PersonFUNCTION],
m2.OneToOneCount, --Count( One to One Meeting between PersonName and GUESTName )
m2.OneToFewCount, --Count( One to Few Meeting between PersonName and GUESTName)
m2.GroupCount, --Count ( Group Meeting between PersonName and GUESTName)
[GUESTName],
[Team],
[sector],
[MeetingCity],
[GUESTCOMPANY],
[TypeMeeting]
FROM [DB_TEST].[dbo].[Meetings] m
CROSS APPLY (SELECT COUNT(CASE WHEN m2.[TypeMeeting] = 'OneToOne' THEN 1 END) AS OneToOneCount,
COUNT(CASE WHEN m2.[TypeMeeting] = 'OneToFew' THEN 1 END) AS OneToFewCount,
COUNT(CASE WHEN m2.[TypeMeeting] = 'Group' THEN 1 END) AS GroupCount
FROM [DB_TEST].[dbo].[Meetings] m2
WHERE m2.[PersonName] = m.[PersonName]
AND m2.[GUESTName] = m.[GUESTName]) m2
if you can't use CROSS APPLY, this is a JOIN alternative.
SELECT [ID],
[PersonName],
[PersonFUNCTION],
m2.OneToOneCount, --Count( One to One Meeting between PersonName and GUESTName )
m2.OneToFewCount, --Count( One to Few Meeting between PersonName and GUESTName)
m2.GroupCount, --Count ( Group Meeting between PersonName and GUESTName)
[GUESTName],
[Team],
[sector],
[MeetingCity],
[GUESTCOMPANY],
[TypeMeeting]
FROM [DB_TEST].[dbo].[Meetings] m
JOIN ( SELECT [PersonName],
[GUESTName],
COUNT(CASE WHEN m2.[TypeMeeting] = 'OneToOne' THEN 1 END) AS OneToOneCount,
COUNT(CASE WHEN m2.[TypeMeeting] = 'OneToFew' THEN 1 END) AS OneToFewCount,
COUNT(CASE WHEN m2.[TypeMeeting] = 'Group' THEN 1 END) AS GroupCount
FROM [DB_TEST].[dbo].[Meetings] m2
GROUP BY [PersonName],
[GUESTName]
) m2 ON m2.[PersonName] = m.[PersonName]
AND m2.[GUESTName] = m.[GUESTName]
BM
Another solution might be using SQL Pivot query
Here is sample data and SQL pivot Select statement
SELECT *
FROM (
SELECT
[ID],
[PersonName],
[TypeMeeting]
FROM [Meetings]
) TableData
PIVOT (
Count(ID)
FOR [TypeMeeting] IN (
[T1],[T2],[T3],[T4],[T5],[T6]
)
) PivotTable
I just used 'T1', etc for meeting types, you are required to replace them with actual values within "[]"
Here is the result
If you have many different meeting types you can use dynamic pivot query in SQL Server but I guess above solution will be enough
You can use conditional aggregation:
select PersonName, GuestName,
sum(case when TypeMetting = 'one-to-one' then 1 else 0 end) as Num_OneToOne,
sum(case when TypeMetting = 'one-to-few' then 1 else 0 end) as Num_OneToFew,
sum(case when TypeMetting = 'group' then 1 else 0 end) as Num_Group
from Meetings
group by PersonName, GuestName;
You can use SQL COUNT function with Partition By clause
Try
SELECT Distinct [ID]
,[PersonName]
,[TypeMeeting] --...
,COUNT([TypeMeeting]) OVER (PARTITION BY [PersonName], [TypeMeeting]) Cnt
FROM [Meetings]
It will result as follows

SQL Aggreate Functions

I have table which list a number of cases and assigned primary and secondary technicians. What I am trying to accomplish is to aggregate the number of cases a technician has worked as a primary and secondary tech. Should look something like this...
Technician Primary Secondary
John 4 3
Stacy 3 1
Michael 5 3
The table that I am pulling that data from looks like this:
CaseID, PrimaryTech, SecondaryTech, DOS
In the past I have used something like this, but now my superiors are asking for the number of secondary cases as well...
SELECT PrimaryTech, COUNT(CaseID) as Total
GROUP BY PrimaryTech
I've done a bit of searching, but cant seem to find the answer to my problem.
Select Tech,
sum(case when IsPrimary = 1 then 1 else 0 end) as PrimaryCount,
sum(case when IsPrimary = 0 then 1 else 0 end) as SecondaryCount
from
(
SELECT SecondaryTech as Tech, 0 as IsPrimary
FROM your_table
union all
SELECT PrimaryTech as Tech, 1 as IsPrimary
FROM your_table
) x
GROUP BY Tech
You can group two subqueries together with a FULL JOIN as demonstrated in this SQLFiddle.
SELECT Technician = COALESCE(pri.Technician, sec.Technician)
, PrimaryTech
, SecondaryTech
FROM
(SELECT Technician = PrimaryTech
, PrimaryTech = COUNT(*)
FROM Cases
WHERE PrimaryTech IS NOT NULL
GROUP BY PrimaryTech) pri
FULL JOIN
(SELECT Technician = SecondaryTech
, SecondaryTech = COUNT(*)
FROM Cases
WHERE SecondaryTech IS NOT NULL
GROUP BY SecondaryTech) sec
ON pri.Technician = sec.Technician
ORDER By Technician;
SELECT COALESCE(A.NAME, B.NAME) AS NAME, CASE WHEN A.CASES IS NOT NULL THEN A.CASES ELSE 0 END AS PRIMARY_CASES,
CASE WHEN B.CASES IS NOT NULL THEN B.CASES ELSE 0 END AS SECONDARY_CASES
FROM
(
SELECT COUNT(*) AS CASES, PRIMARYTECH AS NAME FROM YOUR_TABLE
GROUP BY PRIMARYTECH
) AS A
FULL OUTER JOIN
(
SELECT COUNT(*) AS CASES, SECONDARYTECH AS NAME FROM YOUR_TABLE
GROUP BY SECONDARYTECH
) AS B
ON A.NAME = B.NAME

SQL Optimize - From History table get value from two different dates

Not sure where to start... But basically I have a report table, an account table, and an account history table. The account history table will have zero or more records, where each record is the state of the account cancelled flag after it changed.
There is other stuff going on, but basically i am looking to return the account detail data, with the state of account cancelled bit on the start date and enddate as different columns.
What is the best way to do this?
I have the following working query below
(Idea) Should I do seperate joins on history table, 1 for each date?
I guess I could do it in three separate queries ( Get Begin Snapshot, End Snapshot, Normal Report query with a join to each snapshot)
something else?
Expected output:
AccountID, OtherData, StartDateCancelled, EndDateCancelled
Test Tables:
DECLARE #Report TABLE (ReportID INT, StartDate DATETIME, EndDate DATETIME)
DECLARE #ReportAccountDetail TABLE( ReportID INT, Accountid INT, Cancelled BIT )
DECLARE #AccountHistory TABLE( AccountID INT, ModifiedDate DATETIME, Cancelled BIT )
INSERT INTO #Report
SELECT 1,'1/1/2011', '2/1/2011'
--
INSERT INTO #ReportAccountDetail
SELECT 1 AS ReportID, 1 AS AccountID, 0 AS Cancelled
UNION
SELECT 1,2,0
UNION
SELECT 1,3,1
UNION
SELECT 1,4,1
--
INSERT INTO #AccountHistory
SELECT 2 AS CustomerID, '1/2/2010' AS ModifiedDate, 1 AS Cancelled
UNION--
SELECT 3, '2/1/2011', 1
UNION--
SELECT 4, '1/1/2010', 1
UNION
SELECT 4, '2/1/2010', 0
UNION
SELECT 4, '2/1/2011', 1
Current Query:
SELECT Accountid, OtherData,
MAX(CASE WHEN BeginRank = 1 THEN CASE WHEN BeginHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS StartDateCancelled,
MAX(CASE WHEN EndRank = 1 THEN CASE WHEN EndHistoryExists = 1 THEN HistoryCancelled ELSE DefaultCancel END ELSE NULL END ) AS EndDateCancelled
FROM
(
SELECT c.Accountid,
'OtherData' AS OtherData,
--lots of other data
ROW_NUMBER() OVER (PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS BeginRank,
CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END AS BeginHistoryExists,
ROW_NUMBER() OVER ( PARTITION BY c.AccountID ORDER BY
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate desc) AS EndRank,
CASE WHEN ch.ModifiedDate <= Report.EndDate THEN 1 ELSE 0 END AS EndHistoryExists,
CAST( ch.Cancelled AS INT) AS HistoryCancelled,
0 AS DefaultCancel
FROM
#Report AS Report
INNER JOIN #ReportAccountDetail AS C ON Report.ReportID = C.ReportID
--Others joins related for data to return
LEFT JOIN #AccountHistory AS CH ON CH.AccountID = C.AccountID
WHERE Report.ReportID = 1
) AS x
GROUP BY AccountID, OtherData
Welcome input on writing stack overflow questions. Thanks!
ROW_NUMBER() often suprises me and out-performs my expectations. In this case, however, I'd be tempted to just use correlated sub-queries. At least, I'd test them against the alternatives.
Note: I would also use real tables, with real indexes, and a realistic volume of fake data. (If it's worth posting this question, I'm assuming that it's worth testing this realistically.)
SELECT
[Report].ReportID,
[Account].AccountID,
[Account].OtherData,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].StartDate ORDER BY ModifiedDate DESC), 0) AS StartDateCancelled,
ISNULL((SELECT TOP 1 Cancelled FROM AccountHistory WHERE AccountID = [Account].AccountID AND ModifiedDate <= [Report].EndDate ORDER BY ModifiedDate DESC), 0) AS EndDateCancelled
FROM
Report AS [Report]
LEFT JOIN
ReportAccountDetail AS [Account]
ON [Account].ReportID = [Report].ReportID
ORDER BY
[Report].ReportID,
[Account].AccountID
Note: For whatever reason, I've found that TOP 1 and ORDER BY is faster than MAX().
In terms of your suggested answer, I'd modify it slightly to just use ISNULL instead of trying to make the Exists columns work.
I'd also join on the "other data" after all of the working out, rather than inside the inner-most query, so as to avoid having to group by all the "other data".
WITH
HistoricData AS
(
SELECT
Report.ReportID,
c.Accountid,
c.OtherData,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY CASE WHEN ch.ModifiedDate <= Report.StartDate THEN 1 ELSE 0 END DESC, ch.ModifiedDate DESC) AS BeginRank,
ROW_NUMBER() OVER (PARTITION BY c.ReportID, c.AccountID ORDER BY ch.ModifiedDate DESC) AS EndRank,
CH.Cancelled
FROM
#Report AS Report
INNER JOIN
#ReportAccountDetail AS C
ON Report.ReportID = C.ReportID
LEFT JOIN
#AccountHistory AS CH
ON CH.AccountID = C.AccountID
AND CH.ModifiedDate <= Report.EndDate
)
,
FlattenedData AS
(
SELECT
ReportID,
Accountid,
OtherData,
ISNULL(MAX(CASE WHEN BeginRank = 1 THEN Cancelled END), 0) AS StartDateCancelled,
ISNULL(MAX(CASE WHEN EndRank = 1 THEN Cancelled END), 0) AS EndDateCancelled
FROM
[HistoricData]
GROUP BY
ReportID,
AccountID,
OtherData
)
SELECT
*
FROM
[FlattenedData]
LEFT JOIN
[OtherData]
ON Whatever = YouLike
WHERE
[FlattenedData].ReportID = 1
And a final possible version...
WITH
ReportStartHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].StartDate
)
AS [data]
WHERE
SequenceID = 1
)
,
ReportEndHistory AS
(
SELECT
*
FROM
(
SELECT
[Report].ReportID,
ROW_NUMBER() OVER (PARTITION BY [Report].ReportID, [History].AccountID ORDER BY [History].ModifiedDate) AS SequenceID,
[History].*
FROM
Report AS [Report]
INNER JOIN
AccountHistory AS [History]
ON [History].ModifiedDate <= [Report].EndDate
)
AS [data]
WHERE
SequenceID = 1
)
SELECT
[Report].ReportID,
[Account].*,
ISNULL([ReportStartHistory].Cancelled, 0) AS StartDateCancelled,
ISNULL([ReportEndHistory].Cancelled, 0) AS EndDateCancelled
FROM
Report AS [Report]
INNER JOIN
Account AS [Account]
LEFT JOIN
[ReportStartHistory]
ON [ReportStartHistory].ReportID = [Report].ReportID
AND [ReportStartHistory].AccountID = [Account].AccountID
LEFT JOIN
[ReportEndHistory]
ON [ReportEndHistory].ReportID = [Report].ReportID
AND [ReportEndHistory].AccountID = [Account].AccountID