SQL - select last and previous different to last - sql-server-2016

The problem: a simplified membership table containing membership id, starting date for each membership and membership level description:
CREATE TABLE cover
(
[membership_id] int,
[cover_from_date] date,
[description] varchar(57)
);
INSERT INTO cover ([membership_id], [cover_from_date], [description])
VALUES (1, '1/1/2011', 'AA'),
(1, '1/2/2011', 'BB'),
(1, '1/3/2011', 'CC'),
(1, '1/4/2011', 'CC');
The task: to list the current membership and the immediate previous membership different to the current one. So from the above table I would like to see something like:
1, 1/4/2011, CC, 1/2/2011, BB
The attempted solution: I have managed to come up with a solution but it takes an enormous time to run on a large database and I'm sure there are better ways of resolving this problem. My no-doubt over complicated query is as follows:
with cte as
(
select
cover.membership_id, cover.cover_from_date,
cover.description,
row_number() over (partition by cover.membership_id order by cover.cover_from_date desc) AS version_no
from
cover
)
select
cte.membership_id,
cover_now.cover_from_date, cover_now.description,
cover_prev.cover_from_date, cover_prev.description
from
cte
left outer join
cte cover_now on cte.membership_id = cover_now.membership_id
and cover_now.version_no = 1
left outer join
cte cover_prev on cte.membership_id = cover_prev.membership_id
and cover_prev.version_no = (select min(x.version_no)
from cte x
where x.version_no >= 2
and x.membership_id = cover_now.membership_id
and x.description <> cover_now.description)
group by
cte.membership_id, cover_now.cover_from_date, cover_now.description,
cover_prev.cover_from_date, cover_prev.description
The entire fiddle is located here. Any tips on how to optimise the query would be appreciated.

First create an index on membership_id and cover_from_date in descending order. It will be heavily used by this query.
create index cover_by_date on cover (membership_id asc, cover_from_date desc)
Then:
select
membership.membership_id,
membership.cover_from_date,
membership.description,
previous_membership.cover_from_date,
previous_membership.description
from
(
select membership_id, description, cover_from_date, row_number() over (partition by membership_id order by cover_from_date desc) as rank
from cover
) as membership
left join (
select previous.membership_id, previous.description, previous.cover_from_date, row_number() over (partition by previous.membership_id order by previous.cover_from_date desc) as rank
from cover
join cover as previous on
cover.membership_id = previous.membership_id and
cover.description <> previous.description and
cover.cover_from_date > previous.cover_from_date
) as previous_membership on
previous_membership.membership_id = membership.membership_id and
previous_membership.rank = 1
where
membership.rank = 1

Related

Query keeps giving me duplicate records. How can I fix this?

I wrote a query which uses 2 temp tables. And then joins them into 1. However, I am seeing duplicate records in the student visit temp table. (Query is below). How could this be modified to remove the duplicate records of the visit temp table?
with clientbridge as (Select *
from (Select visitorid, --Visid
roomnumber,
room_id,
profid,
student_id,
ambc.datekey,
RANK() over(PARTITION BY visitorid,student_id,profid ORDER BY ambc.datekey desc) as rn
from university.course_office_hour_bridge cohd
--where student_id = '9999999-aaaa-6634-bbbb-96fa18a9046e'
)
where rn = 1 --visitorid = '999999999999999999999999999999'---'1111111111111111111111111111111' --and pai.datekey is not null --- 00000000000000000000000000
),
-----------------Data Header Table
studentvisit as
(SELECT
--Visit key will allow us to track everything they did within that visit.
distinct visid_visitorid,
--calcualted_visitorid,
uniquevisitkey,
--channel, -- says the room they're in. Channel might not be reliable would need to see how that operates
--office_list, -- add 7 to exact
--user_college,
--first_office_hour_name,
--first_question_time_attended,
studentaccountid_5,
profid_officenumber_8,
studentvisitstarttime,
room_id_115,
--date_time,
qqq144, --Course Name
qqq145, -- Course Office Hour Benefit
qqq146, --Course Office Hour ID
datekey
FROM university.office_hour_details ohd
--left_join niversity.course_office_hour_bridge cohd on ohd.visid_visitorid
where DateKey >='2022-10-01' --between '2022-10-01' and '2022-10-27'
and (qqq146 <> '')
)
select
*
from clientbridge ab inner join studentvisit sv on sv.visid_visitorid = cb.visitorid
I wrote a query which uses 2 temp tables. And then joins them into 1. However, I am seeing duplicate records in the student visit temp table. (Query is below). How could this be modified to remove the duplicate records of the visit temp table?
I think you may get have a better shot by joining the two datasets in the same query where you want the data ranked, otherwise your rank from query will be ignored within the results from the second query. Perhaps, something like ->
;with studentvisit as
(SELECT
--Visit key will allow us to track everything they did within that visit.
distinct visid_visitorid,
--calcualted_visitorid,
uniquevisitkey,
--channel, -- says the room they're in. Channel might not be reliable would need to see how that operates
--office_list, -- add 7 to exact
--user_college,
--first_office_hour_name,
--first_question_time_attended,
studentaccountid_5,
profid_officenumber_8,
studentvisitstarttime,
room_id_115,
--date_time,
qqq144, --Course Name
qqq145, -- Course Office Hour Benefit
qqq146, --Course Office Hour ID
datekey
FROM university.office_hour_details ohd
--left_join niversity.course_office_hour_bridge cohd on ohd.visid_visitorid
where DateKey >='2022-10-01' --between '2022-10-01' and '2022-10-27'
and (qqq146 <> '')
)
,clientbridge as (
Select
sv.*,
university.course_office_hour_bridge cohd, --Visid
roomnumber,
room_id,
profid,
student_id,
ambc.datekey,
RANK() over(PARTITION BY sv.visitorid,sv.student_id,sv,profid ORDER BY ambc.datekey desc) as rn
from university.course_office_hour_bridge cohd
inner join studentvisit sv on sv.visid_visitorid = cohd.visitorid
)
select
*
from clientbridge WHERE rn=1

SQL query to return duplicate rows for certain column, but with unique values for another column

I have written the query shown here that combines three tables and returns rows where the at_ticket_num from appeal_tickets is duplicated but against a different at_sys_ref value
select top 100
t.t_reference, at.at_system_ref, at_ticket_num, a.a_case_ref
from
tickets t, appeal_tickets at, appeals_2 a
where
t.t_reference in ('AB123','AB234') -- filtering on these values so that I can see that its working
and t.t_number = at.at_ticket_num
and at.at_system_ref = a.a_system_ref
and at.at_ticket_num IN (select at_ticket_num
from appeal_tickets
group by at_ticket_num
having count(distinct at_system_ref) > 1)
order by
t.t_reference desc
This is the output:
t_reference at_system_ref at_ticket_num a_case_ref
-------------------------------------------------------
AB123 30838974 23641583 1111979010
AB123 30838976 23641583 1111979010
AB234 30839149 23641520 1111977352
AB234 30839209 23641520 1111988003
I want to modify this so that it only returns records where t_reference is duplicated but against a different a_case_ref. So in above case only records for AB234 would be returned.
Any help would be much appreciated.
You want all ticket appeals that have more than one system reference and more than one case reference it seems. You can join the tables, count the occurrences per ticket and then only keep the tickets that match these criteria.
select *
from
(
select
t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref,
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
count(distinct a.a_case_ref) over (partition by at.at_ticket_num) as caserefs
from tickets t
join appeal_tickets at on at.at_ticket_num = t.t_number
join appeals_2 a on a.a_system_ref = at.at_system_ref
) counted
where sysrefs > 1 and caserefs > 1
order by t.t_reference, at.at_system_ref, at.at_ticket_num, a.a_case_ref;
Correction
It seems that SQL Server still doesn't support COUNT(DISTINCT ...) OVER (...). You can count distinct values in a subquery though. Replace
count(distinct a.a_system_ref) over (partition by at.at_ticket_num) as sysrefs,
by
(
select count(distinct a2.a_system_ref)
from appeal_tickets at2
join appeals_2 a2 on a2.a_system_ref = at2.at_system_ref
where at2.at_ticket_num = t.t_number
) as sysrefs,
An alternative workaround is to use DENSE_RANK in two directions (found here: https://stackoverflow.com/a/53518204/2270762):
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref) +
dense_rank() over (partition by at.at_ticket_num order by a.a_system_ref desc) -
1 as sysrefs,
with data as (
<your query plus one column>,
case when
min() over (partition by t.t_reference)
<>
max() over (partition by t.t_reference)
then 1 end as dup
)
select * from data where dup = 1

Trying to simplify a SQL query without UNION

I'm very bad at explaining, so let me try to lay out my issue. I have a table that resembles the following:
Source Value User
======== ======= ======
old1 1 Phil
new 2 Phil
old2 3 Phil
new 4 Phil
old1 1 Mike
old2 2 Mike
new 1 Jeff
new 2 Jeff
What I need to do is create a query that gets values for users based on the source and the value. It should follow this rule:
For every user, get the highest value. However, disregard the 'new'
source if either 'old1' or 'old2' exists for that user.
So based on those rules, my query should return the following from this table:
Value User
======= ======
3 Phil
2 Mike
2 Jeff
I've come up with a query that does close to what is asked:
SELECT MAX([Value]), [User]
FROM
(
SELECT CASE [Source]
WHEN 'old1' THEN 1
WHEN 'old2' THEN 1
WHEN 'new' THEN 2
END AS [SourcePriority],
[Value],
[User]
FROM #UserValues
) MainPriority
WHERE [SourcePriority] = 1
GROUP BY [User]
UNION
SELECT MAX([Value]), [User]
FROM
(
SELECT CASE [Source]
WHEN 'old1' THEN 1
WHEN 'old2' THEN 1
WHEN 'new' THEN 2
END AS [SourcePriority],
[Value],
[User]
FROM #UserValues
) SecondaryPriority
WHERE [SourcePriority] = 2
GROUP BY [User]
However this returns the following results:
Value User
======= ======
3 Phil
4 Phil
2 Mike
2 Jeff
Obviously that extra value for Phil=4 is not desired. How should I attempt to fix this query? I also understand that this is a pretty convoluted solution and that it can probably be more easily solved by proper use of aggregates, however I'm not too familiar with aggregates yet which resulted in me resorting to a union. Essentially I'm looking for help creating the cleanest-looking solution possible.
Here is the SQL code if anyone wanted to populate the table themselves to give it a try:
CREATE TABLE #UserValues
(
[Source] VARCHAR(10),
[Value] INT,
[User] VARCHAR(10)
)
INSERT INTO #UserValues VALUES
('old1', 1, 'Phil'),
('new', 2, 'Phil'),
('old2', 3, 'Phil'),
('new', 4, 'Phil'),
('old1', 1, 'Mike'),
('old2', 2, 'Mike'),
('new', 1, 'Jeff'),
('new', 2, 'Jeff')
You can solve it fairly easily without resorting to window functions. In this case, you need the maximum value where ((not new) OR (there isn't an old1 or old2 entry)).
Here's a query that works correctly with your sample data:
SELECT
MAX(U1.[Value]) as 'Value'
,U1.[User]
FROM
#UserValues U1
WHERE
U1.[Source] <> 'new'
OR NOT EXISTS (SELECT * FROM #UserValues U2 WHERE U2.[User] = U1.[User] AND U2.[Source] IN ('old1','old2'))
GROUP BY U1.[User]
You can use priorities order by with row_number() :
select top (1) with ties uv.*
from #UserValues uv
order by row_number() over (partition by [user]
order by (case when source = 'old2' then 1 when source = 'old1' then 2 else 3 end), value desc
);
However, if you have only source limited with 3 then you can also do :
. . .
order by row_number() over (partition by [user]
order by (case when source = 'new' then 2 else 1 end), value desc
)
with raw_data
as (
select row_number() over(partition by a.[user] order by a.value desc) as rnk
,count(case when a.source in('old1','old2') then 1 end) over(partition by a.[user]) as cnt_old
,a.*
from uservalues a
)
,curated_data
as(select *
,row_number() over(partition by rd.[user] order by rd.value desc) as rnk2
from raw_data rd
where 0 = case when rnk=1 and source='new' and cnt_old>0 then 1 else 0 end
)
select *
from curated_data
where rnk2=1
I am doing the following
raw_data ->First i get rank the values on the basis of max available value per user. Also i get to check if the user has any records which are pegged at old1 or old2 in the source column
curated_data ->i eliminate records which have the highest value(rnk=1) as new if they have cnt_old >0. Also now i rank(rnk2) the records on the highest value available from this result set.
I select the highest available value from curated_data(ie rnk2=1)
I think you should consider setting up an XREF table to define which source is what priority, for a possible more complicated priorisation in the future. I do it with a temp table:
CREATE TABLE #SourcePriority
(
[Source] VARCHAR(10),
[SourcePriority] INT
)
INSERT INTO #SourcePriority VALUES
('old1', 1),
('old2', 1),
('new', 2)
You might also create a View to look up the SourcePriority to the original table. I do it wit a CTE + possible implementation how to look up the top priority with the highest value:
;WITH CTE as (
SELECT s.[SourcePriority], u.[Value], u.[User]
FROM #UserValues as u
INNER JOIN #SourcePriority as s on u.[Source] = s.[Source]
)
SELECT MAX (v.[Value]) as [Value], v.[User]
FROM (
SELECT MIN ([SourcePriority]) as [TopPriority], [User]
FROM cte
GROUP BY [User]
) as s
INNER JOIN cte as v
ON s.[User] = v.[User] and s.[TopPriority] = v.[SourcePriority]
GROUP BY v.[User]
I think you want:
select top (1) with ties uv.*
from (select uv.*,
sum(case when source in ('old1', 'old2') then 1 else 0 end) over (partition by user) as cnt_old
from #UserValues uv
) uv
where cnt_old = 0 or source <> 'new'
order by row_number() over (partition by user order by value desc);

SQL Query Help - Negative reporting

Perhaps somebody can help with Ideas or a Solution. A User asked me for a negative report. We have a table with tickets each ticket has a ticket number which would be easy to select but the user wants a list of missing tickets between the first and last ticket in the system.
E.g. Select TicketNr from Ticket order by TicketNr
Result
1,
2,
4,
7,
11
But we actually want the result 3,5,6,8,9,10
CREATE TABLE [dbo].[Ticket](
[pknTicketId] [int] IDENTITY(1,1) NOT NULL,
[TicketNr] [int] NULL
) ON [PRIMARY]
GO
SQL Server 2016 - TSQL
Any ideas ?
So a bit more information is need all solution thus far works on small table. Our production database has over 4 million tickets. Hence why we need to find the missing ones.
First get the minimum and maximum, then generate all posible ticket numbers and finally select the ones that are missing.
;WITH FirstAndLast AS
(
SELECT
MinTicketNr = MIN(T.TicketNr),
MaxTicketNr = MAX(T.TicketNr)
FROM
Ticket AS T
),
AllTickets AS
(
SELECT
TicketNr = MinTicketNr,
MaxTicketNr = T.MaxTicketNr
FROM
FirstAndLast AS T
UNION ALL
SELECT
TicketNr = A.TicketNr + 1,
MaxTicketNr = A.MaxTicketNr
FROM
AllTickets AS A
WHERE
A.TicketNr + 1 <= A.MaxTicketNr
)
SELECT
A.TicketNr
FROM
AllTickets AS A
WHERE
NOT EXISTS (
SELECT
'missing ticket'
FROM
Ticket AS T
WHERE
A.TicketNr = T.TicketNr)
ORDER BY
A.TicketNr
OPTION
(MAXRECURSION 32000)
If you can accept the results in a different format, the following will do what you want:
select TicketNr + 1 as first_missing,
next_TicketNr - 1 as last_missing,
(next_TicketNr - TicketNr - 1) as num_missing
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_TicketNr
from Ticket t
) t
where next_TicketNr <> TicketNr + 1;
This shows each sequence of missing ticket numbers on a single row, rather than a separate row for each of them.
If you do use a recursive CTE, I would recommend doing it only for the missing tickets:
with cte as (
select (TicketNr + 1) as missing_TicketNr
from (select t.*, lead(TicketNr) over (order by TicketNr) as next_ticketNr
from tickets t
) t
where next_TicketNr <> TicketNr + 1
union all
select missing_TicketNr + 1
from cte
where not exists (select 1 from tickets t2 where t2.TicketNr = cte.missing_TicketNr + 1)
)
select *
from cte;
This version starts with the list of missing ticket numbers. It then adds a new one, as the numbers are not found.
One method is to use recursive cte to find the missing ticket numbers :
with missing as (
select min(TicketNr) as mnt, max(TicketNr) as mxt
from ticket t
union all
select mnt+1, mxt
from missing m
where mnt < mxt
)
select m.*
from missing m
where not exists (select 1 from tickets t where t.TicketNr = m.mnt);
This should do the trick: SQL Fiddle
declare #ticketsTable table (ticketNo int not null)
insert #ticketsTable (ticketNo) values (1),(2),(4),(7),(11)
;with cte1(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo
, 0
, row_number() over (order by ticketNo)
from #ticketsTable
)
, cte2(ticketNo, isMissing, sequenceNo) AS
(
select ticketNo, isMissing, sequenceNo
from cte1
union all
select a.ticketNo + 1
, 1
, a.sequenceNo
from cte2 a
inner join cte1 b
on b.sequenceNo = a.sequenceNo + 1
and b.ticketNo != a.ticketNo + 1
)
select *
from cte2
where isMissing = 1
order by ticketNo
It works by collecting all of the existing tickets, marking them as existing, and assigning each a consecutive number giving their order in the original list.
We can then see the gaps in the list by finding any spots where the consecutive order number shows the next record, but the ticket numbers are not consecutive.
Finally, we recursively fill in the gaps; working from the start of a gap and adding new records until that gap's consecutive numbers no longer has a gap between the related ticket numbers.
I think this one give you easiest solution
with cte as(
select max(TicketNr) maxnum,min(TicketNr) minnum from Ticket )
select a.number FROM master..spt_values a,cte
WHERE Type = 'P' and number < cte.maxnum and number > cte.minno
except
select TicketNr FROM Ticket
So After looking at all the solutions
I went with creating a temp table with a full range of number from Starting to Ending ticket and then select from the Temp table where the ticket number not in the ticket table.
The reason being I kept running in MAXRECURSION problems.

Find Segment with Longest Stay Per Booking

We have a number of bookings and one of the requirements is that we display the Final Destination for a booking based on its segments. Our business has defined the Final Destination as that in which we have the longest stay. And Origin being the first departure point.
Please note this is not the segments with the Longest Travel time i.e. Datediff(minute, DepartDate, ArrivalDate) This is requesting the one with the Longest gap between segments.
This is a simplified version of the tables:
Create Table Segments
(
BookingID int,
SegNum int,
DepartureCity varchar(100),
DepartDate datetime,
ArrivalCity varchar(100),
ArrivalDate datetime
);
Create Table Bookings
(
BookingID int identity(1,1),
Locator varchar(10)
);
Insert into Segments values (1,2,'BRU','2010-03-06 10:40','FIH','2010-03-06 20:20:00')
Insert into Segments values (1,4,'FIH','2010-03-13 21:50:00','BRU', '2010-03-14 07:25:00')
Insert into Segments values (2,2,'BOD','2010-02-10 06:50:00','AMS','2010-02-10 08:50:00')
Insert into Segments values (2,3,'AMS','2010-02-10 10:40:00','EBB','2010-02-10 20:40:00')
Insert into Segments values (2,4,'EBB','2010-02-28 22:55:00','AMS','2010-03-01 05:35:00')
Insert into Segments values (2,5,'AMS','2010-03-01 10:25:00','BOD','2010-03-01 12:15:00')
insert into Segments values (3,2,'BRU','2010-03-09 12:10:00','IAD','2010-03-09 14:46:00')
Insert into Segments Values (3,3,'IAD','2010-03-13 17:57:00','BRU','2010-03-14 07:15:00')
insert into segments values (4,2,'BRU','2010-07-27','ADD','2010-07-28')
insert into segments values (4,4,'ADD','2010-07-28','LUN','2010-07-28')
insert into segments values (4,5,'LUN','2010-08-23','ADD','2010-08-23')
insert into segments values (4,6,'ADD','2010-08-23','BRU','2010-08-24')
Insert into Bookings values('5MVL7J')
Insert into Bookings values ('Y2IMXQ')
insert into bookings values ('YCBL5C')
Insert into bookings values ('X7THJ6')
I have created a SQL Fiddle with real data here:
SQL Fiddle Example
I have tried to do the following, however this doesn't appear to be correct.
SELECT Locator, fd.*
FROM Bookings ob
OUTER APPLY
(
SELECT Top 1 DepartureCity, ArrivalCity
from
(
SELECT DISTINCT
seg.segnum ,
seg.DepartureCity ,
seg.DepartDate ,
seg.ArrivalCity ,
seg.ArrivalDate,
(SELECT
DISTINCT
DATEDIFF(MINUTE , seg.ArrivalDate , s2.DepartDate)
FROM Segments s2
WHERE s2.BookingID = seg.BookingID AND s2.segnum = seg.segnum + 1) 'LengthOfStay'
FROM Bookings b(NOLOCK)
INNER JOIN Segments seg (NOLOCK) ON seg.bookingid = b.bookingid
WHERE b.Locator = ob.locator
) a
Order by a.lengthofstay desc
)
FD
The results I expect are:
Locator Origin Destination
5MVL7J BRU FIH
Y2IMXQ BOD EBB
YCBL5C BRU IAD
X7THJ6 BRU LUN
I get the feeling that a CTE would be the best approach, however my attempts do this so far failed miserably. Any help would be greatly appreciated.
I have managed to get the following query working but it only works for one at a time due to the top one, but I'm not sure how to tweak it:
WITH CTE AS
(
SELECT distinct s.DepartureCity, s.DepartDate, s.ArrivalCity, s.ArrivalDate, b.Locator , ROW_NUMBER() OVER (PARTITION BY b.Locator ORDER BY SegNum ASC) RN
FROM Segments s
JOIN bookings b ON s.bookingid = b.BookingID
)
SELECT C.Locator, c.DepartureCity, a.ArrivalCity
FROM
(
SELECT TOP 1 C.Locator, c.ArrivalCity, c1.DepartureCity, DATEDIFF(MINUTE,c.ArrivalDate, c1.DepartDate) 'ddiff'
FROM CTE c
JOIN cte c1 ON c1.Locator = C.Locator AND c1.rn = c.rn + 1
ORDER BY ddiff DESC
) a
JOIN CTE c ON C.Locator = a.Locator
WHERE c.rn = 1
You can try something like this:
;WITH CTE_Start AS
(
--Ordering of segments to eliminate gaps
SELECT *, ROW_NUMBER() OVER (PARTITION BY BookingID ORDER BY SegNum) RN
FROM dbo.Segments
)
, RCTE_Stay AS
(
--recursive CTE to calculate stay between segments
SELECT *, 0 AS Stay FROM CTE_Start s WHERE RN = 1
UNION ALL
SELECT sNext.*, DATEDIFF(Mi, s.ArrivalDate, sNext.DepartDate)
FROM CTE_Start sNext
INNER JOIN RCTE_Stay s ON s.RN + 1 = sNext.RN AND s.BookingID = sNext.BookingID
)
, CTE_Final AS
(
--Search for max(stay) for each bookingID
SELECT *, ROW_NUMBER() OVER (PARTITION BY BookingID ORDER BY Stay DESC) AS RN_Stay
FROM RCTE_Stay
)
--join Start and Final on RN=1 to find origin and departure
SELECT b.Locator, s.DepartureCity AS Origin, f.DepartureCity AS Destination
FROM CTE_Final f
INNER JOIN CTE_Start s ON f.BookingID = s.BookingID
INNER JOIN dbo.Bookings b ON b.BookingID = f.BookingID
WHERE s.RN = 1 AND f.RN_Stay = 1
SQLFiddle DEMO
You can use the OUTER APPLY + TOP operators to find the next values SegNum. After finding the gap between segments are used MIN/MAX aggregate functions with OVER clause as conditions in the CASE expression
;WITH cte AS
(
SELECT seg.BookingID,
CASE WHEN MIN(seg.segNum) OVER(PARTITION BY seg.BookingID) = seg.segNum
THEN seg.DepartureCity END AS Origin,
CASE WHEN MAX(DATEDIFF(MINUTE, seg.ArrivalDate, o.DepartDate)) OVER(PARTITION BY seg.BookingID)
= DATEDIFF(MINUTE, seg.ArrivalDate, o.DepartDate)
THEN o.DepartureCity END AS Destination
FROM Segments seg (NOLOCK)
OUTER APPLY (
SELECT TOP 1 seg2.DepartDate, seg2.DepartureCity
FROM Segments seg2
WHERE seg.BookingID = seg2.BookingID
AND seg.SegNum < seg2.SegNum
ORDER BY seg2.SegNum ASC
) o
)
SELECT b.Locator, MAX(c.Origin) AS Origin, MAX(c.Destination) AS Destination
FROM cte c JOIN Bookings b ON c.BookingID = b.BookingID
GROUP BY b.Locator
See demo on SQLFiddle
The statement below:
;WITH DataSource AS
(
SELECT ROW_NUMBER() OVER(PARTITION BY BookingID ORDER BY DATEDIFF(SS,DepartDate,ArrivalDate) DESC) AS Row
,Segments.BookingID
,Segments.SegNum
,Segments.DepartureCity
,Segments.DepartDate
,Segments.ArrivalCity
,Segments.ArrivalDate
,DATEDIFF(SS,DepartDate,ArrivalDate) AS DiffInSeconds
FROM Segments
)
SELECT *
FROM DataSource DS
INNER JOIN Bookings B
ON DS.[BookingID] = B.[BookingID]
Will give the following output:
So, adding the following clause to the above statement:
WHERE Row = 1
will give you what you need.
Few important things:
As you can see from the screenshot below, there are two records with same difference in second. If you want to show both of them (or all of them if there are), instead ROW_NUMBER function use RANK function.
The return type of DATEDIFF is INT. So, there is limitation for seconds max deference value. It is as follows:
If the return value is out of range for int (-2,147,483,648 to
+2,147,483,647), an error is returned. For millisecond, the maximum difference between startdate and enddate is 24 days, 20 hours, 31
minutes and 23.647 seconds. For second, the maximum difference is 68
years.