SQL Server 2008 Group Based on a Sequence - sql

I'm struggling to find if this is possible to use SQL Server 2008 to assign a sequence without having to use cursors. Let's say I have the following table which defines a driver's driving route going from one location to another (null means he is going from home):
RouteID SourceLocationID DestinationLocationID DriverID Created Updated
------- ---------------- --------------------- -------- ------- -------
1 NULL 219 1 10:20 10:23
2 219 266 1 10:21 10:24
3 266 NULL 1 10:22 10:25
4 NULL 54 2 10:23 10:26
5 54 NULL 2 10:24 10:27
6 NULL 300 1 10:25 10:28
7 300 NULL 1 10:26 10:29
I want to group the records between the rows where sourceLID is NULL and the destinationLID is null, so I get the following (generating a sequence number for each grouping set):
DriverID DestinationLocationID TripNumber
-------- --------------------- ----------
1 219 1 (his first trip)
1 266 1
1 300 2 (his second trip)
2 54 1
Is there a way I could use GROUP BY here rather than cursors?

a quick try:
with cte as
( select DestinationLocationID
, DriverID
, tripid = row_number()
over ( partition by driverid
order by DestinationLocationID)
from table1
where sourcelocationid is NULL
UNION ALL
select table1.DestinationLocationID
, table1.DriverID
, cte.tripid
from table1
join cte on table1.SourceLocationID=cte.DestinationLocationID
and table1.DriverID=cte.DriverID
where cte.DestinationLocationID is not null
)
select * from cte

Try this:
select driverid, destinationlocationid, count(destinationlocationid) from
(
select driverid, destinationlocationid from table1 where sourcelocationid is NULL
union all
select driverid, sourcelocationid from table1 where destinationlocationid is NULL
)A group by driverid, destinationlocationid

Try this,
Declare #t table(RouteID int, SourceLocationID int,DestinationLocationID int
,DriverID int,Created time, Updated time)
insert into #t
values(1, NULL, 219, 1, '10:20','10:23'),
(2 ,219,266, 1, '10:21','10:24'),
(3,266, NULL, 1, '10:22','10:25'),
(4, NULL, 54, 2, '10:23','10:26'),
(5,54, NULL, 2, '10:24','10:27'),
(6,NULL,300, 1, '10:25','10:28'),
(7,300,NULL, 1, '10:26','10:29')
;
WITH CTE
AS (
SELECT *
,ROW_NUMBER() OVER (
PARTITION BY DriverID ORDER BY Created
) RN
FROM #t
)
,CTE1
AS (
SELECT *
,1 TripNumber
FROM CTE
WHERE RN = 1
UNION ALL
SELECT A.*
,CASE
WHEN A.SourceLocationID IS NULL
THEN B.TripNumber + 1
ELSE B.TripNumber
END
FROM CTE1 B
INNER JOIN CTE A ON B.DriverID = A.DriverID
WHERE A.RN > B.RN
)
SELECT DISTINCT DestinationLocationID
,DriverID
,TripNumber
FROM CTE1
WHERE DestinationLocationID IS NOT NULL
ORDER BY DriverID

Use a correlated sub-query to count previous trips, plus 1 to get this trip number.
select DriverID,
DestinationLocationID,
(select count(*) + 1
from routes t2
where t1.DriverID = t2.DriverID
and t1.RouteID > t2.RouteID
and DestinationLocationID IS NULL) as TripNumber
from routes t1
where DestinationLocationID IS NOT NULL
order by DriverID, DestinationLocationID;
Executes like this:
SQL>select DriverID,
SQL& DestinationLocationID,
SQL& (select count(*) + 1
SQL& from routes t2
SQL& where t1.DriverID = t2.DriverID
SQL& and t1.RouteID > t2.RouteID
SQL& and DestinationLocationID IS NULL) as TripNumber
SQL&from routes t1
SQL&where DestinationLocationID IS NOT NULL
SQL&order by DriverID, DestinationLocationID;
DriverID DestinationLocationID TripNumber
=========== ===================== ============
1 219 1
1 266 1
1 300 2
2 54 1
4 rows found

Related

Rolling Average in SQL with Partition [duplicate]

declare #t table
(
id int,
SomeNumt int
)
insert into #t
select 1,10
union
select 2,12
union
select 3,3
union
select 4,15
union
select 5,23
select * from #t
the above select returns me the following.
id SomeNumt
1 10
2 12
3 3
4 15
5 23
How do I get the following:
id srome CumSrome
1 10 10
2 12 22
3 3 25
4 15 40
5 23 63
select t1.id, t1.SomeNumt, SUM(t2.SomeNumt) as sum
from #t t1
inner join #t t2 on t1.id >= t2.id
group by t1.id, t1.SomeNumt
order by t1.id
SQL Fiddle example
Output
| ID | SOMENUMT | SUM |
-----------------------
| 1 | 10 | 10 |
| 2 | 12 | 22 |
| 3 | 3 | 25 |
| 4 | 15 | 40 |
| 5 | 23 | 63 |
Edit: this is a generalized solution that will work across most db platforms. When there is a better solution available for your specific platform (e.g., gareth's), use it!
The latest version of SQL Server (2012) permits the following.
SELECT
RowID,
Col1,
SUM(Col1) OVER(ORDER BY RowId ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Col2
FROM tablehh
ORDER BY RowId
or
SELECT
GroupID,
RowID,
Col1,
SUM(Col1) OVER(PARTITION BY GroupID ORDER BY RowId ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Col2
FROM tablehh
ORDER BY RowId
This is even faster. Partitioned version completes in 34 seconds over 5 million rows for me.
Thanks to Peso, who commented on the SQL Team thread referred to in another answer.
For SQL Server 2012 onwards it could be easy:
SELECT id, SomeNumt, sum(SomeNumt) OVER (ORDER BY id) as CumSrome FROM #t
because ORDER BY clause for SUM by default means RANGE UNBOUNDED PRECEDING AND CURRENT ROW for window frame ("General Remarks" at https://msdn.microsoft.com/en-us/library/ms189461.aspx)
Let's first create a table with dummy data:
Create Table CUMULATIVESUM (id tinyint , SomeValue tinyint)
Now let's insert some data into the table;
Insert Into CUMULATIVESUM
Select 1, 10 union
Select 2, 2 union
Select 3, 6 union
Select 4, 10
Here I am joining same table (self joining)
Select c1.ID, c1.SomeValue, c2.SomeValue
From CumulativeSum c1, CumulativeSum c2
Where c1.id >= c2.ID
Order By c1.id Asc
Result:
ID SomeValue SomeValue
-------------------------
1 10 10
2 2 10
2 2 2
3 6 10
3 6 2
3 6 6
4 10 10
4 10 2
4 10 6
4 10 10
Here we go now just sum the Somevalue of t2 and we`ll get the answer:
Select c1.ID, c1.SomeValue, Sum(c2.SomeValue) CumulativeSumValue
From CumulativeSum c1, CumulativeSum c2
Where c1.id >= c2.ID
Group By c1.ID, c1.SomeValue
Order By c1.id Asc
For SQL Server 2012 and above (much better performance):
Select
c1.ID, c1.SomeValue,
Sum (SomeValue) Over (Order By c1.ID )
From CumulativeSum c1
Order By c1.id Asc
Desired result:
ID SomeValue CumlativeSumValue
---------------------------------
1 10 10
2 2 12
3 6 18
4 10 28
Drop Table CumulativeSum
A CTE version, just for fun:
;
WITH abcd
AS ( SELECT id
,SomeNumt
,SomeNumt AS MySum
FROM #t
WHERE id = 1
UNION ALL
SELECT t.id
,t.SomeNumt
,t.SomeNumt + a.MySum AS MySum
FROM #t AS t
JOIN abcd AS a ON a.id = t.id - 1
)
SELECT * FROM abcd
OPTION ( MAXRECURSION 1000 ) -- limit recursion here, or 0 for no limit.
Returns:
id SomeNumt MySum
----------- ----------- -----------
1 10 10
2 12 22
3 3 25
4 15 40
5 23 63
Late answer but showing one more possibility...
Cumulative Sum generation can be more optimized with the CROSS APPLY logic.
Works better than the INNER JOIN & OVER Clause when analyzed the actual query plan ...
/* Create table & populate data */
IF OBJECT_ID('tempdb..#TMP') IS NOT NULL
DROP TABLE #TMP
SELECT * INTO #TMP
FROM (
SELECT 1 AS id
UNION
SELECT 2 AS id
UNION
SELECT 3 AS id
UNION
SELECT 4 AS id
UNION
SELECT 5 AS id
) Tab
/* Using CROSS APPLY
Query cost relative to the batch 17%
*/
SELECT T1.id,
T2.CumSum
FROM #TMP T1
CROSS APPLY (
SELECT SUM(T2.id) AS CumSum
FROM #TMP T2
WHERE T1.id >= T2.id
) T2
/* Using INNER JOIN
Query cost relative to the batch 46%
*/
SELECT T1.id,
SUM(T2.id) CumSum
FROM #TMP T1
INNER JOIN #TMP T2
ON T1.id > = T2.id
GROUP BY T1.id
/* Using OVER clause
Query cost relative to the batch 37%
*/
SELECT T1.id,
SUM(T1.id) OVER( PARTITION BY id)
FROM #TMP T1
Output:-
id CumSum
------- -------
1 1
2 3
3 6
4 10
5 15
Select
*,
(Select Sum(SOMENUMT)
From #t S
Where S.id <= M.id)
From #t M
You can use this simple query for progressive calculation :
select
id
,SomeNumt
,sum(SomeNumt) over(order by id ROWS between UNBOUNDED PRECEDING and CURRENT ROW) as CumSrome
from #t
There is a much faster CTE implementation available in this excellent post:
http://weblogs.sqlteam.com/mladenp/archive/2009/07/28/SQL-Server-2005-Fast-Running-Totals.aspx
The problem in this thread can be expressed like this:
DECLARE #RT INT
SELECT #RT = 0
;
WITH abcd
AS ( SELECT TOP 100 percent
id
,SomeNumt
,MySum
order by id
)
update abcd
set #RT = MySum = #RT + SomeNumt
output inserted.*
For Ex: IF you have a table with two columns one is ID and second is number and wants to find out the cumulative sum.
SELECT ID,Number,SUM(Number)OVER(ORDER BY ID) FROM T
Once the table is created -
select
A.id, A.SomeNumt, SUM(B.SomeNumt) as sum
from #t A, #t B where A.id >= B.id
group by A.id, A.SomeNumt
order by A.id
The SQL solution wich combines "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" and "SUM" did exactly what i wanted to achieve.
Thank you so much!
If it can help anyone, here was my case. I wanted to cumulate +1 in a column whenever a maker is found as "Some Maker" (example). If not, no increment but show previous increment result.
So this piece of SQL:
SUM( CASE [rmaker] WHEN 'Some Maker' THEN 1 ELSE 0 END)
OVER
(PARTITION BY UserID ORDER BY UserID,[rrank] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS Cumul_CNT
Allowed me to get something like this:
User 1 Rank1 MakerA 0
User 1 Rank2 MakerB 0
User 1 Rank3 Some Maker 1
User 1 Rank4 Some Maker 2
User 1 Rank5 MakerC 2
User 1 Rank6 Some Maker 3
User 2 Rank1 MakerA 0
User 2 Rank2 SomeMaker 1
Explanation of above: It starts the count of "some maker" with 0, Some Maker is found and we do +1. For User 1, MakerC is found so we dont do +1 but instead vertical count of Some Maker is stuck to 2 until next row.
Partitioning is by User so when we change user, cumulative count is back to zero.
I am at work, I dont want any merit on this answer, just say thank you and show my example in case someone is in the same situation. I was trying to combine SUM and PARTITION but the amazing syntax "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW" completed the task.
Thanks!
Groaker
Above (Pre-SQL12) we see examples like this:-
SELECT
T1.id, SUM(T2.id) AS CumSum
FROM
#TMP T1
JOIN #TMP T2 ON T2.id < = T1.id
GROUP BY
T1.id
More efficient...
SELECT
T1.id, SUM(T2.id) + T1.id AS CumSum
FROM
#TMP T1
JOIN #TMP T2 ON T2.id < T1.id
GROUP BY
T1.id
Try this
select
t.id,
t.SomeNumt,
sum(t.SomeNumt) Over (Order by t.id asc Rows Between Unbounded Preceding and Current Row) as cum
from
#t t
group by
t.id,
t.SomeNumt
order by
t.id asc;
Try this:
CREATE TABLE #t(
[name] varchar NULL,
[val] [int] NULL,
[ID] [int] NULL
) ON [PRIMARY]
insert into #t (id,name,val) values
(1,'A',10), (2,'B',20), (3,'C',30)
select t1.id, t1.val, SUM(t2.val) as cumSum
from #t t1 inner join #t t2 on t1.id >= t2.id
group by t1.id, t1.val order by t1.id
Without using any type of JOIN cumulative salary for a person fetch by using follow query:
SELECT * , (
SELECT SUM( salary )
FROM `abc` AS table1
WHERE table1.ID <= `abc`.ID
AND table1.name = `abc`.Name
) AS cum
FROM `abc`
ORDER BY Name

Select rows based on one column value exists more than once

I have a table like this
Pcode LogId ExtDate
------------------------------
p123 2 2021-01-02
p342 3 2021-01-16
p456 4 2021-05-02
p456 5 2021-07-26
p634 6 2021-05-02
p764 7 2021-01-18
p764 8 2021-06-25
I am looking for a query which returns only those rows with column one value exists more than one.
So the output should be like this. Which means we are taking only items at which the Pcode exists more than once
Pcode LogId ExtDate
-----------------------------
p456 4 2021-05-02
p456 5 2021-07-26
p764 7 2021-01-18
p764 8 2021-06-25
I tried using dense_rank, but I got stuck here... can't move any further
select
pcode,
LogId,
extdate,
rn
from
(select
pcode,
L.logid,
extdate,
dense_rank() over (partition by pcode order by L.extdate desc) rn
from
kip_project_master P
inner join
kip_report_extraction_log L on L.LogId = P.LogId) tbl
You can try the below - demo here
select pcode,logid,extdate from
(
select *,count(pcode) over(partition by pcode) cnt from t1
)A where cnt>1
maybe ?
DROP TABLE IF EXISTS #yourtable
CREATE TABLE #yourtable(
Pcode VARCHAR(30) NOT NULL
,LogId INT
,ExtDate DATE
);
INSERT INTO #yourtable(Pcode,LogId,ExtDate) VALUES
('p123',2,'2021-01-02'),('p342',3,'2021-01-16'),('p456',4,'2021-05-02')
,('p456',5,'2021-07-26'),('p634',6,'2021-05-02'),('p764',7,'2021-01-18')
,('p764',8,'2021-06-25');
WITH cte AS
(
SELECT Pcode
FROM #yourtable
GROUP BY Pcode
HAVING (COUNT(Pcode) > 1)
)
SELECT yt.Pcode, yt.LogId, yt.ExtDate
FROM yourtable yt
INNER JOIN
cte ON yt.Pcode = cte.Pcode
Assuming another column is unique, just use exists:
select kpm.*
from kip_project_master kpm
where exists (select 1
from kip_project_master kpm2
where kpm2.pcode = kpm.pcode and kpm2.logid <> kpm.logid
)
order by kpm.pcode, kpm.logid;
In particular, this can take advantage of an index on (pcode, logid), which should make it pretty fast.
you can do this like,
select * from yourtable join (select
pcode,count(1) as cnt from yourtable group by
pcode having COUNT(1)>1)b on a.pcode=b.pcode

Find top consecutive rows where a column value is equal between them

I need to get all the consecutive top row where a column value is equal between them
my table is:
CREATE TABLE [dbo].[Items](
[Id] [int] NOT NULL,
[IdUser] [int] NOT NULL,
[CreatedDate] [datetime] NOT NULL,
[SomeData] nvarchar(50) NOT NULL);
and i want the top rows (ordered by Id desc) with the same IdUser
if table data is:
Id IdUser CreatedDate SomeData
--- ------- ------------------------ --------
1 1 2017-09-21T09:42:01.407Z sdafsasfa
2 1 2017-09-21T09:42:01.407Z sdafsasfa
4 2 2017-09-21T09:42:01.41Z sdafsasfa
5 3 2017-09-21T09:42:01.41Z sdafsasfa
7 3 2017-09-21T09:42:01.413Z sdafsasfa
8 3 2017-09-21T09:42:01.413Z sdafsasfa
9 10 2017-09-21T09:42:01.417Z sdafsasfa
11 11 2017-09-21T09:42:01.417Z sdafsasfa
12 2 2017-09-21T09:42:01.42Z sdafsasfa
15 2 2017-09-21T09:42:01.42Z sdafsasfa
I want :
Id IdUser CreatedDate SomeData
--- ------- ------------------------ --------
12 2 2017-09-21T09:42:01.42Z sdafsasfa
15 2 2017-09-21T09:42:01.42Z sdafsasfa
if table data is:
Id IdUser CreatedDate SomeData
--- ------- ------------------------ --------
1 1 2017-09-21T09:42:01.407Z sdafsasfa
2 1 2017-09-21T09:42:01.407Z sdafsasfa
4 2 2017-09-21T09:42:01.41Z sdafsasfa
I want :
Id IdUser CreatedDate SomeData
--- ------- ------------------------ --------
4 2 2017-09-21T09:42:01.41Z sdafsasfa
SqlFiddle
you can try this query:
select I.*
from
[dbo].[Items] I
JOIN
(select top 1 Id, IdUser from [dbo].[Items] order by Id desc)I2
on I.Iduser=I2.Iduser
order by Id desc;-- this can be removed to remove ordering by Id Desc
updated fiddle link
You could use LAG and SUM() OVER() like this
DECLARE #Items as Table
(
[Id] [int] NOT NULL,
[IdUser] [int] NOT NULL,
[CreatedDate] [datetime] NOT NULL,
[SomeData] nvarchar(50) NOT NULL
);
INSERT INTO #Items
(
Id,
IdUser,
CreatedDate,
SomeData
)
VALUES
( 1 , 1 ,getdate(),'sdafsasfa'),
( 2 , 1 ,getdate(),'sdafsasfa'),
( 4 , 2 ,getdate(),'sdafsasfa'),
( 5 , 3 ,getdate(),'sdafsasfa'),
( 7 , 3 ,getdate(),'sdafsasfa'),
( 8 , 3 ,getdate(),'sdafsasfa'),
( 9 , 10,getdate(),'sdafsasfa'),
( 11, 11,getdate(),'sdafsasfa'),
( 12, 2 ,getdate(),'sdafsasfa'),
( 15, 2 ,getdate(),'sdafsasfa')
;WITH temp AS
(
SELECT *,
CASE
WHEN lag(i.IdUser) over(ORDER BY i.Id) = i.IdUser THEN 0
ELSE 1
END as ChangingPoint
FROM #Items i
),
temp1 AS
(
SELECT
*,
sum(t.ChangingPoint) OVER(ORDER BY t.Id) as GroupId
FROM temp t
)
SELECT TOP 1 WITH TIES
t.Id,
t.IdUser,
t.CreatedDate,
t.SomeData
FROM temp1 t
ORDER BY GroupId DESC
See demo here: http://rextester.com/PHWWU96232
Assuming you want last rows with highest CreateDate and same IdUser then DENSE_RANK will help
SELECT id, iduser, CreatedDate, somedata
FROM (
SELECT id, iduser, CreatedDate, somedata,
DENSE_RANK() OVER (ORDER BY CreatedDate desc, IdUser) ord
FROM [dbo].[Items]) t
WHERE t.ord = 1
The equivalent SQL query is
SELECT *
FROM Items t1
WHERE NOT EXISTS (
SELECT *
FROM Items t2
WHERE t2.createddate > t1.createddate or
(t2.createddate = t1.createddate and t2.iduser < t1.iduser)
)
demo
Despite TriV's solution works fine I ended up using a modified Radim Bača's solution (his solution dont work as i need) because it is faster IMO
SELECT id, iduser, createddate, somedata
FROM Items t1
WHERE NOT EXISTS (
SELECT 1
FROM Items t2
WHERE t2.id > t1.id and t2.iduser <> t1.iduser );
SQLFiddle
select I.*
from
[dbo].[Items1] I
JOIN
(select top 1 Id, IdUser,CreatedDate from [dbo].[Items1] order by Id desc)I2
on I.CreatedDate=I2.CreatedDate
order by Id desc;-- this can be removed to remove ordering by Id Desc

I want first and second inserted value with condition

Table abc:
Consgno Name Entrydatetime
111 A 01/03/2017 10:10:15
111 A 01/03/2017 10:20:15
111 A 01/03/2017 11:10:20
222 B 02/03/2017 10:10:25
333 C 06/03/2017 10:10:25
333 C 07/03/2017 10:10:12
444 D 04/03/2017 10:10:41
444 D 04/03/2017 01:10:20
444 D 06/03/2017 10:10:32
555 E 05/04/2017 10:10:15
One Consgno has entered ONE more than one time.
When one Consgno is only once, then the first value should come, otherwise the second entered value should come.
I want to output like this:
Consgno Name Entrydatetime
111 A 01/03/2017 10:20:15
222 B 02/03/2017 11:10:36
333 C 07/03/2017 10:10:12
444 D 04/03/2017 01:10:20
555 E 05/04/2017 10:10:15
You can use a query like the following:
;WITH MyWindowedTable AS (
SELECT Consgno, Name, Entrydatetime,
ROW_NUMBER() OVER (PARTITION BY Consgno
ORDER BY Entrydatetime) AS rn,
COUNT(*) OVER (PARTITION BY Consgno) AS cnt
FROM mytable
)
SELECT Consgno, Name, Entrydatetime
FROM MyWindowedTable
WHERE (cnt = 1 AND rn = 1) OR (cnt > 1 AND rn = 2)
Using windowed version of COUNT:
COUNT(*) OVER (PARTITION BY Consgno)
returns the population, cnt, of each Consgno partition. We can use cnt to properly filter the records returned: in partitions with a population of 1 we get the single record of the partition, whereas in the rest of the cases we get the one having rn = 2.
Use ROW_NUMBER and COUNT built in functions :
CREATE TABLE #table1 ( Consgno INT, Name VARCHAR(1), Entrydatetime
DATETIME)
INSERT INTO #table1 ( Consgno , Name , Entrydatetime )
SELECT 111,'A','01/03/2017 10:10:15' UNION ALL
SELECT 111,'A','01/03/2017 10:20:15' UNION ALL
SELECT 111,'A','01/03/2017 11:10:20' UNION ALL
SELECT 222,'B','02/03/2017 10:10:25' UNION ALL
SELECT 333,'C','06/03/2017 10:10:25' UNION ALL
SELECT 333,'C','07/03/2017 10:10:12' UNION ALL
SELECT 444,'D','04/03/2017 10:10:41' UNION ALL
SELECT 444,'D','04/03/2017 01:10:20' UNION ALL
SELECT 444,'D','06/03/2017 10:10:32' UNION ALL
SELECT 555,'E','05/04/2017 10:10:15'
SELECT Consgno , Name , Entrydatetime
FROM
(
SELECT Consgno , Name , Entrydatetime , ROW_NUMBER() OVER (PARTITION BY
Consgno ORDER BY Entrydatetime) RNo , COUNT(*) OVER (PARTITION BY
Consgno) AS _Count
FROM #table1
) A WHERE ( RNo = 1 AND _Count = 1) OR (_Count > 1 AND RNo = 2 )

How can I get the first result for each account in this SQL query?

I'm trying to write a query that follows this logic:
Find the first following status code of an account that had a previous status code of X.
So if I have a table of:
id account_num status_code
64 1 X
82 1 Y
72 2 Y
87 1 Z
91 2 X
103 2 Z
The results would be:
id account_num status_code
82 1 Y
103 2 Z
I've come up with a couple of solutions but I'm not all that great with SQL and so they've been pretty inelegeant thus far. I was hoping that someone here might be able to point me in the right direction.
View:
SELECT account_number, id
FROM table
WHERE status_code = 'X'
Query:
SELECT account_number, min(id)
FROM table
INNER JOIN view
ON table.account_number = view.account_number
WHERE table.id > view.id
At this point I have the id that I need but I'd have to write ANOTHER query that uses the id tolook up the status_code.
Edit: To add some context, I'm trying to find calls that have a status_code of X. If a call has a status_code of X we want to dial it a different way the next time we make an attempt. The aim of this query is to provide a report that will show the results of the second dial if the first dial resulted an X status code.
Here's a SQL Server solution.
UPDATE
The idea is to avoid a number of NESTED LOOP joins as proposed by Olaf because they roughly have O(N * M) complexity and thus extremely bad for your performance. MERGED JOINS complexity is O(NLog(N) + MLog(M)) which is much better for real world scenarios.
The query below works as follows:
RankedCTE is a subquery that assigns a row number to each id partioned by account and sorted by id which represents the time. So for the data below the output of this
SELECT
id,
account_num,
status_code,
ROW_NUMBER() OVER (PARTITION BY account_num ORDER BY id DESC) AS item_rank
FROM dbo.Test
would be:
id account_num status_code item_rank
----------- ----------- ----------- ----------
87 1 Z 1
82 1 Y 2
64 1 X 3
103 2 Z 1
91 2 X 2
72 2 Y 3
Once we have them numbered we join the result on itself like this:
WITH RankedCTE AS
(
SELECT
id,
account_num,
status_code,
ROW_NUMBER() OVER (PARTITION BY account_num ORDER BY id DESC) AS item_rank
FROM dbo.Test
)
SELECT
*
FROM
RankedCTE A
INNER JOIN RankedCTE B ON
A.account_num = B.account_num
AND A.item_rank = B.item_rank - 1
which will give us an event and a preceding event in the same table
id account_num status_code item_rank id account_num status_code item_rank
----------- ----------- ----------- ----------- ----------- ----------- ----------- -----------
87 1 Z 1 82 1 Y 2
82 1 Y 2 64 1 X 3
103 2 Z 1 91 2 X 2
91 2 X 2 72 2 Y 3
Finally, we just have to take the preceding event with code "X" and the event with code not "X":
WITH RankedCTE AS
(
SELECT
id,
account_num,
status_code,
ROW_NUMBER() OVER (PARTITION BY account_num ORDER BY id DESC) AS item_rank
FROM dbo.Test
)
SELECT
A.id,
A.account_num,
A.status_code
FROM
RankedCTE A
INNER JOIN RankedCTE B ON
A.account_num = B.account_num
AND A.item_rank = B.item_rank - 1
AND A.status_code <> 'X'
AND B.status_code = 'X'
Query plans for this query and #Olaf Dietsche solution (one of the versions) are below.
Data setup script
CREATE TABLE dbo.Test
(
id int not null PRIMARY KEY,
account_num int not null,
status_code nchar(1)
)
GO
INSERT dbo.Test (id, account_num, status_code)
SELECT 64 , 1, 'X' UNION ALL
SELECT 82 , 1, 'Y' UNION ALL
SELECT 72 , 2, 'Y' UNION ALL
SELECT 87 , 1, 'Z' UNION ALL
SELECT 91 , 2, 'X' UNION ALL
SELECT 103, 2, 'Z'
SQL Fiddle with subselect
select id, account_num, status_code
from mytable
where id in (select min(t1.id)
from mytable t1
join mytable t2 on t1.account_num = t2.account_num
and t1.id > t2.id
and t2.status_code = 'X'
group by t1.account_num)
and SQL Fiddle with join, both for MS SQL Server 2012, both returning the same result.
select id, account_num, status_code
from mytable
join (select min(t1.id) as min_id
from mytable t1
join mytable t2 on t1.account_num = t2.account_num
and t1.id > t2.id
and t2.status_code = 'X'
group by t1.account_num) t on id = min_id
SELECT MIN(ID), ACCOUNT_NUM, STATUS_CODE FROM (
SELECT ID, ACCOUNT_NUM, STATUS_CODE
FROM ACCOUNT A1
WHERE EXISTS
(SELECT 1
FROM ACCOUNT A2
WHERE A1.ACCOUNT_NUM = A2.ACCOUNT_NUM
AND A2.STATUS_CODE = 'X'
AND A2.ID < A1.ID)
) SUB
GROUP BY ACCOUNT_NUM
Here's an SQLFIDDLE
Here's query, with your data, checked under PostgreSQL:
SELECT t0.*
FROM so13594339 t0 JOIN
(SELECT min(t1.id), t1.account_num
FROM so13594339 t1, so13594339 t2
WHERE t1.account_num = t2.account_num AND t1.id > t2.id AND t2.status_code = 'X'
GROUP BY t1.account_num
) z
ON t0.id = z.min AND t0.account_num = z.account_num;