Delete rows following a duplicate - sql

I have a list of user login and logout stamps. Unfortunately a LOGIN entry might not always be followed by a LOGOUT entry.
I wish to delete any row which has the same [event] and [user_id] as previous row when ordered by [event_date]
Any suggestions on how to do this?
Example table
CREATE TABLE #LOG (
[id] int IDENTITY(1,1),
[user_id] int,
[event] varchar(50),
[event_date] datetime
);
INSERT INTO #LOG ([user_id], [event], [event_date])
SELECT 1,'LOGIN',{ts '2010-12-15 15:31:59'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-15 15:32:55'}
UNION ALL SELECT 1,'LOGIN',{ts '2010-12-15 15:38:04'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-15 15:38:17'}
UNION ALL SELECT 1,'LOGOUT',{ts '2010-12-15 15:38:45'} -- Delete
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 16:59:39'}
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:00:08'}
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:00:39'} -- Delete
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:01:16'} -- Delete
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 17:01:38'}
UNION ALL SELECT 2,'LOGIN',{ts '2010-12-15 17:02:26'} -- Delete
UNION ALL SELECT 2,'LOGOUT',{ts '2010-12-15 17:02:39'}

;WITH T1 AS
(
SELECT * ,
ROW_NUMBER() OVER (ORDER BY event_date)-
ROW_NUMBER() OVER (PARTITION BY [user_id], [event]
ORDER BY event_date) AS Grp
FROM #LOG
),T2 AS
(
SELECT
ROW_NUMBER() OVER (PARTITION BY [user_id], [event], Grp
ORDER BY event_date) RN
FROM T1
)
DELETE FROM T2
WHERE RN > 1

Using the ROW_NUMBER functionality of SQL Server would be an option
SQL Statement
;WITH q AS (
SELECT Rownumber = ROW_NUMBER() OVER (ORDER BY user_id, event_date)
, user_id
, event
, event_date
FROM #LOG
)
DELETE FROM #LOG
FROM #LOG l
INNER JOIN (
SELECT q2.*
FROM q q1
INNER JOIN q q2 ON q2.Rownumber = q1.Rownumber + 1
AND q2.user_id = q1.user_id
AND q2.event = q1.event
) q ON q.user_id = l.user_id
AND q.event_date = l.event_date
SELECT *
FROM #LOG

My understanding is that you want to delete entries such that the pattern is always In,Out,In,Out,etc.
This means a record is deleted if the preceding record (when order by user_id, then event_date), is of the same event.
There are two options I'd use to go about this...
DELETE
#log
WHERE
event = (
SELECT
TOP 1
event
FROM
#log AS [preceding]
WHERE
[preceding].user_id = #log.user_id
AND [preceding].event_date < #log.event_date
ORDER BY
[preceding].event_date DESC
)
Or...
WITH ordered_log AS (
SELECT
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY event_date) AS user_event_id,
*
FROM
#log
)
DELETE
ordered_log
FROM
ordered_log
INNER JOIN
ordered_log AS [preceding]
ON [preceding].login_id = [ordered_log].login_id
AND [preceding].user_event_id = [ordered_log].user_event_id - 1
WHERE
[preceding].event = [ordered_log].event
Either way, I highly recommend an Index covering user_id then event_date.
Note: The first version does not cope with the possibility of two events having the same timestamp. The latter, however, does.

If you have to delete duplicate row. Then no need to set the order by clause.
Try below
Delete l from #LOG l
Inner Join
(
Select id from #LOG l
Inner Join(
Select user_id, event from #LOG
group by user_id, event
having COUNT(user_id) > 1 and COUNT(event) > 1
)T
on (l.user_id = t.user_id) and (l.event = t.event)
)T
on T.id = l.id

Related

Select rows with same ID/email but different value in other table

Select rows with same ID/email but different value in other table
I have two tables: person and email, now there are mail addresses that have the same value, and persons/ID with different values.
Can anyone tell how to write an SQL query for this? I have tried but I can't figure it out. I have found some answers but then it is always finding the match in the same table
Like this
Table_person. ​​Table_email
1​​​ email#persoon1
2​​​ email#persoon2
3​​​ email#persoon3
4​​​ email#persoon1
5​​​ email#persoon5
6​​​ email#persoon2
The output should be
Table_person​​ Table_email
1​​​ email#persoon1
4​​​ email#persoon1
2​​​ email#persoon2
6​​​ email#persoon2
Using a common table expression with row_number()
;with cte as (
select *
, rn = row_number() over (partition by email order by person_id)
from email e
)
select *
from cte
where exists (
select 1
from cte i
where i.email = cte.email
and rn > 1
)
or using exists()
select *
from email e
where exists (
select 1
from email i
where i.email = e.email
and i.person_id <> e.person_id
)
rextester demo: http://rextester.com/JHFEF82373
Hope it will helps you
;with cte(Table_person,​​Table_email)
AS
(
SELECT 1​​​,'email#persoon1' UNION ALL
SELECT 2​​​,'email#persoon2' UNION ALL
SELECT 3​​​,'email#persoon3' UNION ALL
SELECT 4​​​,'email#persoon1' UNION ALL
SELECT 5​​​,'email#persoon5' UNION ALL
SELECT 6​​​,'email#persoon2'
)
,Cte2
AS
(
SELECT Table_person,​​Table_email From
(
Select Table_person,​​Table_email,ROW_NUMBER()OVER(Partition by Table_email Order By Table_person )Seq
from cte
)dt WHERE dt.Seq>1
)
,Final
AS
(
SELECT Table_person,​​Table_email From
(
Select Table_person,​​Table_email,ROW_NUMBER()OVER(Partition by Table_email Order By Table_email )Seq2
from cte
)dt
where dt.Seq2>1
Union ALL
SELECT Table_person,​​Table_email From cte2
)
SELECt Table_person,​​Table_email from Final

SQL Server 2012+ : Merge elements by time periods

I have been struggling for a while now with that problem and I need some help.
I have the following query :
CREATE TABLE Example(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
;
select MIN(Start)AS Start,MAX(Endd) AS Endd,Col1,Col2 from
(
SELECT top (100000000) Start, Endd,Col1, Col2,dense_rank() over(partition by Col1, Col2 order by Start,Endd) as rank
,LEAD (Col1) OVER (order by Start,Endd DESC) as l1
,LEAD (Col2) OVER (order by Start,Endd DESC) as l2
,LAG (Col1) OVER (order by Start,Endd DESC) as l11
,LAG (Col2) OVER (order by Start,Endd DESC) as l22
FROM Example sp
order by Start,Endd
)rq
GROUP BY Col1,Col2,case when (rq.l1=Col1 and rq.l2=Col2) or (rq.l11=Col1 and rq.l22=Col2) then 0 else rank end
order by Start,Endd;
My goal is to merge those data to have the following result:
However as you can see in the query result, when i have the same values for Col1 and Col2 on different time periods, the merge is not done correctly. It basically tries to merge them all in one, which create issues in the value for the new period.
Would someone be able to help me?
You were getting close in your query and you may have found a solution by now. This is a classic Islands and Gaps problem. I am giving the longer version with no use of LEAD AND LAG. You can replace perhaps 45% of the code below by using those windowing functions with perhaps a dense rank.
DECLARE #Example TABLE(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into #Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)
AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS AllRecords
INNER JOIN
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
) AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
) AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)AS JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
You may choose to shorten this somewhat with a few CTE's to produce a query such as:
;WITH TableWithIDs AS
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)
,ChangeMarkers AS
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
TableWithIDs AS This
LEFT OUTER JOIN TableWithIDs AS Next ON This.TableID=Next.TableID+1
)
,ChangesOnly AS
(
SELECT * FROM ChangeMarkers WHERE Changed=1
)
,
JoinedResults AS
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
ChangeMarkers AllRecords
INNER JOIN ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
There are also some clever hacks that can be applied further using virtual keys however I went the most direct but more verbose route. You should be able to improve on this using a DENSE_RANK() with LEAD() OR LAG()

Get the maximum values of column B per each distinct value of column A sql

I have this table:
I am trying to pull all records from this table for the max value in the DIST_NO column for every distinct ID in the left most column, but I still want to pull every record for each ID in which there are different Product_ID's as well.
I tried partitioning and using row_number, but I am having trouble at the moment.
Here are my desired results:
This is what my code looks like currently:
select *
from
(SELECT *,
ROW_NUMBER() OVER (PARTITION BY ID ORDER BY DIST_NO DESC) RN
FROM Table) V
WHERE RN<=3
you want the max(DIST_NO) for each ID, product_ID?
If so, you can:
SELECT
ID, product_ID, max(DIST_NO)
from table
group by ID, product_ID
If you want the detail rows related to the max row, you just need to join it back to your table:
Select
t.ID, max_dist_no, TRANSaction_ID , LINE_NO , PRODUCT_ID
from
table t inner join
(SELECT
ID, max(DIST_NO) as max_dist_no
from table
group by ID) mx on
t.ID = mx.ID and
t.DIST_NO = max_DIST_NO
Try
SELECT MT.ID
, MT.DIST_NO
, MT.TRANS_ID
, MT.LINE_NO
, MT.PRODUCT_ID
FROM MYTABLE MT
INNER JOIN (
SELECT T.ID, MAX(T.DIST_NO) as DIST_NO FROM MYTABLE T
GROUP BY T.ID
) MAX_MT ON MT.Id = MAX_MT.ID AND MT.DIST_NO = MAX_MT.DIST_NO
The sub query returns each combination of ID and Max value of DIST_NO:
SELECT T.ID, MAX(T.DIST_NO) as DIST_NO FROM MYTABLE T
GROUP BY T.ID
Joining this back to your original table will basically filter your original data-set by only these combinations of values.
Tested on PostgreSQL:
WITH t1 AS (
SELECT id, product_id, MAX(dist_no) AS dist_no
FROM test
GROUP BY 1,2)
SELECT t1.id, t1.dist_no, t2.trans_id, t2.line_no, t1.product_id
FROM test t2, t1
WHERE t1.id=t2.id AND t1.product_id=t2.product_id AND t1.dist_no=t2.dist_no
Use rank() or dense_rank():
select t.*
from (SELECT t.*
RANK() OVER (PARTITION BY ID ORDER BY DIST_NO DESC) as seqnum
FROM Table t
) t
WHERE seqnum = 1;
This is almost a literal translation of your request:
I am trying to pull all records from this table for the max value in
the DIST_NO column for every distinct ID in the left most column.
you can try something like this one :). (But is your result correct? I think there is little mistake in TRANS_ID...)
DECLARE #ExampleTable TABLE
(ID INT,
DIST_NO INT,
TRANS_ID INT,
LINE_NO INT,
PRODUCT_ID INT)
INSERT INTO #ExampleTable
( ID, DIST_NO, TRANS_ID,LINE_NO, PRODUCT_ID )
VALUES ( 102657, 1, 1105365, 1, 109119 ),
( 102657, 1, 1105366, 2, 109114 ),
( 102657, 2, 1105365, 1, 109119 ),
( 102657, 2, 1105366, 2, 109114 ),
( 104371, 1, 1190538, 1, 110981 ),
( 104371, 2, 1190538, 1, 110981 )
;WITH CTE AS ( SELECT DISTINCT ID, LINE_NO
FROM #ExampleTable)
SELECT a.ID,
x.DIST_NO,
x.TRANS_ID,
x.LINE_NO,
x.PRODUCT_ID
FROM CTE a
CROSS APPLY (SELECT TOP 1 *
FROM #ExampleTable f
WHERE a.ID = f.ID AND
a.LINE_NO = f. LINE_NO
ORDER BY DIST_NO DESC) x

SQL - Return previous record details as column by date

I am trying to get a list of records showing changes in location and dates to display as one row for each record showing previous location.
Basically a query to take data like:
And display it like:
I tried using lag, but it mixes up some of the records. Would anyone be able suggest a good way to do this?
Thanks!
DECLARE #TABLE TABLE
(ID INT ,NAME VARCHAR(20), LOCATIONDATE DATETIME, REASON VARCHAR(20))
INSERT INTO #TABLE
(ID,NAME, LOCATIONDATE, REASON)
VALUES
( 1,'abc',CAST('2016/01/01' AS SMALLDATETIME),'move'),
( 2,'def',CAST('2016/02/01' AS SMALLDATETIME),'move'),
( 1,'abc',CAST('2016/06/01' AS SMALLDATETIME),'move'),
( 2,'def',CAST('2016/07/01' AS SMALLDATETIME),'move'),
( 1,'abc',CAST('2016/08/01' AS SMALLDATETIME),'move'),
( 3,'ghi',CAST('2016/08/01' AS SMALLDATETIME),'move')
select s.*
,t1.*
from
(
select t.*,
row_number() over (partition by id order by locationdate desc) rn
from #table t
) s
left join
(
select t.*,
row_number() over (partition by id order by locationdate desc) rn
from #table t
) t1
on t1.id = s.id and t1.rn = s.rn + 1
You can try it:
SELECT a.id,a.name,a.location as currentLocation,a.locationdatedate as currrentLocate,b.location as preLocation,b.locationdatedate as prevLocate,a.changereason
FROM test as a
JOIN test as b ON a.name = b.name
where a.locationdatedate > b.locationdatedate
group by a.name
Pleas try this one. it works here
SELECT l.id,
,l.name
,l.location as currentLocation
,l.locationdatedate as currrentLocate
,r.location as preLocation
,r.locationdatedate as prevLocate
,r.changereason
FROM tableName AS l inner join
tableName AS r ON r.id=l.id
WHERE l.locationdatedate !=r.locationdatedate AND l.locationdatedate > r.locationdatedate

How do I get records before and after given one?

I have the following table structure:
Id, Message
1, John Doe
2, Jane Smith
3, Error
4, Jane Smith
Is there a way to get the error record and the surrounding records? i.e. find all Errors and the record before and after them.
;WITH numberedlogtable AS
(
SELECT Id,Message,
ROW_NUMBER() OVER (ORDER BY ID) AS RN
FROM logtable
)
SELECT Id,Message
FROM numberedlogtable
WHERE RN IN (SELECT RN+i
FROM numberedlogtable
CROSS JOIN (SELECT -1 AS i UNION ALL SELECT 0 UNION ALL SELECT 1) n
WHERE Message='Error')
WITH err AS
(
SELECT TOP 1 *
FROM log
WHERE message = 'Error'
ORDER BY
id
),
p AS
(
SELECT TOP 1 l.*
FROM log
WHERE id <
(
SELECT id
FROM err
)
ORDER BY
id DESC
)
SELECT TOP 3 *
FROM log
WHERE id >
(
SELECT id
FROM p
)
ORDER BY
id
Adapt this routine to pick out your target.
DECLARE #TargetId int
SET #TargetId = 3
select *
from LogTable
where Id in (-- "before"
select max(Id)
from LogTable
where Id < #TargetId
-- target
union all select #TargetId
-- "after"
union all select min(Id)
from LogTable
where Id > #TargetId)
select id,messag from
(Select (Row_Number() over (order by ID)) as RNO, * from #Temp) as A,
(select SubRNO-1 as A,
SubRNO as B,
SubRNO+1 as C
from (Select (Row_Number() over (order by ID)) as SubRNO, * from #Temp) as C
where messag = 'Error') as B
where A.RNO = B.A or A.RNO = B.B or A.RNO = B.C
;WITH Logs AS
(
SELECT ROW_NUMBER() OVER (ORDER BY id), id, message as rownum FROM LogTable lt
)
SELECT curr.id, prev.id, next.id
FROM Logs curr
LEFT OUTER JOIN Logs prev ON curr.rownum+1=prev.rownum
RIGHT OUTER JOIN Logs next ON curr.rownum-1=next.rownum
WHERE curr.message = 'Error'
select id, message from tbl where id in (
select id from tbl where message = "error"
union
select id-1 from tbl where message = "error"
union
select id+1 from tbl where message = "error"
)
Get fixed number of rows before & after target
Using UNION for a simple, high performance query (I found selected answer WITH query above to be extremely slow)
Here is a high performance alternative to the WITH top selected answer, when you know an ID or specific identifier for a given record, and you want to select a fixed number of records BEFORE and AFTER that record. Requires a number field for ID, or something like date that can be sorted ascending / descending.
Example: You want to select the 10 records before and after a specific error was recorded, you know the error ID, and can sort by date or ID.
The following query gets (inclusive) the 1 result above, the identified record itself, and the 1 record below. After the UNION, the results are sorted again in descending order.
SELECT q.*
FROM(
SELECT TOP 2
id, content
FROM
the_table
WHERE
id >= [ID]
ORDER BY id ASC
UNION
SELECT TOP 1
id, content
FROM
the_table
WHERE
id < [ID]
ORDER BY id DESC
) q
ORDER BY q.id DESC