SQL Server 2012+ : Merge elements by time periods - sql

I have been struggling for a while now with that problem and I need some help.
I have the following query :
CREATE TABLE Example(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
;
select MIN(Start)AS Start,MAX(Endd) AS Endd,Col1,Col2 from
(
SELECT top (100000000) Start, Endd,Col1, Col2,dense_rank() over(partition by Col1, Col2 order by Start,Endd) as rank
,LEAD (Col1) OVER (order by Start,Endd DESC) as l1
,LEAD (Col2) OVER (order by Start,Endd DESC) as l2
,LAG (Col1) OVER (order by Start,Endd DESC) as l11
,LAG (Col2) OVER (order by Start,Endd DESC) as l22
FROM Example sp
order by Start,Endd
)rq
GROUP BY Col1,Col2,case when (rq.l1=Col1 and rq.l2=Col2) or (rq.l11=Col1 and rq.l22=Col2) then 0 else rank end
order by Start,Endd;
My goal is to merge those data to have the following result:
However as you can see in the query result, when i have the same values for Col1 and Col2 on different time periods, the merge is not done correctly. It basically tries to merge them all in one, which create issues in the value for the new period.
Would someone be able to help me?

You were getting close in your query and you may have found a solution by now. This is a classic Islands and Gaps problem. I am giving the longer version with no use of LEAD AND LAG. You can replace perhaps 45% of the code below by using those windowing functions with perhaps a dense rank.
DECLARE #Example TABLE(
Start NVARCHAR(8),
Endd NVARCHAR(8),
Col1 NVARCHAR(2),
Col2 NVARCHAR(2));
INSERT into #Example (Start,Endd,Col1,Col2)
VALUES ('20130801','20140316','02','01'),
('20140317','20140319','04','02'),
('20140320','20140320','04','02'),
('20140321','20140421','02','Z8'),
('20140422','20140429','02','Z9'),
('20140430','20140902','04','02'),
('20140903','20150201','04','02'),
('20150202','20150223','04','02'),
('20150224','20150527','04','02'),
('20150528','99991231','04','02')
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)
AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS AllRecords
INNER JOIN
(
SELECT * FROM
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
) AS This
LEFT OUTER JOIN
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
) AS Next ON This.TableID=Next.TableID+1
)
AS ChangeMarkers
WHERE Changed=1
)
AS ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)AS JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
You may choose to shorten this somewhat with a few CTE's to produce a query such as:
;WITH TableWithIDs AS
(
SELECT TableID=ROW_NUMBER() OVER(ORDER BY Start,Endd,Col1,Col2),Start,Endd,Col1,Col2 FROM #Example
)
,ChangeMarkers AS
(
SELECT
This.Start,This.Endd,This.TableID,This.Col1,This.Col2,
Changed=CASE WHEN (Next.Col1=This.Col1 AND Next.Col2=This.Col2) THEN 0 ELSE 1 END
FROM
TableWithIDs AS This
LEFT OUTER JOIN TableWithIDs AS Next ON This.TableID=Next.TableID+1
)
,ChangesOnly AS
(
SELECT * FROM ChangeMarkers WHERE Changed=1
)
,
JoinedResults AS
(
SELECT
AllRecords.TableID,AllRecords.Col1,AllRecords.Col2,AllRecords.Start,AllRecords.Endd,ChangeOnlyTableID=ChangesOnly.TableID
FROM
ChangeMarkers AllRecords
INNER JOIN ChangesOnly ON ChangesOnly.Col1=AllRecords.Col1 AND ChangesOnly.Col2=AllRecords.Col2 AND ChangesOnly.TableID<=AllRecords.TableID
)
SELECT
TableID=MAX(TableID),Col1=MAX(Col1),Col2=MAX(Col2),Start=MIN(Start),Endd=MAX(Endd)
FROM
(
SELECT
TableID,Col1,Col2,Start,Endd,ChangeID=MAX(ChangeOnlyTableID)
FROM
JoinedResults
GROUP BY
TableID,Col1,Col2,Start,Endd
)
AS Final
GROUP BY
Col1,Col2,ChangeID
ORDER BY
MAX(TableID)
There are also some clever hacks that can be applied further using virtual keys however I went the most direct but more verbose route. You should be able to improve on this using a DENSE_RANK() with LEAD() OR LAG()

Related

2 rows differences

I would like to get 2 consecutive rows from an SQL table.
One of the columns storing UNIX datestamp and between 2 rows the difference only this value.
For example:
id_int dt_int
1. row 8211721 509794233
2. row 8211722 509794233
I need only those rows where dt_int the same (edited)
Do you want both lines to be shown?
A solution could be this:
with foo as
(
select
*
from (values (8211721),(8211722),(8211728),(8211740),(8211741)) a(id_int)
)
select
id_int
from
(
select
id_int
,id_int-isnull(lag(id_int,1) over (order by id_int) ,id_int-6) prev
,isnull(lead(id_int,1) over (order by id_int) ,id_int+6)-id_int nxt
from foo
) a
where prev<=5 or nxt<=5
We use lead and lag, to find the differences between rows, and keep the rows where there is less than or equal to 5 for the row before or after.
If you use 2008r2, then lag and lead are not available. You could use rownumber in stead:
with foo as
(
select
*
from (values (8211721),(8211722),(8211728),(8211740),(8211741)) a(id_int)
)
, rownums as
(
select
id_int
,row_number() over (order by id_int) rn
from foo
)
select
id_int
from
(
select
cur.id_int
,cur.id_int-prev.id_int prev
,nxt.id_int-cur.id_int nxt
from rownums cur
left join rownums prev
on cur.rn-1=prev.rn
left join rownums nxt
on cur.rn+1=nxt.rn
) a
where isnull(prev,6)<=5 or isnull(nxt,6)<=5
Assuming:
lead() analytical function available.
ID_INT is what we need to sort by to determine table order...
you may need to partition by some value lead(ID_int) over(partition by SomeKeysuchasOrderNumber order by ID_int asc) so that orders and dates don't get mixed together.
.
WITH CTE AS (
SELECT A.*
, lead(ID_int) over ([missing partition info] ORDER BY id_Int asc) - id_int as ID_INT_DIFF
FROM Table A)
SELECT *
FROM CTE
WHERE ID_INT_DIFF < 5;
You can try it. This version works on SQL Server 2000 and above. Today I don not a more recent SQL Server to write on.
declare #t table (id_int int, dt_int int)
INSERT #T SELECT 8211721 , 509794233
INSERT #T SELECT 8211722 , 509794233
INSERT #T SELECT 8211723 , 509794235
INSERT #T SELECT 8211724 , 509794236
INSERT #T SELECT 8211729 , 509794237
INSERT #T SELECT 8211731 , 509794238
;with cte_t as
(SELECT
ROW_NUMBER() OVER (ORDER BY id_int) id
,id_int
,dt_int
FROM #t),
cte_diff as
( SELECT
id_int
,dt_int
,(SELECT TOP 1 dt_int FROM cte_t b WHERE a.id < b.id) dt_int1
,dt_int - (SELECT TOP 1 dt_int FROM cte_t b WHERE a.id < b.id) Difference
FROM cte_t a
)
SELECT DISTINCT id_int , dt_int FROM #t a
WHERE
EXISTS(SELECT 1 FROM cte_diff b where b.Difference =0 and a.dt_int = b.dt_int)

SQL - Return previous record details as column by date

I am trying to get a list of records showing changes in location and dates to display as one row for each record showing previous location.
Basically a query to take data like:
And display it like:
I tried using lag, but it mixes up some of the records. Would anyone be able suggest a good way to do this?
Thanks!
DECLARE #TABLE TABLE
(ID INT ,NAME VARCHAR(20), LOCATIONDATE DATETIME, REASON VARCHAR(20))
INSERT INTO #TABLE
(ID,NAME, LOCATIONDATE, REASON)
VALUES
( 1,'abc',CAST('2016/01/01' AS SMALLDATETIME),'move'),
( 2,'def',CAST('2016/02/01' AS SMALLDATETIME),'move'),
( 1,'abc',CAST('2016/06/01' AS SMALLDATETIME),'move'),
( 2,'def',CAST('2016/07/01' AS SMALLDATETIME),'move'),
( 1,'abc',CAST('2016/08/01' AS SMALLDATETIME),'move'),
( 3,'ghi',CAST('2016/08/01' AS SMALLDATETIME),'move')
select s.*
,t1.*
from
(
select t.*,
row_number() over (partition by id order by locationdate desc) rn
from #table t
) s
left join
(
select t.*,
row_number() over (partition by id order by locationdate desc) rn
from #table t
) t1
on t1.id = s.id and t1.rn = s.rn + 1
You can try it:
SELECT a.id,a.name,a.location as currentLocation,a.locationdatedate as currrentLocate,b.location as preLocation,b.locationdatedate as prevLocate,a.changereason
FROM test as a
JOIN test as b ON a.name = b.name
where a.locationdatedate > b.locationdatedate
group by a.name
Pleas try this one. it works here
SELECT l.id,
,l.name
,l.location as currentLocation
,l.locationdatedate as currrentLocate
,r.location as preLocation
,r.locationdatedate as prevLocate
,r.changereason
FROM tableName AS l inner join
tableName AS r ON r.id=l.id
WHERE l.locationdatedate !=r.locationdatedate AND l.locationdatedate > r.locationdatedate

Getting top 2 rows in each group without row_number() in SQL Server

I am looking for a simple query to get result of 2 rows with latest invoice date in each group. Although this task can be accomplished by a row_number() that you can see in below code ,I need an alternative to this with minimum complexity.
Code :
create table #tt
(
id int,
invoiceDT datetime
)
insert into #tt
values(1,'01-01-2016 00:12'),(1,'01-02-2016 06:16'),(1,'01-01-2016 00:16')
,(2,'01-01-2016 01:12'),(2,'04-02-2016 06:16'),(2,'01-06-2016 00:16')
select *
from (
SELECT id,invoiceDT,row_number() over(partition by id order by invoiceDT desc) as rownum
FROM #tt
)tmp
where rownum <=2
I need same result that is returned by above query
Please suggest an alternative.
Strange request, but here you go:
WITH CTE as
(
SELECT distinct id FROM #tt t1
)
SELECT x.*
FROM CTE
CROSS APPLY
(
SELECT top 2 *
FROM #tt
WHERE CTE.id = id
ORDER BY invoiceDT desc
) x

SQL group by if values are close

Class| Value
-------------
A | 1
A | 2
A | 3
A | 10
B | 1
I am not sure whether it is practical to achieve this using SQL.
If the difference of values are less than 5 (or x), then group the rows (of course with the same Class)
Expected result
Class| ValueMin | ValueMax
---------------------------
A | 1 | 3
A | 10 | 10
B | 1 | 1
For fixed intervals, we can easily use "GROUP BY". But now the grouping is based on nearby row's value. So if the values are consecutive or very close, they will be "chained together".
Thank you very much
Assuming MSSQL
You are trying to group things by gaps between values. The easiest way to do this is to use the lag() function to find the gaps:
select class, min(value) as minvalue, max(value) as maxvalue
from (select class, value,
sum(IsNewGroup) over (partition by class order by value) as GroupId
from (select class, value,
(case when lag(value) over (partition by class order by value) > value - 5
then 0 else 1
end) as IsNewGroup
from t
) t
) t
group by class, groupid;
Note that this assumes SQL Server 2012 for the use of lag() and cumulative sum.
Update:
*This answer is incorrect*
Assuming the table you gave is called sd_test, the following query will give you the output you are expecting
In short, we need a way to find what was the value on the previous row. This is determined using a join on row ids. Then create a group to see if the difference is less than 5. and then it is just regular 'Group By'.
If your version of SQL Server supports windowing functions with partitioning the code would be much more readable.
SELECT
A.CLASS
,MIN(A.VALUE) AS MIN_VALUE
,MAX(A.VALUE) AS MAX_VALUE
FROM
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS A
LEFT JOIN
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS B
ON A.CLASS = B.CLASS AND A.ROW_ID=B.ROW_ID+1
GROUP BY A.CLASS,CASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END
ORDER BY A.CLASS,cASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END DESC
ps: I think the above is ANSI compliant. So should run in most SQL variants. Someone can correct me if it is not.
These give the correct result, using the fact that you must have the same number of group starts as ends and that they will both be in ascending order.
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (class char(1),Value int);
insert into #temp values ('A',1);
insert into #temp values ('A',2);
insert into #temp values ('A',3);
insert into #temp values ('A',10);
insert into #temp values ('A',13);
insert into #temp values ('A',14);
insert into #temp values ('b',7);
insert into #temp values ('b',8);
insert into #temp values ('b',9);
insert into #temp values ('b',12);
insert into #temp values ('b',22);
insert into #temp values ('b',26);
insert into #temp values ('b',67);
Method 1 Using CTE and row offsets
with cte as
(select distinct class,value,ROW_NUMBER() over ( partition by class order by value ) as R from #temp),
cte2 as
(
select
c1.class
,c1.value
,c2.R as PreviousRec
,c3.r as NextRec
from
cte c1
left join cte c2 on (c1.class = c2.class and c1.R= c2.R+1 and c1.Value < c2.value + 5)
left join cte c3 on (c1.class = c3.class and c1.R= c3.R-1 and c1.Value > c3.value - 5)
)
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where PreviousRec is null) as Starts join
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where NextRec is null) as Ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
** Method 2 Inline views using not exists **
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value < t.Value and Value > t.Value -5 )
) Starts join
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value > t.Value and Value < t.Value +5 )
) ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
In both methods I use a select distinct to begin because if you have a dulpicate entry at a group start or end things go awry without it.
Here is one way of getting the information you are after:
SELECT Under5.Class,
(
SELECT MIN(m2.Value)
FROM MyTable AS m2
WHERE m2.Value < 5
AND m2.Class = Under5.Class
) AS ValueMin,
(
SELECT MAX(m3.Value)
FROM MyTable AS m3
WHERE m3.Value < 5
AND m3.Class = Under5.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m1.Class
FROM MyTable AS m1
WHERE m1.Value < 5
) AS Under5
UNION
SELECT Over4.Class,
(
SELECT MIN(m4.Value)
FROM MyTable AS m4
WHERE m4.Value >= 5
AND m4.Class = Over4.Class
) AS ValueMin,
(
SELECT Max(m5.Value)
FROM MyTable AS m5
WHERE m5.Value >= 5
AND m5.Class = Over4.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m6.Class
FROM MyTable AS m6
WHERE m6.Value >= 5
) AS Over4

Delete one row from same rows

I have a table T with (first, second) columns. I have two rows with first=1 and second=2. I would like to delete just one of the rows. How do I do that?
;WITH CTE AS
(
SELECT TOP 1 *
FROM YourTable
WHERE first=1 and second=2
)
DELETE FROM CTE;
Or if SQL Server 2000
DELETE T
FROM (
SELECT TOP 1 *
FROM YourTable
WHERE [first]=1 and [second]=2
) T;
Then add a primary key.
You can use ROW_NUMBER().
DECLARE #T as Table(First int , Second int )
INsert Into #T
Values (1,2),
(1,2)
SELECT * FROM #T
;WITH CTE as
(SELECT ROW_NUMBER() over (order by first,second) rn , * from #T)
DELETE FROM CTE where rn = 1
select * from #T
If you change rn to include Partition by
ROW_NUMBER() over (PARTITION BY first, second order by first,second)
and change the where to be WHERE RN <> 1
you could use this as a general solution to remove any dupes on First, Second