Group sequential integers postgres - sql

I have a query that returns:
1
2
5
7
8
I'd like to group the rows like:
{1,2}
{5}
{7,8}
In other words I need to group the sequential values.
Any idea?

This is a Gaps and Islands problem.
You can try to make row number then do some Calculate make the group by number.
CREATE TABLE T(
ID INT
);
INSERT INTO T VALUES (1);
INSERT INTO T VALUES (2);
INSERT INTO T VALUES (5);
INSERT INTO T VALUES (7);
INSERT INTO T VALUES (8);
Query 1:
WITH CTE AS (
SELECT Min(id) minid,MAX(ID) maxid
FROM (
SELECT ID,ID - ROW_NUMBER() OVER(ORDER BY ID) rn
FROM T
)t1
group by rn
)
SELECT (CASE WHEN minid = maxid
THEN CAST(maxid AS VARCHAR(50))
ELSE CONCAT(minid,',',maxid)
END) ID
FROM CTE
ORDER BY minid
Results:
| id |
|-----|
| 1,2 |
| 5 |
| 7,8 |

Based on your response to the reason for the need, that you are trying to locate broken sequences in a table. I would identify gaps like this. It doesn't give you explicitly what you requested, but what you requested is quite a bit more complex.
select col1
from mytable S
where not exists
(select col1
from mytable T
where T.col1 = S.col1 + 1)
This would return the last number that was sequential in each set prior to the gap.
To get to exactly what you wanted you could use the results of this table and some between statements to ultimately get to your explicit request or something more complex. You may be over-thinking it though.

This looks like an array would be suitable, so:
select array_agg(col order by col)
from (select t.*, row_number() over (order by col) as seqnum
from t
) t
group by (col - seqnum);
EDIT:
If you just want the start and end, use max() and min():
select min(col), max(col)
from (select t.*, row_number() over (order by col) as seqnum
from t
) t
group by (col - seqnum);

Related

Select first occurrence of list item in table

I have a list like this example:
abc, efg, rty
and a table with following data:
1 abcd
2 efgh
3 abcd
4 rtyu
5 efgh
now I want to find the first-row which start with list item in the table. my expected result is:
1 abcd
2 efgh
4 rtyu
This is a complete script to do the job
Declare #v_List Table
(
Text nvarchar(100)
)
Declare #v_Data Table
(
Number int,
Text nvarchar(100)
)
Insert Into #v_List values(N'abc')
Insert Into #v_List values(N'efg')
Insert Into #v_List values(N'rty')
Insert Into #v_Data values(1, N'abcd')
Insert Into #v_Data values(2, N'efgh')
Insert Into #v_Data values(3, N'abcd')
Insert Into #v_Data values(4, N'rtyu')
Insert Into #v_Data values(5, N'efgh')
;with CTE as
(
Select D.Number,
D.Text,
ROW_NUMBER() OVER (PARTITION BY L.Text Order By D.Number) as Row_No
From #v_Data D
Join #v_List L
On D.Text like L.Text + '%'
)
Select CTE.Number,
CTE.Text
From CTE
Where CTE.Row_No = 1
select * from TableName
where Id in
(
select min(Id) from
(
select Id,
case
when Val like 'abc%' then 1
when Val like 'efg%' then 2
when Val like 'rty%' then 3
else 0 end temp
from TableName
)t where temp > 0
group by temp
)
You can use a windowed ROW_NUMBER to generate a sequential number by each different value, then just display the first one only.
;WITH RowNumbersByValue AS
(
SELECT
T.ID,
T.Value,
RowNumber = ROW_NUMBER() OVER (PARTITION BY T.Value ORDER BY T.ID)
FROM
YourTable AS T
)
SELECT
R.ID,
R.Value
FROM
RowNumbersByValue AS R
WHERE
R.Value IN ('abcd', 'efgh', 'rtyu') AND
R.RowNumber = 1
For SQL Server I prefer this version, which does not require a subquery:
SELECT TOP 1 WITH TIES ID, Value
FROM yourTable
WHERE Value LIKE 'abc%' OR Value LIKE 'efg%' OR Value LIKE 'rty%'
ORDER BY ROW_NUMBER() OVER (PARTITION BY Value ORDER BY ID);
SELECT * INTO #temp FROM (VALUES
(1 ,'abcd'),
(2 ,'efgh'),
(3 ,'abcd'),
(4 ,'rtyu'),
(5 ,'efgh'))a([id], [name])
You can use min and group by function
SELECT MIN(id), name FROM #temp GROUP BY name
You may use this, there are so many ways to achieve this, use whichever suits you better.
using subquery
select id, col from
(select Row_number() over (partition by col order by id) as slno, id, col from yourtable)
as tb where tb.slno=1
using cte
; with cte as (
select row_number() over (partition by col order by id) as Slno, id, col from table)
select id, col from cte where slno=1
using min
select Min(id) , col from table group by col
Note:-
In the end of any above mentioned query you may apply your where clause to filter your records as needed.

Selecting entries with biggest value less than list of values

Suppose my table structure as follows:
id | Word
---|-----
1 | a
2 | aa
. | ..
I have a list of id's like this:
(...,900, 1000, 2000, 3000, 4000,....)
I want to find the biggest id less than each id in the above list.
My table id's is not necessarily consecutive and there are some gaps between two successive id's, for example:
(...,889,900,950,952,997,1000,1001,1010,1920,2000,2990,3000,3500,4000,...)
The expected result according to the above list would be:
(889, 997, 1920, 2990, 3500,...)
How do i achieve desired results?
Use a common table expression and ROW_NUMBER()
;WITH cte AS(
SELECT *, ROW_NUMBER() OVER (ORDER BY ID) rowNum
FROM example)
SELECT ID, word
FROM cte
WHERE rowNum IN (
SELECT (rowNum - 1)
FROM cte
WHERE ID IN ('900','1000','2000','3000','4000'))
--WHERE ID IN (SELECT ID FROM <tableWithIDs>))
If you already have all of the ID you are looking for in another table, you would instead use the commented portion of my answer instead of the hardcoded IN list.
This will work only if the ID you are looking for exists in the table. So, as noted in a comment below if you were searching for 1001 you would not get 997, unless 1001 existed in the table (meaning, if it existed it would get a rowNum value and could be used to decrement in the subquery)
[DEMO HERE]
The following is another way to just see what the previous ID is for each row:
SELECT *, LEAD(ID,1) OVER(ORDER BY ID DESC) PreviousID
FROM example
ORDER BY ID
I would simply do:
select v.val, t.*
from (values (900), (1000), (2000), (3000), (4000) ) v(val) outer apply
(select top 1 t.*
from t
where t.id < v.val
order by t.id desc
) t;
This allows you to see the value on each of the rows. That is probably important because SQL result sets are unordered and it will not be obvious which value goes with which row.
EDIT:
If you know the row numbers are in the table, the most performance solution is probably:
select t.*
from (select t.*, lead(id) over (order by id) as next_id
from t
) t
where next_id in ( . . . );
This should work and I think it will be fairly efficient.
declare #V table (num int primary key);
insert into #V values (800), (889), (900), (997), (1000), (1910), (1920), (2000), (2990), (3000), (3500), (4000);
declare #T table (num int primary key);
insert into #T values (800), (900), (1000), (1200), (2000), (3000), (4000);
select tt.vP
from ( select t.num as t, v.num as v
, LAG(v.num) over (order by v.num) as vP
from #V v
left join #T t
on v.num = t.num
) tt
where tt.t is not null
and tt.vP is not null
order by tt.vP
Not clear how you want it to behave
select t.num
, (select max(v.num) from #V v where v.num < t.num) as prior
from #T t

Get two random records (different in one attribute) from table

Very simple table as an example but no idea how to achieve this:
Example: Table1
ColumnA ColumnB
1 A
1 B
2 C
For two random records: I know I could do like
Select top 2 *
From Table1
order by NewID()
But now I would like to select two random records out but cannot be such a combination that has both '1' for column A, which means the result cannot accept '1 A' together with '1 B', the rest are fine.
Any ideas? Thanks in advance
DROP TABLE #T
CREATE TABLE #T(ID INT
,Vals CHAR(2)
)
INSERT INTO #T VALUES
(1,'A')
,(1,'B')
,(2,'A')
,(2,'C')
,(3,'D')
,(4,'E')
,(5,'E')
SELECT TOP 2
ID,
Vals
FROM
(
SELECT
ID
,VALS
,ROW_NUMBER() OVER(PARTITION BY ID ORDER BY NEWID()) Rnk
FROM
#T) T
WHERE
Rnk = 1
order by NewID()
Here's a way to do it, but it can get expensive if your table is very large:
;With Random As
(
Select *,
Row_Number() Over (Partition By ColumnA Order By NewId()) As RN
From Table1
)
Select Top 2 ColumnA, ColumnB
From Random
Where RN = 1
Order By NewId()

get intervals of nonchanging value from a sequence of numbers

I need to sumarize a sequence of values into intervals of nonchanging values - begin, end and value for each such interval. I can easily do it in plsql but would like a pure sql solution for both performance and educational reasons. I have been trying for some time to solve it with analytical functions, but can't figure how to properly define windowing clause. The problem I am having is with a repeated value.
Simplified example -
given input:
id value
1 1
2 1
3 2
4 2
5 1
I'd like to get output
from to val
1 2 1
3 4 2
5 5 1
You want to identify groups of adjacent values. One method is to use lag() to find the beginning of the sequence, then a cumulative sum to identify the groups.
Another method is the difference of row number:
select value, min(id) as from_id, max(id) as to_id
from (select t.*,
(row_number() over (order by id) -
row_number() over (partition by val order by id
) as grp
from table t
) t
group by grp, value;
Using a CTE to collect all the rows and identifying them into changing values, then finally grouping together for the changing values.
CREATE TABLE #temp (
ID INT NOT NULL IDENTITY(1,1),
[Value] INT NOT NULL
)
GO
INSERT INTO #temp ([Value])
SELECT 1 UNION ALL
SELECT 1 UNION ALL
SELECT 2 UNION ALL
SELECT 2 UNION ALL
SELECT 1;
WITH Marked AS (
SELECT
*,
grp = ROW_NUMBER() OVER (ORDER BY ID)
- ROW_NUMBER() OVER (PARTITION BY Value ORDER BY ID)
FROM #temp
)
SELECT MIN(ID) AS [From], MAX(ID) AS [To], [VALUE]
FROM Marked
GROUP BY grp, Value
ORDER BY MIN(ID)
DROP TABLE #temp;

SQL group by if values are close

Class| Value
-------------
A | 1
A | 2
A | 3
A | 10
B | 1
I am not sure whether it is practical to achieve this using SQL.
If the difference of values are less than 5 (or x), then group the rows (of course with the same Class)
Expected result
Class| ValueMin | ValueMax
---------------------------
A | 1 | 3
A | 10 | 10
B | 1 | 1
For fixed intervals, we can easily use "GROUP BY". But now the grouping is based on nearby row's value. So if the values are consecutive or very close, they will be "chained together".
Thank you very much
Assuming MSSQL
You are trying to group things by gaps between values. The easiest way to do this is to use the lag() function to find the gaps:
select class, min(value) as minvalue, max(value) as maxvalue
from (select class, value,
sum(IsNewGroup) over (partition by class order by value) as GroupId
from (select class, value,
(case when lag(value) over (partition by class order by value) > value - 5
then 0 else 1
end) as IsNewGroup
from t
) t
) t
group by class, groupid;
Note that this assumes SQL Server 2012 for the use of lag() and cumulative sum.
Update:
*This answer is incorrect*
Assuming the table you gave is called sd_test, the following query will give you the output you are expecting
In short, we need a way to find what was the value on the previous row. This is determined using a join on row ids. Then create a group to see if the difference is less than 5. and then it is just regular 'Group By'.
If your version of SQL Server supports windowing functions with partitioning the code would be much more readable.
SELECT
A.CLASS
,MIN(A.VALUE) AS MIN_VALUE
,MAX(A.VALUE) AS MAX_VALUE
FROM
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS A
LEFT JOIN
(SELECT
ROW_NUMBER()OVER(PARTITION BY CLASS ORDER BY VALUE) AS ROW_ID
,CLASS
,VALUE
FROM SD_TEST) AS B
ON A.CLASS = B.CLASS AND A.ROW_ID=B.ROW_ID+1
GROUP BY A.CLASS,CASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END
ORDER BY A.CLASS,cASE WHEN ABS(COALESCE(B.VALUE,0)-A.VALUE)<5 THEN 1 ELSE 0 END DESC
ps: I think the above is ANSI compliant. So should run in most SQL variants. Someone can correct me if it is not.
These give the correct result, using the fact that you must have the same number of group starts as ends and that they will both be in ascending order.
if object_id('tempdb..#temp') is not null drop table #temp
create table #temp (class char(1),Value int);
insert into #temp values ('A',1);
insert into #temp values ('A',2);
insert into #temp values ('A',3);
insert into #temp values ('A',10);
insert into #temp values ('A',13);
insert into #temp values ('A',14);
insert into #temp values ('b',7);
insert into #temp values ('b',8);
insert into #temp values ('b',9);
insert into #temp values ('b',12);
insert into #temp values ('b',22);
insert into #temp values ('b',26);
insert into #temp values ('b',67);
Method 1 Using CTE and row offsets
with cte as
(select distinct class,value,ROW_NUMBER() over ( partition by class order by value ) as R from #temp),
cte2 as
(
select
c1.class
,c1.value
,c2.R as PreviousRec
,c3.r as NextRec
from
cte c1
left join cte c2 on (c1.class = c2.class and c1.R= c2.R+1 and c1.Value < c2.value + 5)
left join cte c3 on (c1.class = c3.class and c1.R= c3.R-1 and c1.Value > c3.value - 5)
)
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where PreviousRec is null) as Starts join
(
select
class
,value
,row_number() over ( partition by class order by value ) as GroupNumber
from cte2
where NextRec is null) as Ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
** Method 2 Inline views using not exists **
select
Starts.Class
,Starts.Value as StartValue
,Ends.Value as EndValue
from
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value < t.Value and Value > t.Value -5 )
) Starts join
(
select class,Value ,row_number() over ( partition by class order by value ) as GroupNumber
from
(select distinct class,value from #temp) as T
where not exists (select 1 from #temp where class=t.class and Value > t.Value and Value < t.Value +5 )
) ends on starts.class=ends.class and starts.GroupNumber = ends.GroupNumber
In both methods I use a select distinct to begin because if you have a dulpicate entry at a group start or end things go awry without it.
Here is one way of getting the information you are after:
SELECT Under5.Class,
(
SELECT MIN(m2.Value)
FROM MyTable AS m2
WHERE m2.Value < 5
AND m2.Class = Under5.Class
) AS ValueMin,
(
SELECT MAX(m3.Value)
FROM MyTable AS m3
WHERE m3.Value < 5
AND m3.Class = Under5.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m1.Class
FROM MyTable AS m1
WHERE m1.Value < 5
) AS Under5
UNION
SELECT Over4.Class,
(
SELECT MIN(m4.Value)
FROM MyTable AS m4
WHERE m4.Value >= 5
AND m4.Class = Over4.Class
) AS ValueMin,
(
SELECT Max(m5.Value)
FROM MyTable AS m5
WHERE m5.Value >= 5
AND m5.Class = Over4.Class
) AS ValueMax
FROM
(
SELECT DISTINCT m6.Class
FROM MyTable AS m6
WHERE m6.Value >= 5
) AS Over4