Find groups containing 6 consecutive 1s in one column

Find groups containing 6 consecutive 1s in one column - sql

I have a table with 2 columns:
val with values: 0 or 1
id with unique identifiers
with cte(val, id) as (
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
)
select *
into #tmp
from cte
How do I to find id with 6 values = 1 in a row.
In the example above: id = 9, id = 10.
It is desirable not to use loops (cursors or while), but something like sum(...) over(...).

Why not LAG() (but you need an order column):
SELECT id
FROM (
SELECT
id,
val,
val1 = LAG(val, 1) OVER (ORDER BY id),
val2 = LAG(val, 2) OVER (ORDER BY id),
val3 = LAG(val, 3) OVER (ORDER BY id),
val4 = LAG(val, 4) OVER (ORDER BY id),
val5 = LAG(val, 5) OVER (ORDER BY id)
FROM YourTable
) t
WHERE val = 1 AND val1 = 1 AND val2 = 1 AND val3 = 1 AND val4 = 1 AND val5 = 1

You can use running sum over a window frame that contains exactly 6 rows (5 prior plus current row):
with cte as (
select *, sum(val) over (
order by id
rows between 5 preceding and current row
) as rsum
from #tmp
)
select *
from cte
where rsum = 6
Adjust the size of the window and where clause to match the desired value.

Another approach is using ROW_NUMBER on the LAG values
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
select t2.id,
t2.islandcount
from ( select t.id,
t.val,
t.priorval,
row_number() over (partition by t.val, t.priorval order by t.id) as islandcount
from ( select id,
val,
lag(val, 1) over (order by id) priorval
from #tmp
) t
) t2
where t2.islandcount >= 6
the result is
id islandcount
9 6
10 7
Try it yourself in this DBFiddle
The advantage of this method is that you can easy set the value from 6 to any other value
EDIT
As #Zhorov mentioned in the comment, there is a flaw in my code, it returns the wrong results when you add certain rows
This solution will fix that, it is based on the solution of #SalmanA so the credit for accepted answer should go to him
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
-- these are the certains rows added
,(0, 11), (1, 12), (1, 13)
select t.id,
t.val,
t.islandcount
from ( select id,
val,
sum(val) over (order by id rows between 5 preceding and current row) as islandcount
from #tmp
) t
where t.islandcount >= 6
order by t.id
And again a DBFiddle

Related

how to assure a deterministic result of a query that uses mode() in snowflake

I use snowflake and I want to use multiple mode() expressions in one select statement. So it looks like:
SELECT
x,
y,
mode(col1),
mode(col2),
...
mode(col15)
FROM table
GROUP BY x, y
My problem is that it yields nondeterministic output in case of ties.
The documentation does not explain exactly how are the ties resolved. It says only:
If there is a tie for most frequent value (two or more values occur as frequently as each other, and more frequently than any other value), MODE returns one of those values.
https://docs.snowflake.net/manuals/sql-reference/functions/mode.html
I need some workaround to get the equivalent of mode(), which will always results in a deterministic output.
Something like: use mode(), but in case of a tie order by some column and select the first value.
I do not supply example to replicate the nondeterministic result, because it seems to occur only with bigger data sets or complex queries.

so mode seems to prefer the first value it see in a tie breaker.
with data as (
select x, col1, col2, col3 from values (1, 1, 1, 3), (1, 1, 2,3), (1, 2, 2,3)
,(4, 1, 20, 30), (4, 1, 2, 3), (4, 2, 2, 30), (4,2,20,3) v(x,col1,col2,col3)
)
select x
,mode(col1)
,mode(col2)
,mode(col3)
from data
group by 1
order by 1;
swapping the first value of the 2/20 or 3/30 pair shows this.
so build a pattern trying to solve this in one expression:
with data as (
select x, col1, col2, col3 from values (1, 1, 1, 3), (1, 1, 2,3), (1, 2, 2,3)
,(4, 1, 20, 30), (4, 1, 2, 3), (4, 2, 2, 30), (4,2,20,3) v(x,col1,col2,col3)
)
select x
,col1
,col2
,col3
,count(col1)over(partition by x,col1) c_col1
,count(col2)over(partition by x,col2) c_col2
,count(col3)over(partition by x,col3) c_col3
from data ;
lends it self to:
with data as (
select x, col1, col2, col3 from values (1, 1, 1, 3), (1, 1, 2,3), (1, 2, 2,3)
,(4, 1, 20, 30), (4, 1, 2, 3), (4, 2, 2, 30), (4,2,20,3) v(x,col1,col2,col3)
)
select x
,col1
,col2
,col3
,row_number() over (partition by x order by c_col1 desc, col1) as r1
,row_number() over (partition by x order by c_col2 desc, col2) as r2
,row_number() over (partition by x order by c_col3 desc, col3) as r3
from (
select x
,col1
,col2
,col3
,count(col1)over(partition by x,col1) c_col1
,count(col2)over(partition by x,col2) c_col2
,count(col3)over(partition by x,col3) c_col3
from data
)
order by 1;
with these result though:
X COL1 COL2 COL3 R1 R2 R3
1 1 2 3 2 1 1
1 2 2 3 3 2 2
1 1 1 3 1 3 3
4 1 2 3 2 1 1
4 2 20 3 4 4 2
4 2 2 30 3 2 3
4 1 20 30 1 3 4
you cannot use logic like
QUALIFY row_number() over (partition by x order by c_col1 desc, col1) = 1
AND row_number() over (partition by x order by c_col2 desc, col2) = 1
AND row_number() over (partition by x order by c_col3 desc, col3 desc) = 1
to pick the best, as the best row for each column are not aligned.
which leads to a CTE (or subquery) for each column, much in the pattern that Gorndon showed.
with data as (
select x, col1, col2, col3 from values (1, 1, 1, 3), (1, 1, 2,3), (1, 2, 2,3)
,(4, 1, 20, 30), (4, 1, 2, 3), (4, 2, 2, 30), (4,2,20,3) v(x,col1,col2,col3)
),col1_m as (
select x, col1, count(*) as c
from data
group by 1,2
QUALIFY row_number() over (partition by x order by c desc, col1) = 1
),col2_m as (
select x, col2, count(*) as c
from data
group by 1,2
QUALIFY row_number() over (partition by x order by c desc, col2) = 1
),col3_m as (
select x, col3, count(*) as c
from data
group by 1,2
QUALIFY row_number() over (partition by x order by c desc, col3) = 1
), base as (
select distinct x from data
)
select b.x
,c1.col1
,c2.col2
,c3.col3
from base as b
left join col1_m as c1 on b.x = c1.x
left join col2_m as c2 on b.x = c2.x
left join col3_m as c3 on b.x = c3.x
order by 1;
which gives the results you are expecting
X COL1 COL2 COL3
1 1 2 3
4 1 2 3
but you will need to expand X to be the set of things (x,y,..) that you care for etc.

This answers the original version of the question.
One method is to use a subquery with aggregation and window functions:
select col, val as mode
from (select col, val, count(*),
row_number() over (partition by col order by count(*) desc, val) as seqnum
from t
group by col, val
) cv
where seqnum = 1;
The second key in the order by adds the determinism in the event of ties.

This should force deterministic sort.
SELECT
row_number() over (partition by t.COL_X order by t.COL1, t.COL2, t.COL3, t.COL4, t.COL5, t.COL6) as rn,
t.*
FROM TABLE t
WHERE t.CONDITION = X
QUALIFY row_number() over (partition by t.COL_X order by t.COL1, t.COL2, t.COL3, t.COL4, t.COL5, t.COL6) = 1
ORDER BY t.COL1, t.COL2, t.COL3, t.COL4, t.COL5, rn

Window Functions between current row and previous row with specific value

I want to be able to sum the values in a certain column between the current row the latest previous row with a certain value in another column.
In this example I want to sum the Val Column between the current row and the latest row with a RecType of 2 partitioned by ID ordered by RowNum.
DECLARE #ExampleTable TABLE
(
Id INT,
RowNum INT,
RecType INT,
Val INT
)
INSERT INTO #ExampleTable
(Id, RowNum, RecType, Val)
VALUES
(1, 1, 1, 1),
(1, 2, 2, 2),
(1, 3, 1, 4),
(1, 4, 1, 8),
(1, 5, 1, 16),
(1, 6, 2, 32),
(1, 7, 1, 64),
(2, 1, 2, 1),
(2, 2, 2, 2),
(2, 3, 1, 4),
(2, 4, 1, 8),
(2, 5, 1, 16),
(2, 6, 1, 32),
(2, 7, 2, 64)
I'm hoping for Results like:
DECLARE #Results TABLE
(
Id INT,
RowNum INT,
SumSinceLast2 INT
)
INSERT INTO #Results
(Id, RowNum, SumSinceLast2)
VALUES
(1, 1, 0),
(1, 2, 0),
(1, 3, 6), -- 4 + 2
(1, 4, 14), -- 4 + 2 + 8
(1, 5, 30), -- 16 + 8 + 4 + 2
(1, 6, 62), -- 32 + 16 + 8 + 4 + 2
(1, 7, 96), -- 64 + 32
(2, 1, 0),
(2, 2, 3), -- 2 + 1
(2, 3, 6), -- 4 + 2
(2, 4, 14), -- 8 + 4 + 2
(2, 5, 30), -- 16 + 8 + 4 + 2
(2, 6, 62), -- 32 + 16 + 8 + 4 + 2
(2, 7, 126) -- 64 + 32 + 16 + 8 + 4 + 2
Is this something that I should be able to easily do in SQL Server 2017? I was hoping window functions would be of use here.

This doesn't return exactly what you want, but the results seem more sensible. Each "2" starts a new group. The values are then cumulatively summed within the group:
select e.*,
(case when grp_2 = 0
then 0
else sum(val) over (partition by id, grp_2 order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2
from #ExampleTable e
) e
order by id, rownum;
Here is a db<>fiddle.
The results can be tweaked (it makes for a much messier query) to "fix" the values for the "2" the way you have them. However, this version makes more sense to me, because "2" are not counted in two separate groups.
Here is a tweaked version that double counts the "2"s:
select e.*,
(case when grp_2 = 0 or grp_2 = 1 and RecType = 2
then 0
when RecType <> 2
then sum(val) over (partition by id, grp_2 order by rownum)
else sum(val) over (partition by id, grp_2_desc) + lag(val) over (partition by id, Rectype order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum desc
) as grp_2_desc
from #ExampleTable e
) e
order by id, rownum;

I know there is a solution already but since I wrote the code, I am going to post it here anyway.
--Sum the range
select
et.Id
,a.CurrentRow
,sum(CASE WHEN ClosestMinRow = CurrentRow THEN 0 ELSE et.Val end) --When there is no previous 2 then set them to 0
from
#ExampleTable et
join
(
--Create begin and end range
select
et.Id
,et.RowNum CurrentRow
,ISNULL(FloorRange.RowNum,et.RowNum) ClosestMinRow
from
#ExampleTable ET
OUTER Apply (
-- Get the RecType = 2 in order to create a range
select
MAX(RowNum) RowNum
from
#ExampleTable et2
WHERE
RecType = 2
AND et2.RowNum < ET.RowNum
AND et2.Id = et.Id
) FloorRange
) a
ON et.Id = a.Id
and et.RowNum between a.ClosestMinRow and CurrentRow
GROUP BY
et.Id
,a.CurrentRow
order by
et.Id
,a.CurrentRow

stuck with one query in SQL Server

I had a table named calci. The following was the sample data
CREATE TABLE calci
(RN int, FREQ int, price int)
;
INSERT INTO calci
(RN, FREQ, price)
VALUES
(1, 1, 3),
(2, 2, 4),
(3, 3, 5),
(4, 4, 6),
(5, 5, 7),
(6, 6, 8),
(7, 1, 5),
(8, 2, 6),
(9, 3, 9),
(10, 4, 7),
(11, 5, 5),
(12, 6, 1),
(13, 1, 3)
;
I required only 3 records based on the sum of freq (1-6)
The result should be like
price
33 -----sum of first 6 records
33 -----sum of next six records
3 -----sum of last six record i.e last record

please check the following query which will solve the above problem
select sum(price) from calci group by (rn- freq)

SELECT SUM(price)
FROM calci
GROUP BY (RN - 1) / 6
HAVING (RN - 1) / 6 IN (0, 1)
UNION
SELECT SUM(price)
FROM calci
WHERE (RN - 1) / 6 = (SELECT (COUNT(*) - 1) / 6 FROM calci)

I think you can use a query like this:
;WITH t as (
SELECT *, CASE WHEN LAG(FREQ) OVER (ORDER BY RN, FREQ) = 6 THEN 1 ELSE 0 END change
FROM calci
), tt as (
SELECT *, SUM(change) OVER (ORDER BY RN) grouped
FROM t)
SELECT SUM(price) sumFreq
FROM tt
GROUP BY grouped;
You can change change to CASE WHEN FREQ - LAG(FREQ) OVER (ORDER BY RN, FREQ) = 1 THEN 0 ELSE 1 END for more flexibility to handle any jump in FREQ ;).

TRY THIS
;WITH CTE (RN, FREQ, PRICE) AS
(
SELECT 1, 1, 3 UNION ALL
SELECT 2, 2, 4 UNION ALL
SELECT 3, 3, 5 UNION ALL
SELECT 4, 4, 6 UNION ALL
SELECT 5, 5, 7 UNION ALL
SELECT 6, 6, 8 UNION ALL
SELECT 7, 1, 5 UNION ALL
SELECT 8, 2, 6 UNION ALL
SELECT 9, 3, 9 UNION ALL
SELECT 10, 4, 7 UNION ALL
SELECT 11, 5, 5 UNION ALL
SELECT 12, 6, 1 UNION ALL
SELECT 13, 1, 3
), CTE2(PRICE, RANK) AS
(
SELECT (PRICE) , DENSE_RANK () OVER (PARTITION BY FREQ ORDER BY RN ) FROM CTE
)
SELECT SUM(PRICE) FROM CTE2 GROUP BY RANK

Consecutive Count on Record Result

I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance

This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+

by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person

Consolidating subsets in a table

I have a table in SqlServer 2008 with data of the form
UserID StartWeek EndWeek Type
1 1 3 A
1 4 5 A
1 6 10 A
1 11 13 B
1 14 16 A
2 1 5 A
2 6 9 A
2 10 16 B
I'd like to consolidate/condense the adjacent types so that the resulting table looks like this.
UserID StartWeek EndWeek Type
1 1 10 A
1 11 13 B
1 14 16 A
2 1 9 A
2 10 16 B
Does anyone have any suggestions as to the best way to accomplish this? I've been looking at using Row_number and Partition, but I can't get it to behave exactly as I'd like.

There's probably a neater way to do it, but this produces the correct result
DECLARE #t TABLE
(UserId TINYINT
,StartWeek TINYINT
,EndWeek TINYINT
,TYPE CHAR(1)
)
INSERT #t
SELECT 1,1,3,'A'
UNION SELECT 1,4,5,'A'
UNION SELECT 1,6,10,'A'
UNION SELECT 1,11,13,'B'
UNION SELECT 1,14,16,'A'
UNION SELECT 2,1,5,'A'
UNION SELECT 2,6,9,'A'
UNION SELECT 2,10,16,'B'
;WITH srcCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY t1.UserID, t1.Type
ORDER BY t1.EndWeek
) AS rn
FROM #t AS t1
)
,recCTE
AS
(
SELECT *
,0 AS grp
FROM srcCTE
WHERE rn = 1
UNION ALL
SELECT s.UserId
,s.StartWeek
,s.EndWeek
,s.TYPE
,s.rn
,CASE WHEN s.StartWeek - 1 = r.EndWeek
THEN r.grp
ELSE r.grp+ 1
END AS GRP
FROM srcCTE AS s
JOIN recCTE AS r
ON r.UserId = s.UserId
AND r.TYPE = s.TYPE
AND r.rn = s.rn - 1
)
SELECT UserId
,MIN(StartWeek) AS StartWeek
,MAX(EndWeek) AS EndWeek
,TYPE
FROM recCTE AS s1
GROUP BY UserId
,TYPE
,grp

Also using a CTE, but in a slightly different way
DECLARE #Consolidate TABLE (
UserID INTEGER, StartWeek INTEGER,
EndWeek INTEGER, Type CHAR(1))
INSERT INTO #Consolidate VALUES (1, 1, 3, 'A')
INSERT INTO #Consolidate VALUES (1, 4, 5, 'A')
INSERT INTO #Consolidate VALUES (1, 6, 10, 'A')
INSERT INTO #Consolidate VALUES (1, 14, 16, 'A')
INSERT INTO #Consolidate VALUES (1, 11, 13, 'B')
INSERT INTO #Consolidate VALUES (2, 1, 5, 'A')
INSERT INTO #Consolidate VALUES (2, 6, 9, 'A')
INSERT INTO #Consolidate VALUES (2, 10, 16, 'B')
;WITH ConsolidateCTE AS
(
SELECT UserID, StartWeek, EndWeek, Type
FROM #Consolidate
UNION ALL
SELECT cte.UserID, cte.StartWeek, c.EndWeek, c.Type
FROM ConsolidateCTE cte
INNER JOIN #Consolidate c ON
c.UserID = cte.UserID
AND c.StartWeek = cte.EndWeek + 1
AND c.Type = cte.Type
)
SELECT UserID, [StartWeek] = MIN(Startweek), EndWeek, Type
FROM (
SELECT UserID, Startweek, [EndWeek] = MAX(EndWeek), Type
FROM ConsolidateCTE
GROUP BY UserID, StartWeek, Type
) c
GROUP BY UserID, EndWeek, Type
ORDER BY 1, 2, 3

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Find groups containing 6 consecutive 1s in one column - sql

Related

how to assure a deterministic result of a query that uses mode() in snowflake

Window Functions between current row and previous row with specific value

stuck with one query in SQL Server

Consecutive Count on Record Result

Consolidating subsets in a table

Categories

Resources