Window Functions between current row and previous row with specific value

Window Functions between current row and previous row with specific value - sql

I want to be able to sum the values in a certain column between the current row the latest previous row with a certain value in another column.
In this example I want to sum the Val Column between the current row and the latest row with a RecType of 2 partitioned by ID ordered by RowNum.
DECLARE #ExampleTable TABLE
(
Id INT,
RowNum INT,
RecType INT,
Val INT
)
INSERT INTO #ExampleTable
(Id, RowNum, RecType, Val)
VALUES
(1, 1, 1, 1),
(1, 2, 2, 2),
(1, 3, 1, 4),
(1, 4, 1, 8),
(1, 5, 1, 16),
(1, 6, 2, 32),
(1, 7, 1, 64),
(2, 1, 2, 1),
(2, 2, 2, 2),
(2, 3, 1, 4),
(2, 4, 1, 8),
(2, 5, 1, 16),
(2, 6, 1, 32),
(2, 7, 2, 64)
I'm hoping for Results like:
DECLARE #Results TABLE
(
Id INT,
RowNum INT,
SumSinceLast2 INT
)
INSERT INTO #Results
(Id, RowNum, SumSinceLast2)
VALUES
(1, 1, 0),
(1, 2, 0),
(1, 3, 6), -- 4 + 2
(1, 4, 14), -- 4 + 2 + 8
(1, 5, 30), -- 16 + 8 + 4 + 2
(1, 6, 62), -- 32 + 16 + 8 + 4 + 2
(1, 7, 96), -- 64 + 32
(2, 1, 0),
(2, 2, 3), -- 2 + 1
(2, 3, 6), -- 4 + 2
(2, 4, 14), -- 8 + 4 + 2
(2, 5, 30), -- 16 + 8 + 4 + 2
(2, 6, 62), -- 32 + 16 + 8 + 4 + 2
(2, 7, 126) -- 64 + 32 + 16 + 8 + 4 + 2
Is this something that I should be able to easily do in SQL Server 2017? I was hoping window functions would be of use here.

This doesn't return exactly what you want, but the results seem more sensible. Each "2" starts a new group. The values are then cumulatively summed within the group:
select e.*,
(case when grp_2 = 0
then 0
else sum(val) over (partition by id, grp_2 order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2
from #ExampleTable e
) e
order by id, rownum;
Here is a db<>fiddle.
The results can be tweaked (it makes for a much messier query) to "fix" the values for the "2" the way you have them. However, this version makes more sense to me, because "2" are not counted in two separate groups.
Here is a tweaked version that double counts the "2"s:
select e.*,
(case when grp_2 = 0 or grp_2 = 1 and RecType = 2
then 0
when RecType <> 2
then sum(val) over (partition by id, grp_2 order by rownum)
else sum(val) over (partition by id, grp_2_desc) + lag(val) over (partition by id, Rectype order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum desc
) as grp_2_desc
from #ExampleTable e
) e
order by id, rownum;

I know there is a solution already but since I wrote the code, I am going to post it here anyway.
--Sum the range
select
et.Id
,a.CurrentRow
,sum(CASE WHEN ClosestMinRow = CurrentRow THEN 0 ELSE et.Val end) --When there is no previous 2 then set them to 0
from
#ExampleTable et
join
(
--Create begin and end range
select
et.Id
,et.RowNum CurrentRow
,ISNULL(FloorRange.RowNum,et.RowNum) ClosestMinRow
from
#ExampleTable ET
OUTER Apply (
-- Get the RecType = 2 in order to create a range
select
MAX(RowNum) RowNum
from
#ExampleTable et2
WHERE
RecType = 2
AND et2.RowNum < ET.RowNum
AND et2.Id = et.Id
) FloorRange
) a
ON et.Id = a.Id
and et.RowNum between a.ClosestMinRow and CurrentRow
GROUP BY
et.Id
,a.CurrentRow
order by
et.Id
,a.CurrentRow

Related

how to calculate consecutive difference using values of two columns? [duplicate]

I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance

This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+

by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person

Find groups containing 6 consecutive 1s in one column

I have a table with 2 columns:
val with values: 0 or 1
id with unique identifiers
with cte(val, id) as (
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
)
select *
into #tmp
from cte
How do I to find id with 6 values = 1 in a row.
In the example above: id = 9, id = 10.
It is desirable not to use loops (cursors or while), but something like sum(...) over(...).

Why not LAG() (but you need an order column):
SELECT id
FROM (
SELECT
id,
val,
val1 = LAG(val, 1) OVER (ORDER BY id),
val2 = LAG(val, 2) OVER (ORDER BY id),
val3 = LAG(val, 3) OVER (ORDER BY id),
val4 = LAG(val, 4) OVER (ORDER BY id),
val5 = LAG(val, 5) OVER (ORDER BY id)
FROM YourTable
) t
WHERE val = 1 AND val1 = 1 AND val2 = 1 AND val3 = 1 AND val4 = 1 AND val5 = 1

You can use running sum over a window frame that contains exactly 6 rows (5 prior plus current row):
with cte as (
select *, sum(val) over (
order by id
rows between 5 preceding and current row
) as rsum
from #tmp
)
select *
from cte
where rsum = 6
Adjust the size of the window and where clause to match the desired value.

Another approach is using ROW_NUMBER on the LAG values
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
select t2.id,
t2.islandcount
from ( select t.id,
t.val,
t.priorval,
row_number() over (partition by t.val, t.priorval order by t.id) as islandcount
from ( select id,
val,
lag(val, 1) over (order by id) priorval
from #tmp
) t
) t2
where t2.islandcount >= 6
the result is
id islandcount
9 6
10 7
Try it yourself in this DBFiddle
The advantage of this method is that you can easy set the value from 6 to any other value
EDIT
As #Zhorov mentioned in the comment, there is a flaw in my code, it returns the wrong results when you add certain rows
This solution will fix that, it is based on the solution of #SalmanA so the credit for accepted answer should go to him
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
-- these are the certains rows added
,(0, 11), (1, 12), (1, 13)
select t.id,
t.val,
t.islandcount
from ( select id,
val,
sum(val) over (order by id rows between 5 preceding and current row) as islandcount
from #tmp
) t
where t.islandcount >= 6
order by t.id
And again a DBFiddle

stuck with one query in SQL Server

I had a table named calci. The following was the sample data
CREATE TABLE calci
(RN int, FREQ int, price int)
;
INSERT INTO calci
(RN, FREQ, price)
VALUES
(1, 1, 3),
(2, 2, 4),
(3, 3, 5),
(4, 4, 6),
(5, 5, 7),
(6, 6, 8),
(7, 1, 5),
(8, 2, 6),
(9, 3, 9),
(10, 4, 7),
(11, 5, 5),
(12, 6, 1),
(13, 1, 3)
;
I required only 3 records based on the sum of freq (1-6)
The result should be like
price
33 -----sum of first 6 records
33 -----sum of next six records
3 -----sum of last six record i.e last record

please check the following query which will solve the above problem
select sum(price) from calci group by (rn- freq)

SELECT SUM(price)
FROM calci
GROUP BY (RN - 1) / 6
HAVING (RN - 1) / 6 IN (0, 1)
UNION
SELECT SUM(price)
FROM calci
WHERE (RN - 1) / 6 = (SELECT (COUNT(*) - 1) / 6 FROM calci)

I think you can use a query like this:
;WITH t as (
SELECT *, CASE WHEN LAG(FREQ) OVER (ORDER BY RN, FREQ) = 6 THEN 1 ELSE 0 END change
FROM calci
), tt as (
SELECT *, SUM(change) OVER (ORDER BY RN) grouped
FROM t)
SELECT SUM(price) sumFreq
FROM tt
GROUP BY grouped;
You can change change to CASE WHEN FREQ - LAG(FREQ) OVER (ORDER BY RN, FREQ) = 1 THEN 0 ELSE 1 END for more flexibility to handle any jump in FREQ ;).

TRY THIS
;WITH CTE (RN, FREQ, PRICE) AS
(
SELECT 1, 1, 3 UNION ALL
SELECT 2, 2, 4 UNION ALL
SELECT 3, 3, 5 UNION ALL
SELECT 4, 4, 6 UNION ALL
SELECT 5, 5, 7 UNION ALL
SELECT 6, 6, 8 UNION ALL
SELECT 7, 1, 5 UNION ALL
SELECT 8, 2, 6 UNION ALL
SELECT 9, 3, 9 UNION ALL
SELECT 10, 4, 7 UNION ALL
SELECT 11, 5, 5 UNION ALL
SELECT 12, 6, 1 UNION ALL
SELECT 13, 1, 3
), CTE2(PRICE, RANK) AS
(
SELECT (PRICE) , DENSE_RANK () OVER (PARTITION BY FREQ ORDER BY RN ) FROM CTE
)
SELECT SUM(PRICE) FROM CTE2 GROUP BY RANK

Consecutive Count on Record Result

I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance

This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+

by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person

Consolidating subsets in a table

I have a table in SqlServer 2008 with data of the form
UserID StartWeek EndWeek Type
1 1 3 A
1 4 5 A
1 6 10 A
1 11 13 B
1 14 16 A
2 1 5 A
2 6 9 A
2 10 16 B
I'd like to consolidate/condense the adjacent types so that the resulting table looks like this.
UserID StartWeek EndWeek Type
1 1 10 A
1 11 13 B
1 14 16 A
2 1 9 A
2 10 16 B
Does anyone have any suggestions as to the best way to accomplish this? I've been looking at using Row_number and Partition, but I can't get it to behave exactly as I'd like.

There's probably a neater way to do it, but this produces the correct result
DECLARE #t TABLE
(UserId TINYINT
,StartWeek TINYINT
,EndWeek TINYINT
,TYPE CHAR(1)
)
INSERT #t
SELECT 1,1,3,'A'
UNION SELECT 1,4,5,'A'
UNION SELECT 1,6,10,'A'
UNION SELECT 1,11,13,'B'
UNION SELECT 1,14,16,'A'
UNION SELECT 2,1,5,'A'
UNION SELECT 2,6,9,'A'
UNION SELECT 2,10,16,'B'
;WITH srcCTE
AS
(
SELECT *
,ROW_NUMBER() OVER (PARTITION BY t1.UserID, t1.Type
ORDER BY t1.EndWeek
) AS rn
FROM #t AS t1
)
,recCTE
AS
(
SELECT *
,0 AS grp
FROM srcCTE
WHERE rn = 1
UNION ALL
SELECT s.UserId
,s.StartWeek
,s.EndWeek
,s.TYPE
,s.rn
,CASE WHEN s.StartWeek - 1 = r.EndWeek
THEN r.grp
ELSE r.grp+ 1
END AS GRP
FROM srcCTE AS s
JOIN recCTE AS r
ON r.UserId = s.UserId
AND r.TYPE = s.TYPE
AND r.rn = s.rn - 1
)
SELECT UserId
,MIN(StartWeek) AS StartWeek
,MAX(EndWeek) AS EndWeek
,TYPE
FROM recCTE AS s1
GROUP BY UserId
,TYPE
,grp

Also using a CTE, but in a slightly different way
DECLARE #Consolidate TABLE (
UserID INTEGER, StartWeek INTEGER,
EndWeek INTEGER, Type CHAR(1))
INSERT INTO #Consolidate VALUES (1, 1, 3, 'A')
INSERT INTO #Consolidate VALUES (1, 4, 5, 'A')
INSERT INTO #Consolidate VALUES (1, 6, 10, 'A')
INSERT INTO #Consolidate VALUES (1, 14, 16, 'A')
INSERT INTO #Consolidate VALUES (1, 11, 13, 'B')
INSERT INTO #Consolidate VALUES (2, 1, 5, 'A')
INSERT INTO #Consolidate VALUES (2, 6, 9, 'A')
INSERT INTO #Consolidate VALUES (2, 10, 16, 'B')
;WITH ConsolidateCTE AS
(
SELECT UserID, StartWeek, EndWeek, Type
FROM #Consolidate
UNION ALL
SELECT cte.UserID, cte.StartWeek, c.EndWeek, c.Type
FROM ConsolidateCTE cte
INNER JOIN #Consolidate c ON
c.UserID = cte.UserID
AND c.StartWeek = cte.EndWeek + 1
AND c.Type = cte.Type
)
SELECT UserID, [StartWeek] = MIN(Startweek), EndWeek, Type
FROM (
SELECT UserID, Startweek, [EndWeek] = MAX(EndWeek), Type
FROM ConsolidateCTE
GROUP BY UserID, StartWeek, Type
) c
GROUP BY UserID, EndWeek, Type
ORDER BY 1, 2, 3

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

Window Functions between current row and previous row with specific value - sql

Related

how to calculate consecutive difference using values of two columns? [duplicate]

Find groups containing 6 consecutive 1s in one column

stuck with one query in SQL Server

Consecutive Count on Record Result

Consolidating subsets in a table

Categories

Resources