how to calculate consecutive difference using values of two columns? [duplicate] - sql

I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance

This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+

by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person

Related

Find groups containing 6 consecutive 1s in one column

I have a table with 2 columns:
val with values: 0 or 1
id with unique identifiers
with cte(val, id) as (
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
)
select *
into #tmp
from cte
How do I to find id with 6 values = 1 in a row.
In the example above: id = 9, id = 10.
It is desirable not to use loops (cursors or while), but something like sum(...) over(...).
Why not LAG() (but you need an order column):
SELECT id
FROM (
SELECT
id,
val,
val1 = LAG(val, 1) OVER (ORDER BY id),
val2 = LAG(val, 2) OVER (ORDER BY id),
val3 = LAG(val, 3) OVER (ORDER BY id),
val4 = LAG(val, 4) OVER (ORDER BY id),
val5 = LAG(val, 5) OVER (ORDER BY id)
FROM YourTable
) t
WHERE val = 1 AND val1 = 1 AND val2 = 1 AND val3 = 1 AND val4 = 1 AND val5 = 1
You can use running sum over a window frame that contains exactly 6 rows (5 prior plus current row):
with cte as (
select *, sum(val) over (
order by id
rows between 5 preceding and current row
) as rsum
from #tmp
)
select *
from cte
where rsum = 6
Adjust the size of the window and where clause to match the desired value.
Another approach is using ROW_NUMBER on the LAG values
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
select t2.id,
t2.islandcount
from ( select t.id,
t.val,
t.priorval,
row_number() over (partition by t.val, t.priorval order by t.id) as islandcount
from ( select id,
val,
lag(val, 1) over (order by id) priorval
from #tmp
) t
) t2
where t2.islandcount >= 6
the result is
id islandcount
9 6
10 7
Try it yourself in this DBFiddle
The advantage of this method is that you can easy set the value from 6 to any other value
EDIT
As #Zhorov mentioned in the comment, there is a flaw in my code, it returns the wrong results when you add certain rows
This solution will fix that, it is based on the solution of #SalmanA so the credit for accepted answer should go to him
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
-- these are the certains rows added
,(0, 11), (1, 12), (1, 13)
select t.id,
t.val,
t.islandcount
from ( select id,
val,
sum(val) over (order by id rows between 5 preceding and current row) as islandcount
from #tmp
) t
where t.islandcount >= 6
order by t.id
And again a DBFiddle

Select rows using group by and in each group get column values based on highest of another column value

I need to get latest field based on another field in group by
we have
Table "SchoolReview"
Id
SchoolId
Review
Point
1
1
rv1
8
2
1
rv2
7
3
2
rv3
4
4
2
rv4
7
5
3
rv5
2
6
3
rv6
8
I need to group by SchoolId and the inside group I need to get Review and Point from highest "Id" column.
I dont need "Id" coulmn but even if I get it for this solution its okay.
Result I am looking for shall look like this.
SchoolId
Review
Point
1
rv2
7
2
rv4
7
3
rv6
8
Any one experienced in MS SQL Server can help in this regard?
Using sample data from other answer
SELECT *
INTO #Data
FROM (VALUES
(1, 1, 'rv1', 8),
(2, 1, 'rv2', 7),
(3, 2, 'rv3', 4),
(4, 2, 'rv4', 7),
(5, 3, 'rv5', 2),
(6, 3, 'rv6', 8)
) v (Id, SchoolId, Review, Point)
SELECT S.SchoolId,
S.Review,
S.Point
FROM #Data S
INNER JOIN
(
SELECT Id = MAX(S1.Id),
S1.SchoolId
FROM #Data S1
GROUP BY SchoolId
) X ON X.Id = S.Id AND X.schoolId = S.SchoolId
ORDER BY X.SchoolId
;
output
You do not need to group the rows, you simply need to select the appropriate rows from the table. In this case, using ROW_NUMBER() is an option:
Table:
SELECT *
INTO Data
FROM (VALUES
(1, 1, 'rv1', 8),
(2, 1, 'rv2', 7),
(3, 2, 'rv3', 4),
(4, 2, 'rv4', 7),
(5, 3, 'rv5', 2),
(6, 3, 'rv6', 8)
) v (Id, SchoolId, Review, Point)
Statement:
SELECT SchoolId, Review, Point
FROM (
SELECT *, ROW_NUMBER() OVER (PARTITION BY SchoolId ORDER BY Id DESC) AS Rn
FROM Data
) t
WHERE Rn = 1
Result:
SchoolId Review Point
---------------------
1 rv2 7
2 rv4 7
3 rv6 8

Window Functions between current row and previous row with specific value

I want to be able to sum the values in a certain column between the current row the latest previous row with a certain value in another column.
In this example I want to sum the Val Column between the current row and the latest row with a RecType of 2 partitioned by ID ordered by RowNum.
DECLARE #ExampleTable TABLE
(
Id INT,
RowNum INT,
RecType INT,
Val INT
)
INSERT INTO #ExampleTable
(Id, RowNum, RecType, Val)
VALUES
(1, 1, 1, 1),
(1, 2, 2, 2),
(1, 3, 1, 4),
(1, 4, 1, 8),
(1, 5, 1, 16),
(1, 6, 2, 32),
(1, 7, 1, 64),
(2, 1, 2, 1),
(2, 2, 2, 2),
(2, 3, 1, 4),
(2, 4, 1, 8),
(2, 5, 1, 16),
(2, 6, 1, 32),
(2, 7, 2, 64)
I'm hoping for Results like:
DECLARE #Results TABLE
(
Id INT,
RowNum INT,
SumSinceLast2 INT
)
INSERT INTO #Results
(Id, RowNum, SumSinceLast2)
VALUES
(1, 1, 0),
(1, 2, 0),
(1, 3, 6), -- 4 + 2
(1, 4, 14), -- 4 + 2 + 8
(1, 5, 30), -- 16 + 8 + 4 + 2
(1, 6, 62), -- 32 + 16 + 8 + 4 + 2
(1, 7, 96), -- 64 + 32
(2, 1, 0),
(2, 2, 3), -- 2 + 1
(2, 3, 6), -- 4 + 2
(2, 4, 14), -- 8 + 4 + 2
(2, 5, 30), -- 16 + 8 + 4 + 2
(2, 6, 62), -- 32 + 16 + 8 + 4 + 2
(2, 7, 126) -- 64 + 32 + 16 + 8 + 4 + 2
Is this something that I should be able to easily do in SQL Server 2017? I was hoping window functions would be of use here.
This doesn't return exactly what you want, but the results seem more sensible. Each "2" starts a new group. The values are then cumulatively summed within the group:
select e.*,
(case when grp_2 = 0
then 0
else sum(val) over (partition by id, grp_2 order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2
from #ExampleTable e
) e
order by id, rownum;
Here is a db<>fiddle.
The results can be tweaked (it makes for a much messier query) to "fix" the values for the "2" the way you have them. However, this version makes more sense to me, because "2" are not counted in two separate groups.
Here is a tweaked version that double counts the "2"s:
select e.*,
(case when grp_2 = 0 or grp_2 = 1 and RecType = 2
then 0
when RecType <> 2
then sum(val) over (partition by id, grp_2 order by rownum)
else sum(val) over (partition by id, grp_2_desc) + lag(val) over (partition by id, Rectype order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum desc
) as grp_2_desc
from #ExampleTable e
) e
order by id, rownum;
I know there is a solution already but since I wrote the code, I am going to post it here anyway.
--Sum the range
select
et.Id
,a.CurrentRow
,sum(CASE WHEN ClosestMinRow = CurrentRow THEN 0 ELSE et.Val end) --When there is no previous 2 then set them to 0
from
#ExampleTable et
join
(
--Create begin and end range
select
et.Id
,et.RowNum CurrentRow
,ISNULL(FloorRange.RowNum,et.RowNum) ClosestMinRow
from
#ExampleTable ET
OUTER Apply (
-- Get the RecType = 2 in order to create a range
select
MAX(RowNum) RowNum
from
#ExampleTable et2
WHERE
RecType = 2
AND et2.RowNum < ET.RowNum
AND et2.Id = et.Id
) FloorRange
) a
ON et.Id = a.Id
and et.RowNum between a.ClosestMinRow and CurrentRow
GROUP BY
et.Id
,a.CurrentRow
order by
et.Id
,a.CurrentRow

Match rows that include one of each at least once in SQL

I have a users table:
ID Name OID TypeID
1 a 1 1
2 b 1 2
3 c 1 3
4 d 2 1
5 e 2 1
6 f 2 2
7 g 3 2
8 h 3 2
9 i 3 2
for this table, I want to filter by OID and TypeID so that I get the rows that it is filtered by OID and that includes all 1, 2, and 3 in TypeID.
For example, where OID=1, we have 1, 2, and 3 in TypeID but I shouldn't get the rows with IDs 4-6 because for IDs 4-6, OIDs are the same but TypeID does not include all of each(1, 2, and 3).
You can do :
select oid
from table t
where typeid in (1,2,3)
group by oid
having count(*) = 3;
If, oid contain duplicate typeid then you can use count(distinct typeid) instead.
you could use exists
select oid from table t1
where exists ( select 1 from table t1 where t1.oid=t2.oid
group by t2.oid
having (distinct TypeID)=3
)
Asume TypeID 1,2,3
if you are using sql-server, you can try this.
DECLARE #SampleData TABLE(ID INT, Name VARCHAR(5), OID INT, TypeID INT)
INSERT INTO #SampleData VALUES
(1 , 'a', 1, 1),
(2 , 'b', 1, 2),
(3 , 'c', 1, 3),
(4 , 'd', 2, 1),
(5 , 'e', 2, 1),
(6 , 'f', 2, 2),
(7 , 'g', 3, 2),
(8 , 'h', 3, 2),
(9 , 'i', 3, 2)
SELECT * FROM #SampleData D
WHERE NOT EXISTS (
SELECT * FROM #SampleData D1
RIGHT JOIN (VALUES (1),(2),(3)) T(TypeID) ON D1.TypeID = T.TypeID
AND D.OID = D1.OID
WHERE D1.TypeID IS NULL
)
Result:
ID Name OID TypeID
----------- ----- ----------- -----------
1 a 1 1
2 b 1 2
3 c 1 3

Consecutive Count on Record Result

I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance
This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+
by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person