Related
I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance
This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+
by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person
I have a table with 2 columns:
val with values: 0 or 1
id with unique identifiers
with cte(val, id) as (
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
)
select *
into #tmp
from cte
How do I to find id with 6 values = 1 in a row.
In the example above: id = 9, id = 10.
It is desirable not to use loops (cursors or while), but something like sum(...) over(...).
Why not LAG() (but you need an order column):
SELECT id
FROM (
SELECT
id,
val,
val1 = LAG(val, 1) OVER (ORDER BY id),
val2 = LAG(val, 2) OVER (ORDER BY id),
val3 = LAG(val, 3) OVER (ORDER BY id),
val4 = LAG(val, 4) OVER (ORDER BY id),
val5 = LAG(val, 5) OVER (ORDER BY id)
FROM YourTable
) t
WHERE val = 1 AND val1 = 1 AND val2 = 1 AND val3 = 1 AND val4 = 1 AND val5 = 1
You can use running sum over a window frame that contains exactly 6 rows (5 prior plus current row):
with cte as (
select *, sum(val) over (
order by id
rows between 5 preceding and current row
) as rsum
from #tmp
)
select *
from cte
where rsum = 6
Adjust the size of the window and where clause to match the desired value.
Another approach is using ROW_NUMBER on the LAG values
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
select 0, 0 union all
select 1, 1 union all
select 1, 2 union all
select 0, 3 union all
select 1, 4 union all
select 1, 5 union all
select 1, 6 union all
select 1, 7 union all
select 1, 8 union all
select 1, 9 union all
select 1, 10
select t2.id,
t2.islandcount
from ( select t.id,
t.val,
t.priorval,
row_number() over (partition by t.val, t.priorval order by t.id) as islandcount
from ( select id,
val,
lag(val, 1) over (order by id) priorval
from #tmp
) t
) t2
where t2.islandcount >= 6
the result is
id islandcount
9 6
10 7
Try it yourself in this DBFiddle
The advantage of this method is that you can easy set the value from 6 to any other value
EDIT
As #Zhorov mentioned in the comment, there is a flaw in my code, it returns the wrong results when you add certain rows
This solution will fix that, it is based on the solution of #SalmanA so the credit for accepted answer should go to him
declare #tmp table (val int, id int)
insert into #tmp values
(0, 0), (1, 1), (1, 2), (0, 3), (1, 4), (1, 5), (1, 6), (1, 7), (1, 8), (1, 9), (1, 10)
-- these are the certains rows added
,(0, 11), (1, 12), (1, 13)
select t.id,
t.val,
t.islandcount
from ( select id,
val,
sum(val) over (order by id rows between 5 preceding and current row) as islandcount
from #tmp
) t
where t.islandcount >= 6
order by t.id
And again a DBFiddle
I want to be able to sum the values in a certain column between the current row the latest previous row with a certain value in another column.
In this example I want to sum the Val Column between the current row and the latest row with a RecType of 2 partitioned by ID ordered by RowNum.
DECLARE #ExampleTable TABLE
(
Id INT,
RowNum INT,
RecType INT,
Val INT
)
INSERT INTO #ExampleTable
(Id, RowNum, RecType, Val)
VALUES
(1, 1, 1, 1),
(1, 2, 2, 2),
(1, 3, 1, 4),
(1, 4, 1, 8),
(1, 5, 1, 16),
(1, 6, 2, 32),
(1, 7, 1, 64),
(2, 1, 2, 1),
(2, 2, 2, 2),
(2, 3, 1, 4),
(2, 4, 1, 8),
(2, 5, 1, 16),
(2, 6, 1, 32),
(2, 7, 2, 64)
I'm hoping for Results like:
DECLARE #Results TABLE
(
Id INT,
RowNum INT,
SumSinceLast2 INT
)
INSERT INTO #Results
(Id, RowNum, SumSinceLast2)
VALUES
(1, 1, 0),
(1, 2, 0),
(1, 3, 6), -- 4 + 2
(1, 4, 14), -- 4 + 2 + 8
(1, 5, 30), -- 16 + 8 + 4 + 2
(1, 6, 62), -- 32 + 16 + 8 + 4 + 2
(1, 7, 96), -- 64 + 32
(2, 1, 0),
(2, 2, 3), -- 2 + 1
(2, 3, 6), -- 4 + 2
(2, 4, 14), -- 8 + 4 + 2
(2, 5, 30), -- 16 + 8 + 4 + 2
(2, 6, 62), -- 32 + 16 + 8 + 4 + 2
(2, 7, 126) -- 64 + 32 + 16 + 8 + 4 + 2
Is this something that I should be able to easily do in SQL Server 2017? I was hoping window functions would be of use here.
This doesn't return exactly what you want, but the results seem more sensible. Each "2" starts a new group. The values are then cumulatively summed within the group:
select e.*,
(case when grp_2 = 0
then 0
else sum(val) over (partition by id, grp_2 order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2
from #ExampleTable e
) e
order by id, rownum;
Here is a db<>fiddle.
The results can be tweaked (it makes for a much messier query) to "fix" the values for the "2" the way you have them. However, this version makes more sense to me, because "2" are not counted in two separate groups.
Here is a tweaked version that double counts the "2"s:
select e.*,
(case when grp_2 = 0 or grp_2 = 1 and RecType = 2
then 0
when RecType <> 2
then sum(val) over (partition by id, grp_2 order by rownum)
else sum(val) over (partition by id, grp_2_desc) + lag(val) over (partition by id, Rectype order by rownum)
end) as result
from (select e.*,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum
) as grp_2,
sum(case when RecType = 2 then 1 else 0 end) over
(partition by id
order by rownum desc
) as grp_2_desc
from #ExampleTable e
) e
order by id, rownum;
I know there is a solution already but since I wrote the code, I am going to post it here anyway.
--Sum the range
select
et.Id
,a.CurrentRow
,sum(CASE WHEN ClosestMinRow = CurrentRow THEN 0 ELSE et.Val end) --When there is no previous 2 then set them to 0
from
#ExampleTable et
join
(
--Create begin and end range
select
et.Id
,et.RowNum CurrentRow
,ISNULL(FloorRange.RowNum,et.RowNum) ClosestMinRow
from
#ExampleTable ET
OUTER Apply (
-- Get the RecType = 2 in order to create a range
select
MAX(RowNum) RowNum
from
#ExampleTable et2
WHERE
RecType = 2
AND et2.RowNum < ET.RowNum
AND et2.Id = et.Id
) FloorRange
) a
ON et.Id = a.Id
and et.RowNum between a.ClosestMinRow and CurrentRow
GROUP BY
et.Id
,a.CurrentRow
order by
et.Id
,a.CurrentRow
I'm working on a data structure with list of positive or negative result for each person.
Sample data (id is an identity):
id person result
1 1 0
2 1 1
3 1 1
4 2 1
5 2 0
6 1 1
7 1 0
8 2 0
9 2 0
10 2 0
With this I would like to count the maximum number of consecutive result = 1 for each person. The result in this sample would be
person max_count
1 3
2 1
I have tried using ROW_NUMBER() OVER (PARTITION BY) like this
SELECT person,
ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY id) AS max_count
FROM TABLE
but it gives me an accumulative count instead of consecutive one.
What should I do to perform a consecutive count? Any hint would be appreciated. Thanks in advance
This looks like classic gaps-and-islands problem.
Examine intermediate results of each CTE in the query below to understand what is going on.
Sample data
I added person 3 with two sequences of positive results, so that we could find the longest sequence.
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0),
(11, 3, 0),
(12, 3, 1),
(13, 3, 1),
(14, 3, 1),
(15, 3, 1),
(16, 3, 0),
(17, 3, 1),
(18, 3, 1),
(19, 3, 0),
(20, 3, 0);
Query
WITH
CTE_RowNumbers
AS
(
SELECT
id, person, result
,ROW_NUMBER() OVER (PARTITION BY person ORDER BY ID) AS rn1
,ROW_NUMBER() OVER (PARTITION BY person, result ORDER BY ID) AS rn2
FROM #T
)
,CTE_Groups
AS
(
SELECT
id, person, result
,rn1-rn2 AS GroupNumber
FROM CTE_RowNumbers
)
,CTE_GroupSizes
AS
(
SELECT
person
,COUNT(*) AS GroupSize
FROM CTE_Groups
WHERE
result = 1
GROUP BY
person
,GroupNumber
)
SELECT
person
,MAX(GroupSize) AS max_count
FROM CTE_GroupSizes
GROUP BY person
ORDER BY person;
Result
+--------+-----------+
| person | max_count |
+--------+-----------+
| 1 | 3 |
| 2 | 1 |
| 3 | 4 |
+--------+-----------+
by using Case and SUM we can achieve the above result
DECLARE #T TABLE (id int, person int, result int);
INSERT INTO #T (id, person, result) VALUES
(1 , 1, 0),
(2 , 1, 1),
(3 , 1, 1),
(4 , 2, 1),
(5 , 2, 0),
(6 , 1, 1),
(7 , 1, 0),
(8 , 2, 0),
(9 , 2, 0),
(10, 2, 0)
select
person,
SUM(CASE WHEN RESULT = 1 then 1 else 0 END)
from #T
GROUP BY person
I have to identify missing records from the example below.
Category BatchNo TransactionNo
+++++++++++++++++++++++++++++++++
CAT1 1 1
CAT1 1 2
CAT1 2 3
CAT1 2 4
CAT1 2 5
CAT1 3 6
CAT1 3 7
CAT1 3 8
CAT1 5 12
CAT1 5 13
CAT1 5 14
CAT1 5 15
CAT1 7 18
CAT2 1 1
CAT2 1 2
CAT2 3 6
CAT2 3 7
CAT2 3 8
CAT2 3 9
CAT2 4 10
CAT2 4 11
CAT2 4 12
CAT2 6 14
I need a script that will identify missing records as below
Category BatchNo
+++++++++++++++++++
CAT1 4
CAT1 6
CAT2 2
CAT2 5
I do not need to know that CAT1 8 and CAT2 7 are not there as they potentially have not been inserted yet.
You can create temporary result set with all possible batch no up to max batch number for each category than select batch no which are not available.
create table TEMP(
Category varchar(10),
BatchNo int,
TransactionNo int
)
insert into TEMP values
('CAT1', 1, 1),
('CAT1', 1, 2),
('CAT1', 2, 3),
('CAT1', 2, 4),
('CAT1', 2, 5),
('CAT1', 3, 6),
('CAT1', 3, 7),
('CAT1', 3, 8),
('CAT1', 5, 9),
('CAT1', 7, 10),
('CAT2', 1, 1),
('CAT2', 1, 2),
('CAT2', 3, 3),
('CAT2', 4, 4),
('CAT2', 4, 5),
('CAT2', 4, 6),
('CAT2', 6, 7);
WITH BatchNo (BatchID,Category,MaxBatch) AS (
SELECT 1, Category, MAX(BatchNo) AS MaxBatch FROM TEMP GROUP BY Category
UNION ALL
SELECT BatchID + 1, Category, MaxBatch FROM BatchNo
WHERE BatchID < MaxBatch
)
SELECT
BatchNo.Category,
BatchNo.BatchID
FROM
BatchNo
WHERE
BatchID NOT IN (SELECT BatchNo FROM TEMP WHERE Category = BatchNo.Category)
ORDER BY
BatchNo.Category,
BatchNo.BatchID
DROP TABLE TEMP
This one uses a Tally Table. For reference: http://www.sqlservercentral.com/articles/T-SQL/62867/
SAMPLE DATA
create table MyTable(
Category varchar(10),
BatchNo int,
TransactionNo int
)
insert into MyTable values
('CAT1', 1, 1),
('CAT1', 1, 2),
('CAT1', 2, 3),
('CAT1', 2, 4),
('CAT1', 2, 5),
('CAT1', 3, 6),
('CAT1', 3, 7),
('CAT1', 3, 8),
('CAT1', 5, 12),
('CAT1', 5, 13),
('CAT1', 5, 14),
('CAT1', 5, 15),
('CAT1', 7, 18),
('CAT2', 1, 1),
('CAT2', 1, 2),
('CAT2', 3, 6),
('CAT2', 3, 7),
('CAT2', 3, 8),
('CAT2', 3, 9),
('CAT2', 4, 10),
('CAT2', 4, 11),
('CAT2', 4, 12),
('CAT2', 6, 14);
SOLUTION
with e1(n) as (
select 1 union all select 1 union all select 1 union all
select 1 union all select 1 union all select 1 union all
select 1 union all select 1 union all select 1 union all select 1
), --10e+1 or 10 rows
e2(n) as (select 1 from e1 a, e1 b), --10e+2 or 100 rows
e4(N) AS (SELECT 1 FROM E2 a, E2 b), --10E+4 or 10,000 rows
tally(n) as(
select
top (select top 1 BatchNo from MyTable order by BatchNo desc)
row_number() over(order by (select null))
from e4
)
select
c.Category,
t.n
from tally t
cross join(
select
Category,
max(BatchNo) as MaxBatchNo
from MyTable
group by Category
)c
left join MyTable m
on m.BatchNo = t.n
and m.Category = c.Category
where
m.Category is null
and t.n < c.MaxBatchNo
order by
c.Category,
t.n
It is better to create a projection table and use standard left join to find gaps:
declare #Sequencer table (
Id int primary key
);
insert into #Sequencer (Id)
select top (1000) row_number() over(order by (select null)) from master.dbo.spt_values;
select *
from #Sequencer s
inner join (
select Category, max(BatchNo) as [Size] from dbo.Table group by Category
) cat on cat.Size > s.Id
left join (
select distinct Category, BatchNo from dbo.Table
) t on t.Category = cat.Category and t.BatchNo = s.Id
where t.BatchNo is null;
Of course, in real life you might need more than 1000 rows, so adjust it accordingly.
WITH Numbers AS (
SELECT MAX(BatchNo) AS Number
FROM #MyTable
UNION ALL
SELECT Number - 1
FROM Numbers
WHERE Number > 1
)
,CategorySizes AS (
SELECT Category
,MIN(BatchNo) AS StartBatch
,MAX(BatchNo) AS EndBatch
FROM #MyTable
GROUP BY Category
)
,PossibleBatches AS (
SELECT Category
,Numbers.Number AS BatchNo
FROM CategorySizes
CROSS JOIN Numbers
WHERE Numbers.Number BETWEEN CategorySizes.StartBatch AND CategorySizes.EndBatch
)
,MissingBatches AS (
SELECT PossibleBatches.Category
,PossibleBatches.BatchNo
FROM PossibleBatches
LEFT JOIN #MyTable
ON #MyTable.Category = PossibleBatches.Category
AND #MyTable.BatchNo = PossibleBatches.BatchNo
WHERE #MyTable.BatchNo IS NULL
)
SELECT *
FROM MissingBatches
without use cycle or fetch you can use this one: (#Category is my eqvivalent of your table name). (Performance is perfect)
DECLARE #t TABLE (RN INT IDENTITY,Category VARCHAR(255), BatchNo INT)
INSERT INTO #t
SELECT DISTINCT Category, BatchNo
FROM #Category
SELECT a.Category,a.BatchNo+1 AS BatchNo
FROM #t a
CROSS APPLY (SELECT * FROM #t b
WHERE a.RN+1 = b.RN AND
a.Category = b.Category AND
a.BatchNo+1 != b.BatchNo) x
create table #cat(
Category varchar(10),
BatchNo int,
TransactionNo int
)
insert into #cat values
('CAT1', 1, 1),
('CAT1', 1, 2),
('CAT1', 2, 3),
('CAT1', 2, 4),
('CAT1', 2, 5),
('CAT1', 3, 6),
('CAT1', 3, 7),
('CAT1', 3, 8),
('CAT1', 5, 9),
('CAT1', 7, 10),
('CAT2', 1, 1),
('CAT2', 1, 2),
('CAT2', 3, 3),
('CAT2', 4, 4),
('CAT2', 4, 5),
('CAT2', 4, 6),
('CAT2', 6, 7);
SELECT DISTINCT C.Category, C.BatchNo + 1
FROM #cat c
OUTER APPLY
(
SELECT *
FROM #cat c1
WHERE C1.BatchNo = C.BatchNo + 1 AND C1.Category = C.Category
) C2
WHERE C2.BatchNo IS NULL
AND
C.BatchNo <> (SELECT MAX(BatchNo) FROM #cat C3 WHERE c3.Category = c.Category)