I need help to do a count based on a date condition.
I have a DB similar to the following:
ManDB
ID
report_date
traffic_v
traffic_ul
traffic_dl
a
1/12/2021
0
0
100
a
2/12/2021
0
0
100
a
3/12/2021
100
0
100
a
4/12/2021
100
0
100
b
1/12/2021
0
100
100
b
2/12/2021
0
0
0
b
3/12/2021
0
100
0
b
4/12/2021
100
100
0
I need you to count the data to zero, for which I have the query:
SELECT
ID AS SECTOR,
SUM(TRAFFIC) TRAFICO_VOZ,
SUM(TRAFFIC_DL_G) + SUM(TRAFFIC_DL_E) TRAFFIC_DL,
SUM(TRAFFIC_UL_G) + SUM(TRAFFIC_UL_E) TRAFFIC_UL
FROM
MainDB
GROUP BY ID
HAVING SUM(TRAFFIC) = 0
OR (SUM(TRAFFIC_DL_G) + SUM(TRAFFIC_DL_E)) = 0
OR (SUM(TRAFFIC_UL_G) + SUM(TRAFFIC_UL_E)) = 0
But I need you to count me from the current date backwards, how many days has it been zero
You should only count me from the last record in zero.
So you should get the following result:
Expected result
ID
traffic_v
count_v
traffic_ul
count_ul
traffic_dl
count_dl
a
200
0
0
4
400
0
b
100
0
200
0
0
3
I do not know how to set the condition so that it detects the date on which I began to have zero records and perform the count of days until the current date.
In cases where the register is different from zero, the count must be restarted.
The db is updated daily.
the counts are displayed correctly with the query, as I only care about zero data.
try to use SUM / CASE, but it counts me from the minimum date that it finds at zero, regardless of having a different record
You can use a MODEL clause:
SELECT id,
count_traffic_v,
sum_traffic_v,
count_traffic_ul,
sum_traffic_ul,
count_traffic_dl,
sum_traffic_dl
FROM (
SELECT *
FROM (
SELECT m.*,
ROW_NUMBER() OVER (PARTITION BY id ORDER BY report_date DESC) AS rn
FROM mainDB m
)
MODEL
PARTITION BY (id)
DIMENSION BY (report_date)
MEASURES (
rn,
traffic_v,
0 AS count_traffic_v,
0 AS sum_traffic_v,
traffic_ul,
0 AS count_traffic_ul,
0 AS sum_traffic_ul,
traffic_dl,
0 AS count_traffic_dl,
0 AS sum_traffic_dl
)
RULES AUTOMATIC ORDER (
count_traffic_v[report_date] = CASE traffic_v[cv()]
WHEN 0
THEN COALESCE(count_traffic_v[cv() - 1] + 1, 1)
ELSE 0
END,
sum_traffic_v[report_date] = CASE traffic_v[cv()]
WHEN 0
THEN 0
ELSE COALESCE(sum_traffic_v[cv() - 1], 0) + traffic_v[cv()]
END,
count_traffic_ul[report_date] = CASE traffic_ul[cv()]
WHEN 0
THEN COALESCE(count_traffic_ul[cv() - 1] + 1, 1)
ELSE 0
END,
sum_traffic_ul[report_date] = CASE traffic_ul[cv()]
WHEN 0
THEN 0
ELSE COALESCE(sum_traffic_ul[cv() - 1], 0) + traffic_ul[cv()]
END,
count_traffic_dl[report_date] = CASE traffic_dl[cv()]
WHEN 0
THEN COALESCE(count_traffic_dl[cv() - 1] + 1, 1)
ELSE 0
END,
sum_traffic_dl[report_date] = CASE traffic_dl[cv()]
WHEN 0
THEN 0
ELSE COALESCE(sum_traffic_dl[cv() - 1], 0) + traffic_dl[cv()]
END
)
)
WHERE rn = 1;
Which, for the sample data:
CREATE TABLE maindb (ID, report_date, traffic_v, traffic_ul, traffic_dl) AS
SELECT 'a', DATE '2021-12-01', 0, 0, 100 FROM DUAL UNION ALL
SELECT 'a', DATE '2021-12-02', 0, 0, 100 FROM DUAL UNION ALL
SELECT 'a', DATE '2021-12-03', 100, 0, 100 FROM DUAL UNION ALL
SELECT 'a', DATE '2021-12-04', 100, 0, 100 FROM DUAL UNION ALL
SELECT 'b', DATE '2021-12-01', 0, 100, 100 FROM DUAL UNION ALL
SELECT 'b', DATE '2021-12-02', 0, 0, 0 FROM DUAL UNION ALL
SELECT 'b', DATE '2021-12-03', 0, 100, 0 FROM DUAL UNION ALL
SELECT 'b', DATE '2021-12-04', 100, 100, 0 FROM DUAL;
Outputs:
ID
COUNT_TRAFFIC_V
SUM_TRAFFIC_V
COUNT_TRAFFIC_UL
SUM_TRAFFIC_UL
COUNT_TRAFFIC_DL
SUM_TRAFFIC_DL
a
0
200
4
0
0
400
b
0
100
0
200
3
0
db<>fiddle here
Related
I have a table t with:
PLACE
LOCATION
TS
ID
AMOUNT
GOING_IN
GOING_OUT
1
10
2020-10-01
1
100
10
0
1
10
2020-10-02
1
110
5
-50
1
10
2020-10-03
1
75
0
-100
1
10
2020-10-04
1
-25
30
0
1
10
2020-10-05
1
5
0
0
1
10
2020-10-06
1
5
38
-300
1
10
2020-10-07
1
-257
0
0
1
10
2020-10-01
2
1
10
0
1
10
2020-10-02
2
11
0
-12
1
10
2020-10-03
2
-1
0
-100
1
10
2020-10-04
2
-101
0
0
2
20
2020-11-15
1
18
20
0
2
20
2020-11-16
1
38
0
0
2
20
2020-11-15
3
-9
20
-31
2
20
2020-11-16
3
-20
0
0
So due to SAP legacy stuff some logistic data is mangled which may lead to negative inventory.
To check how severe the error is I need to count for each PLACE, LOCATION, ID
the number of rows that have a positive AMOUNT AND which do not have a negative AMOUNT before
the number of rows that have a negative AMOUNT AND any positive AMOUNT that has a negative AMOUNT anywhere before
As you can see in my table there are (for PLACE=1, LOCATION=10, ID=1) 3 rows with a positive AMOUNT without any negative AMOUNT before. But then there is a negative AMOUNT and some positive AMOUNTS afterwards --> those 4 rows should not be counted for COUNT_CORRECT but should count for COUNT_WRONG.
So in this example table my query should return:
PLACE
LOCATION
TOTAL
COUNT_CORRECT
COUNT_WRONG
RATIO
1
10
11
5
6
0.55
2
20
4
2
2
0.5
My code so far:
CREATE OR REPLACE TABLE ANALYTICS.t (
PLACE INT NOT NULL
, LOCATION INT NOT NULL
, TS DATE NOT NULL
, ID INT NOT NULL
, AMOUNT INT NOT NULL
, GOING_IN INT NOT NULL
, GOING_OUT INT NOT NULL
, PRIMARY KEY(PLACE, LOCATION, ID, TS)
);
INSERT INTO ANALYTICS.t
(PLACE, LOCATION, TS, ID, AMOUNT, GOING_IN, GOING_OUT)
VALUES
(1, 10, '2020-10-01', 1, 100, 10, 0)
, (1, 10, '2020-10-02', 1, 110, 5, -50)
, (1, 10, '2020-10-03', 1, 75, 0, -100)
, (1, 10, '2020-10-04', 1, -25, 30, 0)
, (1, 10, '2020-10-05', 1, 5, 0, 0)
, (1, 10, '2020-10-06', 1, 5, 38, 300)
, (1, 10, '2020-10-07', 1, -257, 0, 0)
, (1, 10, '2020-10-04', 2, 1, 10, 0)
, (1, 10, '2020-10-05', 2, 11, 0, -12)
, (1, 10, '2020-10-06', 2, -1, 0, -100)
, (1, 10, '2020-10-07', 2, -101, 0, 0)
, (2, 20, '2020-11-15', 1, 18, 12, 0)
, (2, 20, '2020-11-16', 1, 30, 0, 0)
, (2, 20, '2020-11-15', 3, -9, 20, -31)
, (2, 20, '2020-11-16', 3, -20, 0, 0)
;
Then
SELECT PLACE
, LOCATION
, SUM(CASE WHEN AMOUNT >= 0 THEN 1 ELSE 0 END) AS 'COUNT_CORRECT'
, SUM(CASE WHEN AMOUNT < 0 THEN 1 ELSE 0 END) AS 'COUNT_WRONG'
, ROUND((SUM(CASE WHEN AMOUNT < 0 THEN 1 ELSE 0 END) / COUNT(AMOUNT)) * 100, 2) AS 'ratio'
FROM t
GROUP BY PLACE, LOCATION
ORDER BY PLACE, LOCATION
;
But I don't know how I can filter for "AND which do not have a negative AMOUNT before" and counting by PLACE, LOCATION, ID as an intermediate step.
Any help appreciated.
I'm not sure if I understand your question correctly, but the following gives you the number of rows before the first negative amount per (place, location) partition.
The subselect computes the row numbers of all rows with a negative amount. Then we can select the minimum of this as the first row with a negative amount.
SELECT
place,
location,
COUNT(*) - NVL(MIN(pos) - 1, COUNT(*)) AS COUNT_WRONG,
COUNT(*) - local.COUNT_WRONG AS COUNT_CORRECT,
ROUND(local.COUNT_WRONG / COUNT(*),2) AS RATIO
FROM
( SELECT
amount,
place,
location,
CASE
WHEN amount < 0
THEN ROW_NUMBER() over (
PARTITION BY
place,
location
ORDER BY
"TIMESTAMP")
ELSE NULL
END pos -- Row numbers of rows with negative amount, else NULL
FROM
t)
GROUP BY
place,
location;
I have edited the query. Please let me know if this works.
ALL_ENTRIES query has all the row numbers for the table t partitioned by place,location and ID and ordered by timestamp.
TABLE1 is used to compute the first negative entry. This is done by joining with ALL_ENTRIES and selecting the minimum row number where amount < 0.
TABLE2 is used to compute the last correct entry. Basically ALL_ENTRIES is joined with TABLE1 with the condition that the row numbers should be lesser than the row number in TABLE1. This will give us the row number corresponding to the last correct entry.
TABLE1 and TABLE2 are joined with ALL_ENTRIES to calculate the max row number, which gives the total entries.
In the final select statement I have used case when statement to account for IDs where there are no negative amount values. In those scenarios all the entries should be correct. Hence, the max row number is considered for those cases.
WITH ALL_ENTRIES AS (
SELECT
PLACE,
LOCATION,
ID,
TIMESTAMP,
AMOUNT,
ROW_NUMBER() OVER(PARTITION BY PLACE,LOCATION,ID ORDER BY TIMESTAMP) AS 'ROW_NUM'
FROM t)
SELECT
PLACE,
LOCATION,
ID,
TOTAL,
COUNT_CORRECT,
TOTAL - COUNT_CORRECT AS COUNT_WRONG,
COUNT_CORRECT / TOTAL AS RATIO
FROM
(SELECT
ae.PLACE,
ae.LOCATION,
ae.ID,
MAX(ae.ROW_NUM) as TOTAL,
MAX (CASE WHEN table2.LAST_CORRECT_ENTRY IS NULL THEN ae.ROW_NUM ELSE table2.LAST_CORRECT_ENTRY END) AS COUNT_CORRECT,
FROM
ALL_ENTRIES ae
LEFT JOIN
(SELECT
ae.PLACE,
ae.LOCATION,
ae.ID,
MAX(ae.ROW_NUM) as LAST_CORRECT_ENTRY
FROM
ALL_ENTRIES ae
INNER JOIN
( SELECT
t.PLACE,
t.LOCATION,
t.ID, MIN(ae.ROW_NUM) as FIRST_NEGATIVE_ENTRY
FROM t t
INNER JOIN
ALL_ENTRIES ae ON t.PLACE = ae.PLACE
AND t.LOCATION = ae.LOCATION
AND t.ID = ae.ID
AND t.TIMESTAMP = ae.TIMESTAMP
AND t.AMOUNT = ae.AMOUNT
AND ae.AMOUNT < 0
GROUP BY t.PLACE, t.LOCATION
) table1
ON ae.PLACE = table1.PLACE
AND ae.LOCATION = table1.LOCATION
AND ae.ID = table1.ID
AND ae.ROW_NUM < table1.FIRST_NEGATIVE_ENTRY
GROUP BY ae.PLACE, ae.LOCATION, ae.ID
) table2
ON ae.PLACE = table2.PLACE
AND ae.LOCATION = table2.LOCATION
AND ae.ID = table2.ID
GROUP BY ae.PLACE, ae.LOCATION, ae.ID
)
I have the data for student absence which I got after some transformations. The data is day by day:
WITH datasample AS (
SELECT 1 AS StudentID, 20180101 AS DateID, 0 AS AbsentToday, 0 AS AbsentYesterday UNION ALL
SELECT 1, 20180102, 1, 0 UNION ALL
SELECT 1, 20180103, 1, 1 UNION ALL
SELECT 1, 20180104, 1, 1 UNION ALL
SELECT 1, 20180105, 1, 1 UNION ALL
SELECT 1, 20180106, 0, 1 UNION ALL
SELECT 2, 20180101, 0, 0 UNION ALL
SELECT 2, 20180102, 1, 0 UNION ALL
SELECT 2, 20180103, 1, 1 UNION ALL
SELECT 2, 20180104, 0, 1 UNION ALL
SELECT 2, 20180105, 1, 0 UNION ALL
SELECT 2, 20180106, 1, 1 UNION ALL
SELECT 2, 20180107, 0, 1
)
SELECT *
FROM datasample
ORDER BY StudentID, DateID
I need to add a column (AbsencePeriodInMonth) which would calculate the student's absence period during the month.
For example, StudentID=1 was absent in one consecutive period during the month and StudentID=2 had two periods, something like this:
StudentID DateID AbsentToday AbsentYesterday AbsencePeriodInMonth
1 20180101 0 0 0
1 20180102 1 0 1
1 20180103 1 1 1
1 20180104 1 1 1
1 20180105 1 1 1
1 20180106 0 1 0
2 20180101 0 0 0
2 20180102 1 0 1
2 20180103 1 1 1
2 20180104 0 1 0
2 20180105 1 0 2
2 20180106 1 1 2
2 20180107 0 1 0
My goal is actually to calculate the consecutive absent days prior to each day in the fact table, I think I can do it if I get the AbsencePeriodInMonth column, by having this added to my query after the *:
,CASE WHEN AbsentToday = 1 THEN DENSE_RANK() OVER(PARTITION BY StudentID, AbsencePeriodInMonth ORDER BY DateID)
ELSE 0
END AS DaysAbsent
Any idea on how I can add that AbsencePeriodInMonth or maybe calculate the consecutive absent days in some other way?
You can identify each period by counting the number of 0s before hand. Then you can enumerate them using dense_rank().
select ds.*,
(case when absenttoday = 1 then dense_rank() over (partition by studentid order by grp)
else 0
end) as AbsencePeriodInMonth
from (select ds.*, sum(case when absenttoday = 0 then 1 else 0 end) over (partition by studentid order by dateid) as grp
from datasample ds
) ds
order by StudentID, DateID;
Here is a SQL Fiddle.
Using Recursive CTE and Dense_Rank
WITH datasample AS (
SELECT 1 AS StudentID, 20180101 AS DateID, 0 AS AbsentToday, 0 AS AbsentYesterday UNION ALL
SELECT 1, 20180102, 1, 0 UNION ALL
SELECT 1, 20180103, 1, 1 UNION ALL
SELECT 1, 20180104, 1, 1 UNION ALL
SELECT 1, 20180105, 1, 1 UNION ALL
SELECT 1, 20180106, 0, 1 UNION ALL
SELECT 2, 20180101, 0, 0 UNION ALL
SELECT 2, 20180102, 1, 0 UNION ALL
SELECT 2, 20180103, 1, 1 UNION ALL
SELECT 2, 20180104, 0, 1 UNION ALL
SELECT 2, 20180105, 1, 0 UNION ALL
SELECT 2, 20180106, 1, 1 UNION ALL
SELECT 2, 20180107, 0, 1
), cte as
(Select *,DateID as dd
from datasample
where AbsentToday = 1 and AbsentYesterday = 0
union all
Select d.*, c.dd
from datasample d
join cte c
on d.StudentID = c.StudentID and d.DateID = c.DateID + 1
where d.AbsentToday = 1
), cte1 as
(
Select *, DENSE_RANK() over (partition by StudentId order by dd) as de
from cte
)
Select d.*, IsNull(c.de,0) as AbsencePeriodInMonth
from cte1 c
right join datasample d
on d.StudentID = c.StudentID and c.DateID = d.DateID
order by d.StudentID, d.DateID
I have a table that looks like below:
ID|Date |X| Flag |
1 |1/1/16|2| 0
2 |1/1/16|0| 0
3 |1/1/16|0| 0
1 |2/1/16|0| 0
2 |2/1/16|1| 0
3 |2/1/16|2| 0
1 |3/1/16|2| 0
2 |3/1/16|1| 0
3 |3/1/16|2| 0
I'm trying to make it so that flag is populated if X=2 in the PREVIOUS month. As such, it should look like this:
ID|Date |X| Flag |
1 |1/1/16|2| 0
2 |1/1/16|0| 0
3 |1/1/16|0| 0
1 |2/1/16|2| 1
2 |2/1/16|1| 0
3 |2/1/16|2| 0
1 |3/1/16|2| 1
2 |3/1/16|1| 0
3 |3/1/16|2| 1
I use this in SQL:
`select ID, date, X, flag into Work_Table from t
(
Select ID, date, X, flag,
Lag(X) Over (Partition By ID Order By date Asc) As Prev into Flag_table
From Work_Table
)
Update [dbo].[Flag_table]
Set flag = 1
where prev = '2'
UPDATE t
Set t.flag = [dbo].[Flag_table].flag FROM T
JOIN [dbo].[Flag_table]
ON t.ID= [dbo].[Flag_table].ID where T.date = [dbo].[Flag_table].date`
However I cannot do this in Bigquery. Any ideas?
Below is for BigQuery Standard SQL
#standardSQL
SELECT id, dt, x,
IF(LAG(x = 2) OVER(PARTITION BY id ORDER BY dt), 1, 0) flag
FROM `project.dataset.work_table`
You can test / play with it using dummy data from your question as
#standardSQL
WITH `project.dataset.work_table` AS (
SELECT 1 id, '1/1/16' dt, 2 x, 0 flag UNION ALL
SELECT 2, '1/1/16', 0, 0 UNION ALL
SELECT 3, '1/1/16', 0, 0 UNION ALL
SELECT 1, '2/1/16', 0, 0 UNION ALL
SELECT 2, '2/1/16', 1, 0 UNION ALL
SELECT 3, '2/1/16', 2, 0 UNION ALL
SELECT 1, '3/1/16', 2, 0 UNION ALL
SELECT 2, '3/1/16', 1, 0 UNION ALL
SELECT 3, '3/1/16', 2, 0
)
SELECT id, dt, x,
IF(LAG(x = 2) OVER(PARTITION BY id ORDER BY dt), 1, 0) flag
FROM `project.dataset.work_table`
ORDER BY dt, id
with result as
Row id dt x flag
1 1 1/1/16 2 0
2 2 1/1/16 0 0
3 3 1/1/16 0 0
4 1 2/1/16 0 1
5 2 2/1/16 1 0
6 3 2/1/16 2 0
7 1 3/1/16 2 0
8 2 3/1/16 1 0
9 3 3/1/16 2 1
I have data in the below format
g_name amt flag
g1 0 0
g1 0 0
g1 10 1
g1 0 0
g1 15 2
g1 0 0
and I would require in the below format
n1 will have data starting from row where amt hits 1 and it keeps retaining it till the end, similarly n2 will have data starting from row where amt hits 2 and it keeps retaining it till the end, please help me with any window functions with out needing joins. please.
g_name amt flag n1 n2
g1 0 0 0 0
g1 0 0 0 0
g1 10 1 10 0
g1 0 0 10 0
g1 15 2 10 15
g1 0 0 10 15
I added a column for ordering - change as needed. I also added a few more rows with a different g_name, presumably this must be done "by g_name".
This is a good test case for the first_value() analytic function. It has the ability to ignore nulls - so we make the amt NULL when flag is not 1 (or 2, etc.) and then apply first_value() with the proper PARTITION BY and ORDER BY clauses.
with
test_data ( id, g_name, amt, flag ) as (
select 1, 'g1', 0, 0 from dual union all
select 2, 'g1', 0, 0 from dual union all
select 3, 'g1', 10, 1 from dual union all
select 4, 'g1', 0, 0 from dual union all
select 5, 'g1', 15, 2 from dual union all
select 6, 'g1', 0, 0 from dual union all
select 1, 'g2', 0, 0 from dual union all
select 2, 'g2', 4, 1 from dual union all
select 3, 'g2', 3, 2 from dual union all
select 4, 'g2', 0, 0 from dual
)
-- end of test data; solution (SQL query) begins below this line
select id, g_name, amt, flag,
coalesce (first_value(case when flag = 1 then amt end ignore nulls)
over (partition by g_name order by id), 0) as n1,
coalesce (first_value(case when flag = 2 then amt end ignore nulls)
over (partition by g_name order by id), 0) as n2
from test_data
order by g_name, id
;
ID G_NAME AMT FLAG N1 N2
--- ------ ---------- ---------- ---------- ----------
1 g1 0 0 0 0
2 g1 0 0 0 0
3 g1 10 1 10 0
4 g1 0 0 10 0
5 g1 15 2 10 15
6 g1 0 0 10 15
1 g2 0 0 0 0
2 g2 4 1 4 0
3 g2 3 2 4 3
4 g2 0 0 4 3
SQL tables represent unordered sets. There is no ordering, unless a column specifies that ordering. Let me assume that such a column exists.
If so, you can do this with analytic functions:
select t.*,
max(case when flag = 1 then amt else 0 end) over (order by ??) as n1,
max(case when flag = 2 then amt else 0 end) over (order by ??) as n2
from t;
The ?? specifies the ordering.
Please help me generate the following query i've been struggling with for some time now. Lets' say I have a simple table with month number and information whether there were any failed events in this particular month
Below a script to generate sample data:
WITH DATA(Month, Success) AS
(
SELECT 1, 0 UNION ALL
SELECT 2, 0 UNION ALL
SELECT 3, 0 UNION ALL
SELECT 4, 1 UNION ALL
SELECT 5, 1 UNION ALL
SELECT 6, 0 UNION ALL
SELECT 7, 0 UNION ALL
SELECT 8, 1 UNION ALL
SELECT 9, 0 UNION ALL
SELECT 10, 1 UNION ALL
SELECT 11, 0 UNION ALL
SELECT 12, 1 UNION ALL
SELECT 13, 0 UNION ALL
SELECT 14, 1 UNION ALL
SELECT 15, 0 UNION ALL
SELECT 16, 1 UNION ALL
SELECT 17, 0 UNION ALL
SELECT 18, 0
)
Given the definition of a "repeated failure ":
When event failure occurs during at least 4 months in any 6 months period then the last month with such failure is a "repeated failure" my query should return the following output
Month Success RepeatedFailure
1 0
2 0
3 0
4 1
5 1
6 0 R1
7 0 R2
8 1
9 0
10 1
11 0 R3
12 1
13 0
14 1
15 0
16 1
17 0
18 0 R1
where:
R1 -1st repeated failure in month no 6 (4 failures in last 6 months).
R2 -2nd repeated failure in month no 7 (4 failures in last 6 months).
R3 -3rd repeated failure in month no 11 (4 failures in last 6 months).
R1 -again 1st repeated failure in month no 18 because Repeated Failures should be again numbered from the beginning when new Repeated Failure occurs for the first time in last 6 reporting periods
Repeated Failures are numerated consecutively because based on its number i must apply appropriate multiplier:
1st repated failure - X2
2nd repeated failure - X4
3rd and more repeated failure -X5.
I'm sure this can be improved, but it works. We essentially do two passes - the first to establish repeated failures, the second to establish what kind of repeated failure each is. Note that Intermediate2 can definitely be done away with, I've only separated it out for clarity. All the code is one statement, my explanation is interleaved:
;WITH DATA(Month, Success) AS
-- assuming your data as defined (with my edit)
,Intermediate AS
(
SELECT
Month,
Success,
-- next column for illustration only
(SELECT SUM(Success)
FROM DATA hist
WHERE curr.Month - hist.Month BETWEEN 0 AND 5)
AS SuccessesInLastSixMonths,
-- next column for illustration only
6 - (SELECT SUM(Success)
FROM DATA hist
WHERE curr.Month - hist.Month BETWEEN 0 AND 5)
AS FailuresInLastSixMonths,
CASE WHEN
(6 - (SELECT SUM(Success)
FROM DATA hist
WHERE curr.Month - hist.Month BETWEEN 0 AND 5))
>= 4
THEN 1
ELSE 0
END AS IsRepeatedFailure
FROM DATA curr
-- No real data until month 6
WHERE curr.Month > 5
)
At this point we have established, for each month, whether it's a repeated failure, by counting the failures in the six months up to and including it.
,Intermediate2 AS
(
SELECT
Month,
Success,
IsRepeatedFailure,
(SELECT SUM(IsRepeatedFailure)
FROM Intermediate hist
WHERE curr.Month - hist.Month BETWEEN 0 AND 5)
AS RepeatedFailuresInLastSixMonths
FROM Intermediate curr
)
Now we have counted the number of repeated failures in the six months leading up to now
SELECT
Month,
Success,
CASE IsRepeatedFailure
WHEN 1 THEN 'R' + CONVERT(varchar, RepeatedFailuresInLastSixMonths)
ELSE '' END
AS RepeatedFailureText
FROM Intermediate2
so we can say, if this month is a repeated failure, what cardinality of repeated failure it is.
Result:
Month Success RepeatedFailureText
----------- ----------- -------------------------------
6 0 R1
7 0 R2
8 1
9 0
10 1
11 0 R3
12 1
13 0
14 1
15 0
16 1
17 0
18 0 R1
(13 row(s) affected)
Performance considerations will depend on on how much data you actually have.
;WITH DATA(Month, Success) AS
(
SELECT 1, 0 UNION ALL
SELECT 2, 0 UNION ALL
SELECT 3, 0 UNION ALL
SELECT 4, 1 UNION ALL
SELECT 5, 1 UNION ALL
SELECT 6, 0 UNION ALL
SELECT 7, 0 UNION ALL
SELECT 8, 1 UNION ALL
SELECT 9, 0 UNION ALL
SELECT 10, 1 UNION ALL
SELECT 11, 0 UNION ALL
SELECT 12, 1 UNION ALL
SELECT 13, 0 UNION ALL
SELECT 14, 1 UNION ALL
SELECT 15, 0 UNION ALL
SELECT 16, 1 UNION ALL
SELECT 17, 0 UNION ALL
SELECT 18, 0
)
SELECT DATA.Month,DATA.Success,Isnull(convert(Varchar(10),b.result),'') +
Isnull(CONVERT(varchar(10),b.num),'') RepeatedFailure
FROM (
SELECT *, ROW_NUMBER() over (order by Month) num FROM
( Select * ,(case when (select sum(Success)
from DATA where MONTH>(o.MONTH-6) and MONTH<=(o.MONTH) ) <= 2
and o.MONTH>=6 then 'R' else '' end) result
from DATA o
) a where result='R'
) b
right join DATA on DATA.Month = b.Month
order by DATA.Month