Count number of occurrences in a bit column in sql - sql

How to count the change in a sql server column like I have Ignition value
Ignition
1
1
0
1
1
1
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
0
1
I want to count change only it is from 0 to 1 to make occurrence 1. It can also be from 1 to 0 for the occurrence to be 1.

Step 1: use the Row_Number() function to provide a complete (un-broken) sequence of numbers, according to our order
SELECT ignition
, id
, Row_Number() OVER (ORDER BY id ASC) As row_num
FROM your_table
Step 4: Make this a Common-Table Expression (CTE) so we can refer to the derived row_num column
; WITH cte AS (
SELECT ignition
, id
, Row_Number() OVER (ORDER BY id ASC) As row_num
FROM your_table
)
SELECT ignition
, id
, row_num
FROM cte
Step 3: join this table back to itself matching on the next/previous row
; WITH cte AS (
SELECT ignition
, id
, Row_Number() OVER (ORDER BY id ASC) As row_num
FROM your_table
)
SELECT c1.ignition As c1_ignition
, c2.ignition As c2_ignition
FROM cte As c1
LEFT
JOIN cte As c2
ON c2.row_num = c1.row_num + 1
Step 4: Filter the results to show those where the values aren't the same
; WITH cte AS (
SELECT ignition
, id
, Row_Number() OVER (ORDER BY id ASC) As row_num
FROM your_table
)
SELECT c1.ignition As c1_ignition
, c2.ignition As c2_ignition
FROM cte As c1
LEFT
JOIN cte As c2
ON c2.row_num = c1.row_num - 1
WHERE c1.ignition <> c2.ignition
Step 5: ...
Step 6: profit!

Not sure if you want a solution that works in both 2008 and 2012 as you have both tags, but in 2012 (doesn't work in 2008) we did get LAG() and LEAD() so a SUM() of [Change] in the query below will do it for 2012. You'll have to decide how to handle the first value (which obviously doesn't have a previous value), current state it counts as a change.
SELECT [Id]
, [Ignition]
, LAG([Ignition]) OVER(ORDER BY [Id]) [Previous]
, CASE WHEN LAG([Ignition]) OVER(ORDER BY [Id]) = [Ignition] THEN 0 ELSE 1 END [Change]
FROM [dbo].[Table]
ORDER BY Id;
For 2008 a self-join should produce the same result.
SELECT [T1].[Id]
, [T1].[Ignition]
, [T2].[Ignition] [Previous]
, CASE WHEN [T1].[Ignition] = [T2].[Ignition] THEN 0 ELSE 1 END [Change]
FROM [dbo].[Table] [T1]
LEFT JOIN [dbo].[Table] [T2] ON [T1].[Id] = ([T2].[Id] + 1)
ORDER BY [T1].[Id];

declare #t table(id int identity(1,1), ignition bit)
insert #t values(1),(0),(1),(1)
declare #Ignition varchar(max) = ''
select #Ignition = #Ignition + cast(Ignition as char(1))
from #t order by id
select #ignition
select len(replace(replace(replace(#Ignition, '10', 'x')
+ replace(#Ignition, '01', 'x'), 1, ''), 0, ''))
Result:
2

Simplest and shortest way for SQL server 2008 I know is:
with cte as (
select
row_number() over(partition by Ignition order by Id) as rn1,
row_number() over(order by Id) as rn2
from Table1
)
select count(distinct rn2 - rn1) - 1
from cte
Or, as #MartinSmith pointed out:
with cte as (
select
row_number() over(order by Ignition, Id) as rn1,
row_number() over(order by Id) as rn2
from Table1
), cte2 as (
select distinct Ignition, rn2 - rn1
from cte
)
select count(*) - 1
from cte2
for SQL Server 2012 you can use lag() (or lead()) function:
;with cte as (
select
lag(Ignition) over(order by Id) as prev,
Ignition as cur
from Table1
)
select count(case when cur <> prev then 1 end)
from cte;
sql fiddle demo

Related

Indicate a row that cause an abnormal case (SQL)

I have a result as below using the following script:
SELECT
id, (2022 - age) yearId, age, [value],
CASE
WHEN LAG([value], 1, 0) OVER (PARTITION BY id ORDER BY [age]) = 0
THEN 'Base'
WHEN [value] > LAG([value], 1, -1) OVER (PARTITION BY id ORDER BY [age])
THEN 'Increasing'
WHEN [value] = LAG([value], 1, -1) OVER (PARTITION BY id ORDER BY [age])
THEN 'No Change'
ELSE 'Decreasing'
END AS [Order]
FROM Test
Values
And I manage to get a group of ids with an id causing a "flip: decreasing and then increasing or the other way around" as:
Abnormal Case
Now I want to print out the same result as above but with a column indicates the row that cause the flip, something like this (the row causes the flip should be place at the top of each partition):
Id
age
value
flip
1
4
3
1
1
0
5
0
1
1
4
0
1
2
3
0
1
3
2
0
1
5
3
0
1
6
4
0
Thank you!
Expanding your existing logic to get the previous order value then conditionally ordering
with cte as
(
SELECT
id, (2022 - age) yearId, age, [value],
CASE
WHEN LAG([value], 1, 0) OVER (PARTITION BY id ORDER BY [age]) = 0
THEN 'Base'
WHEN [value] > LAG([value], 1, -1) OVER (PARTITION BY id ORDER BY [age])
THEN 'Increasing'
WHEN [value] = LAG([value], 1, -1) OVER (PARTITION BY id ORDER BY [age])
THEN 'No Change'
ELSE 'Decreasing'
END AS [Order]
FROM T1
) ,
cte1 as
(select cte.*,concat(cte.[order], lag([order]) over (partition by id order by age)) concatlag
from cte)
select * ,
case when concatlag in('IncreasingDecreasing','DecreasingIncreasing') then 1 else 0 end
from cte1
order by
case when concatlag in('IncreasingDecreasing','DecreasingIncreasing') then 1 else 0 end desc,
age

ROW_Number with Custom Group

I am trying to have row_number based on custom grouping but I am not able to produce it.
Below is my Query
CREATE TABLE mytbl (wid INT, id INT)
INSERT INTO mytbl Values(1,1),(2,1),(3,0),(4,2),(5,3)
Current Output
wid id
1 1
2 1
3 0
4 2
5 3
Query
SELECT *, RANK() OVER(PARTITION BY wid, CASE WHEN id = 0 THEN 0 ELSE 1 END ORDER BY ID)
FROM mytbl
I would like to rank the rows based on custom condition like if ID is 0 then I have start new group until I have non 0 ID.
Expected Output
wid id RN
1 1 1
2 1 1
3 0 1
4 2 2
5 3 2
Guessing here, as we don't have much clarification, but perhaps this:
SELECT wid,
id,
COUNT(CASE id WHEN 0 THEN 1 END) OVER (ORDER BY wid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) +1 AS [Rank]
FROM mytbl ;
If I understand you correctly, you may use the next approach. Note, that you need to have an ordering column (I assume this is wid column):
Statement:
;WITH ChangesCTE AS (
SELECT
*,
CASE WHEN LAG(id) OVER (ORDER BY wid) = 0 THEN 1 ELSE 0 END AS ChangeIndex
FROM mytbl
), GroupsCTE AS (
SELECT
*,
SUM(ChangeIndex) OVER (ORDER BY wid) AS GroupIndex
FROM ChangesCTE
)
SELECT
wid,
id,
DENSE_RANK() OVER (ORDER BY GroupIndex) AS Rank
FROM GroupsCTE
Result:
wid id Rank
1 1 1
2 1 1
3 0 1
4 2 2
5 3 2
without much clarification on the logic required, my understanding is you want to increase the Rank by 1 whenever id = 0
select wid, id,
[Rank] = sum(case when id = 0 then 1 else 0 end) over(order by wid)
+ case when id <> 0 then 1 else 0 end
from mytbl
Try this,
CREATE TABLE #mytbl (wid INT, id INT)
INSERT INTO #mytbl Values(1,1),(2,1),(3,0)
,(4,2),(5,3),(6,0),(7,4),(8,5),(9,6)
;with CTE as
(
select *,ROW_NUMBER()over(order by wid)rn
from #mytbl where id=0
)
,CTE1 as
(
select max(rn)+1 ExtraRN from CTE
)
select a.* ,isnull(ca.rn,ca1.ExtraRN) from #mytbl a
outer apply(select top 1 * from CTE b
where a.wid<=b.wid )ca
cross apply(select ExtraRN from CTE1)ca1
drop table #mytbl
Here both OUTER APPLY and CROSS APPLY will not increase cardianility estimate.It will always return only one rows.

Cumulative sum value 1 and reset sum when meet 0 SQL

I am try below query but it's not working.
SELECT *,
CASE WHEN x = 1
THEN ROW_NUMBER() OVER(PARTITION BY id ORDER BY date)
ELSE 0 END AS y
Expected result :
x y
1 1
1 2
1 3
0 0
1 1
1 2
How can I achieve this ? i still want to keep 0 in y column
Count the number of zeros up to each value and then use this to group. The final enumeration uses row_number():
select t.*,
(case when x = 0 then 0
else row_number() over (partition by x, grp order by date)
end) as y
from (select t.*, countif(x = 0) over (order by date) as grp
from t
) t

SQL - categorize rows

Below is the result set I am working with. What I would like is an additional column that identifies a X number of rows as the same. In my result set, rows 1-4 are the same (would like to mark as 1), rows 5-9 are the same (mark as 2); row 10 (mark as 3)
How is this possible using just SQL? I can't seem to do this using rank or dense_rank functions.
ranking diff bool
-------------------- ----------- -----------
1 0 0
2 0 0
3 0 0
4 0 0
5 54 1
6 0 0
7 0 0
8 0 0
9 0 0
10 62 1
In general case you can do something like this:
select
t.ranking, t.[diff], t.[bool],
dense_rank() over(order by c.cnt) as rnk
from Table1 as t
outer apply (
select count(*) as cnt
from Table1 as t2
where t2.ranking <= t.ranking and t2.[bool] = 1
) as c
In your case you can do it even without dense_rank():
select
t.ranking, t.[diff], t.[bool],
c.cnt + 1 as rnk
from Table1 as t
outer apply (
select count(*) as cnt
from Table1 as t2
where t2.ranking <= t.ranking and t2.[bool] = 1
) as c;
Unfortunately, in SQL Server 2008 you cannot do running total with window function, in SQL Server 2012 it'd be possible to do it with sum([bool]) over(order by ranking).
If you have really big number of rows and your ranking column is unique/primary key, you can use recursive cte approach - like one in this answer, it's fastest one in SQL Server 2008 R2:
;with cte as
(
select t.ranking, t.[diff], t.[bool], t.[bool] as rnk
from Table1 as t
where t.ranking = 1
union all
select t.ranking, t.[diff], t.[bool], t.[bool] + c.rnk as rnk
from cte as c
inner join Table1 as t on t.ranking = c.ranking + 1
)
select t.ranking, t.[diff], t.[bool], 1 + t.rnk
from cte as t
option (maxrecursion 0)
sql fiddle demo

How to get average of the 'middle' values in a group?

I have a table that has values and group ids (simplified example). I need to get the average for each group of the middle 3 values. So, if there are 1, 2, or 3 values it's just the average. But if there are 4 values, it would exclude the highest, 5 values the highest and lowest, etc. I was thinking some sort of window function, but I'm not sure if it's possible.
http://www.sqlfiddle.com/#!11/af5e0/1
For this data:
TEST_ID TEST_VALUE GROUP_ID
1 5 1
2 10 1
3 15 1
4 25 2
5 35 2
6 5 2
7 15 2
8 25 3
9 45 3
10 55 3
11 15 3
12 5 3
13 25 3
14 45 4
I'd like
GROUP_ID AVG
1 10
2 15
3 21.6
4 45
Another option using analytic functions;
SELECT group_id,
avg( test_value )
FROM (
select t.*,
row_number() over (partition by group_id order by test_value ) rn,
count(*) over (partition by group_id ) cnt
from test t
) alias
where
cnt <= 3
or
rn between floor( cnt / 2 )-1 and ceil( cnt/ 2 ) +1
group by group_id
;
Demo --> http://www.sqlfiddle.com/#!11/af5e0/59
I'm not familiar with the Postgres syntax on windowed functions, but I was able to solve your problem in SQL Server with this SQL Fiddle. Maybe you'll be able to easily migrate this into Postgres-compatible code. Hope it helps!
A quick primer on how I worked it.
Order the test scores for each group
Get a count of items in each group
Use that as a subquery and select only the middle 3 items (that's the where clause in the outer query)
Get the average for each group
--
select
group_id,
avg(test_value)
from (
select
t.group_id,
convert(decimal,t.test_value) as test_value,
row_number() over (
partition by t.group_id
order by t.test_value
) as ord,
g.gc
from
test t
inner join (
select group_id, count(*) as gc
from test
group by group_id
) g
on t.group_id = g.group_id
) a
where
ord >= case when gc <= 3 then 1 when gc % 2 = 1 then gc / 2 else (gc - 1) / 2 end
and ord <= case when gc <= 3 then 3 when gc % 2 = 1 then (gc / 2) + 2 else ((gc - 1) / 2) + 2 end
group by
group_id
with cte as (
select
*,
row_number() over(partition by group_id order by test_value) as rn,
count(*) over(partition by group_id) as cnt
from test
)
select
group_id, avg(test_value)
from cte
where
cnt <= 3 or
(rn >= cnt / 2 - 1 and rn <= cnt / 2 + 1)
group by group_id
order by group_id
sql fiddle demo
in the cte, we need to get count of elements over each group_id by window function + calculate row_number inside each group_id. Then, if this count > 3 then we need to get middle of the group by dividing count by 2 and then get +1 and -1 element. If count <= 3, then we should just take all elements.
This works:
SELECT A.group_id, avg(A.test_value) AS avg_mid3 FROM
(SELECT group_id,
test_value,
row_number() OVER (PARTITION BY group_id ORDER BY test_value) AS position
FROM test) A
JOIN
(SELECT group_id,
CASE
WHEN count(*) < 4 THEN 1
WHEN count(*) % 2 = 0 THEN (count(*)/2 - 1)
ELSE (count(*) / 2)
END AS position_start,
CASE
WHEN count(*) < 4 THEN count(*)
WHEN count(*) % 2 = 0 THEN (count(*)/2 + 1)
ELSE (count(*) / 2 + 2)
END AS position_end
FROM test GROUP BY group_id) B
ON A.group_id=B.group_id
AND A.position >= B.position_start
AND A.position <= B.position_end
GROUP BY A.group_id
Fiddle link: http://www.sqlfiddle.com/#!11/af5e0/56
If you need to calculate the average values ​​for groups then you can do this:
SELECT CASE WHEN NUMBER_FIRST_GROUP <> 0
THEN SUM_FIRST_GROUP / NUMBER_FIRST_GROUP
ELSE NULL
END AS AVG_FIRST_GROUP,
CASE WHEN NUMBER_SECOND_GROUP <> 0
THEN SUM_SECOND_GROUP / NUMBER_SECOND_GROUP
ELSE NULL
END AS AVG_SECOND_GROUP,
CASE WHEN NUMBER_THIRD_GROUP <> 0
THEN SUM_THIRD_GROUP / NUMBER_THIRD_GROUP
ELSE NULL
END AS AVG_THIRD_GROUP,
CASE WHEN NUMBER_FOURTH_GROUP <> 0
THEN SUM_FOURTH_GROUP / NUMBER_FOURTH_GROUP
ELSE NULL
END AS AVG_FOURTH_GROUP
FROM (
SELECT
SUM(CASE WHEN GROUP_ID = 1 THEN 1 ELSE 0 END) AS NUMBER_FIRST_GROUP,
SUM(CASE WHEN GROUP_ID = 1 THEN TEST_VALUE ELSE 0 END) AS SUM_FIRST_GROUP,
SUM(CASE WHEN GROUP_ID = 2 THEN 1 ELSE 0 END) AS NUMBER_SECOND_GROUP,
SUM(CASE WHEN GROUP_ID = 2 THEN TEST_VALUE ELSE 0 END) AS SUM_SECOND_GROUP,
SUM(CASE WHEN GROUP_ID = 3 THEN 1 ELSE 0 END) AS NUMBER_THIRD_GROUP,
SUM(CASE WHEN GROUP_ID = 3 THEN TEST_VALUE ELSE 0 END) AS SUM_THIRD_GROUP,
SUM(CASE WHEN GROUP_ID = 4 THEN 1 ELSE 0 END) AS NUMBER_FOURTH_GROUP,
SUM(CASE WHEN GROUP_ID = 4 THEN TEST_VALUE ELSE 0 END) AS SUM_FOURTH_GROUP
FROM TEST
) AS FOO