What is the equivalent of ARRAY_AGG(foo where bar) in BigQuery? - google-bigquery

Let's say I want to do this:
with source_data as (
select 1 as id, 'a' as sub_id, true as turned_on
union all
select 1 as id, 'b' as sub_id, true as turned_on
union all
select 2 as id, 'a' as sub_id, false as turned_on
union all
select 2 as id, 'b' as sub_id, true as turned_on
union all
select 3 as id, 'a' as sub_id, false as turned_on
union all
select 3 as id, 'b' as sub_id, false as turned_on
)
select
id,
array_agg(sub_id where turned_on) as all_on, -- invalid syntax
turned_off(sub_id where turned_off) as all_off -- invalid syntax
from
source_data
group by id
to get something like
| id | all_on | all_off |
| --- | ------ | ------- |
| 1 | [a, b] | |
| 2 | [b] | [a] |
| 3 | | [a, b] |
the marked rows are invalid, because I can't do ARRAY_AGG(... where ...). From the docs I gather I could probably accomplish something similar using analytic functions (particularly PARTITION BY) but I don't understand how.
Is it possible to write a query that aggregates arrays the way I illustrate above? How do I do it?

Consider using if:
with source_data as (
select 1 as id, 'a' as sub_id, true as turned_on
union all
select 1 as id, 'b' as sub_id, true as turned_on
union all
select 2 as id, 'a' as sub_id, false as turned_on
union all
select 2 as id, 'b' as sub_id, true as turned_on
union all
select 3 as id, 'a' as sub_id, false as turned_on
union all
select 3 as id, 'b' as sub_id, false as turned_on
)
select
id,
array_agg(if(turned_on=true, sub_id, null) ignore nulls) as all_on,
array_agg(if(turned_on=false, sub_id, null) ignore nulls) as all_off
from
source_data
group by id

Related

Need to get the mismatcted cells in specific partition

ID
TC_No
Result
1
tc_1
PASS
1
tc_2
PASS
1
tc_3
FAIL
1
tc_4
PASS
1
tc_5
FAIL
2
tc_1
FAIL
2
tc_2
PASS
2
tc_3
FAIL
2
tc_4
FAIL
2
tc_5
FAIL
I'm trying to find all records that have conflicting "Result" on the same "TC_No" and among different "ID" values, filtered by ID IN (1,2).
Here's the expected output:
ID
TC_No
Result
1
tc_1
PASS
1
tc_4
PASS
2
tc_1
FAIL
2
tc_4
FAIL
and my attempted query:
SELECT * From
(SELECT * from Excel As T1
UNION
SELECT * from Excel As T2)
As c
where ID in(1,2) order By TC_NO
find the distinct count of result for each tc_no and then select the records having count greater than 1
Query
select * from your_tbl_name a
where exists(
select 1 from (
select tc_no, count(distinct result) as cnt
from your_tbl_name
where result in ('PASS','FAIL')
group by c_no
) b
where a.tc_no = b.tc_no
and b.cnt > 1
)
You can simply INNER JOIN the table onto itself and add a predicate in the WHERE clause to return only mismatched results.
SQL:
SELECT
a.ID,
a.TC_No,
a.Result
FROM
Excel a
INNER JOIN Excel b ON a.TC_No = b.TC_No
WHERE
a.Result <> b.Result;
Result:
| ID | TC_No | Result |
|----|-------|--------|
| 1 | tc_1 | PASS |
| 1 | tc_4 | PASS |
| 2 | tc_1 | FAIL |
| 2 | tc_4 | FAIL |
SQL Fiddle Demo:
Here
Check when the maximum result is different than the minimum one, in your filtered data, using window functions.
WITH cte AS (
SELECT tab.*,
MAX(Result_) OVER(PARTITION BY TC_No) AS max_result,
MIN(Result_) OVER(PARTITION BY TC_No) AS min_result
FROM tab
WHERE ID IN (1,2)
)
SELECT Id, Tc_No, Result_
FROM cte
WHERE min_result < max_result
Check the demo here.
You could use analytic function COUNT() OVER() to find the rows with different content and then just filter the result in Where clause:
SELECT ID, TC_NO, RESULT
FROM ( Select ID, TC_NO, RESULT,
CASE WHEN Count(DISTINCT RESULT) OVER(Partition By TC_NO) = 2 THEN 'Y' END "IS_DIFF"
From tbl
)
WHERE IS_DIFF = 'Y'
ORDER BY ID
With your sample data:
WITH
tbl (ID, TC_NO, RESULT) AS
(
Select 1, 'tc_1', 'PASS' From Dual Union All
Select 1, 'tc_2', 'PASS' From Dual Union All
Select 1, 'tc_3', 'FAIL' From Dual Union All
Select 1, 'tc_4', 'PASS' From Dual Union All
Select 1, 'tc_5', 'FAIL' From Dual Union All
Select 2, 'tc_1', 'FAIL' From Dual Union All
Select 2, 'tc_2', 'PASS' From Dual Union All
Select 2, 'tc_3', 'FAIL' From Dual Union All
Select 2, 'tc_4', 'FAIL' From Dual Union All
Select 2, 'tc_5', 'FAIL' From Dual
)
... the result is:
ID
TC_NO
RESULT
1
tc_1
PASS
1
tc_4
PASS
2
tc_1
FAIL
2
tc_4
FAIL

How to group-by in Oracle

I have a table like [Original] in below.
I want to sum by group-by field like [result].
Does anyone have an idea to make this query?
Thank you in advance for your help.
WITH t1 as (
SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
)
SELECT *
FROM t1
[Original Data]
ID FIELD VAL
1 A 1
2 A 2
3 A 1
4 B 2
5 B 2
6 B 1
7 A 3
8 A 2
9 A 1
[Result]
ID FIELD VAL
1 A 4
4 B 5
7 A 6
This is island and gap issue and you can use analytical function as follows:
SQL> WITH t1 as (
2 SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
3 UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
4 UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
5 UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
6 UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
7 UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
8 UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
9 UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
10 UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
11 )
12 SELECT MIN(ID) AS ID, FIELD, SUM(VAL)
13 FROM (SELECT T1.*,
14 SUM(CASE WHEN LAG_FIELD = FIELD THEN 0 ELSE 1 END)
15 OVER (ORDER BY ID) AS SM
16 FROM (SELECT T1.*,
17 LAG(FIELD) OVER (ORDER BY ID) AS LAG_FIELD
18 FROM t1
19 ) T1
20 )
21 GROUP BY FIELD, SM
22 ORDER BY 1;
ID F SUM(VAL)
---------- - ----------
1 A 4
4 B 5
7 A 6
SQL>
This is indeed a gaps-and-islands problem. I think the simplest approach here is to use the difference between row numbers to identify groups of adjacent rows:
select min(id) as id, field, sum(val) as val
from (
select t1.*,
row_number() over(order by id) rn1,
row_number() over(partition by field order by id) rn2
from t1
) t
group by field, rn1 - rn2
order by min(id)
If id is always incrementing without gaps, this is even simpler:
select min(id) as id, field, sum(val) as val
from (
select t1.*,
row_number() over(partition by field order by id) rn
from t1
) t
group by field, id - rn
order by min(id)
From Oracle 12, you can do it quite simply using MATCH_RECOGNIZE:
WITH t1 as (
SELECT 1 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 2 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 3 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 4 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 5 AS ID, 'B' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 6 AS ID, 'B' AS FIELD, 1 AS VAL FROM dual
UNION SELECT 7 AS ID, 'A' AS FIELD, 3 AS VAL FROM dual
UNION SELECT 8 AS ID, 'A' AS FIELD, 2 AS VAL FROM dual
UNION SELECT 9 AS ID, 'A' AS FIELD, 1 AS VAL FROM dual
)
SELECT *
FROM t1
MATCH_RECOGNIZE (
ORDER BY id
MEASURES
FIRST( id ) AS id,
FIRST( field ) AS field,
SUM( val ) AS total
ONE ROW PER MATCH
PATTERN( same_field+ )
DEFINE same_field AS FIRST(field) = field
)
Which outputs:
ID | FIELD | TOTAL
-: | :---- | ----:
1 | A | 4
4 | B | 5
7 | A | 6
db<>fiddle here

Select rows when a value appears multiple times

I have a table like this one:
+------+------+
| ID | Cust |
+------+------+
| 1 | A |
| 1 | A |
| 1 | B |
| 1 | B |
| 2 | A |
| 2 | A |
| 2 | A |
| 2 | B |
| 3 | A |
| 3 | B |
| 3 | B |
+------+------+
I would like to get the IDs that have at least two times A and two times B. So in my example, the query should return only the ID 1,
Thanks!
In MySQL:
SELECT id
FROM test
GROUP BY id
HAVING GROUP_CONCAT(cust ORDER BY cust SEPARATOR '') LIKE '%aa%bb%'
In Oracle
WITH cte AS ( SELECT id, LISTAGG(cust, '') WITHIN GROUP (ORDER BY cust) custs
FROM test
GROUP BY id )
SELECT id
FROM cte
WHERE custs LIKE '%aa%bb%'
I would just use two levels of aggregation:
select id
from (select id, cust, count(*) as cnt
from t
where cust in ('A', 'B')
group by id, cust
) ic
group by id
having count(*) = 2 and -- both customers are in the result set
min(cnt) >= 2 -- and there are at least two instances
This is one option; lines #1 - 13 represent sample data. Query you might be interested in begins at line #14.
SQL> with test (id, cust) as
2 (select 1, 'a' from dual union all
3 select 1, 'a' from dual union all
4 select 1, 'b' from dual union all
5 select 1, 'b' from dual union all
6 select 2, 'a' from dual union all
7 select 2, 'a' from dual union all
8 select 2, 'a' from dual union all
9 select 2, 'b' from dual union all
10 select 3, 'a' from dual union all
11 select 3, 'b' from dual union all
12 select 3, 'b' from dual
13 )
14 select id
15 from (select
16 id,
17 sum(case when cust = 'a' then 1 else 0 end) suma,
18 sum(case when cust = 'b' then 1 else 0 end) sumb
19 from test
20 group by id
21 )
22 where suma = 2
23 and sumb = 2;
ID
----------
1
SQL>
You can use group by and having for the relevant Cust ('A' , 'B')
And query twice (I chose to use with to avoid multiple selects and to cache it)
with more_than_2 as
(
select Id, Cust, count(*) c
from tab
where Cust in ('A', 'B')
group by Id, Cust
having count(*) >= 2
)
select *
from tab
where exists ( select 1 from more_than_2 where more_than_2.Id = tab.Id and more_than_2.Cust = 'A')
and exists ( select 1 from more_than_2 where more_than_2.Id = tab.Id and more_than_2.Cust = 'B')
What you want is a perfect candidate for match_recognize. Here you go:
select id_ as id from t
match_recognize
(
order by id, cust
measures id as id_
pattern (A {2, } B {2, })
define A as cust = 'A',
B as cust = 'B'
)
Output:
Regards,
Ranagal

Identify only when value matches

I need to return only rows that have the match e.g Value = A, but I only need the rows that have A and with no other values.
T1:
ID Value
1 A
1 B
1 C
2 A
3 A
3 B
4 A
5 B
5 D
5 E
5 F
Desired Output:
2
4
how can I achieve this?
when I try the following, 1&3 are also returned:
select ID from T1 where Value ='A'
With NOT EXISTS:
select t.id
from tablename t
where t.value = 'A'
and not exists (
select 1 from tablename
where id = t.id and value <> 'A'
)
From the sample data you posted there is no need to use:
select distinct t.id
but if you get duplicates then use it.
Another way if there are no null values:
select id
from tablename
group by id
having sum(case when value <> 'A' then 1 else 0 end) = 0
Or if you want the rows where the id has only 1 value = 'A':
select id
from tablename
group by id
having count(*) = 1 and max(value) = 'A'
I think the simplest way is aggregation with having:
select id
from tablename
group by id
having min(value) = max(value) and
min(value) = 'A';
Note that this ignores NULL values so it could return ids with both NULL and A. If you want to avoid that:
select id
from tablename
group by id
having count(value) = count(*) and
min(value) = max(value) and
min(value) = 'A';
Oracle Setup:
CREATE TABLE test_data ( ID, Value ) AS
SELECT 1, 'A' FROM DUAL UNION ALL
SELECT 1, 'B' FROM DUAL UNION ALL
SELECT 1, 'C' FROM DUAL UNION ALL
SELECT 2, 'A' FROM DUAL UNION ALL
SELECT 3, 'A' FROM DUAL UNION ALL
SELECT 3, 'B' FROM DUAL UNION ALL
SELECT 4, 'A' FROM DUAL UNION ALL
SELECT 5, 'B' FROM DUAL UNION ALL
SELECT 5, 'D' FROM DUAL UNION ALL
SELECT 5, 'E' FROM DUAL UNION ALL
SELECT 5, 'F' FROM DUAL
Query:
SELECT ID
FROM test_data
GROUP BY ID
HAVING COUNT( CASE Value WHEN 'A' THEN 1 END ) = 1
AND COUNT( CASE Value WHEN 'A' THEN NULL ELSE 1 END ) = 0
Output:
| ID |
| -: |
| 2 |
| 4 |
db<>fiddle here

select rows between two character values of a column

I have a table which shows as below:
S.No | Action
1 | New
2 | Dependent
3 | Dependent
4 | Dependent
5 | New
6 | Dependent
7 | Dependent
8 | New
9 | Dependent
10 | Dependent
I here want to select the rows between the first two 'New' values in the Action column, including the first row with the 'New' action. Like [New,New)
For example:
In this case, I want to select rows 1,2,3,4.
Please let me know how to do this.
Hmmm. Let's count up the cumulative number of times that New appears as a value and use that:
select t.*
from (select t.*,
sum(case when action = 'New' then 1 else 0 end) over (order by s_no) as cume_new
from t
) t
where cume_new = 1;
you can do some magic with analytic functions
1 select group of NEW actions, to get min and max s_no
2 select lead of 2 rows
3 select get between 2 sno (min and max)
with t as (
select 1 sno, 'New' action from dual union
select 2,'Dependent' from dual union
select 3,'Dependent' from dual union
select 4,'Dependent' from dual union
select 5,'New' from dual union
select 6,'Dependent' from dual union
select 7,'Dependent' from dual union
select 8,'New' from dual union
select 9,'Dependent' from dual union
select 10,'Dependent' from dual
)
select *
from (select *
from (select sno, lead(sno) over (order by sno) a
from ( select row_number() over (partition by action order by Sno) t,
t.sno
from t
where t.action = 'New'
) a
where t <=2 )
where a is not null) a, t
where t.sno >= a.sno and t.sno < a.a