CREATE TABLE (
A INT NOT NULL,
B INT NOT NULL
)
A is an enumerated values of 1, 2, 3, 4, 5
B can be any values
I would like to count() the number of occurrence group by B, with a specific subset of A e.g. {1, 2}
Example:
A B
1 7 *
2 7 *
3 7
1 8 *
2 8 *
1 9
3 9
When B = 7, A = 1, 2, 3. Good
When B = 8, A = 1, 2. Good
When B = 9, A = 1, 3. Not satisfy, 2 is missing
So the count will be 2 (when B = 7 and 8)
If I've understood you correctly, we want to find B values for which we have both a 1 and a 2 in A, and then we want to know how many of those we have.
This query does this:
declare #t table (A int not null, B int not null)
insert into #t(A,B) values
(1,7),
(2,7),
(3,7),
(1,8),
(2,8),
(1,9),
(3,9)
select COUNT(DISTINCT B) from (
select B
from #t
where A in (1,2)
group by B
having COUNT(DISTINCT A) = 2
) t
One or both of the DISTINCTs may be unnecessary - it depends on whether your data can contain repeating values.
If I understand correctly and the requirement is to find Bs with a series of As that doesn't have any "gaps", you could compare the difference between the minimal and maximal A with number of records (per B, of course):
SELECT b
FROM mytable
GROUP BY b
HAVING COUNT(*) + 1 = MAX(a) - MIN(a)
SELECT COUNT(DISTINCT B) FROM TEMP T WHERE T.B NOT IN
(SELECT B FROM
(SELECT B,A,
LAG (A,1) OVER (PARTITION BY B ORDER BY A) AS PRE_A
FROM Temp) K
WHERE K.PRE_A IS NOT NULL AND K.A<>K.PRE_A+1);
I'm trying to update a column for all rows after each time one row is processed by a UDF.
The example has 3 rows with 6 columns. Column "A" has the same value across 3 rows; column "B" and "A" is the joint identifier of each row; column "C" is arrays with any letters in a,b,c,d,e; column "D" is the target array to be filled in; column "E" is some integers; column "abcde" is the integer array with 5 integers specifying the counts for each letter a,b,c,d,e.
Each row will be passed into a UDF to update the column "D" and column "abcde" according to the column "C" and column "E". The rule is: select the number, which specified by "E", of items from "C" to put into "D"; the selection is random; after each selection done for a row, the column 'abcde' will be updated across all rows.
For example, to process the first row, we randomly select one item from ('a','b','c') to put into "D". Let's say the system picked the 'c' in the column "C", so the value in "D" for this row becomes ['c'] and 'abcde' gets updated to [1,3,1,1,1] (before was [1,3,2,1,1]) for all three rows.
Example data:
#StandardSQL in BigQuery
#code to generate the example table
with sample as (
select 'y1' as A, 'x1' as B, ['a','b','c'] as C, [] as D, 1 as E, [1,3,2,1,1] as abcde union all
select 'y1','x2',['a','b'],[],2,[1,3,2,1,1] union all
select 'y1','x3',['c','d','e'],[],3,[1,3,2,1,1])
select * from sample order by B
After the first row is processed:
with sample as (
select 'y1' as A, 'x1' as B, ['a','b','c'] as C, ['c'] as D, 1 as E, [1,3,1,1,1] as abcde union all
select 'y1','x2',['a','b'],[],2,[1,3,1,1,1] union all
select 'y1','x3',['c','d','e'],[],3,[1,3,1,1,1])
select * from sample order by B
After the second row is processed:
with sample as (
select 'y1' as A, 'x1' as B, ['a','b','c'] as C, ['c'] as D, 1 as E, [0,2,1,1,1] as abcde union all
select 'y1','x2',['a','b'],['a','b'],2,[0,2,1,1,1] union all
select 'y1','x3',['c','d','e'],[],3,[0,2,1,1,1])
select * from sample order by B
After the third row is processed:
with sample as (
select 'y1' as A, 'x1' as B, ['a','b','c'] as C, ['c'] as D, 1 as E, [0,2,0,0,0] as abcde union all
select 'y1','x2',['a','b'],['a','b'],2,[0,2,0,0,0] union all
select 'y1','x3',['c','d','e'],['c','d','e'],3,[0,2,0,0,0])
select * from sample order by B
Don't worry about how the UDF will do the random selection. I'm just wondering, if it's possible in BigQuery to do the task to update the column 'abcde' in the way I want?
I've tried using UDFs, but I'm struggling to get it working because my understanding of a UDF is that it can only take one row in and produce multiple rows out. So, I can't update the other rows. Is it possible just using SQL?
Expected output:
After the first row is processed:
After the third row is processed:
Additional information:
create temporary function selection(A string, B string, C ARRAY<STRING>, D ARRAY<STRING>, E INT64, abcde ARRAY<INT64>)
returns STRUCT< A stRING, B string, C array<string>, D array<string>, E int64, abcde array<int64>>
language js AS """
/*
for the row i in the data:
select the number i.E of items (randomly) from i.C where the numbers associated with the item in i.abcde is bigger than 0 (i.e. only the items with numbers in abcde bigger than 0 can be the cadidates for the random selection);
put the selected items in i.D and deduct the amount of selected items from the number for the corresponding item in the column 'abcde' FOR ALL ROWS;
proceed to the next row i+1 until every row is processed;
*/
return {A,B,C,D,E,abcde}
""";
with sample as (
select 'y1' as A, 'x1' as B, ['a','b','c'] as C, CAST([] AS ARRAY<STRING>) as D, 1 as E, [1,3,2,1,1] as abcde union all
select 'y1','x2',['a','b'],[],2,[1,3,2,1,1] union all
select 'y1','x3',['c','d','e'],[],2,[1,3,2,1,1])
select selection(A,B,C,D,E,abcde) from sample order by B
Below is for BigQuery Standard SQL
#StandardSQL
WITH sample AS (
SELECT 'y1' AS A, 'x1' AS B, ['a','b','c'] AS C, ['c'] AS D, 1 AS E, [1,3,2,1,1] AS abcde UNION ALL
SELECT 'y1','x2',['a','b'],['a','b'],2,[1,3,2,1,1] UNION ALL
SELECT 'y1','x3',['c','d','e'],['c','d','e'],3,[1,3,2,1,1] UNION ALL
SELECT 'y2' AS A, 'x1' AS B, ['a','b','c'] AS C, ['a','b'] AS D, 2 AS E, [1,3,2,1,1] AS abcde UNION ALL
SELECT 'y2','x2',['a','b'],['b'],1,[1,3,2,1,1] UNION ALL
SELECT 'y2','x3',['c','d','e'],['d','e'],2,[1,3,2,1,1]
),
counts AS (
SELECT A AS AA, dd, COUNT(1) AS cnt
FROM sample, UNNEST(D) AS dd
GROUP BY AA, dd
),
processed AS (
SELECT A, B, ARRAY_AGG(aa - IFNULL(cnt, 0) ORDER BY pos) AS abcde
FROM sample, UNNEST(abcde) AS aa WITH OFFSET AS pos
LEFT JOIN counts ON A = counts.AA
AND CASE dd
WHEN 'a' THEN 0
WHEN 'b' THEN 1
WHEN 'c' THEN 2
WHEN 'd' THEN 3
WHEN 'e' THEN 4
END = pos
GROUP BY A, B
)
SELECT s.A, s.B, s.C, s.D, s.E, p.abcde
FROM sample AS s
JOIN processed AS p
USING (A, B)
-- ORDER BY A, B
Don't worry about how the UDF will do the random selection
So, as you can see - I just put "random" values into sample data to mimic D
Using MS SQL 2012
I want to do something like
select a, b, c, a+b+c d
However a, b, c are complex computed columns, lets take a simple example
select case when x > 4 then 4 else x end a,
( select count(*) somethingElse) b,
a + b c
order by c
I hope that makes sense
You can use a nested query or a common table expression (CTE) for that. The CTE syntax is slightly cleaner - here it is:
WITH CTE (a, b)
AS
(
select
case when x > 4 then 4 else x end a,
count(*) somethingElse b
from my_table
)
SELECT
a, b, (a+b) as c
FROM CTE
ORDER BY c
I would probably do this:
SELECT
sub.a,
sub.b,
(sub.a + sub.b) as c,
FROM
(
select
case when x > 4 then 4 else x end a,
(select count(*) somethingElse) b
FROM MyTable
) sub
ORDER BY c
The easiest way is to do this:
select a,b,c,a+b+c d
from (select <whatever your calcs are for a,b,c>) x
order by c
That just creates a derived table consisting of your calculations for a, b, and c, and allows you to easily reference and sum them up!