Bigquery array of STRINGs to array of INTs - google-bigquery

I'm trying to pull an array of INT64 s in BigQuery standard SQL from a column which is a long string of numbers separated by commas (for example, 2013,1625,1297,7634). I can pull an array of strings easily with:
SELECT
SPLIT(string_col,",")
FROM
table
However, I want to return an array of INT64 s, not an array of strings. How can I do that? I've tried
CAST(SPLIT(string_col,",") AS ARRAY<INT64>)
but that doesn't work.

Below is for BigQuery Standard SQL
#standardSQL
WITH yourTable AS (
SELECT 1 AS id, '2013,1625,1297,7634' AS string_col UNION ALL
SELECT 2, '1,2,3,4,5'
)
SELECT id,
(SELECT ARRAY_AGG(CAST(num AS INT64))
FROM UNNEST(SPLIT(string_col)) AS num
) AS num,
ARRAY(SELECT CAST(num AS INT64)
FROM UNNEST(SPLIT(string_col)) AS num
) AS num_2
FROM yourTable

Mikhail beat me to it and his answer is more extensive but adding this as a more minimal repro:
SELECT CAST(num as INT64) from unnest(SPLIT("2013,1625,1297,7634",",")) as num;

Related

BigQuery - Count how many words in array are equal

I want to count how many similar words I have in a path (which will be split at delimiter /) and return a matching array of integers.
Input data will be something like:
I want to add another column, match_count, with an array of integers. For example:
To replicate this case, this is the query I'm working with:
CREATE TEMP FUNCTION HOW_MANY_MATCHES_IN_PATH(src_path ARRAY<STRING>, test_path ARRAY<STRING>) RETURNS ARRAY<INTEGER> AS (
-- WHAT DO I PUT HERE?
);
SELECT
*,
HOW_MANY_MATCHES_IN_PATH(src_path, test_path) as dir_path_match_count
FROM (
SELECT
ARRAY_AGG(x) AS src_path,
ARRAY_AGG(y) as test_path
FROM
UNNEST([
'lib/client/core.js',
'lib/server/core.js'
]) AS x, UNNEST([
'test/server/core.js'
]) as y
)
I've tried working with ARRAY and UNNEST in the HOW_MANY_MATCHES_IN_PATH function, but I either end up with an error or an array of 4 items (in this example)
Consider below approach
create temp function how_many_matches_in_path(src_path string, test_path string) returns integer as (
(select count(distinct src)
from unnest(split(src_path, '/')) src,
unnest(split(test_path, '/')) test
where src = test)
);
select *,
array( select how_many_matches_in_path(src, test)
from t.src_path src with offset
join t.test_path test with offset
using(offset)
) dir_path_match_count
from your_table t
if to apply to sample of Input data in your question
with your_table as (
select
['lib/client/core.js', 'lib/server/core.js'] src_path,
['test/server/core.js', 'test/server/core.js'] test_path
)
output is

BigQuery Standard SQL, get max value from json array

I have a BigQuery column which contains STRING values like
col1
[{"a":1,"b":2},{"a":2,"b":3}]
[{"a":3,"b":4},{"a":5,"b":6}]
Now when doing a SELECT for each I want to get just the max. value of "a" in each json array for example here I would want the output of the SELECT on the table to be
2
5
Any ideas please? Thanks!
Use JSON_EXTRACT_ARRAY() to retrieve each array element. Then JSON_EXTRACT_VALUE():
with t as (
select '[{"a":1,"b":2},{"a":2,"b":3}]' as col union all
select '[{"a":3,"b":4},{"a":5,"b":6}]'
)
select t.*,
(select max(json_value(el, '$.a'))
from unnest(JSON_QUERY_ARRAY(col, '$')) el
)
from t;

Extracting Key-worlds out of string and show them in another column

I need to write a query to extract specific names out of String and have them show in another column for example a column has this field
Column:
Row 1: jasdhj31e31jh123hkkj,12l1,3jjds,Amin,02323rdcsnj
Row 2:jasnasc8918212,ahsahkdjjMina67,
Row 3:kasdhakshd,asda,asdasd,121,121,Sina878788kasas
Key Words: Amin,Mina,Sina
How could I have these key words in another column? I dont want to insert another column but if that's the only solution let me know.
Any help appreciated!
Below is for BigQuery Standard SQL
#standardSQL
WITH keywords AS (
SELECT keyword
FROM UNNEST(SPLIT('Amin,Mina,Sina')) keyword
)
SELECT str, STRING_AGG(keyword) keywords_in_str
FROM `project.dataset.table`
CROSS JOIN keywords
WHERE REGEXP_CONTAINS(str, CONCAT(r'(?i)', keyword))
GROUP BY str
You can test, play with above using dummy data from your question as below
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'jasdhMINAj31e31jh123hkkj,12l1,3jjds,Amin,02323rdcsnj' str UNION ALL
SELECT 'jasnasc8918212,ahsahkdjjMina67,' UNION ALL
SELECT 'kasdhakshd,asda,asdasd,121,121,Sina878788kasas'
), keywords AS (
SELECT keyword
FROM UNNEST(SPLIT('Amin,Mina,Sina')) keyword
)
SELECT str, STRING_AGG(keyword) keywords_in_str
FROM `project.dataset.table`
CROSS JOIN keywords
WHERE REGEXP_CONTAINS(str, CONCAT(r'(?i)', keyword))
GROUP BY str
with results as
Row str keywords_in_str
1 jasdhMINAj31e31jh123hkkj,12l1,3jjds,Amin,02323rdcsnj Amin,Mina
2 jasnasc8918212,ahsahkdjjMina67, Mina
3 kasdhakshd,asda,asdasd,121,121,Sina878788kasas Sina
to count the no of keywords
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'jasdhMINAj31e31jh123hkkj,12l1,3jjds,Amin,02323rdcsnj' str UNION ALL
SELECT 'jasnasc8918212,ahsahkdjjMina67,' UNION ALL
SELECT 'kasdhakshd,asda,asdasd,121,121,Sina878788kasas'
)
select str,array(select as struct countif(lower(x) ="amin") amin,countif(lower(x) ="mina") mina,countif(lower(x)="sina") sina from unnest(x)x)keyword from
(select str,regexp_extract_all(str,"(?i)(Amin|Mina|Sina)")x from `project.dataset.table`)

Extract a sub string from a string

In google-bigquery, I need to pull the string that is between domain** and ** as in the example bellow
The string is under the column "Site_Data"
Can someone help me? 10x!
See example below
#standardSQL
WITH yourTable AS (
SELECT '756-1__6565656565656, tagtype**unmapped,domain**www.sport.com,userarriveddirectly**False' AS Site_Data
)
SELECT
REGEXP_EXTRACT(Site_Data, r'domain\*\*(.*)\*\*') AS x,
Site_Data
FROM yourTable
Do all of the strings have that format? There are a couple of different options, assuming that you always need the third string after the ** delimiter.
1) Use SPLIT, e.g.:
#standardSQL
WITH SampleData AS (
SELECT '756-1__67648582789116,tagtype**unmapped,domain**www.sport.com,userarriveddirectly**False' AS site_data
)
SELECT SPLIT(site_data, '**')[OFFSET(2)] AS visit_type
FROM SampleData;
2) Use REGEXP_EXTRACT, e.g.:
#standardSQL
WITH SampleData AS (
SELECT '756-1__67648582789116,tagtype**unmapped,domain**www.sport.com,userarriveddirectly**False' AS site_data
)
SELECT REGEXP_EXTRACT(site_data, r'[^\*]+\*\*[^\*]+\*\*([^\*]+)') AS visit_type
FROM SampleData;
Taking this a step further, if you want to split the domain and the arrival type, you can use SPLIT again:
#standardSQL
WITH SampleData AS (
SELECT '756-1__67648582789116,tagtype**unmapped,domain**www.sport.com,userarriveddirectly**False' AS site_data
)
SELECT
SPLIT(visit_type)[OFFSET(0)] AS domain,
SPLIT(visit_type)[OFFSET(1)] AS arrival_type
FROM (
SELECT SPLIT(site_data, '**')[OFFSET(2)] AS visit_type
FROM SampleData
);

Convert array of strings into array of integers

I have the following SQL:
SELECT * FROM (SELECT t.id, t.summary, null as worker, tt.worked from ticket t
INNER JOIN (SELECT ticket, sum(seconds_worked)/3600.0 as worked FROM ticket_time GROUP BY ticket) tt ON tt.ticket=t.id
UNION ALL
SELECT ticket,null, tt.worker, sum(tt.seconds_worked)/3600.0 from ticket_time tt GROUP BY ticket,worker) as foo
WHERE id in ('9755, 9759') ORDER BY id
The ids string '9755, 9759' in the last line can and will change whenever the sql executed.
I can convert the sting to an array like this:
string_to_array('9755, 9759', ',')
But is there a way to convert this array of strings into array of integers?
Just cast the resulting array to an int[]
where id = ANY ( string_to_array('9755, 9759', ',')::int[] )