Map in BigQuery (dynamic keys) - google-bigquery

In bigquery, if we are interested in constructing json output, we can usually use struct for json object when the keys are known beforehand.
SELECT TO_JSON_STRING(STRUCT(key1))
FROM (SELECT "val1" as key1 UNION ALL
SELECT "val2" as key1)
Result
{"key1":"val1"}
{"key1":"val2"}
But in the case where the keys are dynamic, we really want a map type, similar to the avro map type
For example
SELECT *
FROM (SELECT "key1" as key, "val1" as val UNION ALL
SELECT "key2" as key, "val2" as val)
should return
{"key1": "val1", "key2": "val2"}
is there anyway to achieve this using BigQuery SQL?

Below is for BigQuery Standard SQL
Something simple like below should produce expected result
#standardSQL
WITH `project.dataset.table` AS (
SELECT "key1" AS key, "val1" AS val UNION ALL
SELECT "key2" AS key, "val2" AS val
)
SELECT '{' || STRING_AGG(REPLACE(TRIM(FORMAT('%T', t), '()'), '", "', '": "'), ', ') || '}' AS return
FROM `project.dataset.table` t
with output
Row return
1 {"key1": "val1", "key2": "val2"}

You can use Dynamic SQL to generate JSON string:
DECLARE
JSONSTR STRING;
SET
JSONSTR = (
SELECT
'{' || STRING_AGG('"' || key || '": "' || val || '"', ', ') || '}'
FROM (
SELECT *
FROM (SELECT "key1" AS key, "val1" AS val
UNION ALL
SELECT "key2" AS key, "val2" AS val)));
EXECUTE IMMEDIATE
FORMAT("""SELECT '%t'""",JSONSTR);

Related

Create nested json in Snowflake

I am trying to create a nested json in Snowflake and have narrowed down the query like below where I have nested it on id. However, I want the nested json to also apply to the inner layer and I am finding it hard to get the right query for it.
WITH subquery AS (
SELECT id, placeId, actionId, resultValue
FROM my_table
)
SELECT id,
'{"resultValues": {' || listagg('"' || placeId || '": {"' || actionId || '": ' || resultValue || '}', ',') within group (order by placeId) || '}}' as nested_json
FROM subquery
GROUP BY id;
Below is how the current result is looking like for each id.
I am trying to get the actionId1 and actionId2 grouped under the placeId1 and placeId2 so that it looks like below. How do I get this done? Any ideas would be appreciated.
Meet FLATTEN() and LATERAL they like to hang out with OBJECT_AGG() who needs his own space via CTE's.
WITH CTE AS (
SELECT
parse_json(
' { "resultValues": [
{ "placeId1": { "actionId1": 1.1 } }, { "placeId1": { "actionId2": 1.2 } },
{ "placeId2": { "actionId1": 1.3 } }, { "placeId2":{ "actionId2": 1.4} } ] }'
) VOLIA
),
CTE2 AS (
SELECT
DISTINCT KIAORA.PATH KIAORA,
TE_REO.PATH TE_REO,
OBJECT_AGG(MAORI.PATH, MAORI.VALUE) OVER (PARTITION BY TE_REO.PATH) MAORI
FROM
CTE,
LATERAL FLATTEN(INPUT => VOLIA) KIAORA,
LATERAL FLATTEN(KIAORA.VALUE) HELLO,
LATERAL FLATTEN(HELLO.VALUE) TE_REO,
LATERAL FLATTEN (INPUT => TE_REO.VALUE) MAORI
)
SELECT
DISTINCT OBJECT_CONSTRUCT(
KIAORA,
ARRAY_CONSTRUCT(
OBJECT_AGG(TE_REO, MAORI) OVER (PARTITION BY KIAORA)
)
) ANSWER,
VOLIA
FROM
CTE2, CTE
after starting :

BigQuery SQL JSON Returning additional rows when current row contains multiple values

I have a table that looks like this
keyA | data:{"value":false}}
keyB | data:{"value":3}}
keyC | data:{"value":{"paid":10,"unpaid":20}}}
For keyA,keyB I can easily extract a single value with JSON_EXTRACT_SCALAR, but for keyC I would like to return multiple values and change the key name, so the final output looks like this:
keyA | false
keyB | 3
keyC-paid | 10
keyD-unpaid | 20
I know I can use UNNEST and JSON_EXTRACT multiple values and create additional but unsure how to combine them to adjust the key column name as well?
Even more generic approach
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
select col || replace(replace(key, 'value', ''), '.', '-') as col, value,
from your_table,
unnest([struct(extract_all_leaves(data) as json)]),
unnest(extract_keys(json)) key with offset
join unnest(extract_values(json)) value with offset
using(offset)
if applied to sample data in your question - output is
Benefit of this approach is that it is quite generic and thus can handle any level of nesting in json
For example for below data/table
the output is
Try this one:
WITH sample AS (
SELECT 'keyA' AS col, '{"value":false}' AS data
UNION ALL
SELECT 'keyB' AS col, '{"value":3}' AS data
UNION ALL
SELECT 'keyC' AS col, '{"value":{"paid":10,"unpaid":20}}' AS data
)
SELECT col || IFNULL('-' || k, '') AS col,
IFNULL(v, JSON_VALUE(data, '$.value')) AS data
FROM (
SELECT col, data,
`bqutil.fn.json_extract_keys`(JSON_QUERY(data, '$.value')) AS keys,
`bqutil.fn.json_extract_values`(JSON_QUERY(data, '$.value')) AS vals
FROM sample
) LEFT JOIN UNNEST(keys) k WITH OFFSET ki
LEFT JOIN UNNEST(vals) v WITH OFFSET vi ON ki = vi;

Expanding a Struct of Struct to columns in bigquery

I am working with a BQ table that has a format of a STRUCT of STRUCTs.
It looks as follows:
I would like to have a table which looks like follows:
property_hs_email_last_click_date_value
currentlyinworkflow_value
hs_first_engagement_object_id_value
hs_first_engagement_object_id_value__st
5/5/2022 23:00:00
Y
1
'Hey'
The challenge is that there are 500 fields and I would like to make this efficient instead of writing out every single line as follows:
SELECT property_hs_email_last_click_date as property_hs_email_last_click_date_value,
properties.currentlyinworkflow.value as currentlyinworkflow_value,
properties.hs_first_engagement_object_id.value as properties.hs_first_engagement_object_id_value,
properties.hs_first_engagement_object_id.value__st as hs_first_engagement_object_id_value__st
Any suggestions on how to make this more efficient?
Edit:
Here's a query that creates a table such as this:
create or replace table `project.database.TestTable` (
property_hs_email_last_click_date STRUCT < value string >,
properties struct < currentlyinworkflow struct < value string > ,
hs_first_engagement_object_id struct < value numeric , value__st string >,
first_conversion_event_name struct < value string >
>
);
insert into `project.database.TestTable`
values (struct('12/2/2022 23:00:02'), struct(struct('Yes'), struct(1, 'Thursday'), struct('Festival')) );
insert into `project.database.TestTable`
values (struct('14/2/2021 12:00:02'), struct(struct('No'), struct(5, 'Friday'), struct('Phone')) )
Below is quite generic script that extracts all leaves in JSON and then presents them as columns
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function extract_all_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
create temp table temp_table as (
select offset, key, value, format('%t', t) row_id
from your_table t,
unnest([struct(to_json_string(t) as json)]),
unnest([struct(extract_all_leaves(json) as leaves)]),
unnest(extract_keys(leaves)) key with offset
join unnest(extract_values(leaves)) value with offset
using(offset)
);
execute immediate (select '''
select * except(row_id) from (select * except(offset) from temp_table)
pivot (any_value(value) for replace(key, '.', '__') in (''' || keys_list || '''
))'''
from (select string_agg('"' || replace(key, '.', '__') || '"', ',' order by offset) keys_list from (
select key, min(offset) as offset from temp_table group by key
))
);
if applied to sample data in your question
create temp table your_table as (
select struct('12/2/2022 23:00:02' as value) as property_hs_email_last_click_date ,
struct(
struct('Yes' as value) as currentlyinworkflow ,
struct(1 as value, 'Thursday' as value__st) as hs_first_engagement_object_id ,
struct('Festival' as value) as first_conversion_event_name
) as properties
union all
select struct('14/2/2021 12:00:02'), struct(struct('No'), struct(5, 'Friday'), struct('Phone'))
);
the output is

How to use JSON_EXTRACT without having a key name?

how do I extract value frm key "Nome" from JSON using JSON_EXTRACT in google bigquery?
I cannot use the key 135 in the query because it is dynamic (Like this JSON_EXTRACT(vista, '$.Agencia.135.Nome'))
How to use JSON_EXTRACT without having a key '135' name?
JSON Record Sample:
{
"Campanha": "Campanha A",
"Ad": "Ad A",
"Agencia": {
"135": {
"Celular": ".",
"Codigo": "135",
"CodigoPai": "105",
"DDD": "00",
"Email": "email-A#email.com",
"Nome": "Nome A",
"Fone": "00 0000.0000",
"Fone2": ".",
"Foto": "foto-A.jpg"
}
}
}
Not sure if your json is formatted correctly. Is the key '135' an array? If so, format it properly and you can access it as the example below:
SELECT JSON_EXTRACT(json_text, '$.Agencia.135[1]') AS nome
FROM UNNEST([
'{"Agencia":{"135":[{"Codigo":"135"},{"Nome":"Nome A"}]}}'
]) AS json_text;
That would give you:
[
{
"nome": "{\"Nome\":\"Nome A\"}"
}
]
For more references about the JSON_EXTRACT: https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_extract
Use below approach
execute immediate (
select string_agg("select " || key || ''' key
, JSON_EXTRACT_SCALAR(vista, '$.Agencia.''' || key || '''.Nome') AS Nome
from `project.dataset.table`''', " union all ")
from `project.dataset.table`, unnest(regexp_extract_all(regexp_replace(JSON_EXTRACT(vista, '$.Agencia'), r':{.*?}+', ''), r'"(.*?)"')) key
);
If applied to sample data in your question - output is
Also, depends on your use case - you might try below option too
execute immediate (
select 'select * from (' || string_agg("select " || key || ''' key
, JSON_EXTRACT_SCALAR(vista, '$.Agencia.''' || key || '''.Nome') AS Nome
from `project.dataset.table`''', " union all ") || ') where not Nome is null'
from `project.dataset.table`, unnest(regexp_extract_all(regexp_replace(JSON_EXTRACT(vista, '$.Agencia'), r':{.*?}+', ''), r'"(.*?)"')) key
);

BigQuery JSON EXTRACT

[
{
"key":"expiry_date",
"type":"date",
"label":"Expiry Date",
"required":false,
"default_value":"2029-12-15"
},
{
"key":"brand",
"type":"text",
"label":"Brand",
"required":false,
"default_value":"clipsal"
}
]
Is there a way that I could extract the default_value of "expiry_date" in the nested JSON above? The data is under a column called attributes.
Have you tried any of these functions described here? Maybe it can help.
Also, if the first element of the json array will be always what you want, you could use something like:
WITH test_table AS (
SELECT "[{\"key\":\"expiry_date\",\"type\":\"date\",\"label\":\"Expiry Date\",\"required\":false,\"default_value\":\"2029-12-15\"},{\"key\":\"brand\",\"type\":\"text\",\"label\":\"Brand\",\"required\":false,\"default_value\":\"clipsal\"}]" AS json_text_field
)
SELECT JSON_EXTRACT(json_text_field, '$[0].default_value') FROM test_table
If the keys it's not always the first, you could use this instead:
WITH test_table AS (
SELECT "[{\"key\":\"expiry_date\",\"type\":\"date\",\"label\":\"Expiry Date\",\"required\":false,\"default_value\":\"2029-12-15\"},{\"key\":\"brand\",\"type\":\"text\",\"label\":\"Brand\",\"required\":false,\"default_value\":\"clipsal\"}]" AS json_text_field
)
SELECT value FROM (
SELECT JSON_EXTRACT(json_text_field, '$.key') AS id, JSON_EXTRACT(json_text_field, '$.default_value') AS value FROM test_table, UNNEST(JSON_EXTRACT_ARRAY(json_text_field, '$')) AS json_value
) WHERE id = '"expiry_date"'
Below is for BigQuery Standard SQL
#standardSQL
SELECT JSON_EXTRACT_SCALAR(el, '$.default_value') AS default_value
FROM `project.dataset.table`,
UNNEST(JSON_EXTRACT_ARRAY(json)) el
WHERE JSON_EXTRACT_SCALAR(el, '$.key') = 'expiry_date'
You can test above with sample / dummy data from y our question as in below example
#standardSQL
WITH `project.dataset.table` AS (
SELECT '''
[
{
"key":"expiry_date",
"type":"date",
"label":"Expiry Date",
"required":false,
"default_value":"2029-12-15"
},
{
"key":"brand",
"type":"text",
"label":"Brand",
"required":false,
"default_value":"clipsal"
}
]
''' json
)
SELECT JSON_EXTRACT_SCALAR(el, '$.default_value') AS default_value
FROM `project.dataset.table`,
UNNEST(JSON_EXTRACT_ARRAY(json)) el
WHERE JSON_EXTRACT_SCALAR(el, '$.key') = 'expiry_date'
with output
Row default_value
1 2029-12-15
Depends on your real use case - you can consider below variation
#standardSQL
SELECT *,
(
SELECT JSON_EXTRACT_SCALAR(el, '$.default_value')
FROM UNNEST(JSON_EXTRACT_ARRAY(json)) el
WHERE JSON_EXTRACT_SCALAR(el, '$.key') = 'expiry_date'
) AS default_value
FROM `project.dataset.table`