How to extract a value in a JSON table on BigQuery? - sql

I have a JSON table which has over 30.000 rows. There are different rows like this:
JSON_columns
------------
{
"level": 20,
"nickname": "ABCDE",
"mission_name": "take_out_the_trash",
"mission_day": "150",
"duration": "0",
"properties": []
}
{
"nickname": "KLMNP",
"mission_name": "recycle",
"mission_day": "180",
"properties": [{
"key": "bottle",
"value": {
"string_value": "blue_bottle"
}
}, {
"key": "bottleRecycle",
"value": {
"string_value": "true"
}
}, {
"key": "price",
"value": {
"float_value": 21.99
}
}, {
"key": "cost",
"value": {
"float_value": 15.39
}
}]
}
I want to take the sum of costs the table. But firtsly, I want to extract the cost from the table.
I tried the code below. It returns null:
SELECT JSON_VALUE('$.properties[3].value.float_value') AS profit
FROM `missions.missions_study`
WHERE mission_name = "recycle"
My question is, how can I extract the cost values right, and sum them?

Common way to extract cost from your json is like below.
WITH sample_table AS (
SELECT '{"level":20,"nickname":"ABCDE","mission_name":"take_out_the_trash","mission_day":"150","duration":"0","properties":[]}' json
UNION ALL
SELECT '{"nickname":"KLMNP","mission_name":"recycle","mission_day":"180","properties":[{"key":"bottle","value":{"string_value":"blue_bottle"}},{"key":"bottleRecycle","value":{"string_value":"true"}},{"key":"price","value":{"float_value":21.99}},{"key":"cost","value":{"float_value":15.39}}]}' json
)
SELECT SUM(cost) AS total FROM (
SELECT CAST(JSON_VALUE(prop, '$.value.float_value') AS FLOAT64) AS cost
FROM sample_table, UNNEST(JSON_QUERY_ARRAY(json, '$.properties')) prop
WHERE JSON_VALUE(json, '$.mission_name') = 'recycle'
AND JSON_VALUE(prop, '$.key') = 'cost'
);

Related

Deserialise multiple objects into a select statment

In a table, I store multiple string records in several records.
declare #x nvarchar(max) = {
"totalSize": 1000,
"done": true,
"records": [
{
"attributes": {
"type": "Contract",
"url": ""
},
"Name": "Harpy",
"Job_Schedule_Date__c": null,
"EndDate": "2021-03-24",
"Account": {
"attributes": {
"type": "Account",
"url": ""
},
"Name": "Madison"
},
"ContractNumber": "12345",
"Related_Site__r": {
"attributes": {
"type": "Site__c",
"url": ""
},
"Name": "Jackson"
}
},
.
.
.
]
}
select * from openJson(#x, '$.records')
I am trying to use open JSON to unpack the records.
I am able to unpack a single record, but it doesn't unpack them into columns and need to unpack multiple records and join them.
Since each record only stores 1000 records, I need to join them up.
What I want is output like below as a Select
Name, Job_Schedule_Date__c, EndDate, AccountName, ContractNumber, RelatedSiteName
Harpy, null, 2021-03-24, Madison, 12345, Jackson

select node value from json column type

A table I called raw_data with three columns: ID, timestamp, payload, the column paylod is a json type having values such as:
{
"data": {
"author_id": "1461871206425108480",
"created_at": "2022-08-17T23:19:14.000Z",
"geo": {
"coordinates": {
"type": "Point",
"coordinates": [
-0.1094,
51.5141
]
},
"place_id": "3eb2c704fe8a50cb"
},
"id": "1560043605762392066",
"text": " ALWAYS # London, United Kingdom"
},
"matching_rules": [
{
"id": "1560042248007458817",
"tag": "london-paris"
}
]
}
From this I want to select rows where the coordinates is available, such as [-0.1094,51.5141]in this case.
SELECT *
FROM raw_data, json_each(payload)
WHERE json_extract(json_each.value, '$.data.geo.') IS NOT NULL
LIMIT 20;
Nothing was returned.
EDIT
NOT ALL json objects have the coordinates node. For example this value:
{
"data": {
"author_id": "1556031969062010881",
"created_at": "2022-08-18T01:42:21.000Z",
"geo": {
"place_id": "006c6743642cb09c"
},
"id": "1560079621017796609",
"text": "Dear Desperate sister say husband no dey oo."
},
"matching_rules": [
{
"id": "1560077018183630848",
"tag": "kaduna-kano-katsina-dutse-zaria"
}
]
}
The correct path is '$.data.geo.coordinates.coordinates' and there is no need for json_each():
SELECT *
FROM raw_data
WHERE json_extract(payload, '$.data.geo.coordinates.coordinates') IS NOT NULL;
See the demo.

Postgresql and jsonb - inserting a key/value into a multi-level array

Very similar to this post, but I struggle to adapt from their solution..
My table : public.challenge, column lines JSONB
My initial JSON in lines :
[
{
"line": 1,
"blocs": [
{
"size": 100,
"name": "abc"
},
{
"size": 100,
"name": "def"
},
{
"size": 100,
"name": "ghi"
}
]
},
{
"line": 2,
"blocs": [
{
"size": 100,
"name": "xyz"
}
]
}
]
Desired update :
[
{
"line": 1,
"blocs": [
{
"size": 100,
"name": "abc",
"type": "regular"
},
{
"size": 100,
"name": "def",
"type": "regular"
},
{
"size": 100,
"name": "ghi",
"type": "regular"
}
]
},
{
"line": 2,
"blocs": [
{
"size": 100,
"name": "xyz",
"type": "regular"
}
]
}
]
So basically I need to add the type key+value in every object of blocs, for each element of the root array.
My unsuccessful attempt looks like this :
UPDATE public.challenge SET lines = jsonb_set(lines, '{}', (
SELECT jsonb_set(line, '{blocs}', (
SELECT jsonb_agg( bloc || '{"type":"regular"}' )
FROM jsonb_array_elements(line->'{blocs}') bloc
))
FROM jsonb_array_elements(lines) line
))
;
(currently it sets the whole column as null, maybe due to jsonb_set(lines, '{}' while my json begins as an array ?)
Thanks!
Use jsonb_array_elements to unnest all the array elements and then add the required json and use jsonb_agg to aggregate it again:
with cte as
(select id,
jsonb_agg(jsonb_set(val1,
'{blocs}',
(select jsonb_agg(arr2 || '{"type": "regular"}')
from jsonb_array_elements(arr1.val1 - >
'blocs') arr2)))
from challenge,
jsonb_array_elements(lines) arr1(val1)
group by 1)
update challenge
set lines = (cte.jsonb_agg)
from cte
where challenge.id = cte.id
DEMO

BigQuery concat nested array json

I have data that looks like
{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}
The output I want is concat all the values in the nested json array
price, "20003, 30075, 40060"
I tried some queries but failed to get the correct output.
You can use JSON_EXTRACT_ARRAY and ARRAY_TO_STRING:
WITH test_json AS (
SELECT
'''{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}''' AS json_string
),
values_concatenated AS (
SELECT ARRAY_TO_STRING(
ARRAY(
SELECT JSON_VALUE(json_values, '$.value')
FROM UNNEST((SELECT JSON_EXTRACT_ARRAY(json_string, '$.Attributes[0].values') AS json_values FROM test_json)) as json_values
),
', '
) as values
)
SELECT
(select json_value(json_string, '$.Attributes[0].name') from test_json),
(select values from values_concatenated)

How to update a field in a nested array in Bigquery?

I am trying to update a table that has STRUCT(a few fields, ARRAY(STRUCT)).
The field that I need to update is inside the array and I am having trouble with making it work.
Here is the layout of the the two tables:
CREATE TABLE mydatset.orders (
order_id string,
order_time timestamp,
trans STRUCT <
id string,
amount INT64,
accounts ARRAY<STRUCT <
role STRING ,
account_id STRING,
region STRING,
amount INT64> > >
)
CREATE TABLE mydatset.relocations (
account_id string,
region string
)
Trying to update the region of any account in the array accounts if that account exists in the relocations table:
update mydataset.orders a
set trans = (SELECT AS STRUCT trans.* REPLACE(ARRAY(SELECT STRUCT<role STRING, account_id STRING, region STRING, amount INT64>
(cp.role, cp.account_id,
case when cp.account_id = ll.account_id then ll.region else cp.region end ,
cp.amount
)
) as accounts )
from unnest(trans.accounts) cp
left join unnest(relocs.chgs) ll
on cp.account_id = ll.account_id
)
from (select array_agg(struct (account_id, region) ) chgs
from`mydataset.relocations`
) relocs
where true
The syntax works, but the sql doesn't perform the expected update. The account's region in the orders table is not changed after running the above update!
(I have seen BigQuery UPDATE nested array field and this case is slightly different. The array is inside a struct and itself is an array of struct)
Appreciate any help.
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]