How to get all values of an attribute of json array with jsonpath bigquery in bigquery? Asterisk operator not supported. - google-bigquery

I'm trying to get all the values of a certain attribute from a json array.
Considering the following json, I'm trying to get all the types e.g. iPhone,home
{
"firstName": "John",
"lastName" : "doe",
"age" : 26,
"address" :
{
"streetAddress": "naist street",
"city" : "Nara",
"postalCode" : "630-0192"
},
"phoneNumbers":
[
{
"type" : "iPhone",
"number": "0123-4567-8888"
},
{
"type" : "home",
"number": "0123-4567-8910"
}
]
}
I am using $.phoneNumbers[*].type which seems to work fine on online parsers
but when I'm using it in big query:
select json_extract(my_column,'$.phoneNumbers[*].type')
from my_table
I get:
JSONPath parse error at: [*].type

You can write a Javascript UDF to do the extraction:
SELECT JSON_EXTRACT('[1,2,3]', '$[*]') parsed
Error: Unsupported operator in JSONPath: *
UDF alternative:
#standardSQL
CREATE TEMPORARY FUNCTION parseJson(libs STRING)
RETURNS ARRAY<INT64>
LANGUAGE js AS """
try {
return JSON.parse(libs);
} catch (e) {
return [];
}
""";
SELECT parseJson('[1,2,3]') parsed
More complex example:
#standardSQL
CREATE TEMPORARY FUNCTION parseJson(libs STRING)
RETURNS ARRAY<STRUCT<x INT64, y INT64, z INT64>>
LANGUAGE js AS """
try {
return JSON.parse(libs);
} catch (e) {
return [];
}
""";
SELECT parseJson(JSON_EXTRACT('{"a":[{"x":1},{"y":2},{"z":3}]}', '$.a')) parsed
(inspired by: https://discuss.httparchive.org/t/javascript-library-detection/955)

json_extract cannot return REPEATED field, it can only do one match - hence no support for *

Yet another interesting (I hope) solution for BigQuery Standard SQL
Can be easily adjusted to whatever specific needs are
#standardSQL
CREATE TEMPORARY FUNCTION parseJson(data STRING)
RETURNS ARRAY<STRUCT<parent STRING, item STRING, key STRING, value STRING>>
LANGUAGE js AS """
x = JSON.parse(data); z = []; processKey(x, '');
function processKey(node, parent) {
if (parent !== '') {parent += '.'};
Object.keys(node).map(function(key) {
value = node[key].toString();
if (!value.startsWith('[object Object]')) {
var q = {}; var arr = parent.split('.');
q.parent = arr[0]; q.item = arr[1];
q.key = key; q.value = value;
z.push(q);
} else {
processKey(node[key], parent + key);
};
});
}; return z;
""";
WITH t AS (
SELECT """ {
"firstName": "John",
"lastName" : "doe",
"age" : 26,
"address" : {
"streetAddress": "naist street", "city" : "Nara", "postalCode" : "630-0192" },
"phoneNumbers": [
{ "type" : "iPhone", "number": "0123-4567-8888"},
{ "type" : "home", "number": "0123-4567-8910"},
{ "type" : "work", "number": "0123-4567-7777"}]
} """ AS info
)
SELECT parent, item, key, value FROM t, UNNEST(parseJson(info))
WHERE parent = 'phoneNumbers' AND key = 'type'

Related

How to remove object by value from a JSONB type array?

I want to remove a JSONB object by their unique 'id' value from a JSONB array. I am no expert at writing SQL code, but I managed to write the concatenate function.
For an example: Remove this object from an array below.
{
"id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e",
"title": "Wikipedia",
"links": [
"https://en.wikipedia.org/1",
"https://en.wikipedia.org/2"
]
},
Schema:
CREATE TABLE users (
url text not null,
user_id SERIAL PRIMARY KEY,
name VARCHAR,
list_of_links jsonb default '[]'
);
list_of_links format:
[
{
"id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e",
"title": "Wikipedia",
"links": [
"https://en.wikipedia.org/1",
"https://en.wikipedia.org/2"
]
},
{
"id": "451ac172-b93e-4158-8e53-8e9031cfbe72",
"title": "Russian Wikipedia",
"links": [
"https://ru.wikipedia.org/wiki/",
"https://ru.wikipedia.org/wiki/"
]
},
{
"id": "818b99c8-479b-4846-ac15-4b2832ec63b5",
"title": "German Wikipedia",
"links": [
"https://de.wikipedia.org/any",
"https://de.wikipedia.org/any"
]
},
...
]
The concatenate function:
update users set list_of_links=(
list_of_links || (select *
from jsonb_array_elements(list_of_links)
where value->>'id'='ad26e2be-19fd-4862-8f84-f2f9c87b582e'
)
)
where url='test'
returning *
;
Your json data is structured so you have to unpack it, operate on the unpacked data, and then repack it again:
SELECT u.url, u.user_id, u.name,
jsonb_agg(
jsonb_build_object('id', l.id, 'title', l.title, 'links', l.links)
) as list_of_links
FROM users u
CROSS JOIN LATERAL jsonb_to_recordset(u.list_of_links) AS l(id uuid, title text, links jsonb)
WHERE l.id != 'ad26e2be-19fd-4862-8f84-f2f9c87b582e'::uuid
GROUP BY 1, 2, 3
The function jsonb_to_recordset is a set-returning function so you have to use it as a row source, joined to its originating table with the LATERAL clause so that the list_of_links column is available to the function to be unpacked. Then you can delete the records you are not interested in using the WHERE clause, and finally repack the structure by building the record fields into a jsonb structure and then aggregating the individual records back into an array.
I wrote this on JS but that does not matter to how it works. Essentially, its getting all the items from the array, then finding the matching id which returns an index. And using that index, I use "-" operator which takes the index and removes it from the array. Sorry if my grammar is bad.
//req.body is this JSON object
//{"url":"test", "id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e"}
var { url, id } = req.body;
pgPool.query(
`
select list_of_links
from users
where url=$1;
`,
[url],
(error, result) => {
//block code executing further if error is true
if (error) {
res.json({ status: "failed" });
return;
}
if (result) {
// this function just returns the index of the array element where the id matches from request's id
// 0, 1, 2, 3, 4, 5
var index_of_the_item = result.rows.list_of_links
.map(({ id: db_id }, index) =>
db_id === id ? index : false
)
.filter((x) => x !== false)[0];
//remove the array element by it's index
pgPool.query(
`
update users
set list_of_links=(
list_of_links - $1::int
)
where url=$2
;
`,
[index_of_the_item, url], (e, r) => {...}
);
}
}
);

Postgres/JSON Updating nested array elements

Given the input JSON from the 'table' under a column named '_value'. I would like to replace the field "sc" as text from object to value of name under sc.
The json before updating looks like this.
{
"iProps": [
{
"value": {
"rules": [
{
"ao": {
"sc": {
"web_link": "abc.com",
"name": "name"
}
}
},
{
"ao": {
"sc": ""
}
}
]
}
}
]
}
The json after updating should look like this.
{
"iProps": [
{
"value": {
"rules": [
{
"ao": {
"sc": "name"
}
},
{
"ao": {
"sc": ""
}
}
]
}
}
]
}
I tried the below query to get to 'rules' array but having difficulty to proceed further in parsing and updating.
WITH values AS (
SELECT iprop -> 'value' -> 'rules' AS value FROM
table t,jsonb_array_elements(t._value->'iProps') AS
iprop )
SELECT *
from values, jsonb_array_elements(values.ao)
throws following error
ERROR: column values.ao does not exist
LINE 26: from values, jsonb_array_elements(values.ao)
^
SQL state: 42703
Character: 1396
You can try below mentioned query considering your structure is constant and the data type of your column is JSONB.
with cte as (
select
vals2->'ao'->'sc'->'name' as namevalue,
('{iProps,'||index1-1||',value,rules,'||index2-1||',ao,sc}')::text[] as json_path
from
table_,
jsonb_array_elements(value_->'iProps')
with ordinality arr1(vals1,index1),
jsonb_array_elements(vals1->'value'->'rules')
with ordinality arr2(vals2,index2)
)
update table_
set value_ = jsonb_set(value_,cte.json_path,cte.namevalue,false)
from cte
WHERE cte.namevalue IS NOT NULL
DEMO

Unable to update dynamic JSON element in called feature [duplicate]

I have a JSON file as given below:
{
"lastname": {
"displayName": "lastname"
},
"#(dynamicKey)": {
"displayName": "#(dynamicKey)"
}
}
When I try to read the file, the key and values are not getting updated but when I use JSON like below, value got replaced by the def values. If I give dynamic key both key and value are not getting updated. :-(
{
"lastname": {
"displayName": "lastname"
},
"someKey": {
"displayName": "#(dynamicKey)"
}
}
Could you please help me on how to replace dynamic key and value?
This is how you can handle dynamic keys. The #(dynamicKey) embedded-expression approach will not work.
* def dynamicKey = 'bar'
* def test = { someKey: 'foo' }
* test[dynamicKey] = 'baz'
* match test == { someKey: 'foo', bar: 'baz' }

How to create a function that will add a a value to array in object

I want to create a function that will add a grade to specific student and subject
This is how my document looks
"_id" : ObjectId("5b454b545b4545b"),
"name" : "Bob",
"last_name" : "Bob",
"nr_album" : "222",
"grades" ; {
"IT" : [
3,
3,
5,
4
]
}
This is what I came up with
function addGrade(
nr_album,grades,value
) {
db.studenci.update (
{nr_album: nr_album},
{ $push: { [grades]: value}});
}
addGrade("222","grades.IT",100)
It`s working properly, but what I want to achieve is to except "grades.IT" pass only "IT" in the parameters.
You can use template strings in ES2015.
Pass Arguments like that
addGrade("222","IT",100)
Get parameters "IT" and make it dynamically required string
function addGrade(nr_album, grades, value) {
const string = `grades.{$grades}`
db.studenci.update({
nr_album: nr_album
}, {
$push: { [string]: value }
});
}

Convert Mongodb schema including nested Object

Suppose a mongodb collection schema like this:
{
"_id" : ObjectId("5a5b2657a19692e18a3792ad"),
"Toponym" : "Climate station Stavenhagen",
"Lat" : 53.33333,
"Lon" : "13.99999",
"SensorMaker" : "Hitachi",
"SensorClass" : "Thermometer",
"Dimension" : "SoilTemperature_0.05mSensor1",
"Gauge" : "degC"
}
And I would like to change the complete collection (~ 90k items) to this, to conform to minimal GeoJson:
{
"_id" : ObjectId("5a5b2657a19692e18a3792ad"),
"Toponym" : "Climate station Stavenhagen",
"geometry" : {
"type" : "Point",
"coordinates" : [53.33333, 13.99999]
},
"SensorMaker": "Hitachi",
"SensorClass": "Thermometer",
"Dimension" : "SoilTemperature_0.05mSensor1",
"Gauge" : "degC"
}
I tried to convert it using this query, but whatever I do I will receive an error the like "Line 5: Unexpected string":
db.sensor_geo.aggregate([
{ '$group' : {
'_id' : '$_id',
'Toponym' : '$Toponym'
'geometry': { 'type': 'Point', { $set : {"coordinates.$[]": [ {'$Lat', '$Lon'} ] }}},
'SensorMaker' : '$SensorMaker',
'SensorClass' : '$SensorClass',
'Dimension' : '$Dimension',
'Gauge' : '$Gauge'
}
}
]);
Should I've used $push instead of $set, even though this also lead nowhere? Do I also have to create an ObjectID for the nested Object, and that may have caused the problem?
You can try below aggregation pipeline with bulk writes.
Below aggregation changes the Lat and Lon field to geometry with bulk update to write the new geometry field and remove the Lat and Lon fields.
var bulk = db.getCollection("sensor_geo").initializeUnorderedBulkOp();
var count = 0;
var batch = 1;
db.getCollection("sensor_geo").aggregate([
{"$project":{
"geometry":{
"type":"Point", "coordinates":["$Lat", "$Lon"]
}
}}]).forEach(function(doc){
var _id = doc._id;
var geometry = doc.geometry;
bulk.find({ "_id" : _id }).updateOne(
{
$set: {"geometry":geometry},
$unset: {"Lat":"", "Lon":""}
}
);
count++;
if (count == batch) {
bulk.execute();
bulk = db.getCollection("sensor_geo").initializeUnorderedBulkOp();
count = 0;
}
});
if (count > 0) {
bulk.execute();
}