How to update every object inside a JSON array with PostgreSQL? - sql

I am trying to make a query that updates all the objects inside the nested json array i.e update first_name of all the people named 'John' to some other name.
My JSON is something like this but with way more data ofc:
{
"_id": {
"$oid": "5eb21a9f779aac987b2584b2"
},
"name": "Bouchard Restaurant and Inn",
"cuisine": "Australian",
"stars": 0.2,
"address": {
"street": "1602 Bosup Terrace",
"city": "Sibharze",
"state": "CA",
"zipcode": "21875"
},
"reviews": [
{
"person": {
"_id": {
"$oid": "57d7a121fa937f710a7d486e"
},
"address": {
"city": "Burgessborough",
"street": "83248 Woods Extension",
"zip": "47201"
},
"birthday": {
"$date": "2011-03-17T11:21:36Z"
},
"email": "murillobrian#cox.net",
"first_name": "Yvonne",
"job": "Counselling psychologist",
"last_name": "Pham"
},
"comment": "Aliquam est reiciendis alias neque ad.",
"created_on": {
"$date": "2017-12-09T20:49:00.35Z"
}
},
{
"person": {
"_id": {
"$oid": "57d7a121fa937f710a7d486f"
},
"address": {
"city": "Nicholsbury",
"state": "Indiana",
"street": "699 Ryan Branch Apt. 371",
"zip": "52277"
},
"birthday": {
"$date": "2015-11-25T17:26:40Z"
},
"email": "cindy93#gmail.com",
"first_name": "Mary",
"job": "Conservator, furniture",
"last_name": "Nelson"
},
"comment": "Quis sed tenetur eius illo.",
"created_on": {
"$date": "2020-01-03T16:55:51.396Z"
}
},
{
"person": {
"_id": {
"$oid": "57d7a121fa937f710a7d4870"
},
"address": {
"city": "Crystalmouth",
"street": "3924 Mosley Burg Suite 602",
"zip": "14969"
},
"birthday": {
"$date": "2015-04-07T19:10:04Z"
},
"email": "harrissummer#hotmail.com",
"first_name": "Jenna",
"job": "Engineer, land",
"last_name": "Smith"
},
"comment": "Recusandae rem minus dolorum corporis corrupti rem placeat.",
"created_on": {
"$date": "2019-06-13T13:00:34.473Z"
}
},
{
"person": {
"_id": {
"$oid": "57d7a121fa937f710a7d4871"
},
"address": {
"city": "Lake Meaganton",
"state": "Idaho",
"street": "2831 Kevin Knolls",
"zip": "10914-3394"
},
"birthday": {
"$date": "2014-02-08T01:03:22Z"
},
"email": "ythompson#hotmail.com",
"first_name": "Christopher",
"job": "Investment banker, corporate",
"last_name": "Franklin"
},
"comment": "Id provident eius natus quasi minima nobis.",
"created_on": {
"$date": "2016-01-05T02:15:06.933Z"
}
},
{
"person": {
"_id": {
"$oid": "57d7a121fa937f710a7d4872"
},
"address": {
"city": "Morganport",
"state": "Vermont",
"street": "9069 Bailey Ferry Suite 423",
"zip": "99473"
},
"birthday": {
"$date": "2015-12-19T18:27:42Z"
},
"email": "elizabeth35#mccarty.com",
"first_name": "Elizabeth",
"job": "Theatre stage manager",
"last_name": "Herrera"
},
"comment": "Sit perferendis nostrum suscipit cumque mollitia.",
"created_on": {
"$date": "2016-09-27T15:47:22.458Z"
}
}
]}
I have a query that updates the first object in the array successfully:
UPDATE restaurants
SET info = jsonb_set(info::jsonb, '{reviews,0,person,first_name}', '"Vedat"', true)
WHERE info->'reviews'->0->'person'->>'first_name' = 'John';
However, trying make an update query that updates all objects within the array (reviews) seems to be almost impossible.
I tried something like this:
UPDATE restaurants r
SET info = (
SELECT jsonb_agg(jsonb_set(rev::jsonb, '{first_name}', rev -> 'person' ->> 'first_name' = '"Vedat"', false))
FROM jsonb_array_elements(r.info::jsonb->'reviews') rev
WHERE rev -> 'person' ->> 'first_name' = 'John'
);
But it wasn't successful, it gives me errors like:
ERROR: function jsonb_set(jsonb, unknown, boolean, boolean) does not exist
UPDATE
I came up with this query but it runs inifitely
with rev as (
select id, generate_series(0, jsonb_array_length(info::jsonb->'reviews')-1) i
from restaurants
)
update restaurants r
set info = jsonb_set(r.info::jsonb,
array['reviews', rev.i::varchar, 'first_name'],
'"Vedat"'::jsonb)
from rev
where r.info->'reviews'->rev.i-> 'person' ->> 'first_name' = 'John'

The query gets a bit complicated but here is a solution, unless it matters that the order of elements in the .reviews array is not preserved:
update test_j
set j = jsonb_set( -- Replace the '.reviews' array with a concatenation of two arrays,
-- one with the unchanged objects and one with the changed ones.
j
, '{reviews}'
, jsonb_path_query_array( -- Extract the objects from the '.reviews' array
-- that need _no_ updating.
-- Uses the SQL/JSON Path Language.
j
, '$.reviews[*] ? (#.person.first_name != "Jenna" && #.person.first_name != "Yvonne")'
)
|| array_to_json(ARRAY( -- convert 'set of jsonb' to PostgreSQL array and back
-- ( do not forget the cast from json to jsonb !)
select jsonb_set( -- Update the json data
jsonb_path_query( -- Select the objects from the '.reviews' array
-- that need updating
-- (2 names chosen to demonstrate multiple updates
-- with the original sample data).
j
, '$.reviews[*] ? (#.person.first_name == "Jenna" || #.person.first_name == "Yvonne")'
)
, '{person,first_name}'
, '"Vedat"'
, true
)
from test_j
))::jsonb
, true
)
;
Most likely there are more elegant and more efficient ways to get the job done but the code should get people started.
Online demo available here (DB fiddle).
The relevant section of the PostgreSQL 12 docs can be found here

Related

select node value from json column type

A table I called raw_data with three columns: ID, timestamp, payload, the column paylod is a json type having values such as:
{
"data": {
"author_id": "1461871206425108480",
"created_at": "2022-08-17T23:19:14.000Z",
"geo": {
"coordinates": {
"type": "Point",
"coordinates": [
-0.1094,
51.5141
]
},
"place_id": "3eb2c704fe8a50cb"
},
"id": "1560043605762392066",
"text": " ALWAYS # London, United Kingdom"
},
"matching_rules": [
{
"id": "1560042248007458817",
"tag": "london-paris"
}
]
}
From this I want to select rows where the coordinates is available, such as [-0.1094,51.5141]in this case.
SELECT *
FROM raw_data, json_each(payload)
WHERE json_extract(json_each.value, '$.data.geo.') IS NOT NULL
LIMIT 20;
Nothing was returned.
EDIT
NOT ALL json objects have the coordinates node. For example this value:
{
"data": {
"author_id": "1556031969062010881",
"created_at": "2022-08-18T01:42:21.000Z",
"geo": {
"place_id": "006c6743642cb09c"
},
"id": "1560079621017796609",
"text": "Dear Desperate sister say husband no dey oo."
},
"matching_rules": [
{
"id": "1560077018183630848",
"tag": "kaduna-kano-katsina-dutse-zaria"
}
]
}
The correct path is '$.data.geo.coordinates.coordinates' and there is no need for json_each():
SELECT *
FROM raw_data
WHERE json_extract(payload, '$.data.geo.coordinates.coordinates') IS NOT NULL;
See the demo.

Select data from Json array MS SQL Server

I have to select data from Json like this:
[
{
"id": 10100,
"externalId": "100000035",
"name": "Test1",
"companyId": 10099,
"phone": "0738003811",
"email": "test#Test.com",
"mainAddress": {
"county": "UK",
"province": "test",
"zipCode": "01234",
"city": "test",
"street": "test",
"gln": "44,37489331;26,21941193",
"country": {
"iso2": "UK",
"iso3": "UK"
}
},
"active": false,
"main": true,
"stores": [
"Test"
],
"attributes": [
{
"attributeId": 1059,
"attributeName": "CH6 name",
"attributeExternalId": null,
"attributeValueId": 74292,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Unknown"
},
{
"attributeId": 1061,
"attributeName": "BD",
"attributeExternalId": null,
"attributeValueId": 81720,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": null,
"attributePlainValue": "Not assigned"
}
],
"daysSinceLastOrder": null
},
{
"id": 62606,
"externalId": "VL_LC_000190",
"name": "Test",
"companyId": 17793,
"phone": "44333424",
"email": "test#email.com",
"mainAddress": {
"firmName": "test",
"county": "test",
"province": "test",
"zipCode": "247555",
"city": "test",
"street": "test",
"gln": "44.8773851;23.9223518",
"country": {
"iso2": "RO",
"iso3": "ROU"
},
"phone": "07547063789"
},
"active": true,
"main": false,
"stores": [
"Valcea"
],
"attributes": [
{
"attributeId": 1042,
"attributeName": "Type of location",
"attributeExternalId": "TYPE_OF_DIVISION",
"attributeValueId": 34506,
"attributeValueType": "MONO_LINGUAL",
"attributeValueEid": "Small OTC (<40mp)",
"attributePlainValue": "Small OTC (<40mp)"
},
{
"attributeId": 17,
"attributeName": "Limit for payment",
"attributeExternalId": "LIMIT_FOR_PAYMENT_IN_DAYS",
"attributeValueId": 59120,
"attributeValueType": "NUMBER",
"attributeValueEid": null,
"attributePlainValue": "28"
}
],
"daysSinceLastOrder": 147
}
]
I know how to select data from simple json object using "FROM OPENJSON",
but now I have to select a
AttributeValueId, AttributeId and AttributeName, attributePlainValue and CompanyId for each Attribute. So I dont know how to select data from attributes array and then how to join to this CompanyId which is one level up.
Maybe someone knows how write this query.
As mentioned by #lptr in the comments:
You need to pass the result of one OPENJSON to another, using CROSS APPLY. You can select a whole JSON object or array as a property, by using the syntax AS JSON
select
t1.companyid,
t2.*
from openjson(#j)
with (
companyId int,
attributes nvarchar(max) as json
) as t1
cross apply openjson(t1.attributes)
with
(
attributeId int,
attributeName nvarchar(100),
attributeValueId nvarchar(100),
attributePlainValue nvarchar(100)
) as t2;
db<>fiddle
For example, you can use code like this.
f1.metaData->"$.identity.customerID" = '.$customerID.'

Get nested data c.<type>.<type> in SQL Query?

I've been searching everywhere for this seemingly simple action: I'd like to select only a certain few data type from a nested source.
The closest that I can get to the solution is this:
SELECT c.receipt_number, c.millitime, c.email, c.phone, c.shipping, c.shipping_note, c.amount_paid, i.description, i.quantity
FROM c
JOIN i IN c.line_items
WHERE c.millitime > 1627813253000
But this will create lots of duplicated data, like the receipt_number, email, etc in the example:
[
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"description": "Curry Chicken",
"quantity": 1
},
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"description": "Chicken Noodle",
"quantity": 8
}
]
Is there a way to create a nested result of c.line_items with just the desired data description and quantity? The final result should be similar to:
[
{
"receipt_number": null,
"millitime": 1627813377000,
"email": "test#gmail.com",
"phone": "000000000",
"shipping": {
"address": {
"city": "Sydney",
"country": "AU",
"line1": "Test Street",
"line2": null,
"postal_code": "3000",
"state": "VIC"
},
"name": "New Cust"
},
"shipping_note": "Please knock on door.",
"amount_paid": 104,
"line_items": [
{
"description": "Chicken Noodle",
"quantity": 8
},
{
"description": "Curry Chicken",
"quantity": 1
}
]
}
]
I have created the same in
You can use distinct keyword to remove duplicates from the results.
Add c.line_items to your select list and change i.description, i.quantity to c.description, c.quantity to add view under line_items.
Query:
SELECT distinct c.receipt_number, c.millitime, c.email, c.phone, c.shipping, c.shipping_note, c.amount_paid, c.line_items
FROM c
JOIN i IN c.line_items
WHERE c.millitime > 1627813253000
Result:
Reference: Azure Cosmos DB SQL query - JSON items

How to update a field in a nested array in Bigquery?

I am trying to update a table that has STRUCT(a few fields, ARRAY(STRUCT)).
The field that I need to update is inside the array and I am having trouble with making it work.
Here is the layout of the the two tables:
CREATE TABLE mydatset.orders (
order_id string,
order_time timestamp,
trans STRUCT <
id string,
amount INT64,
accounts ARRAY<STRUCT <
role STRING ,
account_id STRING,
region STRING,
amount INT64> > >
)
CREATE TABLE mydatset.relocations (
account_id string,
region string
)
Trying to update the region of any account in the array accounts if that account exists in the relocations table:
update mydataset.orders a
set trans = (SELECT AS STRUCT trans.* REPLACE(ARRAY(SELECT STRUCT<role STRING, account_id STRING, region STRING, amount INT64>
(cp.role, cp.account_id,
case when cp.account_id = ll.account_id then ll.region else cp.region end ,
cp.amount
)
) as accounts )
from unnest(trans.accounts) cp
left join unnest(relocs.chgs) ll
on cp.account_id = ll.account_id
)
from (select array_agg(struct (account_id, region) ) chgs
from`mydataset.relocations`
) relocs
where true
The syntax works, but the sql doesn't perform the expected update. The account's region in the orders table is not changed after running the above update!
(I have seen BigQuery UPDATE nested array field and this case is slightly different. The array is inside a struct and itself is an array of struct)
Appreciate any help.
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]

Transform JSON response with lodash

I'm new in lodash (v3.10.1), and having a hard time understanding.
Hope someone can help.
I have an input something like this:
{
{"id":1,"name":"Matthew","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":2,"name":"Mark","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":3,"name":"Luke","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":4,"name":"John","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":5,"name":"Paul","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}}
];
I would like to output this or close to this:
{
"industries": [
{
"industry":{
"id":5,
"name":"Medical",
"companies": [
{
"company":{
"id":1,
"name":"abc",
"employees": [
{"id":1,"name":"Matthew"},
{"id":2,"name":"Mark"},
{"id":3,"name":"Luke"},
{"id":4,"name":"John"},
{"id":5,"name":"Paul"}
]
}
}
]
}
}
]
}
Here's something that gets you close to what you want. I structured the output to be an object instead of an array. You don't need the industries or industry properties in your example output. The output structure looks like this:
{
"industry name": {
"id": "id of industry",
"companies": [
{
"company name": "name of company",
"id": "id of company",
"employees": [
{
"id": "id of company",
"name": "name of employee"
}
]
}
]
}
}
I use the _.chain function to wrap the collection with a lodash wrapper object. This enables me to explicitly chain lodash functions.
From there, I use the _.groupBy function to group elements of the collection by their industry name. Since I'm chaining, I don't have to pass in the array again to the function. It's implicitly passed via the lodash wrapper. The second argument of the _.groupBy is the path to the value I want to group elements by. In this case, it's the path to the industry name: company.industry.name. _.groupBy returns an object with each employee grouped by their industry (industries are keys for this object).
I then do use _.transform to transform each industry object. _.transform is essentially _.reduce except that the results returned from the _.transform function is always an object.
The function passed to the _.transform function gets executed against each key/value pair in the object. In the function, I use _.groupBy again to group employees by company. Based off the results of _.groupBy, I map the values to the final structure I want for each employee object.
I then call the _.value function because I want to unwrap the output collection from the lodash wrapper object.
I hope this made sense. If it doesn't, I highly recommend reading Lo-Dash Essentials. After reading the book, I finally got why lodash is so useful.
"use strict";
var _ = require('lodash');
var emps = [
{ "id": 1, "name": "Matthew", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 2, "name": "Mark", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 3, "name": "Luke", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 4, "name": "John", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 5, "name": "Paul", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } }
];
var result = _.chain(emps)
.groupBy("company.industry.name")
.transform(function(result, employees, industry) {
result[industry] = {};
result[industry].id = _.get(employees[0], "company.industry.id");
result[ industry ][ 'companies' ] = _.map(_.groupBy(employees, "company.name"), function( employees, company ) {
return {
company: company,
id: _.get(employees[ 0 ], 'company.id'),
employees: _.map(employees, _.partialRight(_.pick, [ 'id', 'name' ]))
};
});
return result;
})
.value();
Results from your example are as follows:
{
"Medical": {
"id": 5,
"companies": [
{
"company": "abc",
"id": 1,
"employees": [
{
"id": 1,
"name": "Matthew"
},
{
"id": 2,
"name": "Mark"
},
{
"id": 3,
"name": "Luke"
},
{
"id": 4,
"name": "John"
},
{
"id": 5,
"name": "Paul"
}
]
}
]
}
}
If you ever wanted the exact same structure as in the questions, I solved it using the jsonata library:
(
/* lets flatten it out for ease of accessing the properties*/
$step1 := $ ~> | $ |
{
"employee_id": id,
"employee_name": name,
"company_id": company.id,
"company_name": company.name,
"industry_id": company.industry.id,
"industry_name": company.industry.name
},
["company", "id", "name"] |;
/* now the magic begins*/
$step2 := {
"industries":
[($step1{
"industry" & $string(industry_id): ${
"id": $distinct(industry_id)#$I,
"name": $distinct(industry_name),
"companies": [({
"company" & $string(company_id): {
"id": $distinct(company_id),
"name": $distinct(company_name),
"employees": [$.{
"id": $distinct(employee_id),
"name": $distinct(employee_name)
}]
}
} ~> $each(function($v){ {"company": $v} }))]
}
} ~> $each(function($v){ {"industry": $v} }))]
};
)
You can see it in action on the live demo site: https://try.jsonata.org/VvW4uTRz_