Delete element in a deeply nested array in jsonb column - Postgres - sql

I have a table my_table with a jsonb column that contains some data, for instance, in a single row, the column can contain the following data:
[
{
"x_id": "1",
"type": "t1",
"parts": [
{ "part_id": "1", price: 400 },
{ "part_id": "2", price: 500 },
{ "part_id": "3", price: 0 }
]
},
{
"x_id": "2",
"type": "t1",
"parts": [
{ "part_id": "1", price: 1000 },
{ "part_id": "3", price: 60 }
]
},
{
"x_id": "3",
"type": "t2",
"parts": [
{ "part_id": "1", price: 100 },
{ "part_id": "3", price: 780 },
{ "part_id": "2", price: 990 }
]
}
]
I need help finding how to delete an element from the parts array given a x_id and a part_id.
Example
given x_id=2 and part_id=1, I need the data to be updated to become:
[
{
"x_id": "1",
"type": "t1",
"parts": [
{ "part_id": "1", price: 400 },
{ "part_id": "2", price: 500 },
{ "part_id": "3", price: 0 }
]
},
{
"x_id": "2",
"type": "t1",
"parts": [
{ "part_id": "3", price: 60 }
]
},
{
"x_id": "3",
"type": "t2",
"parts": [
{ "part_id": "1", price: 100 },
{ "part_id": "3", price: 780 },
{ "part_id": "2", price: 990 }
]
}
]
PS1: these data cannot be normalized, so that's not a possible solution.
PS2: I'm running PostgreSQL 9.6
PS3: I have checked this question and this question but my data structure seems too complex compared to the other questions thus I can't apply the given answers.
Edit1: the json data can be big, especially the parts array, which can have from as few as 0 element to thousands.

I think you can use #- operator (see functions-json), you just need to find the path to remove the array element from:
select
data #- p.path
from test as t
cross join lateral (
select array[(a.i-1)::text,'parts',(b.i-1)::text]
from jsonb_array_elements(t.data) with ordinality as a(data,i),
jsonb_array_elements(a.data->'parts') with ordinality as b(data,i)
where
a.data ->> 'x_id' = '2' and
b.data ->> 'part_id' = '1'
) as p(path)
or
update test as t set
data = data #- (
select
array[(a.i-1)::text,'parts',(b.i-1)::text]
from jsonb_array_elements(t.data) with ordinality as a(data,i),
jsonb_array_elements(a.data->'parts') with ordinality as b(data,i)
where
a.data ->> 'x_id' = '2' and
b.data ->> 'part_id' = '1'
)
db<>fiddle demo
update Ok, there's reasonable comment that update part works incorrectly if given path doesn't exist in the data. I guess in this case you're going to either duplicate expression in the where clause:
update test as t set
data = data #- (
select
array[(a.i-1)::text,'parts',(b.i-1)::text]
from jsonb_array_elements(t.data) with ordinality as a(data,i),
jsonb_array_elements(a.data->'parts') with ordinality as b(data,i)
where
a.data ->> 'x_id' = '2' and
b.data ->> 'part_id' = '23222'
)
where
exists (
select *
from jsonb_array_elements(t.data) as a(data),
jsonb_array_elements(a.data->'parts') as b(data)
where
a.data ->> 'x_id' = '2' and
b.data ->> 'part_id' = '23222'
)
db<>fiddle demo
or you can use self-join:
update test as t2 set
data = t.data #- p.path
from test as t
cross join lateral (
select array[(a.i-1)::text,'parts',(b.i-1)::text]
from jsonb_array_elements(t.data) with ordinality as a(data,i),
jsonb_array_elements(a.data->'parts') with ordinality as b(data,i)
where
a.data ->> 'x_id' = '2' and
b.data ->> 'part_id' = '23232'
) as p(path)
where
t.ctid = t2.ctid
db<>fiddle demo

This should work, just need another unique column (primary key usually)
create test table
create table test_tab(
id serial primary key,
j jsonb
);
insert into test_tab
(j)
values
('[
{
"x_id": "1",
"type": "t1",
"parts": [
{ "part_id": "1", "price": 400 },
{ "part_id": "2", "price": 500 },
{ "part_id": "3", "price": 0 }
]
},
{
"x_id": "2",
"type": "t1",
"parts": [
{ "part_id": "1", "price": 1000 },
{ "part_id": "3", "price": 60 }
]
},
{
"x_id": "3",
"type": "t2",
"parts": [
{ "part_id": "1", "price": 100 },
{ "part_id": "3", "price": 780 },
{ "part_id": "2", "price": 990 }
]
}
]');
Then split json, filter unnecessary data, and recreate json again:
select id, jsonb_agg( jsonb_build_object('x_id',xid, 'type',type, 'parts', case when inner_arr = '[null]'::jsonb then parts_arr::jsonb else inner_arr end) )
from (
select
id,
value->>'x_id' as xid,
jsonb_agg(inner_arr) as inner_arr,
max(value->>'parts') as parts_arr,
max(value->>'type') as type
from (
select * ,
case when value->>'x_id'='2' then jsonb_array_elements(value->'parts') else NULL end inner_arr
from test_tab
join lateral jsonb_array_elements(j)
on true
) t
where
inner_arr->>'part_id' is distinct from '1'
group by id, value->>'x_id'
) t
group by id

Related

Parse Google API JSON file to rows and columns with OPENJSON in T-SQL

So I am trying to create a query than can handle a json file that we get with a data factory web request from the Google Analytics API 4 and store the result in an Azure sql table. The following query is the closest I got.
The dimension and metric headers seems to be column names and the values in the rows part should be the rows.
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
DECLARE #jsonexample2 NVARCHAR(MAX) = (SELECT [value] FROM OPENJSON(#jsonexample) where [key]= 'rows' )
SELECT *
from OPENJSON(#jsonexample2)
This blog seemed to have a good explanation but I still not got it working.
https://levelup.gitconnected.com/how-to-easily-parse-and-transform-json-in-sql-server-c0b091a964de
You can shred it down to something like this:
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
;with cols as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.dimensionHeaders') x
)
, metrics as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.metricHeaders') x
)
select CAST(x.[key] AS INT) AS id, c.v AS dimName, JSON_VALUE(dim.value, '$.value') AS dimValue
, m.v AS metName, JSON_VALUE(metr.value, '$.value') AS metValue
from openjson(#jsonexample, '$.rows') x
cross apply openjson(x.value, '$.dimensionValues') dim
cross apply openjson(x.value, '$.metricValues') metr
inner join cols c
ON c.k = dim.[key]
inner join metrics m
ON m.k = metr.[key]
Then you can probably figure out the rest.
Here is a bit code that will dynamically parse the metrics and dimension for Google Analytics. It should give you a good starting point :)
SELECT TOP 1 #json = JSON_QUERY(RawJson, '$.reports[0].columnHeader')
FROM TableName
SET #WithClause =
(
SELECT STRING_AGG(Line, ',')
FROM
(
SELECT REPLACE(r.value, 'ga:', '')+' '+CASE
WHEN r.value = 'ga:DATE' THEN 'DATE'
ELSE 'NVARCHAR(255)'
END+' '+'''$.dimensions['+r.[key]+']''' AS Line
FROM OPENJSON(#json, '$.dimensions') AS r
UNION ALL
SELECT REPLACE(JSON_VALUE(r.value, '$.name'), 'ga:', '')+' '+CASE
WHEN JSON_VALUE(r.value, '$.type') = 'TIME' THEN 'FLOAT'
WHEN JSON_VALUE(r.value, '$.type') = 'CURRENCY' THEN 'DECIMAL(9,2)'
ELSE JSON_VALUE(r.value, '$.type')
END+' '+'''$.metrics[0].values['+r.[key]+']'''
FROM OPENJSON(#json, '$.metricHeader.metricHeaderEntries') AS r
) AS a
);
SET #Query = '
SELECT d.*
INTO #temp_table
FROM TableNAme AS cm
CROSS APPLY OPENJSON(RawJson, ''$.reports[0].data.rows'') WITH ( '+#WithClause+ ') AS d';
--PRINT #Query;
EXECUTE (#Query);

BigQuery concat nested array json

I have data that looks like
{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}
The output I want is concat all the values in the nested json array
price, "20003, 30075, 40060"
I tried some queries but failed to get the correct output.
You can use JSON_EXTRACT_ARRAY and ARRAY_TO_STRING:
WITH test_json AS (
SELECT
'''{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}''' AS json_string
),
values_concatenated AS (
SELECT ARRAY_TO_STRING(
ARRAY(
SELECT JSON_VALUE(json_values, '$.value')
FROM UNNEST((SELECT JSON_EXTRACT_ARRAY(json_string, '$.Attributes[0].values') AS json_values FROM test_json)) as json_values
),
', '
) as values
)
SELECT
(select json_value(json_string, '$.Attributes[0].name') from test_json),
(select values from values_concatenated)

How to get the inner elements of JsonB column in Postgres

I have a Json like this in my JsonB column:
{
"emails": [
{
"email": {
"id": "a8399412-165e-4601-824f-a55f631ad471",
"value": "test#gmail.com"
}
},
{
"email": {
"id": "fa09d9a7-a36a-42a4-8627-66b7554ce82e",
"value": "test1#gmail.com"
}
}
],
"Address": [
{
"address": {
"id": "a8399412-165e-4601-824f-a55f631ad471",
"addressLine1": "Line1"
}
},
{
"address": {
"id": "fa09d9a7-a36a-42a4-8627-66b7554ce82e",
"addressLine2": "Line2"
}
}
],
"lastName": {
"id": "bc10a5a9-04ff-4a00-b167-ac3232e5cb89",
"value": "LastName"
},
"firstName": {
"id": "4ccdd400-2586-4a7f-9379-aff4d1f5d9d6",
"value": "FirstName"
}
}
and so on. My requirement to get list of elements as key and value pairs with limit, I did a research tried different functions of postgres and I wrote the below query :
select response.* from my_table t, jsonb_each_text(jsonb_column) as response;
If I do like this I'm getting only the root elements like emails, firstName and lastName, but I want inner elements as well along with their values like below :
Key | value
------- ---------
"email" : {"id": "a8399412-165e-4601-824f-a55f631ad471","value": "test#gmail.com"}
"email" : {"id": "fa09d9a7-a36a-42a4-8627-66b7554ce82e","value": "test1#gmail.com"}
"lastName" : {"id": "bc10a5a9-04ff-4a00-b167-ac3232e5cb89","value": "LastName"}
"firstName" : {"id": "4ccdd400-2586-4a7f-9379-aff4d1f5d9d6","value": "FirstName"}
"address" : {"id": "a8399412-165e-4601-824f-a55f631ad471", "addressLine1": "Line1"}
"address" : {"id": "a8399412-165e-4601-824f-a55f631ad471", "addressLine2": "Line2"}
You can use jsonb_array_elements() function, and combine queries by UNION ALL
SELECT 'email' AS key, je.* ->> 'email' AS value
FROM my_table
CROSS JOIN jsonb_array_elements(jsonb_column->'emails') AS je
UNION ALL
SELECT 'address', ja.* ->> 'address'
FROM my_table
CROSS JOIN jsonb_array_elements(jsonb_column->'Address') AS ja
UNION ALL
SELECT 'lastName', (jsonb_column->'lastName')::text
FROM my_table
UNION ALL
SELECT 'firstName', (jsonb_column->'firstName' )::text
FROM my_table
Demo

How to update a field in a nested array in Bigquery?

I am trying to update a table that has STRUCT(a few fields, ARRAY(STRUCT)).
The field that I need to update is inside the array and I am having trouble with making it work.
Here is the layout of the the two tables:
CREATE TABLE mydatset.orders (
order_id string,
order_time timestamp,
trans STRUCT <
id string,
amount INT64,
accounts ARRAY<STRUCT <
role STRING ,
account_id STRING,
region STRING,
amount INT64> > >
)
CREATE TABLE mydatset.relocations (
account_id string,
region string
)
Trying to update the region of any account in the array accounts if that account exists in the relocations table:
update mydataset.orders a
set trans = (SELECT AS STRUCT trans.* REPLACE(ARRAY(SELECT STRUCT<role STRING, account_id STRING, region STRING, amount INT64>
(cp.role, cp.account_id,
case when cp.account_id = ll.account_id then ll.region else cp.region end ,
cp.amount
)
) as accounts )
from unnest(trans.accounts) cp
left join unnest(relocs.chgs) ll
on cp.account_id = ll.account_id
)
from (select array_agg(struct (account_id, region) ) chgs
from`mydataset.relocations`
) relocs
where true
The syntax works, but the sql doesn't perform the expected update. The account's region in the orders table is not changed after running the above update!
(I have seen BigQuery UPDATE nested array field and this case is slightly different. The array is inside a struct and itself is an array of struct)
Appreciate any help.
Below is for BigQuery Standard SQL
#standardSQL
UPDATE `project.dataset.orders`
SET trans = (SELECT AS STRUCT trans.* REPLACE(
ARRAY(SELECT AS STRUCT x.* REPLACE(IFNULL(y.region, x.region) AS region)
FROM UNNEST(trans.accounts) x
LEFT JOIN UNNEST(relocations) y
USING(account_id)
) AS accounts))
FROM (SELECT ARRAY_AGG(t) relocations FROM `project.dataset.relocations` t)
WHERE TRUE
It is tested with below dummy data
initial dummy data that looks like below
[
{
"order_id": "order_id1",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "region1",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "region2",
"amount": "12"
}
]
}
},
{
"order_id": "order_id2",
"order_time": "2019-06-28 01:05:16.346854 UTC",
"trans": {
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "region4",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
}
]
after applying below adjustments
[
{
"account_id": "account_id1",
"region": "regionA"
},
{
"account_id": "account_id2",
"region": "regionB"
}
]
result is
[
{
"id": "id1",
"amount": "1",
"accounts": [
{
"role": "role1",
"account_id": "account_id1",
"region": "regionA",
"amount": "11"
},
{
"role": "role2",
"account_id": "account_id2",
"region": "regionB",
"amount": "12"
}
]
},
{
"id": "id2",
"amount": "1",
"accounts": [
{
"role": "role3",
"account_id": "account_id1",
"region": "regionA",
"amount": "13"
},
{
"role": "role4",
"account_id": "account_id3",
"region": "region3",
"amount": "14"
}
]
}
]

Removing null/empty values from an array

I'm struggling to understand arrays and structs in BigQuery. When I run this query in Standard SQL:
with t1 as (
select 1 as id, [1,2] as orders
union all
select 2 as id, null as orders
)
select
id,
orders
from t1
order by 1
I get this result in json:
[
{
"id": "1",
"orders": [
"1",
"2"
]
},
{
"id": "2",
"orders": []
}
]
I want to remove to remove the orders value for id = 2 so that I instead get:
[
{
"id": "1",
"orders": [
"1",
"2"
]
},
{
"id": "2"
}
]
How can I do this? Do I need to add another CTE to remove null values, how?