BigQuery concat nested array json - google-bigquery

I have data that looks like
{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}
The output I want is concat all the values in the nested json array
price, "20003, 30075, 40060"
I tried some queries but failed to get the correct output.

You can use JSON_EXTRACT_ARRAY and ARRAY_TO_STRING:
WITH test_json AS (
SELECT
'''{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}''' AS json_string
),
values_concatenated AS (
SELECT ARRAY_TO_STRING(
ARRAY(
SELECT JSON_VALUE(json_values, '$.value')
FROM UNNEST((SELECT JSON_EXTRACT_ARRAY(json_string, '$.Attributes[0].values') AS json_values FROM test_json)) as json_values
),
', '
) as values
)
SELECT
(select json_value(json_string, '$.Attributes[0].name') from test_json),
(select values from values_concatenated)

Related

Parse Google API JSON file to rows and columns with OPENJSON in T-SQL

So I am trying to create a query than can handle a json file that we get with a data factory web request from the Google Analytics API 4 and store the result in an Azure sql table. The following query is the closest I got.
The dimension and metric headers seems to be column names and the values in the rows part should be the rows.
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
DECLARE #jsonexample2 NVARCHAR(MAX) = (SELECT [value] FROM OPENJSON(#jsonexample) where [key]= 'rows' )
SELECT *
from OPENJSON(#jsonexample2)
This blog seemed to have a good explanation but I still not got it working.
https://levelup.gitconnected.com/how-to-easily-parse-and-transform-json-in-sql-server-c0b091a964de
You can shred it down to something like this:
DECLARE #jsonexample NVARCHAR(MAX) =
N'{
"dimensionHeaders": [
{
"name": "date"
},
{
"name": "country"
}
],
"metricHeaders": [
{
"name": "totalUsers",
"type": "TYPE_INTEGER"
}
],
"rows": [
{
"dimensionValues": [
{
"value": "20230207"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "3"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "Netherlands"
}
],
"metricValues": [
{
"value": "2"
}
]
},
{
"dimensionValues": [
{
"value": "20230208"
},
{
"value": "United States"
}
],
"metricValues": [
{
"value": "1"
}
]
}
]
}'
;with cols as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.dimensionHeaders') x
)
, metrics as (
select cast([key] as int) AS k, JSON_VALUE(value, '$.name') AS v
from openjson(#jsonexample, '$.metricHeaders') x
)
select CAST(x.[key] AS INT) AS id, c.v AS dimName, JSON_VALUE(dim.value, '$.value') AS dimValue
, m.v AS metName, JSON_VALUE(metr.value, '$.value') AS metValue
from openjson(#jsonexample, '$.rows') x
cross apply openjson(x.value, '$.dimensionValues') dim
cross apply openjson(x.value, '$.metricValues') metr
inner join cols c
ON c.k = dim.[key]
inner join metrics m
ON m.k = metr.[key]
Then you can probably figure out the rest.
Here is a bit code that will dynamically parse the metrics and dimension for Google Analytics. It should give you a good starting point :)
SELECT TOP 1 #json = JSON_QUERY(RawJson, '$.reports[0].columnHeader')
FROM TableName
SET #WithClause =
(
SELECT STRING_AGG(Line, ',')
FROM
(
SELECT REPLACE(r.value, 'ga:', '')+' '+CASE
WHEN r.value = 'ga:DATE' THEN 'DATE'
ELSE 'NVARCHAR(255)'
END+' '+'''$.dimensions['+r.[key]+']''' AS Line
FROM OPENJSON(#json, '$.dimensions') AS r
UNION ALL
SELECT REPLACE(JSON_VALUE(r.value, '$.name'), 'ga:', '')+' '+CASE
WHEN JSON_VALUE(r.value, '$.type') = 'TIME' THEN 'FLOAT'
WHEN JSON_VALUE(r.value, '$.type') = 'CURRENCY' THEN 'DECIMAL(9,2)'
ELSE JSON_VALUE(r.value, '$.type')
END+' '+'''$.metrics[0].values['+r.[key]+']'''
FROM OPENJSON(#json, '$.metricHeader.metricHeaderEntries') AS r
) AS a
);
SET #Query = '
SELECT d.*
INTO #temp_table
FROM TableNAme AS cm
CROSS APPLY OPENJSON(RawJson, ''$.reports[0].data.rows'') WITH ( '+#WithClause+ ') AS d';
--PRINT #Query;
EXECUTE (#Query);

Query an array element in an JSONB Object

I have a jsonb column called data in a table called reports. Here is what report.id = 1 looks like
[
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Food123"
},
{
"productIDs": [
"EFG1"
],
"groupID": "Electronic123"
}
],
"Package": [
{
"groupID": "Electronic123"
}
],
"type": "Produce"
},
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Clothes123"
}
],
"Package": [
{
"groupID": "Food123"
}
],
"type": "Wearables"
}
]
and here is what report.id = 2 looks like:
[
{
"Product": [
{
"productIDs": [
"XYZ1",
"XYZ2"
],
"groupID": "Food123"
}
],
"Package": [],
"type": "Wearable"
},
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Clothes123"
}
],
"Package": [
{
"groupID": "Food123"
}
],
"type": "Wearables"
}
]
I am trying to get a list of all entries in reports table where at least one of data column's element has following:
type = Produce AND
where any elements of Product array OR any elements of Product array's groupID start with Food
So from the example above this query will only return the first index since
The type = Produce
groupID starts with Food for first element of Product array
The second index will be filtered out because type is not Produce.
I am not sure how to query to do AND query for groupID. Here is what I have tried to get all entries for type Produce
select * from reports r, jsonb_to_recordset(r.data) as items(type text) where items.type like 'Produce';
Sample structure and result: dbfiddle
select r.*
from reports r
cross join jsonb_array_elements(r.data) l1
cross join jsonb_array_elements(l1.value -> 'Product') l2
where l1 ->> 'type' = 'Produce'
and l2.value ->> 'groupID' ~ '^Food';

Extract array from varchar in PrestoSQL

I have a VARCHAR field like this:
[
{
"config": 0,
"type": "0
},
{
"config": x,
"type": "1"
},
{
"config": "",
"type": ""
},
{
"config": [
{
"address": {},
"category": "",
"merchant": {
"data": [
10,12,23
],
"file": 0
},
"range_id": 1,
"shop_id_info": null
}
],
"type": "new"
}
]
And I need to extract merchant data from this. Desirable output is:
10
12
23
Please advise. I keep getting Cannot cast VARCHAR to array/unnest type VARCHAR
You can try using json path $.*.config.*.merchant.data.* but if it does not work for you (as for me in Athena version, where arrays in json path are not supported well) you can cast your json to ARRAY(JSON) and do some manipultaions from there (needed to fix your JSON a little bit):
Test data:
WITH dataset AS (
SELECT * FROM (VALUES
(JSON '[
{
"config": {},
"type": "0"
},
{
"config": "x",
"type": "1"
},
{
"config": "",
"type": ""
},
{
"config": [
{
"address": {},
"category": "",
"merchant": {
"data": [
10,12,23
],
"file": 0
},
"range_id": 1,
"shop_id_info": null
}
],
"type": "new"
}
]')
) AS t (json_value))
And query:
SELECT flatten(
transform(
flatten(
transform(
CAST(json_value AS ARRAY(JSON))
, json_object -> try(CAST(json_extract(json_object, '$.config') AS ARRAY(JSON))))),
json_config -> CAST(json_extract(json_config, '$.merchant.data') as ARRAY(INTEGER))))
FROM dataset
Which will give you array of numbers:
_col0
[10, 12, 23]
And from there you can continue with unnest and so on if needed.

Postgresql and jsonb - inserting a key/value into a multi-level array

Very similar to this post, but I struggle to adapt from their solution..
My table : public.challenge, column lines JSONB
My initial JSON in lines :
[
{
"line": 1,
"blocs": [
{
"size": 100,
"name": "abc"
},
{
"size": 100,
"name": "def"
},
{
"size": 100,
"name": "ghi"
}
]
},
{
"line": 2,
"blocs": [
{
"size": 100,
"name": "xyz"
}
]
}
]
Desired update :
[
{
"line": 1,
"blocs": [
{
"size": 100,
"name": "abc",
"type": "regular"
},
{
"size": 100,
"name": "def",
"type": "regular"
},
{
"size": 100,
"name": "ghi",
"type": "regular"
}
]
},
{
"line": 2,
"blocs": [
{
"size": 100,
"name": "xyz",
"type": "regular"
}
]
}
]
So basically I need to add the type key+value in every object of blocs, for each element of the root array.
My unsuccessful attempt looks like this :
UPDATE public.challenge SET lines = jsonb_set(lines, '{}', (
SELECT jsonb_set(line, '{blocs}', (
SELECT jsonb_agg( bloc || '{"type":"regular"}' )
FROM jsonb_array_elements(line->'{blocs}') bloc
))
FROM jsonb_array_elements(lines) line
))
;
(currently it sets the whole column as null, maybe due to jsonb_set(lines, '{}' while my json begins as an array ?)
Thanks!
Use jsonb_array_elements to unnest all the array elements and then add the required json and use jsonb_agg to aggregate it again:
with cte as
(select id,
jsonb_agg(jsonb_set(val1,
'{blocs}',
(select jsonb_agg(arr2 || '{"type": "regular"}')
from jsonb_array_elements(arr1.val1 - >
'blocs') arr2)))
from challenge,
jsonb_array_elements(lines) arr1(val1)
group by 1)
update challenge
set lines = (cte.jsonb_agg)
from cte
where challenge.id = cte.id
DEMO

Advice on using OPENJSON to parse a nested array

I am hoping somebody can help point me in the right direction as Iā€™m trying to parse a json file into sql using OPENJSON. I have a structure which looks like this:
DECLARE #json AS NVARCHAR(MAX) = '
[{
"id": "78",
"Version": {
"Value": "12"
},
"Names": [{
"NameId": {
"Value": "8516365"
},
"id": "328787",
"NameLinkType": {
"Value": "A"
"CommsChains": {
"Value": [[{
"com_primary": {
"Value": "Y"
},
"com_recd": {
"Value": "2020-07-07 00:00:00.000"
},
"com_ack": {
"Value": "2020-07-09 00:00:00.000"
},
}
]]
), },
},
],
}
]'
I am able to parse the majority of the JSON correctly, so for each ID I can return values such as Version or NameId. However, I am unable to return any dates in respect of com_recd or com_ack, which sit under CommsChains [Object] ā€“ Value [Array] ā€“ [0] [Array]
It looks like there are some syntactic errors in your JSON. After having them fixed, I was able to try and find the JSON paths to the date expressions to the date values. This is the SQL:
DECLARE #json AS NVARCHAR(MAX) = '
[{
"id": "78",
"Version": {
"Value": "12"
},
"Names": [{
"NameId": {
"Value": "8516365"
},
"id": "328787",
"NameLinkType": {
"Value": "A",
"CommsChains": {
"Value": [[{
"com_primary": {
"Value": "Y"
},
"com_recd": {
"Value": "2020-07-07 00:00:00.000"
},
"com_ack": {
"Value": "2020-07-09 00:00:00.000"
}
}
]]
}
}
}
]
}
]'
select * from openjson(#json, '$[0].Version');--Value 12 1
select * from openjson(#json, '$[0].Names');
select * from openjson(#json, '$[0].Names[0]');
select * from openjson(#json, '$[0].Names[0].NameLinkType');
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains');
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains.Value');
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains.Value[0]');
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains.Value[0][0]');
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains.Value[0][0].com_recd'); --selecting path for com_recd
select * from openjson(#json, '$[0].Names[0].NameLinkType.CommsChains.Value[0][0].com_ack'); --selecting path for com_ack
Herein I show the selects to the different parts of your JSON. The arrays'content are always referenced as [0] as its always the first index to select here.
For more information on JSON paths on the SQL server look here