BigQuery : best use of UNNEST Arrays - sql

I really need some help, I have a big file JSON that I ingested into BigQuery, I want to write a query that uses UNNEST twice, namely I have this like :
{
"categories": [
{
"id": 1,
"name" : "C0",
"properties": [
{
"name": "Property_1",
"value": {
"type": "String",
"value": "11111"
}
},
{
"name": "Property_2",
"value": {
"type": "String",
"value": "22222"
}
}
]}
]}
And I want to do a query that give's me something like this result
---------------------------------------------------------------------
| Category_ID | Name_ID | Property_1 | Property_2 |
------------------------------------------------------------------
| 1 | C0 | 11111 | 22222 |
---------------------------------------------------------------------
I already made something like but it's not working :
SELECT
c.id as Category_ID,
c.name as Name_ID,
p.value.value as p.name
From `DataBase-xxxxxx` CROSS JOIN
UNNEST(categories) AS c,
UNNEST(c.properties) AS p;
Thank you more 🙏

Related

How to parse this JSON file in Snowflake?

So I have a column in a Snowflake table that stores JSON data but the column is of a varchar data type.
The JSON looks like this:
{
"FLAGS": [],
"BANNERS": {},
"TOOLS": {
"game.appConfig": {
"type": [
"small",
"normal",
"huge"
],
"flow": [
"control",
"noncontrol"
]
}
},
"PLATFORM": {}
}
I want to filter only the data inside TOOLS and want to get the following result:
TOOLS_ID
TOOLS
game.appConfig
type
game.appConfig
flow
How can I achieve this?
I assumed that the TOOLs can have more than one tool ID, so I wrote this query:
with mydata as ( select
'{
"FLAGS": [],
"BANNERS": {},
"TOOLS": {
"game.appConfig": {
"type": [
"small",
"normal",
"huge"
],
"flow": [
"control",
"noncontrol"
]
}
},
"PLATFORM": {}
}' as v1 )
select main.KEY TOOLS_ID, sub.KEY TOOLS
from mydata,
lateral flatten ( parse_json(v1):"TOOLS" ) main,
lateral flatten ( main.VALUE ) sub;
+----------------+-------+
| TOOLS_ID | TOOLS |
+----------------+-------+
| game.appConfig | flow |
| game.appConfig | type |
+----------------+-------+
Assuming the column name is C1 and table name T1:
select a.t:"TOOLS":"game.appConfig"::string from (select
parse_json(to_variant(C1))t from T1) a

Postgres combine 3 CTEs causes duplicate rows

I'm trying to combine 2 select queries on 2 different tables which have a foreign key in common project_id included with a condition and returned in a single result set with the project_id a json_array called sprints and a json_array called backlog. The output should look something like this.
{
"id": "1920c79d-69d7-4b63-9662-ed5333e9b735",
"name": "Test backend v1",
"backlog_items": [
{
"id": "961b2438-a16b-4f30-83f1-723a05592d68",
"name": "Another User Story 1",
"type": "User Story",
"backlog": true,
"s3_link": null,
"sprint_id": null
},
{
"id": "a2d93017-ab87-4ec2-9589-71f6cebba936",
"name": "New Comment",
"type": "Comment",
"backlog": true,
"s3_link": null,
"sprint_id": null
}
],
"sprints": [
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62ed4",
"name": "Test name 2",
"sprint_items": [
{
"id": "1285825b-1669-40f2-96b8-de02ec80d8bd",
"name": "As an admin I should be able to delete an organization",
"type": "User Story",
"backlog": false,
"s3_link": null,
"sprint_id": "1cd165c7-68f7-4a1d-b018-609989d62ed4"
}
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62f44",
"name": "Test name 1",
"sprint_items": []
}
]
}
In case there are no backlog items associated with the project_id or sprints with the project_id I want to return an empty list. I figured using Postgres COALESCE function might help in this case but I'm not sure how to use is to achieve what I want.
Sprint table
id | end_date | start_date | project_id | name
--------------------------------------+----------+------------+--------------------------------------+-------------
1cd165c7-68f7-4a1d-b018-609989d62ed4 | | | 1920c79d-69d7-4b63-9662-ed5333e9b735 | Test name 2
Sprint item table
id | sprint_id | name | type | s3_link | backlog | project_id
--------------------------------------+--------------------------------------+--------------------------------------------------------+------------+---------+---------+--------------------------------------
961b2438-a16b-4f30-83f1-723a05592d68 | | Another User Story 1 | User Story | | t | 1920c79d-69d7-4b63-9662-ed5333e9b735
a2d93017-ab87-4ec2-9589-71f6cebba936 | | New Comment | Comment | | t | 1920c79d-69d7-4b63-9662-ed5333e9b735
1285825b-1669-40f2-96b8-de02ec80d8bd | 1cd165c7-68f7-4a1d-b018-609989d62ed4 | As an admin I should be able to delete an organization | User Story | | f | 1920c79d-69d7-4b63-9662-ed5333e9b735
The query I'm using right now which returns multiple duplicates in the result set.
with si as (
select si.id, si.name, si.backlog, si.project_id
from sprint_items si
), s as (
select s.id, s.name, s.project_id, jsonb_agg(to_jsonb(si) - 'project_id') as sprint_items
from sprints s
left join sprint_items si
on si.sprint_id = s.id
group by s.id, s.name, s.project_id
), p as (
select p.id, p.name, jsonb_agg(to_jsonb(s) - 'project_id') as sprints,
jsonb_agg(to_jsonb(case when si.backlog = true then si end) - 'project_id') as backlog_items
from projects p
left join s
on s.project_id = p.id
left join si
on si.project_id = p.id
group by p.id, p.name
)
select to_jsonb(p) from p
where p.id = '1920c79d-69d7-4b63-9662-ed5333e9b735'
Updated
This is what the above query is producing in terms of duplicating the sprint items and sprints
{
"id": "1920c79d-69d7-4b63-9662-ed5333e9b735",
"name": "Test backend v1",
"sprints": [
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62ed4",
"name": "Test name 2",
"sprint_items": [
{
"id": "1285825b-1669-40f2-96b8-de02ec80d8bd",
"name": "As an admin I should be able to delete an organization",
"type": "User Story",
"backlog": false,
"s3_link": null,
"sprint_id": "1cd165c7-68f7-4a1d-b018-609989d62ed4"
}
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62ed4",
"name": "Test name 2",
"sprint_items": [
{
"id": "1285825b-1669-40f2-96b8-de02ec80d8bd",
"name": "As an admin I should be able to delete an organization",
"type": "User Story",
"backlog": false,
"s3_link": null,
"sprint_id": "1cd165c7-68f7-4a1d-b018-609989d62ed4"
}
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62ed4",
"name": "Test name 2",
"sprint_items": [
{
"id": "1285825b-1669-40f2-96b8-de02ec80d8bd",
"name": "As an admin I should be able to delete an organization",
"type": "User Story",
"backlog": false,
"s3_link": null,
"sprint_id": "1cd165c7-68f7-4a1d-b018-609989d62ed4"
}
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62f44",
"name": "Test name 1",
"sprint_items": [
null
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62f44",
"name": "Test name 1",
"sprint_items": [
null
]
},
{
"id": "1cd165c7-68f7-4a1d-b018-609989d62f44",
"name": "Test name 1",
"sprint_items": [
null
]
}
],
"backlog_items": [
null,
{
"id": "961b2438-a16b-4f30-83f1-723a05592d68",
"name": "Another User Story 1",
"backlog": true
},
{
"id": "a2d93017-ab87-4ec2-9589-71f6cebba936",
"name": "New Comment",
"backlog": true
},
null,
{
"id": "961b2438-a16b-4f30-83f1-723a05592d68",
"name": "Another User Story 1",
"backlog": true
},
{
"id": "a2d93017-ab87-4ec2-9589-71f6cebba936",
"name": "New Comment",
"backlog": true
}
]
}
Any pointers to what functions I should read up would be greatly appreciated.

SQL to get count in JSON data

i am new to sql language. Could anyone sharing some point or solution for my cases?
i'll show the JSON data below, could it possible return some value in the column how many time appear in my json data. look like the table below:
+--------------------------------+
|column |value |totalCount |
+--------------------------------+
|brand |top-brand| (2) |
|brand |low | (1) |
|type |Bobtail | (1) |
|type |Snowshoe | (2) |
+--------------------------------+
[
{
"id": 1,
"name": "cat",
"type": {
"id": 2,
"name": "Snowshoe",
},
"brand": {
"id": 3,
"name": "top-brand",
}
},
{
"id": 2,
"name": "cat",
"type": {
"id": 2,
"name": "Snowshoe",
},
"brand": {
"id": 2,
"name": "low",
}
},
{
"id": 3,
"name": "cat",
"type": {
"id": 1,
"name": "Bobtail",
},
"brand": {
"id": 3,
"name": "top-brand",
}
}
]

Azure LogAnalytics Parse JSON Array

I am ingesting some custom logs to Azure LogAnalytics. One of the columns contains nested json objects. I would like to return each nested object to a separate column value.
Was trying the mvexpand statement but have not had any luck.
customLog_CL
| extend test = parsejson(target_s)
| mvexpand test
The column data looks like below.
[ { "id": "00phb49dl40lBsasC0h7", "type": "PolicyEntity", "alternateId": "unknown", "displayName": "Default Policy", "detailEntry": "#{policyType=hello}" }, { "id": "0pri9mxp9vSc4lpiU0h7", "type": "PolicyRule", "alternateId": "00phb49dl40lBsasC0h7", "displayName": "All Users Login", "detailEntry": null } ]
I'm in the exact same situation, so hopefully we can share the knowledge.
I ended up doing something like this, if it's the correct way of doing it, or I have any bugs, I honestly can't tell you right now (still doing my data validation, so I'll update later on), but this should at least get you started.
customLog_CL
| mvexpand parsejson(target_s)
| extend Id=target_s["id"]
| extend type=target_s["type"]
| extend OtherId=target_s["alternateId"]
| project Id, type, OtherId
This should work:
datatable(d:dynamic)
[
dynamic(
[
{ "id": "00phb49dl40lBsasC0h7", "type": "PolicyEntity", "alternateId": "unknown", "displayName": "Default Policy", "detailEntry": "#{policyType=hello}" },
{ "id": "0pri9mxp9vSc4lpiU0h7", "type": "PolicyRule", "alternateId": "00phb49dl40lBsasC0h7", "displayName": "All Users Login", "detailEntry": "" }
]
)
]
| mv-expand(d)
| project key = tostring(d['id']), value = d
| extend p = pack(key, value)
| summarize bag = make_bag(p)
| evaluate bag_unpack(bag)
Output
Please check if this fits your requirement.
let hosts_object = parsejson('{"hosts": [ { "id": "00phb49dl40lBsasC0h7", "type": "PolicyEntity", "alternateId": "unknown", "displayName": "Default Policy", "detailEntry": "#{policyType=hello}" }, { "id": "0pri9mxp9vSc4lpiU0h7", "type": "PolicyRule", "alternateId": "00phb49dl40lBsasC0h7", "displayName": "All Users Login", "detailEntry": null } ]}');
print hosts_object
| extend json1 = hosts_object.hosts[0] , json2 = hosts_object.hosts[1]
Output for this should be as below
Additional Documentation Reference
Hope this helps.

Transform Json Nested Object Array To Table Row

I have a json like:
[
{
"Id": "1234",
"stockDetail": [
{
"Number": "10022_1",
"Code": "500"
},
{
"Number": "10022_1",
"Code": "600"
}
]
},
{
"Id": "1235",
"stockDetail": [
{
"Number": "10023_1",
"Code": "100"
},
{
"Number": "10023_1",
"Code": "100"
}
]
}
]
How to convert it in sql table like below:
+------+---------+------+
| Id | Number | Code |
+------+---------+------+
| 1234 | 10022_1 | 500 |
| 1234 | 10022_1 | 600 |
| 1235 | 10023_1 | 100 |
| 1235 | 10023_1 | 100 |
+------+---------+------+
If you need to define typed columns you can use OPENJSON with WITH clause:
DECLARE #j nvarchar(max) = N'[
{
"Id": "1234",
"stockDetail": [
{ "Number": "10022_1",
"Code": "500"
},
{ "Number": "10022_1",
"Code": "600"
}
]
},
{
"Id": "1235",
"stockDetail": [
{ "Number": "10023_1",
"Code": "100"
},
{ "Number": "10023_1",
"Code": "100"
}
]
}
]'
select father.Id, child.Number, child.Code
from openjson (#j)
with (
Id int,
stockDetail nvarchar(max) as json
) as father
cross apply openjson (father.stockDetail)
with (
Number nvarchar(100),
Code nvarchar(100)
) as child
Result:
In your case you may try to CROSS APPLY the JSON child node with the parent node:
DECLARE #json nvarchar(max)
SET #json = N'
[
{
"Id": "1234",
"stockDetail": [
{
"Number": "10022_1",
"Code": "500"
},
{
"Number": "10022_1",
"Code": "600"
}
]
},
{
"Id": "1235",
"stockDetail": [
{
"Number": "10023_1",
"Code": "100"
},
{
"Number": "10023_1",
"Code": "100"
}
]
}
]'
SELECT
JSON_Value (i.value, '$.Id') as ID,
JSON_Value (d.value, '$.Number') as [Number],
JSON_Value (d.value, '$.Code') as [Code]
FROM OPENJSON (#json, '$') as i
CROSS APPLY OPENJSON (i.value, '$.stockDetail') as d
Output:
ID Number Code
1234 10022_1 500
1234 10022_1 600
1235 10023_1 100
1235 10023_1 100