BigQuery - Extra multiple nest child keys inside JSON document - sql

I have a JSON structure in a field that looks like this. I'm trying to extract every task in every category, there could be any number of tasks or categories.
I've got part of the way there by extracting a single category, but can't seem to do it for every task in every category.
"tasks": {
"category-business": [
{
"dateCompleted": {
"_seconds": 1653672655,
"_nanoseconds": 791000000
},
"slug": "task-alpha",
"status": "completed"
},
{
"dateCompleted": {
"_seconds": 1654516259,
"_nanoseconds": 796000000
},
"slug": "task-bravo",
"status": "completed"
}
],"category-community": [
{
"dateCompleted": {
"_seconds": 1654709063,
"_nanoseconds": 474000000
},
"slug": "task-papa",
"status": "completed"
},
{
"dateCompleted": {
"_seconds": 1654709841,
"_nanoseconds": 764000000
},
"slug": "task-zebra",
"status": "completed"
}
]}
Here's the query so far
SELECT
*
FROM
(
SELECT
ARRAY(
SELECT
STRUCT(
TIMESTAMP_SECONDS(
CAST(
JSON_EXTRACT_SCALAR(business_tasks, '$.dateCompleted._seconds') AS INT64
)
) AS dateCompleted,
json_extract_scalar(business_tasks, '$.slug') AS task_slug,
json_extract_scalar(business_tasks, '$.status') AS status
)
FROM
UNNEST(
json_extract_array(DATA, '$.tasks.category-business')
) business_tasks
) AS items
FROM
`table`
)
This extracts just the information in the category business.
What I'm trying to do is expand category-community and any other children underneath the tasks key. The real data has at least 10 categories and 50 tasks.
I think I need to do another round of UNNEST and json_extract_array but I can't quite work out the correct order?

Consider below approach
create temp function get_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function get_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function get_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
create temp table temp_table as (
select
split(key, '.')[offset(0)] as category,
split(key, '.')[offset(1)] as offset,
split(key, '.')[offset(2)] || ifnull(split(key, '.')[safe_offset(3)], '') as key,
val, format('%t', t) row_id
from your_table t, unnest([struct(get_leaves(json_extract(data, '$.tasks')) as leaves)]),
unnest(get_keys(leaves)) key with offset
join unnest(get_values(leaves)) val with offset using(offset)
);
execute immediate (
select '''
select * except(row_id) from temp_table
pivot (any_value(val) for key in ("''' || keys || '"))'
from (
select string_agg(key, '","') keys
from (select distinct key from temp_table)
)
);
if applied to sample data in your question - output is

DML only:
with category_level as (
select
coalesce(
json_query_array(DATA.tasks[a], '$.category-business')
, json_query_array(DATA.tasks[a], '$.category-community')
, json_query_array(DATA.tasks[a], '$.category-3')
, json_query_array(DATA.tasks[a], '$.category-4')
, json_query_array(DATA.tasks[a], '$.category-5')
, json_query_array(DATA.tasks[a], '$.category-6')
, json_query_array(DATA.tasks[a], '$.category-7')
, json_query_array(DATA.tasks[a], '$.category-8')
, json_query_array(DATA.tasks[a], '$.category-9')
, json_query_array(DATA.tasks[a], '$.category-10')
) category_array
from table
left join unnest(generate_array(0, 100)) a
where DATA.tasks[a] is not null
)
select
timestamp_seconds(cast(json_extract_scalar(b.dateCompleted._seconds) as int64)) dateCompleted
, json_extract_scalar(b.slug) slug
, json_extract_scalar(b.status) status
from category_level
left join unnest(category_array) b
https://console.cloud.google.com/bigquery?sq=1013309549723:fe8b75122e5b4b549e8081df99584c81

new version:
select
timestamp_seconds(cast(regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"_seconds":(\d*)')[offset(a)] as int64)) dateCompleted
, regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"slug":"([a-z\-]*)"')[offset(a)] task_slug
, regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"status":"([a-z\-]*)"')[offset(a)] status
from table
join unnest(generate_array(0,-1+array_length(regexp_extract_all(to_json_string(json_extract(DATA,'$.tasks')), r'"slug":"([a-z\-]*)"')))) a
https://console.cloud.google.com/bigquery?sq=1013309549723:9f43bd653ba14589b31a1f5673adcda7

Related

Sum of column values ​inside Json array with group by

I have next Django model.
class StocksHistory(models.Model):
wh_data = models.JsonField()
created_at = models.DateTimeField()
I store JSON data in wh_data.
[
{
"id":4124124,
"stocks":[
{
"wh":507,
"qty":2
},
{
"wh":2737,
"qty":1
}
],
},
{
"id":746457457,
"stocks":[
{
"wh":507,
"qty":3
}
]
}
]
Note: it's data for one row - 2022-06-06.
I need to calculate the sum inside stocks by grouping them by wh and by created_at so that the output is something like this
[
{
"wh":507,
"qty":5,
"created_at":"2022-06-06"
},
{
"wh":2737,
"qty":1,
"created_at":"2022-06-06"
},
{
"wh":507,
"qty":0,
"created_at":"2022-06-07"
},
{
"wh":2737,
"qty":2,
"created_at":"2022-06-07"
}
]
I know how to group by date, but I don't understand how to proceed with aggregations inside JsonField.
StocksHistory.objects.extra(select={'day': 'date( created_at )'})
.values('day')
.annotate(
???
)
A solution is suitable, both through Django ORM and through RAW SQL.
demo
WITH cte AS (
SELECT
jsonb_path_query(js, '$[*].stocks.wh')::numeric AS wh,
jsonb_path_query(js, '$[*].stocks.qty')::numeric AS b,
_date
FROM (
VALUES ('[
{
"id":4124124,
"stocks":[
{
"wh":507,
"qty":2
},
{
"wh":2737,
"qty":1
}
]
},
{
"id":746457457,
"stocks":[
{
"wh":507,
"qty":3
}
]
}
]'::jsonb)) v (js),
(
VALUES ('2022-06-06'), ('2022-06-07')) ss_ (_date)
),
cte2 AS (
SELECT
wh, sum(b) AS qty,
_date
FROM
cte
GROUP BY
1,
3
ORDER BY
1
)
SELECT
array_agg(row_to_json(cte2.*)::jsonb)
FROM
cte2;

How to remove object by value from a JSONB type array?

I want to remove a JSONB object by their unique 'id' value from a JSONB array. I am no expert at writing SQL code, but I managed to write the concatenate function.
For an example: Remove this object from an array below.
{
"id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e",
"title": "Wikipedia",
"links": [
"https://en.wikipedia.org/1",
"https://en.wikipedia.org/2"
]
},
Schema:
CREATE TABLE users (
url text not null,
user_id SERIAL PRIMARY KEY,
name VARCHAR,
list_of_links jsonb default '[]'
);
list_of_links format:
[
{
"id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e",
"title": "Wikipedia",
"links": [
"https://en.wikipedia.org/1",
"https://en.wikipedia.org/2"
]
},
{
"id": "451ac172-b93e-4158-8e53-8e9031cfbe72",
"title": "Russian Wikipedia",
"links": [
"https://ru.wikipedia.org/wiki/",
"https://ru.wikipedia.org/wiki/"
]
},
{
"id": "818b99c8-479b-4846-ac15-4b2832ec63b5",
"title": "German Wikipedia",
"links": [
"https://de.wikipedia.org/any",
"https://de.wikipedia.org/any"
]
},
...
]
The concatenate function:
update users set list_of_links=(
list_of_links || (select *
from jsonb_array_elements(list_of_links)
where value->>'id'='ad26e2be-19fd-4862-8f84-f2f9c87b582e'
)
)
where url='test'
returning *
;
Your json data is structured so you have to unpack it, operate on the unpacked data, and then repack it again:
SELECT u.url, u.user_id, u.name,
jsonb_agg(
jsonb_build_object('id', l.id, 'title', l.title, 'links', l.links)
) as list_of_links
FROM users u
CROSS JOIN LATERAL jsonb_to_recordset(u.list_of_links) AS l(id uuid, title text, links jsonb)
WHERE l.id != 'ad26e2be-19fd-4862-8f84-f2f9c87b582e'::uuid
GROUP BY 1, 2, 3
The function jsonb_to_recordset is a set-returning function so you have to use it as a row source, joined to its originating table with the LATERAL clause so that the list_of_links column is available to the function to be unpacked. Then you can delete the records you are not interested in using the WHERE clause, and finally repack the structure by building the record fields into a jsonb structure and then aggregating the individual records back into an array.
I wrote this on JS but that does not matter to how it works. Essentially, its getting all the items from the array, then finding the matching id which returns an index. And using that index, I use "-" operator which takes the index and removes it from the array. Sorry if my grammar is bad.
//req.body is this JSON object
//{"url":"test", "id": "ad26e2be-19fd-4862-8f84-f2f9c87b582e"}
var { url, id } = req.body;
pgPool.query(
`
select list_of_links
from users
where url=$1;
`,
[url],
(error, result) => {
//block code executing further if error is true
if (error) {
res.json({ status: "failed" });
return;
}
if (result) {
// this function just returns the index of the array element where the id matches from request's id
// 0, 1, 2, 3, 4, 5
var index_of_the_item = result.rows.list_of_links
.map(({ id: db_id }, index) =>
db_id === id ? index : false
)
.filter((x) => x !== false)[0];
//remove the array element by it's index
pgPool.query(
`
update users
set list_of_links=(
list_of_links - $1::int
)
where url=$2
;
`,
[index_of_the_item, url], (e, r) => {...}
);
}
}
);

Data population from database as nested array object

I have a table structure as below in SQL Server database,
I want to populate the data from database something similar to JSON like as below:
id: 1
aname: xyz
categories: bus
{
arnam: res
street: [s1,s2]
},
{
arnam: com
street: [c1,c2]
}
Can someone please guide me as to how I can do this in the database using normal SQL query or procedure.
Your json is not valid, but according to your table I think you want to know how to parse data from nested JSON with array of values with this structure:
WITH cte AS (
SELECT * FROM (VALUES
('{"id": 1, "aname": "xyz",
"categories": {
"bus": [
{"aname": "res",
"street": ["c1", "c2"]
},
{"aname": "res",
"street": ["s1", "s2"]
}]
}
}'),
('{"id": 2, "aname": "abc",
"categories": {
"bus": [
{"aname": "foo",
"street": ["c1", "c2"]
},
{"aname": "zoo",
"street": ["s1", "s2"]
}]
}
}')
) t1 ([json])
)SELECT
ROW_NUMBER() OVER(ORDER BY [id]) AS RN,
*
FROM cte AS e
CROSS APPLY OPENJSON(e.[json]) WITH (
[id] int '$.id',
[aname] VARCHAR(100) '$.aname',
[categories_jsn] NVARCHAR(MAX) '$.categories.bus' AS JSON
) AS jsn
CROSS APPLY OPENJSON([categories_jsn]) WITH (
[street_arr] NVARCHAR(MAX) '$.street' AS JSON,
[aname_lvl2] VARCHAR(20) '$.aname'
) AS jsn2
CROSS APPLY OPENJSON([street_arr]) WITH (
[street] VARCHAR(20) '$'
)
Output:

Converting Parent child rows to JSON in oracle

Is there a way to create JSON object in Oracle, for parent child relationship data? For example an organizational structure. Table contains
EmpId Name Title ManagerId
1 John GM 0
2 Smith Manager 1
3 Jason Manager 1
4 Will IP1 3
5 Jade AM 3
6 Mark IP2 5
7 Jane AM2 5
8 Tamara M1 1
9 Dory M2 1
Something like below JSON object is expected.
{
'name': 'John',
'title': 'GM',
'children': [
{ 'name': 'Smith', 'title': 'manager' },
{ 'name': 'Jason', 'title': 'manager',
'children': [
{ 'name': 'Will', 'title': 'IP1' },
{ 'name': 'Jade', 'title': 'AM',
'children': [
{ 'name': 'Mark', 'title': 'IP2' },
{ 'name': 'Jane', 'title': 'AM2' }
]
}
]
},
{ 'name': 'Tamara', 'title': 'M1' },
{ 'name': 'Dory', 'title': 'M2' }
]
}
Oracle Database 12.2 does have a number of JSON generation functions. But these are of limited use. You need to build up the document recursively.
Which I believe requires a bit of hand-crafting.
First use a recursive query to create the org chart, adding which level each person is in the hierarchy.
Then build the JSON by:
If level for the next row is greater than the current, the employee is a manager. And you need to start a child array. Otherwise return a JSON object for the current row
If the current row is the last in the tree, you need to close N arrays and objects. N is how deep the row is in the tree minus one.
Otherwise if the next row is a lower level than the current, you need to close ( current level - next level ) arrays and objects
Then if the next level equals or is less than the current, add a comma
Which gives something like:
create table t (
EmpId int,
Name varchar2(10),
Title varchar2(10),
ManagerId int
);
insert into t values (1, 'John', 'GM' , 0 );
insert into t values (2, 'Smith', 'Manager' , 1 );
insert into t values (3, 'Jason', 'Manager' , 1 );
insert into t values (4, 'Will', 'IP1' , 3 );
insert into t values (5, 'Jade', 'AM' , 3 );
insert into t values (6, 'Mark', 'IP2' , 5 );
insert into t values (7, 'Jane', 'AM2' , 5 );
insert into t values (8, 'Tamar', 'M1' , 1 );
insert into t values (9, 'Dory', 'M2' , 1 );
commit;
with chart (
empid, managerid, name, title, lvl
) as (
select empid, managerid,
name, title, 1 lvl
from t
where empid = 1
union all
select t.empid, t.managerid,
t.name, t.title,
lvl + 1 lvl
from chart c
join t
on c.empid = t.managerid
) search depth first by empid set seq,
jdata as (
select case
/* The employee has reports */
when lead ( lvl ) over ( order by seq ) > lvl then
'{"name": "' || name ||
'", "title": "' || title ||
'", "children": ['
else
json_object ( 'name' value name, 'title' value title )
end ||
case
/* Close arrays & objects */
when lead ( lvl ) over ( order by seq ) is null then
lpad ( ']}', ( lvl - 1 ) * 2, ']}' )
when lead ( lvl ) over ( order by seq ) < lvl then
lpad ( ']}', ( lvl - lead ( lvl ) over ( order by seq ) ) * 2, ']}' )
end ||
case
/* Add closing commas */
when lead ( lvl ) over ( order by seq ) <= lvl then
','
end j,
lead ( lvl ) over ( order by seq ) nlvl,
seq, lvl
from chart
)
select json_query (
listagg ( j )
within group ( order by seq ),
'$' returning varchar2 pretty
) chart_json
from jdata;
CHART_JSON
{
"name" : "John",
"title" : "GM",
"children" :
[
{
"name" : "Smith",
"title" : "Manager"
},
{
"name" : "Jason",
"title" : "Manager",
"children" :
[
{
"name" : "Will",
"title" : "IP1"
},
{
"name" : "Jade",
"title" : "AM",
"children" :
[
{
"name" : "Mark",
"title" : "IP2"
},
{
"name" : "Jane",
"title" : "AM2"
}
]
}
]
},
{
"name" : "Tamar",
"title" : "M1"
},
{
"name" : "Dory",
"title" : "M2"
}
]
}

MDX: Query Data analysed

In this query i used were clause in that year is 2015 and quarter-[2013]&[Quarter1], how is it possible, and getting result set 10 records. actually result set is not displaying.
WITH MEMBER [Measures].[Test] AS ( [Measures].[ProgramAssessmentPatientCnt] + [Measures].[AssessmentPatientCnt] )
MEMBER [Measures].[Test1] AS ( [Measures].[CCMPatientCnt] + [Measures].[CareteamCnt] + [Measures].[CCMPatientCnt] )
SELECT ( ( { [DimEnrollStatus].[EnrollmentStatus].[EnrollmentStatus] } ),
{ [Measures].[AssessmentPatientCnt], [Measures].[Test], [Measures].[Test1] } ) ON COLUMNS,
Subset (
NonEmpty (
{
( { [DimAssessment].[AssessmentText].[AssessmentText] },
{ [DimAssessment].[QuestionText].[QuestionText] },
{ [DimAssessment].[AnswerText].[AnswerText] } )
},
{ [Measures].[AssessmentPatientCnt], [Measures].[Test], [Measures].[Test1] }
),
0,
10
) ON ROWS
FROM [NavigateCube]
WHERE (
{
( { [DimManagedPopulation].[ManagedPopulationName].&[1044]&[LTC Lincoln Centers] },
{ [DimAnchorDate].[Calender Year].&[2015] },
{ [DimAnchorDate].[Calendar Semester Des].[All] },
{ [DimAnchorDate].[Calendar Quarter Des].&[2013]&[Quarter1] },
{ [DimAnchorDate].[English Month Name Desc].[All] } )
} )
Does this return any rows?
WHERE
(
[DimManagedPopulation].[ManagedPopulationName].&[1044]&[LTC Lincoln Centers],
[DimAnchorDate].[Calender Year].&[2015],
//[DimAnchorDate].[Calendar Semester Des].[All],
[DimAnchorDate].[Calendar Quarter Des].&[2013]&[Quarter1],
[DimAnchorDate].[English Month Name Desc].[All]
);
Maybe the following:
WHERE
(
[DimManagedPopulation].[ManagedPopulationName].&[1044]&[LTC Lincoln Centers],
{
[DimAnchorDate].[Calender Year].&[2015],
[DimAnchorDate].[Calendar Semester Des].[All],
[DimAnchorDate].[Calendar Quarter Des].&[2013]&[Quarter1],
[DimAnchorDate].[English Month Name Desc].[All]
}
);