BigQuery - Extract nested children JSON as rows - sql

I have a JSON structure in a field that looks like this. I'm trying to extract the task viewed and completed date, there could be any number of tasks in each field.
Sample data
"task_1a232445": {
"completedDate": {
"_seconds": 1670371200,
"_nanoseconds": 516000000
},
"viewedDate": {
"_seconds": 1666652400,
"_nanoseconds": 667000000
}
},
"task_1a233445": {
"completedDate": {
"_seconds": 1670198400,
"_nanoseconds": 450000000
},
"viewedDate": {
"_seconds": 1674000000,
"_nanoseconds": 687000000
}
}
}
I have tried to adapt this previous question I asked, but where there are multiple tasks in a single row (as sample data) I can only return the first completedDate

Consider below approach
create temp function extract_keys(input string) returns array<string> language js as """
return Object.keys(JSON.parse(input));
""";
create temp function extract_values(input string) returns array<string> language js as """
return Object.values(JSON.parse(input));
""";
create temp function get_leaves(input string) returns string language js as '''
function flattenObj(obj, parent = '', res = {}){
for(let key in obj){
let propName = parent ? parent + '.' + key : key;
if(typeof obj[key] == 'object'){
flattenObj(obj[key], propName, res);
} else {
res[propName] = obj[key];
}
}
return JSON.stringify(res);
}
return flattenObj(JSON.parse(input));
''';
select * from (
select
arr[safe_offset(0)] task,
arr[safe_offset(1)] date,
date(timestamp_seconds(cast(val as int64))) val
from your_table, unnest([get_leaves(json)]) leaves,
unnest(extract_keys(leaves)) key with offset
join unnest(extract_values(leaves)) val with offset
using(offset),
unnest([struct(split(key, '.') as arr)])
where arr[safe_offset(2)] = '_seconds'
)
pivot (any_value(val) for date in ('completedDate', 'viewedDate'))
if applied to sample data in your question
with your_table as (
select '''{
"task_1a232445": {
"completedDate": {
"_seconds": 1670371200,
"_nanoseconds": 516000000
},
"viewedDate": {
"_seconds": 1666652400,
"_nanoseconds": 667000000
}
},
"task_1a233445": {
"completedDate": {
"_seconds": 1670198400,
"_nanoseconds": 450000000
},
"viewedDate": {
"_seconds": 1674000000,
"_nanoseconds": 687000000
}
}
}
''' as json
)
output is

Related

Syntax error: Unexpected keyword UNNEST at [12:8] while using unnest

I want to use unnest in the following function to use INkeyword but it is throwing error unexpected keyword UNNEST while using unnest.
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, json_path STRING)
RETURNS STRING
LANGUAGE js AS """
try { var parsed = JSON.parse(json);
return JSON.stringify(jsonPath(parsed, json_path));
} catch (e) { return null }
"""
OPTIONS (
library="https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/jsonpath/jsonpath-0.8.0.js.txt"
);
SELECT UNNEST((CUSTOM_JSON_EXTRACT( '''[
{
"mobile_ad_id1": "409ca39f-447e-4700-9ab1-fb3f743c2a04",
"key":1
},
{
"mobile_ad_id1": "0f5aef1c-d957-41b7-91f8-af51f0c775bf",
"key":1
}
]''', '$[?(#.key=="1")].mobile_ad_id1')));
UNNEST should be used together with an UDF which returns an array. Try this one instead.
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, json_path STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
try {
var parsed = JSON.parse(json);
return jsonPath(parsed, json_path);
} catch (e) { return null; }
"""
OPTIONS (
library="https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/jsonpath/jsonpath-0.8.0.js.txt"
);
SELECT *
FROM UNNEST((CUSTOM_JSON_EXTRACT('''[
{ "mobile_ad_id1": "409ca39f-447e-4700-9ab1-fb3f743c2a04", "key":1 },
{ "mobile_ad_id1": "0f5aef1c-d957-41b7-91f8-af51f0c775bf", "key":1 }]
''', '$[?(#.key=="1")].mobile_ad_id1'))
);
output:
update:
DECLARE json_data DEFAULT '''
[ { "mobile_ad_id1": "409ca39f-447e-4700-9ab1-fb3f743c2a04", "key":1 },
{ "mobile_ad_id1": "0f5aef1c-d957-41b7-91f8-af51f0c775bf", "key":1 } ]
''';
DECLARE json_path DEFAULT '$[?(#.key=="1")].mobile_ad_id1';
CREATE TEMP TABLE mytable AS
SELECT "409ca39f-447e-4700-9ab1-fb3f743c2a04" AS mobile_ad_id;
CREATE TEMPORARY FUNCTION CUSTOM_JSON_EXTRACT(json STRING, json_path STRING)
RETURNS ARRAY<STRING>
LANGUAGE js AS """
try {
var parsed = JSON.parse(json);
return jsonPath(parsed, json_path);
} catch (e) { return null; }
"""
OPTIONS (
library="https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/jsonpath/jsonpath-0.8.0.js.txt"
);
SELECT *
FROM `mytable`
WHERE mobile_ad_id IN UNNEST(CUSTOM_JSON_EXTRACT(json_data, json_path));

How to convert a json response to match schema (not manually)?

I'm trying to validate a schema for complex JSON. We can easily compare a schema with API response by below command
And match response == response_SCHEMA
(where "response_SCHEMA" is json schema)
For small json we can manually create:
Actual API response:
{ "id": "123", "name": "abc", "type": "Mumbai", "owner": { "name": "Mr Singh", "type": "Business", "licenseNo": "ASL8989" }
Converted the response to below - manually
{ "id": "#number", "name": "#string", "type": "#string", "owner": { "name": "#string", "type": "#string", "licenseNo": "#string" }
How to create this kind of schema automatically for a complex big json having 300-400 lines? So, we can compare it with API response with Karate.
The point of the schema design is that you can easily cut and paste an actual JSON and either use it as it is (data match, recommended) or edit it to use #string etc (schema match).
When you say 300-400 lines, most likely you mean an array of JSON. All you need to do is specify the schema of the "repeating part" and then use match each: https://github.com/intuit/karate#match-each
* def actual = [{ a: 1, b: 'x' }, { a: 2, b: 'y' }]
* def schema = { a: '#number', b: '#string' }
* match each actual == schema
The short answer is there is no automatic way to do it. Typically you never need to do more than a few lines. Maybe you can write your own custom utility.
#Mayank I also faced this issue, Unfortunately I didn't find any options.
So I created my own small JS to convert the actual JSON into JSON Schema which is compatible with Karate Fuzzy Match.
The output may look like this
enter image description here
Hope This Helps!...
function main() {
var json;
if (document.getElementById('code').value) {
try {
json = JSON.parse(document.getElementById('code').value);
document.getElementById('code').value = JSON.stringify(inputTxt, null, 2);
document.getElementById('output').value = '';
} catch (e) {
document.getElementById('output').value = e;
}
}
let outArr = {};
let output = convertJson(json, 'response', true);
ouput = Object.assign(output, outArr);
Object.keys(ouput).forEach(key => ouput[key] === "#undefined" && delete ouput[key]);
document.getElementById('output').value = JSON.stringify(output, null, 2);
function convertJson(json, keyName, isParent) {
let outA = {};
let x = {};
let y = {};
Object.keys(json).forEach(function(key) {
if (!(getJSType(json[key]) === "object") && !(getJSType(json[key]) === "array")) {
x[key] = '#' + getJSType(json[key]);
}
if (getJSType(json[key]) === "object") {
x[key] = convertJson(json[key], key, false);
}
if (getJSType(json[key]) === "array") {
x[key] = '#' + getJSType(json[key]);
// y[key + 'arr'] = getArray(json[key][0], key, false);
getArray(json[key][0], key, false);
}
})
if (isParent) {
if (Object.keys(x).length > 0) {
outA[keyName] = x;
}
if (Object.keys(y).length > 0) {
outA[keyName + 'Arr'] = y;
}
return outA;
} else {
if (Object.keys(y).length > 0 && isParent) {
return y;
}
if (Object.keys(x).length > 0) {
return x;
}
}
}
function getArray(json, keyName, isParent) {
let z = {};
if (!(getJSType(json) === "object") && !(getJSType(json) === "array")) {
z[keyName + 'Arr'] = '#' + getJSType(json);
} else {
z[keyName + 'Arr'] = convertJson(json, keyName, false);
}
outArr = Object.assign(outArr, z);
}
function getJSType(valToChk) {
function isUndefined(valToChk) {
return valToChk === undefined;
}
function isNull(valToChk) {
return valToChk === null;
}
function isArray(valToChk) {
return valToChk.constructor == Array;
}
function isBoolean(valToChk) {
return valToChk.constructor == Boolean;
}
function isFunction(valToChk) {
return valToChk.constructor == Function;
}
function isNumber(valToChk) {
return valToChk.constructor == Number;
}
function isString(valToChk) {
return valToChk.constructor == String;
}
function isObject(valToChk) {
return valToChk.constructor == Object;
}
if (isUndefined(valToChk)) {
return "undefined";
}
if (isNull(valToChk)) {
return "null";
}
if (isArray(valToChk)) {
return "array";
}
if (isBoolean(valToChk)) {
return "boolean";
}
if (isFunction(valToChk)) {
return "function";
}
if (isNumber(valToChk)) {
return "number";
}
if (isString(valToChk)) {
return "string";
}
if (isObject(valToChk)) {
return "object";
}
}
}
function formatJson() {
if (document.getElementById('code').value) {
try {
var inputTxt = JSON.parse(document.getElementById('code').value);
document.getElementById('code').value = JSON.stringify(inputTxt, null, 2);
document.getElementById('output').value = '';
} catch (e) {
document.getElementById('output').value = e;
}
}
}
function minifyJson() {
if (document.getElementById('code').value) {
try {
var inputTxt = JSON.parse(document.getElementById('code').value);
document.getElementById('code').value = JSON.stringify(inputTxt, null, null);
document.getElementById('output').value = '';
} catch (e) {
document.getElementById('output').value = e;
}
}
}
<html>
<head>
<title>JavaScript Code Runner</title>
</head>
<body>
<h3>Generate the Json Schema for Response</h3>
<div style="display: flex;">
<textarea id="code" style="flex: 1; height: 80vh;" spellcheck="false"></textarea>
<textarea id="output" style="flex: 1; height: 80vh; overflow: auto;" spellcheck="false"></textarea>
</div>
<div style="display: flex;height: 12;"></div>
<button onclick="runCode()">Generate Schema</button>
<button onclick="formatJson()">Format JSON</button>
<button onclick="minifyJson()">Minify JSON</button>
<script src="src/jsonconverter.js"></script>
<script>
function runCode() {
main();
}
</script>
</body>
</html>

Azure Stream Analytics: Get Array Elements by name

I was wondering if it is possible for me to get the elements of the array by the name of property than the position. For example, this is my incoming data:
{
"salesdata": {
"productsbyzone": {
"zones": [{
"eastzone": "shirts, trousers"
},
{
"westzone": "slacks"
},
{
"northzone": "gowns"
},
{
"southzone": "maxis"
}
]
}
}
}
I intend to move this to a SQL database and I have columns within the database for each zone. The problem is that the order of different zones changes within each json. I was successfully using the following query until I realized that the position of the zones changes within each json:
WITH
salesData AS
(
SELECT
(c.salesdata.productsbyzone.zone,0) as eastzone,
(c.salesdata.productsbyzone.zone,1) as westzone,
(c.salesdata.productsbyzone.zone,2) as northzone,
(c.salesdata.productsbyzone.zone,3) as sourthzone,
FROM [sales-data] as c
)
SELECT
eastzone.eastzone as PRODUCTS_EAST,
westzone.westzone as PRODUCTS_WEST,
northzone.northzone as PRODUCTS_NORTH,
southzone.southzone as PRODUCTS_SOUTH
INTO PRODUCTSDATABASE
FROM salesData
Need a way to reference these fields by the name rather than by the position.
I recommend a solution: Use the JavaScript UDF in the azure stream job to complete the columns sort.
Please refer to my sample:
Input data(upset the order):
{
"salesdata": {
"productsbyzone": {
"zones": [{
"westzone": "slacks"
},
{
"eastzone": "shirts, trousers"
},
{
"northzone": "gowns"
},
{
"southzone": "maxis"
}
]
}
}
}
js udf code:
function test(arg) {
var z = arg;
var obj = {
eastzone: "",
westzone: "",
northzone: "",
southzone: ""
}
for(var i=0;i<z.length;i++){
switch(Object.keys(z[i])[0]){
case "eastzone":
obj.eastzone = z[i]["eastzone"];
continue;
case "westzone":
obj.westzone = z[i]["westzone"];
continue;
case "northzone":
obj.northzone = z[i]["northzone"];
continue;
case "southzone":
obj.southzone = z[i]["southzone"];
continue;
}
}
return obj;
}
You can define the order you want in the obj parameter
SQL:
WITH
c AS
(
SELECT
udf.test(jsoninput.salesdata.productsbyzone.zones) as result
from jsoninput
),
b AS
(
SELECT
c.result.eastzone as east,c.result.westzone as west,c.result.northzone as north,c.result.southzone as south
from c
)
SELECT
b.east,b.west,b.north,b.south
INTO
jaycosmos
FROM
b
Output:
Hope it helps you.
You can use GetArrayElement to return array element then access to each property. Please refer the below query
WITH
salesData AS
(
SELECT
GetArrayElement(zones,0) as z
FROM [sales-data] as s
)
SELECT
z.eastzone
z.westzone
z.northzone
z.southzone
FROM PRODUCTSDATABASE
FROM salesData

ctools table component conditional formatting

I have no idea if this is the correct forum to ask this question, but I figured I would give it a go - does anyone use Pentaho Ctools? I am trying to apply conditional formatting to column 8 of my table component, but so far no available. Any thoughts would be greatly appreciated!
function f(){
this.setAddInOptions("numeric","formattedText",function(statusReport){
var days = statusReport.value;
if(statusREport.colIndex == 8)
if(days <=30){
return { textFormat: function(v, st) { return "<span style='color:green'>"+v+"</span>"; } };
}
else {
return { textFormat: function(v, st) { return "<span style='color:red'>"+v+"</span>"; } };
}
});
}
Pre Execution Function:
function f(){
//conditional coloring of cells
this.setAddInOptions("colType","formattedText",function(cell_data){
var days = cell_data.value;
if(cell_data.colIdx == 7)
{
if(!cell_data.value) //checking the null possibility
{
this.value = '00000';
}
}
if(days > 30){
return { textFormat: function(v, st) { return "<span style='color:#FF0000'>"+v+"</span>"; } };
}
else if(days <= 30) {
return { textFormat: function(v, st) { return "<span style='color:#000000'>"+v+"</span>"; } };
}
}) ;
}
You also have to update the Column Types in Advanced Properties - make the regular column types "string" or whatever they are and change the formatted column to "formattedText".

dojo ItemFileReadStore.getValue mixed return value is not handled as string

I'am using dojo.data.ItemFileReadStore to query a json file with data. the main purpose is finding translations at Js level.
The Json data has "id" the word and "t" the translation
function translate(word)
{
var json = '/my/language/path/es.json';
var reader = new dojo.data.ItemFileReadStore({
url: json
});
var queryObj = {};
queryObj["id"] = word;
reader.fetch({
query: queryObj,
onComplete: function(items, request){
if (items.length > 0) {
var t = reader.getValue(items[0], 't');
if (dojo.isString(t)) {
return t;
}
}
return word;
},
onError: function(error, request){
return word;
}
});
}
The return value is always a undefined wether there is a translation or not. any ideas?
I tried typecasting with no success.
You can do it like this:
function translate(wordId) {
var translatedWord= wordId;
var store = new dojo.data.ItemFileReadStore({ data: storeData });
store.fetch({ query: { id: wordId },
onItem: function (item) {
translatedWord= (store.getValue(item, 't'));
}
});
return translatedWord;
}