Query an array element in an JSONB Object - sql

I have a jsonb column called data in a table called reports. Here is what report.id = 1 looks like
[
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Food123"
},
{
"productIDs": [
"EFG1"
],
"groupID": "Electronic123"
}
],
"Package": [
{
"groupID": "Electronic123"
}
],
"type": "Produce"
},
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Clothes123"
}
],
"Package": [
{
"groupID": "Food123"
}
],
"type": "Wearables"
}
]
and here is what report.id = 2 looks like:
[
{
"Product": [
{
"productIDs": [
"XYZ1",
"XYZ2"
],
"groupID": "Food123"
}
],
"Package": [],
"type": "Wearable"
},
{
"Product": [
{
"productIDs": [
"ABC1",
"ABC2"
],
"groupID": "Clothes123"
}
],
"Package": [
{
"groupID": "Food123"
}
],
"type": "Wearables"
}
]
I am trying to get a list of all entries in reports table where at least one of data column's element has following:
type = Produce AND
where any elements of Product array OR any elements of Product array's groupID start with Food
So from the example above this query will only return the first index since
The type = Produce
groupID starts with Food for first element of Product array
The second index will be filtered out because type is not Produce.
I am not sure how to query to do AND query for groupID. Here is what I have tried to get all entries for type Produce
select * from reports r, jsonb_to_recordset(r.data) as items(type text) where items.type like 'Produce';

Sample structure and result: dbfiddle
select r.*
from reports r
cross join jsonb_array_elements(r.data) l1
cross join jsonb_array_elements(l1.value -> 'Product') l2
where l1 ->> 'type' = 'Produce'
and l2.value ->> 'groupID' ~ '^Food';

Related

select node value from json column type

A table I called raw_data with three columns: ID, timestamp, payload, the column paylod is a json type having values such as:
{
"data": {
"author_id": "1461871206425108480",
"created_at": "2022-08-17T23:19:14.000Z",
"geo": {
"coordinates": {
"type": "Point",
"coordinates": [
-0.1094,
51.5141
]
},
"place_id": "3eb2c704fe8a50cb"
},
"id": "1560043605762392066",
"text": " ALWAYS # London, United Kingdom"
},
"matching_rules": [
{
"id": "1560042248007458817",
"tag": "london-paris"
}
]
}
From this I want to select rows where the coordinates is available, such as [-0.1094,51.5141]in this case.
SELECT *
FROM raw_data, json_each(payload)
WHERE json_extract(json_each.value, '$.data.geo.') IS NOT NULL
LIMIT 20;
Nothing was returned.
EDIT
NOT ALL json objects have the coordinates node. For example this value:
{
"data": {
"author_id": "1556031969062010881",
"created_at": "2022-08-18T01:42:21.000Z",
"geo": {
"place_id": "006c6743642cb09c"
},
"id": "1560079621017796609",
"text": "Dear Desperate sister say husband no dey oo."
},
"matching_rules": [
{
"id": "1560077018183630848",
"tag": "kaduna-kano-katsina-dutse-zaria"
}
]
}
The correct path is '$.data.geo.coordinates.coordinates' and there is no need for json_each():
SELECT *
FROM raw_data
WHERE json_extract(payload, '$.data.geo.coordinates.coordinates') IS NOT NULL;
See the demo.

Getting the last datum in a vega dataset

I have a data source A and I'd like to create a new data source B containing just the last element of A. What is the best way to do this in Vega?
This is relatively straight forward to do. Although I am slightly confused by your use of "max" in the aggregation since this isn't the last value?
Either way here is my solution for obtaining the last value in a dataset using this series of transforms,
transform: [
{
type: window
ops: [
row_number
]
}
{
type: joinaggregate
fields: [
row_number
]
ops: [
max
]
as: [
max_row_number
]
}
{
type: filter
expr: datum.row_number==datum.max_row_number
}
]
I was able to get this working in the Vega Editor using the following:
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"data": [
{
"name": "source",
"url": "https://raw.githubusercontent.com/vega/vega/master/docs/data/cars.json",
"transform": [
{
"type": "filter",
"expr": "datum['Horsepower'] != null && datum['Miles_per_Gallon'] != null && datum['Acceleration'] != null"
}
]
},
{
"name": "avg",
"source":"source",
"transform":[
{
"type":"aggregate",
"groupby":["Horsepower"],
"ops": ["average"],
"fields":["Miles_per_Gallon"],
"as":["Avg_Miles_per_Gallon"]
}
]
},
{
"name":"last",
"source": "avg",
"transform": [
{
"type": "aggregate",
"ops": ["max"],
"fields": ["Horsepower"],
"as": ["maxHorsepower"]
},
{
"type": "lookup",
"from": "avg",
"key": "Horsepower",
"fields": ["maxHorsepower"],
"values": ["Horsepower","Avg_Miles_per_Gallon"]
}
]
}
]
}
maxHorsepower
Horsepower
Avg_Miles_per_Gallon
230
230
16
I'd be interested to know if there are better ways, but this worked for me.

How to update multiple occurrence a specific value of a object present in array of object within Postgres JSON Field

Here is my JSON field where has multiple users with the same name. I want to update all users whose name is Devang to Dev
JSON
{
"user": [
{
"user_name": "Devang",
"user_weight": 0.7676846955248864
},
{
"user_name": "Meet",
"user_weight": 0.07447325861051013
},
{
"user_name": "Devang",
"user_weight": 0.056163873153859706
}
],
"address": [
{
"address_name": "India"
}
]
}
After Update The JSON would be
{
"user": [
{
"user_name": "Dev",
"user_weight": 0.7676846955248864
},
{
"user_name": "Meet",
"user_weight": 0.07447325861051013
},
{
"user_name": "Dev",
"user_weight": 0.056163873153859706
}
],
"address": [
{
"address_name": "India"
}
]
}
Here I have tried this query but update only the first occurrence due to subquery.
with cte as (
select id, ('{user,'||index-1||',user_name}')::text[] as json_path
from user_table, jsonb_array_elements(json_field->'user')
with ordinality arr(vals,index) where arr.vals->>'user_name' ='Devang'
)
update user_table
set json_field = jsonb_set(json_field,cte.json_path,'"Dev"',false)
from cte where user_table.id=cte.id;
Please also look at this DEMO
Any answer will be appreciated
You may use string function REPLACE:
UPDATE user_table
SET json_field = REPLACE(json_field :: TEXT, '"user_name": "Devang"', '"user_name": "Dev"') :: JSONB;
https://dbfiddle.uk/?rdbms=postgres_10&fiddle=fa36275977f85a1233bcbec150ada266

BigQuery concat nested array json

I have data that looks like
{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}
The output I want is concat all the values in the nested json array
price, "20003, 30075, 40060"
I tried some queries but failed to get the correct output.
You can use JSON_EXTRACT_ARRAY and ARRAY_TO_STRING:
WITH test_json AS (
SELECT
'''{
"Attributes": [
{
"values": [
{
"value": "20003"
},
{
"value": "30075"
},
{
"value": "40060"
}
],
"name": "price"
}
],
"attr2" : "val"
}''' AS json_string
),
values_concatenated AS (
SELECT ARRAY_TO_STRING(
ARRAY(
SELECT JSON_VALUE(json_values, '$.value')
FROM UNNEST((SELECT JSON_EXTRACT_ARRAY(json_string, '$.Attributes[0].values') AS json_values FROM test_json)) as json_values
),
', '
) as values
)
SELECT
(select json_value(json_string, '$.Attributes[0].name') from test_json),
(select values from values_concatenated)

Nested "for loop" searches in SQL - Azure CosmosDB

I am using Cosmos DB and have a document with the following simplified structure:
{
"id1":"123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
}
My issue is - I have an array of these documents and need to search name to see if it matches my search word. For example I want both big trees and trees to return if a user types in trees.
So currently I push every document into an array and do the following:
For each document
for each stuff
for each a.b.c.d[0].e
for each classes
var splice = name.split(' ')
if (splice.includes(searchWord))
return id1, id2 and id3.
Using cosmosDB I am using SQL with the following code:
client.queryDocuments(
collection,
`SELECT * FROM root r`
).toArray((err, results) => {stuff});
This effectively brings every document in my collection into an array to perform the search manually above as mentioned.
This is going to cause issues when I have 1000s or 1,000,000s of documents in the array and I believe I should be leveraging the search mechanics available within Cosmos itself. Is anyone able to help me to work out what SQL query would be able to perform this type of function?
Having searched everything is it also possible to search the 5 latest documents?
Thanks for any insight in advance!
1.Is anyone able to help me to work out what SQL query would be able to
perform this type of function?
According to your sample and description, I suggest you using ARRAY_CONTAINS in cosmos db sql. Please refer to my sample:
sample documents:
[
{
"id1": "123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "456",
"stuff": [
{
"id2": "stuff2",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things2",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "trees",
"meta": 1
}
]
},
{
"id3": "default2",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "789",
"stuff": [
{
"id2": "stuff3",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things3",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big",
"meta": 1
}
]
},
{
"id3": "default3",
"name": "other",
"classes": [
{
"name": "big trees",
"meta": 1
}
]
}
]
}
]
}
}
}
}
]
}
]
query :
SELECT distinct c.id1,stuff.id2,e.id3 FROM c
join stuff in c.stuff
join d in stuff.a.b.c.d
join e in d.e
where ARRAY_CONTAINS(e.classes,{name:"trees"},true)
or ARRAY_CONTAINS(e.classes,{name:"big trees"},true)
output:
2.Having searched everything is it also possible to search the 5 latest
documents?
Per my research, features like LIMIT is not supported in cosmos so far. However , TOP is supported by cosmos db. So if you could add sort field(such as date or id), then you could use sql:
select top 5 from c order by c.sort desc