Transform JSON response with lodash - lodash

I'm new in lodash (v3.10.1), and having a hard time understanding.
Hope someone can help.
I have an input something like this:
{
{"id":1,"name":"Matthew","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":2,"name":"Mark","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":3,"name":"Luke","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":4,"name":"John","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}},
{"id":5,"name":"Paul","company":{"id":1,"name":"abc","industry":{"id":5,"name":"Medical"}}}
];
I would like to output this or close to this:
{
"industries": [
{
"industry":{
"id":5,
"name":"Medical",
"companies": [
{
"company":{
"id":1,
"name":"abc",
"employees": [
{"id":1,"name":"Matthew"},
{"id":2,"name":"Mark"},
{"id":3,"name":"Luke"},
{"id":4,"name":"John"},
{"id":5,"name":"Paul"}
]
}
}
]
}
}
]
}

Here's something that gets you close to what you want. I structured the output to be an object instead of an array. You don't need the industries or industry properties in your example output. The output structure looks like this:
{
"industry name": {
"id": "id of industry",
"companies": [
{
"company name": "name of company",
"id": "id of company",
"employees": [
{
"id": "id of company",
"name": "name of employee"
}
]
}
]
}
}
I use the _.chain function to wrap the collection with a lodash wrapper object. This enables me to explicitly chain lodash functions.
From there, I use the _.groupBy function to group elements of the collection by their industry name. Since I'm chaining, I don't have to pass in the array again to the function. It's implicitly passed via the lodash wrapper. The second argument of the _.groupBy is the path to the value I want to group elements by. In this case, it's the path to the industry name: company.industry.name. _.groupBy returns an object with each employee grouped by their industry (industries are keys for this object).
I then do use _.transform to transform each industry object. _.transform is essentially _.reduce except that the results returned from the _.transform function is always an object.
The function passed to the _.transform function gets executed against each key/value pair in the object. In the function, I use _.groupBy again to group employees by company. Based off the results of _.groupBy, I map the values to the final structure I want for each employee object.
I then call the _.value function because I want to unwrap the output collection from the lodash wrapper object.
I hope this made sense. If it doesn't, I highly recommend reading Lo-Dash Essentials. After reading the book, I finally got why lodash is so useful.
"use strict";
var _ = require('lodash');
var emps = [
{ "id": 1, "name": "Matthew", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 2, "name": "Mark", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 3, "name": "Luke", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 4, "name": "John", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } },
{ "id": 5, "name": "Paul", "company": { "id": 1, "name": "abc", "industry": { "id": 5, "name": "Medical" } } }
];
var result = _.chain(emps)
.groupBy("company.industry.name")
.transform(function(result, employees, industry) {
result[industry] = {};
result[industry].id = _.get(employees[0], "company.industry.id");
result[ industry ][ 'companies' ] = _.map(_.groupBy(employees, "company.name"), function( employees, company ) {
return {
company: company,
id: _.get(employees[ 0 ], 'company.id'),
employees: _.map(employees, _.partialRight(_.pick, [ 'id', 'name' ]))
};
});
return result;
})
.value();
Results from your example are as follows:
{
"Medical": {
"id": 5,
"companies": [
{
"company": "abc",
"id": 1,
"employees": [
{
"id": 1,
"name": "Matthew"
},
{
"id": 2,
"name": "Mark"
},
{
"id": 3,
"name": "Luke"
},
{
"id": 4,
"name": "John"
},
{
"id": 5,
"name": "Paul"
}
]
}
]
}
}

If you ever wanted the exact same structure as in the questions, I solved it using the jsonata library:
(
/* lets flatten it out for ease of accessing the properties*/
$step1 := $ ~> | $ |
{
"employee_id": id,
"employee_name": name,
"company_id": company.id,
"company_name": company.name,
"industry_id": company.industry.id,
"industry_name": company.industry.name
},
["company", "id", "name"] |;
/* now the magic begins*/
$step2 := {
"industries":
[($step1{
"industry" & $string(industry_id): ${
"id": $distinct(industry_id)#$I,
"name": $distinct(industry_name),
"companies": [({
"company" & $string(company_id): {
"id": $distinct(company_id),
"name": $distinct(company_name),
"employees": [$.{
"id": $distinct(employee_id),
"name": $distinct(employee_name)
}]
}
} ~> $each(function($v){ {"company": $v} }))]
}
} ~> $each(function($v){ {"industry": $v} }))]
};
)
You can see it in action on the live demo site: https://try.jsonata.org/VvW4uTRz_

Related

select node value from json column type

A table I called raw_data with three columns: ID, timestamp, payload, the column paylod is a json type having values such as:
{
"data": {
"author_id": "1461871206425108480",
"created_at": "2022-08-17T23:19:14.000Z",
"geo": {
"coordinates": {
"type": "Point",
"coordinates": [
-0.1094,
51.5141
]
},
"place_id": "3eb2c704fe8a50cb"
},
"id": "1560043605762392066",
"text": " ALWAYS # London, United Kingdom"
},
"matching_rules": [
{
"id": "1560042248007458817",
"tag": "london-paris"
}
]
}
From this I want to select rows where the coordinates is available, such as [-0.1094,51.5141]in this case.
SELECT *
FROM raw_data, json_each(payload)
WHERE json_extract(json_each.value, '$.data.geo.') IS NOT NULL
LIMIT 20;
Nothing was returned.
EDIT
NOT ALL json objects have the coordinates node. For example this value:
{
"data": {
"author_id": "1556031969062010881",
"created_at": "2022-08-18T01:42:21.000Z",
"geo": {
"place_id": "006c6743642cb09c"
},
"id": "1560079621017796609",
"text": "Dear Desperate sister say husband no dey oo."
},
"matching_rules": [
{
"id": "1560077018183630848",
"tag": "kaduna-kano-katsina-dutse-zaria"
}
]
}
The correct path is '$.data.geo.coordinates.coordinates' and there is no need for json_each():
SELECT *
FROM raw_data
WHERE json_extract(payload, '$.data.geo.coordinates.coordinates') IS NOT NULL;
See the demo.

JSON element extraction from response based on scenario outline examples or external file

This is my api response. Want to extract the value of the Id based on the displayNumber. This display number is a given in the list of values in examples/csv file.
{
"Acc": [
{
"Id": "2b765368696b3441673633325",
"code": "SGD",
"val": 406030.83,
"displayNumber": "8957",
"curval": 406030.83
},
{
"Id": "4e676269685a73787472355776764b50717a4",
"code": "GBP",
"val": 22.68,
"displayNumber": "1881",
"curval": 22.68
},
{
"Id": "526e666d65366e67626244626e6266467",
"code": "SGD",
"val": 38404.44,
"displayNumber": "1004",
"curval": 38404.44
},
],
"combinations": [
{
"displayNumber": "3444",
"Code": "SGD",
"Ids": [
{
"Id": "2b765368696b34416736333254462"
},
{
"Id": "4e676269685a7378747235577"
},
{
"Id": "526e666d65366e6762624d"
}
],
"destId": "3678434b643530456962435272d",
"curval": 3.85
},
{
"displayNumber": "8957",
"code": "SGD",
"Ids": [
{
"Id": "3678434b6435304569624357"
},
{
"Id": "4e676269685a73787472355776764b50717a4"
},
{
"Id": "526e666d65366e67626244626e62664679"
}
],
"destId": "2b765368696b344167363332544",
"curval": 406030.83
},
{
"displayNumber": "1881",
"code": "GBP",
"Ids": [
{
"Id": "3678434b643530456962435275"
},
{
"Id": "2b765368696b3441673"
},
{
"Id": "526e666d65366e67626244626e626"
}
],
"destId": "4e676269685a7378747d",
"curval": 22.68
},
]
}
Examples
|displayNumber|
|8957|
|3498|
|4943|
Below expression works if i give the value
* def tempid = response
* def fromAccount = get[0] tempid.Acc[?(#.displayNumber==8957].Id
I'm not sure how to make this comparison value (i.e. 1881) as a variable which can be read from examples (scenario outline) or a csv file. Went through the documentation, which recommends, karate filters or maps. However, not able to follow how to implement.
You almost got it :-). This is the way you want to solve this
Scenario Outline: Testing SO question for Navneeth
* def tempid = response
* def fromAccount = get[0] tempid.Acc[?(#.displayNumber == <displayNumber>)]
* print fromAccount
Examples:
|displayNumber|
|8957|
|1881|
|3444|
You need to pass the placeholder in examples as -
'<displayNumber>'

How to match field value in response when there are multiple fields with the same name?

[
{
"key": "test1",
"category": "test",
"name": "test1",
"translations":
{
"english": "eng"
}
},
{
"key": "test2",
"category": "test",
"name": "test1",
"translations":
{
"english": "eng2",
"german": "German"
}
},
{
"key": "test3",
"category": "power",
"name": "test1",
"translations":
{
"EN_lang": "jik"
}
}
]
Here, we have multiple field's are with different values and we have to match value in translations (field position will change on every call)
You have to be clear about what you want to assert. Hint, the new contains deep (available in 0.9.6.RC4) can help:
* match response contains deep { key: 'test2', translations: { english: 'eng2' } }
Else you should look at transforming the JSON into a shape where it is easier to do the assertions you want: https://github.com/intuit/karate#json-transforms

Nested "for loop" searches in SQL - Azure CosmosDB

I am using Cosmos DB and have a document with the following simplified structure:
{
"id1":"123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
}
My issue is - I have an array of these documents and need to search name to see if it matches my search word. For example I want both big trees and trees to return if a user types in trees.
So currently I push every document into an array and do the following:
For each document
for each stuff
for each a.b.c.d[0].e
for each classes
var splice = name.split(' ')
if (splice.includes(searchWord))
return id1, id2 and id3.
Using cosmosDB I am using SQL with the following code:
client.queryDocuments(
collection,
`SELECT * FROM root r`
).toArray((err, results) => {stuff});
This effectively brings every document in my collection into an array to perform the search manually above as mentioned.
This is going to cause issues when I have 1000s or 1,000,000s of documents in the array and I believe I should be leveraging the search mechanics available within Cosmos itself. Is anyone able to help me to work out what SQL query would be able to perform this type of function?
Having searched everything is it also possible to search the 5 latest documents?
Thanks for any insight in advance!
1.Is anyone able to help me to work out what SQL query would be able to
perform this type of function?
According to your sample and description, I suggest you using ARRAY_CONTAINS in cosmos db sql. Please refer to my sample:
sample documents:
[
{
"id1": "123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "456",
"stuff": [
{
"id2": "stuff2",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things2",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "trees",
"meta": 1
}
]
},
{
"id3": "default2",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "789",
"stuff": [
{
"id2": "stuff3",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things3",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big",
"meta": 1
}
]
},
{
"id3": "default3",
"name": "other",
"classes": [
{
"name": "big trees",
"meta": 1
}
]
}
]
}
]
}
}
}
}
]
}
]
query :
SELECT distinct c.id1,stuff.id2,e.id3 FROM c
join stuff in c.stuff
join d in stuff.a.b.c.d
join e in d.e
where ARRAY_CONTAINS(e.classes,{name:"trees"},true)
or ARRAY_CONTAINS(e.classes,{name:"big trees"},true)
output:
2.Having searched everything is it also possible to search the 5 latest
documents?
Per my research, features like LIMIT is not supported in cosmos so far. However , TOP is supported by cosmos db. So if you could add sort field(such as date or id), then you could use sql:
select top 5 from c order by c.sort desc

Unwind an array in DocumentDB query

I have documents that look like this:
[
{
"id": "e1bb9b05-11f2-459e-37d3-9bf9fed56c96",
"name": "bulbasaur",
"type": [
{
"slot": 2,
"type": {
"url": "https://pokeapi.co/api/v2/type/4/",
"name": "poison"
}
},
{
"slot": 1,
"type": {
"url": "https://pokeapi.co/api/v2/type/12/",
"name": "grass"
}
}
]
}
]
The following query is about as close as I can get, but not quite the output I'm hoping for.
Query
SELECT
c.id, c.name, t.type.name as type
FROM
c
JOIN
t IN c.types
WHERE
c.name = "bulbasaur"
Result
[
{
"id": "e1bb9b05-11f2-459e-37d3-9bf9fed56c96",
"name": "bulbasaur",
"type": "poison"
},
{
"id": "e1bb9b05-11f2-459e-37d3-9bf9fed56c96",
"name": "bulbasaur",
"type": "grass"
}
]
Hoping for
[
{
"id": "e1bb9b05-11f2-459e-37d3-9bf9fed56c96",
"name": "bulbasaur",
"types": ["poison", "grass"]
}
]
Is this possible with a DocumentDB query?
This requires use of DocumentDB UDFs, which can extend query functionality with custom transformations. For example, register this:
function unwindTypeArray(value) {
var result = { id: value.id, name: value.name, types: []};
for (var idx in value.type) {
console.log(idx);
var name = value.type[idx].type.name;
result.types.push(name);
}
return result;
}
Then call it inside a query like:
SELECT udf.unwindTypeArray(c) FROM c WHERE c.name = "bulbasaur"