Comparing two JSON objects with order of fields and subarrays shuffled in Karate [duplicate] - karate

This question already has an answer here:
Asserting and using conditions for an array response in Karate
(1 answer)
Closed 2 years ago.
I want to loop through below nested json structure and want to update all the required fields, however I could achieve this through typescript but want to do this in karate JS, I do not see any examples how to nested for each works.
I want to update 26 periods data(here for readability i used 3), based on index I want to update period field, i.e. if(index == key), these 26 periods are under each car attribute.(NOTe: you again have multiple cars and multiple car attributes and each car attribute you have 26 periods data)
I cannot use this Karate - Match two dynamic responses only when you have single array list and have less data
[
{
"cars": [
{
"name": "car 1",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
},
{
"name": "car 2",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
},
{
"name": "car 3",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
}
],
"totalPeriodEprps": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
}
carId ="dfd"
]
This above array repeats
Type script code
//periods is a map of index and values
async modifyCarsData(mid, id, periods, campaignData) {
//carData is a json file
carData.forEach(element => {
element.carId= id;
// Update all egrp periods
element.totalPeriodEGRPs.forEach(eGrpPeriod => {
// egrprd.period =
if (periods.size === element.totalPeriodEGRPs.length) {
periods.forEach((value, key) => {
if (key === eGrpPeriod.index.toString()) {
eGrpPeriod.period = value;
return true;
}
});
}
});
element.cars.forEach(carCell => {
// Logic for updating periods data
carCell .periodsData.forEach(periodAttribute => {
if (periods.size === carCell.periodsData.length) {
periods.forEach((value, key) => {
if (key === periodAttribute.index.toString()) {
periodAttribute.period = value;
return true;
}
});
}
});
});
});

Don't think of this as an update, but as a transform. I'm not using your example because it is un-necessarily complicated. Here is a simpler example that gives you all the concepts you need:
* def data = [{ name: 'one', periods: [{ index: 1, value: 'a' },{ index: 2, value: 'b' }]}, { name: 'two', periods: [{ index: 1, value: 'c' },{ index: 2, value: 'd' }]}]
* def fnPeriod = function(x){ x.value = x.value + x.index; return x }
* def fnData = function(x){ return { name: x.name, periods: karate.map(x.periods, fnPeriod) } }
* def converted = karate.map(data, fnData)
* print converted
Which prints:
[
{
"name": "one",
"periods": [
{
"index": 1,
"value": "a1"
},
{
"index": 2,
"value": "b2"
}
]
},
{
"name": "two",
"periods": [
{
"index": 1,
"value": "c1"
},
{
"index": 2,
"value": "d2"
}
]
}
]
If this doesn't work for you, please look for another tool. Karate is designed for testing and assertions, not doing what you normally do in programming languages. And I suspect that you have fallen into the trap of writing "over-smart tests", so please read this: https://stackoverflow.com/a/54126724/143475
Also refer: https://stackoverflow.com/a/53120851/143475

Related

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

Karate - How to use nested for each in karate similar to javascript [duplicate]

This question already has an answer here:
Asserting and using conditions for an array response in Karate
(1 answer)
Closed 2 years ago.
I want to loop through below nested json structure and want to update all the required fields, however I could achieve this through typescript but want to do this in karate JS, I do not see any examples how to nested for each works.
I want to update 26 periods data(here for readability i used 3), based on index I want to update period field, i.e. if(index == key), these 26 periods are under each car attribute.(NOTe: you again have multiple cars and multiple car attributes and each car attribute you have 26 periods data)
I cannot use this Karate - Match two dynamic responses only when you have single array list and have less data
[
{
"cars": [
{
"name": "car 1",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
},
{
"name": "car 2",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
},
{
"name": "car 3",
"periodsData": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
}
],
"totalPeriodEprps": [
{
"period": "5ed73ed31a775d1ab0c9fb5c",
"index": 1
},
{
"period": "5ed73ed31a775d1ab0c9fb5d",
"index": 2
},
{
"period": "5ed73ed31a775d1ab0c9fb5e",
"index": 3
}
]
}
carId ="dfd"
]
This above array repeats
Type script code
//periods is a map of index and values
async modifyCarsData(mid, id, periods, campaignData) {
//carData is a json file
carData.forEach(element => {
element.carId= id;
// Update all egrp periods
element.totalPeriodEGRPs.forEach(eGrpPeriod => {
// egrprd.period =
if (periods.size === element.totalPeriodEGRPs.length) {
periods.forEach((value, key) => {
if (key === eGrpPeriod.index.toString()) {
eGrpPeriod.period = value;
return true;
}
});
}
});
element.cars.forEach(carCell => {
// Logic for updating periods data
carCell .periodsData.forEach(periodAttribute => {
if (periods.size === carCell.periodsData.length) {
periods.forEach((value, key) => {
if (key === periodAttribute.index.toString()) {
periodAttribute.period = value;
return true;
}
});
}
});
});
});
Don't think of this as an update, but as a transform. I'm not using your example because it is un-necessarily complicated. Here is a simpler example that gives you all the concepts you need:
* def data = [{ name: 'one', periods: [{ index: 1, value: 'a' },{ index: 2, value: 'b' }]}, { name: 'two', periods: [{ index: 1, value: 'c' },{ index: 2, value: 'd' }]}]
* def fnPeriod = function(x){ x.value = x.value + x.index; return x }
* def fnData = function(x){ return { name: x.name, periods: karate.map(x.periods, fnPeriod) } }
* def converted = karate.map(data, fnData)
* print converted
Which prints:
[
{
"name": "one",
"periods": [
{
"index": 1,
"value": "a1"
},
{
"index": 2,
"value": "b2"
}
]
},
{
"name": "two",
"periods": [
{
"index": 1,
"value": "c1"
},
{
"index": 2,
"value": "d2"
}
]
}
]
If this doesn't work for you, please look for another tool. Karate is designed for testing and assertions, not doing what you normally do in programming languages. And I suspect that you have fallen into the trap of writing "over-smart tests", so please read this: https://stackoverflow.com/a/54126724/143475
Also refer: https://stackoverflow.com/a/53120851/143475

ES6: Joining of subqueries to two different rows through the AND operator

I have following index:
+-----+-----+-------+
| oid | tag | value |
+-----+-----+-------+
| 1 | t1 | aaa |
| 1 | t2 | bbb |
| 2 | t1 | aaa |
| 2 | t2 | ddd |
| 2 | t3 | eee |
+-----+-----+-------+
where: oid - object ID, tag - property name, value - property value.
Mappings:
"mappings": {
"document": {
"_all": { "enabled": false },
"properties": {
"oid": { "type": "integer" },
"tag": { "type": "text" }
"value": { "type": "text" },
}
}
}
This simple structure allows store any number of object properties and it is a quite simple to search by one property or by more using OR logical operator.
E.g. get object oid's where:
(tag='t1' AND value='aaa') OR (tag='t2' AND value='ddd')
ES query:
{
"_source": { "includes":["oid"] },
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{ "term": { "tag": "t1" } },
{ "term": { "value": "aaa" } }
]
}
},
{
"bool": {
"must": [
{ "term": { "tag": "t2" } },
{ "term": { "value": "ddd" } }
]
}
}
],
"minimum_should_match": "1"
}
}
}
But it is hard to search by two or more properties using AND logical operator. So the question is how to join two sub-queries to two different records through the AND operator. E.g. get object oid's where:
(tag='t1' AND value='aaa') AND (tag='t2' AND value='ddd')
In this case result must be: { "oid": "2" }
Searching data contains in two different records and applying MUST instead of SHOULD from the previous example returns nothing in this case.
I have two equivalents in SQL of what I need:
SELECT i1.[oid]
FROM [index] i1 INNER JOIN [index] i2 ON i1.oid = i2.oid
WHERE
(i1.tag='t1' AND i1.value='aaa')
AND
(i2.tag='t2' AND i2.value='ddd')
---------
SELECT [oid] FROM [index] WHERE tag='t1' AND value='aaa'
INTERSECT
SELECT [oid] FROM [index] WHERE tag='t2' AND value='ddd'
Do the two requests and merge them on the client is not the option.
Elastic Search version is 6.1.1
In order to achieve what you want, you need to use the nested type, i.e. your mapping should look like this:
PUT my-index
{
"mappings": {
"doc": {
"properties": {
"oid": {
"type": "keyword"
},
"data": {
"type": "nested",
"properties": {
"tag": {
"type": "keyword"
},
"value": {
"type": "text"
}
}
}
}
}
}
}
The documents would be indexed like this:
PUT /my-index/doc/_bulk
{ "index": {"_id": 1}}
{ "oid": 1, "data": [ {"tag": "t1", "value": "aaa"}, {"tag": "t2", "value": "bbb"}] }
{ "index": {"_id": 2}}
{ "oid": 2, "data": [ {"tag": "t1", "value": "aaa"}, {"tag": "t2", "value": "ddd"}, {"tag": "t3", "value": "eee"}] }
Then you can make your query work like this:
POST my-index/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "data",
"query": {
"bool": {
"filter": [
{
"term": {
"data.tag": "t1"
}
},
{
"term": {
"data.value": "aaa"
}
}
]
}
}
}
},
{
"nested": {
"path": "data",
"query": {
"bool": {
"filter": [
{
"term": {
"data.tag": "t2"
}
},
{
"term": {
"data.value": "ddd"
}
}
]
}
}
}
}
]
}
}
}
There might be one way, which is a little ugly: adding terms aggregations to your query body.
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{ "term": { "tag": "t1" } },
{ "term": { "value": "aaa" } }
]
}
},
{
"bool": {
"must": [
{ "term": { "tag": "t2" } },
{ "term": { "value": "ddd" } }
]
}
}
],
"minimum_should_match": "1"
}
},
"size": 0,
"aggs": {
"find_joined_oid": {
"terms": {
"field": "oid.keyword"
}
}
}
}
If everything goes right, this will output something like
{
"took": 123,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 123,
"max_score": 0,
"hits": []
},
"aggregations": {
"find_joined_oid": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 1
},
{
"key": "2",
"doc_count": 2
}
}
}
}
Here, in the "aggregations" part,
"key": "1"
means your "oid":"1", and
"doc_counts": 1
means there is 1 hit in query with "oid":"1".
As you know how many tags you are querying to match, say N, in the aggregations result body, only those "key"s with "doc_count" equal to N are the result you're pursuing. In this example, you are querying tag:t1 (with value aaa) and tag:t2 (with value ddd), thus N=2. You can iterate in the result bucket list to find out those "key"s who have "doc_count" equal to 2.
However, there should be a better way. If you would alter your mapping to a document like style, ie. store all fields of one oid in one doc, life will be much easier.
{
"properties": {
"oid": { "type": "integer" },
"tag-1": { "type": "text" }
"value-1": { "type": "text" },
"tag-2": { "type": "text" }
"value-2": { "type": "text" }
}
}
When you want to add new tag-value pairs, just get the original doc with oid concerned, put new tag-pair into the doc, and put the whole new doc back into Elasticsearch with the same _id which you get from the original one. Most of the time dynamic mapping will work properly in your case, which means you don't need to assert mapping for new fields explicitly.
No-SQL databases like Elasticsearch and others are not designed to handle such SQL style query you are asking.

find object in nested array with lodash

I have json data similar to this:
{
"Sections": [
{
"Categories": [
{
"Name": "Book",
"Id": 1,
"Options": [
{
"Name": "AAAA",
"OptionId": 111
},
"Selected": 0
},
{
"Name": "Car",
"Id": 2,
"Options": [
{
"Name": "BBB",
"OptionId": 222
},
"Selected": 0
},
],
"SectionName": "Main"
},
... more sections like the one above
]
}
Given this data, I want to find a category inside a section based on its (Category) Id, and set its selected option, I tried this, but couldn't get it to work....Note Category Id will be unique in the whole data set.
_.find(model.Sections, { Categories: [ { Id: catId } ]});
According to your data model, it looks like you're trying to find an element that is inside a matrix: Sections can have multiple Categories and a Category can have multiple types (car, book...).
I'm afraid there isn't a function in lodash that allows a deep find, you'll have to implement it the 'traditional' way (a couple of fors).
I provide this solution that is a bit more 'functional flavoured' than the traditional nested fors. It also takes advantage of the fact that when you explicitly return false inside a forEach, the loop finishes. Thus, once an element with the provided id is found, the loop is ended and the element returned (if it's not found, undefined is returned instead).
Hope it helps.
const findCategoryById = (sections, id) => {
var category;
_.forEach(sections, (section) => {
category = _.find(section.Categories, ['Id', id]);
return _.isUndefined(category);
});
return category;
};
const ex = {
"Sections": [{
"Categories": [{
"Name": "Book",
"Id": 1,
"Options": [{
"Name": "AAAA",
"OptionId": 111
}],
"Selected": 0
},
{
"Name": "Car",
"Id": 2,
"Options": [{
"Name": "BBB",
"OptionId": 222
}],
"Selected": 0
}
],
"SectionName": "Main"
}]
};
console.log(findCategoryById(ex.Sections, 2));
<script src="https://cdn.jsdelivr.net/npm/lodash#4.17.5/lodash.min.js"></script>

hierarchical faceting with Elasticsearch

I'm using elasticsearch and need to implement facet search for hierarchical object as follow:
category 1 (10)
subcategory 1 (4)
subcategory 2 (6)
category 2 (X)
...
So I need to get facets for two related objects. Documentation says that it's possible to get such kind of facets for numeric value, but I need it for strings http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets-terms-stats-facet.html
Here is another interesting topic, unfortunately it's old: http://elasticsearch-users.115913.n3.nabble.com/Pivot-facets-td2981519.html
Does it possible with elastic search?
If so, how can I do that?
The previous solution works really well until you have no more than a multi-level tag on a single-document. In this case a simple aggregation doesn't work, because the flat structure of the lucene fields mix the results on the internal aggregation.
See the example below:
DELETE /test_category
POST /test_category
# Insert a doc with 2 hierarchical tags
POST /test_category/test/1
{
"categories": [
{
"cat_1": "1",
"cat_2": "1.1"
},
{
"cat_1": "2",
"cat_2": "2.2"
}
]
}
# Simple two-levels aggregations query
GET /test_category/test/_search?search_type=count
{
"aggs": {
"main_category": {
"terms": {
"field": "categories.cat_1"
},
"aggs": {
"sub_category": {
"terms": {
"field": "categories.cat_2"
}
}
}
}
}
}
That's the WRONG response that I have got on ES 1.4, where the fields on the internal aggregation are mixed at a document level:
{
...
"aggregations": {
"main_category": {
"buckets": [
{
"key": "1",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1",
"doc_count": 1
},
{
"key": "2.2", <= WRONG
"doc_count": 1
}
]
}
},
{
"key": "2",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1", <= WRONG
"doc_count": 1
},
{
"key": "2.2",
"doc_count": 1
}
]
}
}
]
}
}
}
A Solution can be to use nested objects. These are the steps to do:
1) Define a new type in the schema with nested objects
POST /test_category/test2/_mapping
{
"test2": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"cat_1": {
"type": "string"
},
"cat_2": {
"type": "string"
}
}
}
}
}
}
# Insert a single document
POST /test_category/test2/1
{"categories":[{"cat_1":"1","cat_2":"1.1"},{"cat_1":"2","cat_2":"2.2"}]}
2) Run a nested aggregation query:
GET /test_category/test2/_search?search_type=count
{
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"main_category": {
"terms": {
"field": "categories.cat_1"
},
"aggs": {
"sub_category": {
"terms": {
"field": "categories.cat_2"
}
}
}
}
}
}
}
}
That's the response, now correct, that I have got:
{
...
"aggregations": {
"categories": {
"doc_count": 2,
"main_category": {
"buckets": [
{
"key": "1",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1",
"doc_count": 1
}
]
}
},
{
"key": "2",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "2.2",
"doc_count": 1
}
]
}
}
]
}
}
}
}
The same solution can be extended to a more than two-levels hierarchy facet.
Currently, elasticsearch does not support hierarchical facetting out-of-the-box. But the upcoming 1.0 release features a new aggregations module, that can be used to get these kind of facets (which are more like pivot-facets rather than hierarchical facets). Version 1.0 is currently in beta, you can download the second beta and test out aggregatins by yourself. Your example might look like
curl -XPOST 'localhost:9200/_search?pretty' -d '
{
"aggregations": {
"main category": {
"terms": {
"field": "cat_1",
"order": {"_term": "asc"}
},
"aggregations": {
"sub category": {
"terms": {
"field": "cat_2",
"order": {"_term": "asc"}
}
}
}
}
}
}'
The idea is, to have a different field for each level of facetting and bucket your facets based on the terms of the first level (cat_1). These aggregations then would have sub-buckets, based on the terms of the second level (cat_2). The result may look like
{
"aggregations" : {
"main category" : {
"buckets" : [ {
"key" : "category 1",
"doc_count" : 10,
"sub category" : {
"buckets" : [ {
"key" : "subcategory 1",
"doc_count" : 4
}, {
"key" : "subcategory 2",
"doc_count" : 6
} ]
}
}, {
"key" : "category 2",
"doc_count" : 7,
"sub category" : {
"buckets" : [ {
"key" : "subcategory 1",
"doc_count" : 3
}, {
"key" : "subcategory 2",
"doc_count" : 4
} ]
}
} ]
}
}
}