Kotlin - Merge two data class - kotlin

Data class
data class A(
var data: List<Data>
) {
data class Data(
var key: String,
var count: Long = 0,
var sub: List<Data>? = null
)
}
A class data values expressed in json.
[
{
"data": [
{
"key": "ALLOGENE THERAPEUTICS",
"count": 47,
"sub": [
{
"key": "N",
"count": 46,
"sub": [
{
"key": "S1",
"count": 1
},
{
"key": "S2",
"count": 13
}
]
},
{
"key": "B+",
"count": 1,
"sub": [
{
"key": "S1",
"count": 2
},
{
"key": "S2",
"count": 1
}
]
}
]
},
{
"key": "CELLECTIS",
"count": 5,
"sub": [
{
"key": "B+",
"count": 2,
"sub": [
{
"key": "S1",
"count": 3
},
{
"key": "S2",
"count": 5
}
]
},
{
"key": "B",
"count": 2,
"sub": [
{
"key": "S1",
"count": 6
},
{
"key": "S2",
"count": 1
}
]
},
{
"key": "N",
"count": 1,
"sub": [
{
"key": "S1",
"count": 8
},
{
"key": "S2",
"count": 4
}
]
}
]
},
{
"key": "PFIZER",
"count": 5,
"sub": [
{
"key": "N",
"count": 5,
"sub": [
{
"key": "S1",
"count": 83
},
{
"key": "S2",
"count": 1
}
]
}
]
}
]
}
]
I would like to combine elements with key values of "ALLOGENE THERAPEUTICS" and "CELECTIS" and replace the key value with "STUB".
When the elements are combined, all the "count" values must be combined.
And elements that are not there must be added.
Therefore, the results should be as follows.
[
{
"data": [
{
"key": "STUB",
"count": 52, // "ALLOGENE THERAPEUTICS"(47) + "CELECTIS"(5) = 52
"sub": [
{
"key": "N",
"count": 47, // 46 + 1
"sub": [
{
"key": "S1",
"count": 9
},
{
"key": "S2",
"count": 17
}
]
},
{
"key": "B+",
"count": 3,
"sub": [
{
"key": "S1",
"count": 5
},
{
"key": "S2",
"count": 6
}
]
},
{
"key": "B",
"count": 5,
"sub": [
{
"key": "S1",
"count": 11
},
{
"key": "S2",
"count": 7
}
]
}
]
},
{
"key": "PFIZER",
"count": 5,
"sub": [
{
"key": "N",
"count": 5,
"sub": [
{
"key": "S1",
"count": 83
},
{
"key": "S2",
"count": 1
}
]
}
]
}
]
}
]
How can I code the work neatly with Kotlin?
For reference, the values of the data class are expressed as json, and the result value must be data class.
This is the progress so far:
create a function for Data that creates a merged copy
data class Data(
var key: String,
var count: Long = 0,
var sub: List<Data> = emptyList()
) {
fun mergedWith(other: Data): Data {
return copy(
count = count + other.count,
sub = sub + other.sub
)
}
}
fold the consolidation list into a single data item and add them back together.
val consolidatedKeys = listOf("ALLOGENE THERAPEUTICS", "CELECTIS")
val (consolidatedValues, nonconsolidatedValues) = a.data.partition { it.key in consolidatedKeys }
val consolidatedData = when {
consolidatedValues.isEmpty() -> emptyList()
else -> listOf(consolidatedValues.fold(A.Data("STUB", 0), A.Data::mergedWith))
}
val result = A(consolidatedData + nonconsolidatedValues)
And combine the sub-elements.
consolidatedData.forEach { x ->
x.sub
.groupBy { group -> group.key }
.map { A.Data(it.key, it.value.sumOf { c -> c.count }) }
}
This is the current situation.
In this way, elements with depth of 2 will work normally, but elements with depth of 3 will not be added.
For example, up to "N" below STUB is combined, but "S1" and "S2" below "N" are not combined.
Therefore, the current result is output in this way.
[
{
"data": [
{
"key": "STUB",
"count": 52, <--------- WORK FINE
"sub": [
{
"key": "N",
"count": 47, <--------- WORK FINE
"sub": [] <--------- EMPTY !!
},
{
"key": "B+",
"count": 3, <--------- WORK FINE
"sub": [] <--------- EMPTY !!
},
{
"key": "B",
"count": 5, <--------- WORK FINE
"sub": [] <--------- EMPTY !!
}
]
},
{
"key": "PFIZER",
"count": 5,
"sub": [
{
"key": "N",
"count": 5,
"sub": [
{
"key": "S1",
"count": 83
},
{
"key": "S2",
"count": 1
}
]
}
]
}
]
}
]
How can all the sub-elements be combined and implemented?

First break down your problem. You can create a function for Data that creates a merged copy:
fun mergedWith(other: Data): Data {
return copy(
count = count + other.count,
sub = when {
sub == null && other.sub == null -> null
else -> sub.orEmpty() + other.sub.orEmpty()
}
)
}
I recommend if possible that you use a non-nullable List for your sub parameter, and use emptyList() when there's nothing in it. This makes it simpler since there aren't two different ways to represent a lack of items and you won't have to deal with nullability:
data class Data(
var key: String,
var count: Long = 0,
var sub: List<Data> = emptyList()
) {
fun mergedWith(other: Data): Data {
return copy(
count = count + other.count,
sub = sub + other.sub
)
}
}
Then you can split your list into ones that you want to consolidate vs. the rest. Then fold the consolidation list into a single data item and add them back together.
val consolidatedKeys = listOf("ALLOGENE THERAPEUTICS", "CELECTIS")
val (consolidatedValues, nonconsolidatedValues) = a.data.partition { it.key in consolidatedKeys }
val consolidatedData = when {
consolidatedValues.isEmpty() -> emptyList()
else -> listOf(consolidatedValues.fold(A.Data("STUB", 0), A.Data::mergedWith))
}
val result = A(consolidatedData + nonconsolidatedValues)

Related

How to check a particular value on basis of condition in karate

Goal: Match the check value is correct for 123S and 123O response in API
First check the value on this location x.details[0].user.school.name[0].codeable.text if it is 123S then check if x.details[0].data.check value is abc
Then check if the value on this location x.details[1].user.school.name[0].codeable.text is 123O then check if x.details[1].data.check is xyz
The response in array inter changes it is not mandatory first element is 123S sometime API returns 123O as first array response.
Sample JSON.
{
"type": "1",
"array": 2,
"details": [
{
"path": "path",
"user": {
"school": {
"name": [
{
"value": "this is school",
"codeable": {
"details": [
{
"hello": "yty",
"condition": "check1"
}
],
"text": "123S"
}
}
]
},
"sample": "test1",
"id": "22222"
},
"data": {
"check": "abc"
}
},
{
"path": "path",
"user": {
"school": {
"name": [
{
"value": "this is school",
"codeable": {
"details": [
{
"hello": "def",
"condition": "check2"
}
],
"text": "123O"
}
}
]
},
"sample": "test",
"id": "11111"
},
"data": {
"check": "xyz"
}
}
]
}
How I did in Postman but how to replicate same in Karate?
var jsonData = pm.response.json();
pm.test("Body matches string", function () {
for(var i=0;i<jsonData.details.length;i++){
if(jsonData.details[i].user.school.name[0].codeable.text == '123S')
{
pm.expect(jsonData.details[i].data.check).to.equal('abc');
}
if(jsonData.details[i].user.school.name[0].codeable.text == '123O')
{
pm.expect(jsonData.details[i].data.check).to.equal('xyz');
}
}
});
2 lines. And this takes care of any number of combinations of lookup values :)
* def lookup = { '123S': 'abc', '123O': 'xyz' }
* match each response.details contains { data: { check: '#(lookup[_$.user.school.name[0].codeable.text])' } }

Nested "for loop" searches in SQL - Azure CosmosDB

I am using Cosmos DB and have a document with the following simplified structure:
{
"id1":"123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
}
My issue is - I have an array of these documents and need to search name to see if it matches my search word. For example I want both big trees and trees to return if a user types in trees.
So currently I push every document into an array and do the following:
For each document
for each stuff
for each a.b.c.d[0].e
for each classes
var splice = name.split(' ')
if (splice.includes(searchWord))
return id1, id2 and id3.
Using cosmosDB I am using SQL with the following code:
client.queryDocuments(
collection,
`SELECT * FROM root r`
).toArray((err, results) => {stuff});
This effectively brings every document in my collection into an array to perform the search manually above as mentioned.
This is going to cause issues when I have 1000s or 1,000,000s of documents in the array and I believe I should be leveraging the search mechanics available within Cosmos itself. Is anyone able to help me to work out what SQL query would be able to perform this type of function?
Having searched everything is it also possible to search the 5 latest documents?
Thanks for any insight in advance!
1.Is anyone able to help me to work out what SQL query would be able to
perform this type of function?
According to your sample and description, I suggest you using ARRAY_CONTAINS in cosmos db sql. Please refer to my sample:
sample documents:
[
{
"id1": "123",
"stuff": [
{
"id2": "stuff",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big ostrich",
"meta": 1
}
]
},
{
"id3": "default",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "456",
"stuff": [
{
"id2": "stuff2",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things2",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "trees",
"meta": 1
}
]
},
{
"id3": "default2",
"name": "other",
"classes": [
{
"name": "green trees",
"meta": 1
},
{
"name": "trees",
"score": 1
}
]
}
]
}
]
}
}
}
}
]
},
{
"id1": "789",
"stuff": [
{
"id2": "stuff3",
"a": {
"b": {
"c": {
"d": [
{
"e": [
{
"id3": "things3",
"name": "animals",
"classes": [
{
"name": "ostrich",
"meta": 1
},
{
"name": "big",
"meta": 1
}
]
},
{
"id3": "default3",
"name": "other",
"classes": [
{
"name": "big trees",
"meta": 1
}
]
}
]
}
]
}
}
}
}
]
}
]
query :
SELECT distinct c.id1,stuff.id2,e.id3 FROM c
join stuff in c.stuff
join d in stuff.a.b.c.d
join e in d.e
where ARRAY_CONTAINS(e.classes,{name:"trees"},true)
or ARRAY_CONTAINS(e.classes,{name:"big trees"},true)
output:
2.Having searched everything is it also possible to search the 5 latest
documents?
Per my research, features like LIMIT is not supported in cosmos so far. However , TOP is supported by cosmos db. So if you could add sort field(such as date or id), then you could use sql:
select top 5 from c order by c.sort desc

How do I write a SQL query for a shown azure cosmos db documents?

I have following documents in azure cosmos db collection.
// Document 1
{
"c": {
"firstName": "Robert"
}
"elements" : [
{
"a": "x2",
"b": {
"name": "yadda2",
"id": 1
}
}
]
}
// Document 2
{
"c": {
"firstName": "Steve"
}
"elements" : [
{
"a": "x5",
"b": {
"name": "yadda2",
"id": 4
}
},
{
"a": "x3",
"b": {
"name": "yadda8",
"id": 5
}
},
]
}
// Document 3
{
"c": {
"firstName": "Johnson"
}
"elements" : [
{
"a": "x4",
"b": {
"name": "yadda28",
"id": 25
}
},
{
"a": "x5",
"b": {
"name": "yadda30",
"id": 37
}
},
]
}
I need to write a query that returns all documents which have "b" object whose name is "yadda2" (i.e. /elements/*/b/name=yadda2). In other words, this query should return document 1 and 2 but NOT 3.
I tried following but it did not work:
SELECT * FROM x where ARRAY_CONTAINS(x.elements, {b: { name: "yadda2"}})
What am I doing wrong?
Just modify your sql to :
SELECT * FROM x where ARRAY_CONTAINS(x.elements, {b: { name: "yadda2"}},true)
Result:
Based on the official doc , the boolean expression could specify if the match is full or partial.
Hope it helps you.

ES6: Joining of subqueries to two different rows through the AND operator

I have following index:
+-----+-----+-------+
| oid | tag | value |
+-----+-----+-------+
| 1 | t1 | aaa |
| 1 | t2 | bbb |
| 2 | t1 | aaa |
| 2 | t2 | ddd |
| 2 | t3 | eee |
+-----+-----+-------+
where: oid - object ID, tag - property name, value - property value.
Mappings:
"mappings": {
"document": {
"_all": { "enabled": false },
"properties": {
"oid": { "type": "integer" },
"tag": { "type": "text" }
"value": { "type": "text" },
}
}
}
This simple structure allows store any number of object properties and it is a quite simple to search by one property or by more using OR logical operator.
E.g. get object oid's where:
(tag='t1' AND value='aaa') OR (tag='t2' AND value='ddd')
ES query:
{
"_source": { "includes":["oid"] },
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{ "term": { "tag": "t1" } },
{ "term": { "value": "aaa" } }
]
}
},
{
"bool": {
"must": [
{ "term": { "tag": "t2" } },
{ "term": { "value": "ddd" } }
]
}
}
],
"minimum_should_match": "1"
}
}
}
But it is hard to search by two or more properties using AND logical operator. So the question is how to join two sub-queries to two different records through the AND operator. E.g. get object oid's where:
(tag='t1' AND value='aaa') AND (tag='t2' AND value='ddd')
In this case result must be: { "oid": "2" }
Searching data contains in two different records and applying MUST instead of SHOULD from the previous example returns nothing in this case.
I have two equivalents in SQL of what I need:
SELECT i1.[oid]
FROM [index] i1 INNER JOIN [index] i2 ON i1.oid = i2.oid
WHERE
(i1.tag='t1' AND i1.value='aaa')
AND
(i2.tag='t2' AND i2.value='ddd')
---------
SELECT [oid] FROM [index] WHERE tag='t1' AND value='aaa'
INTERSECT
SELECT [oid] FROM [index] WHERE tag='t2' AND value='ddd'
Do the two requests and merge them on the client is not the option.
Elastic Search version is 6.1.1
In order to achieve what you want, you need to use the nested type, i.e. your mapping should look like this:
PUT my-index
{
"mappings": {
"doc": {
"properties": {
"oid": {
"type": "keyword"
},
"data": {
"type": "nested",
"properties": {
"tag": {
"type": "keyword"
},
"value": {
"type": "text"
}
}
}
}
}
}
}
The documents would be indexed like this:
PUT /my-index/doc/_bulk
{ "index": {"_id": 1}}
{ "oid": 1, "data": [ {"tag": "t1", "value": "aaa"}, {"tag": "t2", "value": "bbb"}] }
{ "index": {"_id": 2}}
{ "oid": 2, "data": [ {"tag": "t1", "value": "aaa"}, {"tag": "t2", "value": "ddd"}, {"tag": "t3", "value": "eee"}] }
Then you can make your query work like this:
POST my-index/_search
{
"query": {
"bool": {
"filter": [
{
"nested": {
"path": "data",
"query": {
"bool": {
"filter": [
{
"term": {
"data.tag": "t1"
}
},
{
"term": {
"data.value": "aaa"
}
}
]
}
}
}
},
{
"nested": {
"path": "data",
"query": {
"bool": {
"filter": [
{
"term": {
"data.tag": "t2"
}
},
{
"term": {
"data.value": "ddd"
}
}
]
}
}
}
}
]
}
}
}
There might be one way, which is a little ugly: adding terms aggregations to your query body.
{
"query": {
"bool": {
"should": [
{
"bool": {
"must": [
{ "term": { "tag": "t1" } },
{ "term": { "value": "aaa" } }
]
}
},
{
"bool": {
"must": [
{ "term": { "tag": "t2" } },
{ "term": { "value": "ddd" } }
]
}
}
],
"minimum_should_match": "1"
}
},
"size": 0,
"aggs": {
"find_joined_oid": {
"terms": {
"field": "oid.keyword"
}
}
}
}
If everything goes right, this will output something like
{
"took": 123,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 123,
"max_score": 0,
"hits": []
},
"aggregations": {
"find_joined_oid": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "1",
"doc_count": 1
},
{
"key": "2",
"doc_count": 2
}
}
}
}
Here, in the "aggregations" part,
"key": "1"
means your "oid":"1", and
"doc_counts": 1
means there is 1 hit in query with "oid":"1".
As you know how many tags you are querying to match, say N, in the aggregations result body, only those "key"s with "doc_count" equal to N are the result you're pursuing. In this example, you are querying tag:t1 (with value aaa) and tag:t2 (with value ddd), thus N=2. You can iterate in the result bucket list to find out those "key"s who have "doc_count" equal to 2.
However, there should be a better way. If you would alter your mapping to a document like style, ie. store all fields of one oid in one doc, life will be much easier.
{
"properties": {
"oid": { "type": "integer" },
"tag-1": { "type": "text" }
"value-1": { "type": "text" },
"tag-2": { "type": "text" }
"value-2": { "type": "text" }
}
}
When you want to add new tag-value pairs, just get the original doc with oid concerned, put new tag-pair into the doc, and put the whole new doc back into Elasticsearch with the same _id which you get from the original one. Most of the time dynamic mapping will work properly in your case, which means you don't need to assert mapping for new fields explicitly.
No-SQL databases like Elasticsearch and others are not designed to handle such SQL style query you are asking.

ElasticSearch - return the complete value of a facet for a query

I've recently started using ElasticSearch. I try to complete some use cases. I have a problem for one of them.
I have indexed some users with their full name (e.g. "Jean-Paul Gautier", "Jean De La Fontaine").
I try to get all the full names responding to some query.
For example, I want the 100 most frequent full names beggining by "J"
{
"query": {
"query_string" : { "query": "full_name:J*" } }
},
"facets":{
"name":{
"terms":{
"field": "full_name",
"size":100
}
}
}
}
The result I get is all the words of the full names : "Jean", "Paul", "Gautier", "De", "La", "Fontaine".
How to get "Jean-Paul Gautier" and "Jean De La Fontaine" (all the full_name values begging by 'J') ? The "post_filter" option is not doing this, it only restrict this above subset.
I have to configure "how works" this full_name facet
I have to add some options to this current query
I have to do some "mapping" (very obscure for the moment)
Thanks
You just need to set "index": "not_analyzed" on the field, and you will be able to get back the full, unmodified field values in your facet.
Typically, it's nice to have one version of the field that isn't analyzed (for faceting) and another that is (for searching). The "multi_field" field type is useful for this.
So in this case, I can define a mapping as follows:
curl -XPUT "http://localhost:9200/test_index/" -d'
{
"mappings": {
"people": {
"properties": {
"full_name": {
"type": "multi_field",
"fields": {
"untouched": {
"type": "string",
"index": "not_analyzed"
},
"full_name": {
"type": "string"
}
}
}
}
}
}
}'
Here we have two sub-fields. The one with the same name as the parent will be the default, so if you search against the "full_name" field, Elasticsearch will actually use "full_name.full_name". "full_name.untouched" will give you the facet results you want.
So next I add two documents:
curl -XPUT "http://localhost:9200/test_index/people/1" -d'
{
"full_name": "Jean-Paul Gautier"
}'
curl -XPUT "http://localhost:9200/test_index/people/2" -d'
{
"full_name": "Jean De La Fontaine"
}'
And then I can facet on each field to see what is returned:
curl -XPOST "http://localhost:9200/test_index/_search" -d'
{
"size": 0,
"facets": {
"name_terms": {
"terms": {
"field": "full_name"
}
},
"name_untouched": {
"terms": {
"field": "full_name.untouched",
"size": 100
}
}
}
}'
and I get back the following:
{
"took": 1,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 2,
"max_score": 0,
"hits": []
},
"facets": {
"name_terms": {
"_type": "terms",
"missing": 0,
"total": 7,
"other": 0,
"terms": [
{
"term": "jean",
"count": 2
},
{
"term": "paul",
"count": 1
},
{
"term": "la",
"count": 1
},
{
"term": "gautier",
"count": 1
},
{
"term": "fontaine",
"count": 1
},
{
"term": "de",
"count": 1
}
]
},
"name_untouched": {
"_type": "terms",
"missing": 0,
"total": 2,
"other": 0,
"terms": [
{
"term": "Jean-Paul Gautier",
"count": 1
},
{
"term": "Jean De La Fontaine",
"count": 1
}
]
}
}
}
As you can see, the analyzed field returns single-word, lower-cased tokens (when you don't specify an analyzer, the standard analyzer is used), and the un-analyzed sub-field returns the unmodified original text.
Here is a runnable example you can play with:
http://sense.qbox.io/gist/7abc063e2611846011dd874648fd1b77450b19a5
Try altering the mapping for "full_name":
"properties": {
"full_name": {
"type": "string",
"index": "not_analyzed"
}
...
}
not_analyzed means that it will be kept as is, capitals, spaces, dashes etc, so that "Jean De La Fontaine" will stay findable and not be tokenized into "Jean" "De" "La" "Fontaine"
You can experiment with different analyzers using the api
Notice what the standard one does to a mulit part name:
GET /_analyze?analyzer=standard
{'Jean Claude Van Dame'}
{
"tokens": [
{
"token": "jean",
"start_offset": 2,
"end_offset": 6,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "claude",
"start_offset": 7,
"end_offset": 13,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "van",
"start_offset": 14,
"end_offset": 17,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "dame",
"start_offset": 18,
"end_offset": 22,
"type": "<ALPHANUM>",
"position": 4
}
]
}