ElasticSearch combine match and function score queries - indexing

I have a complicated query to run in elasticsearch that spans across multiple fields (nested and non-nested). I am using a bool should query across a multi-field match and nested field match.
Additionally I want a composite scoring which takes into account several other parameters such as location, rating etc.
I tried to run a simplified proof of concept combined query which looks for a matching term and tries to use function score for the other fields but I am running into an error from es.
GET init/restaurant/_search/
{
"query": {
"match": {
"cuisine_categories": "Oriental"
},
"function_score": {
"functions": [
{
"gauss": {
"coordinates": {
"origin": { "lat": 74.20, "lon": 31.23 },
"offset": "1km",
"scale": "3km"
}
}
},
{
"gauss": {
"nomnom_rating": {
"origin": "4.5",
"offset": "0.5",
"scale": "1"
}
},
"weight": 2
},
{
"gauss": {
"estimated_preparation_time": {
"origin": "30",
"offset": "10",
"scale": "20"
}
},
"weight": 5
}
]
}
}
}

The query is not a valid. The match clause should be within the query object of function score as shown below
Example:
POST init/restaurant/_search/
{
"query": {
"function_score": {
"functions": [
{
"gauss": {
"coordinates": {
"origin": {
"lat": 74.2,
"lon": 31.23
},
"offset": "1km",
"scale": "3km"
}
}
},
{
"gauss": {
"nomnom_rating": {
"origin": "4.5",
"offset": "0.5",
"scale": "1"
}
},
"weight": 2
},
{
"gauss": {
"estimated_preparation_time": {
"origin": "30",
"offset": "10",
"scale": "20"
}
},
"weight": 5
}
],
"query": {
"match": {
"cuisine_categories": "Oriental"
}
}
}
}
}

Related

GA4 data api - (not set) in custom dimensions

We are currently using GA4 data API and faced the issue when custom dimensions returns value "(not set)".
We were using the following article to set custom dimension for the session count, but we still receiving "(not set)" values.
Example of request:
{
"dateRanges": [
{
"startDate": "2021-09-01",
"endDate": "2021-09-05"
}
],
"offset": 0,
"limit": 100,
"dimensionFilter": {
"filter": {
"fieldName": "eventName",
"stringFilter": {
"matchType": 1,
"value": "screen_view",
"caseSensitive": true
}
}
},
"dimensions": [
{
"name": "customUser:applicationID"
},
{
"name": "customEvent:ga_session_number"
},
{
"name": "dateHour"
},
{
"name": "platform"
},
{
"name": "sessionSource"
},
{
"name": "sessionMedium"
},
{
"name": "sessionCampaignName"
},
{
"name": "deviceCategory"
}
],
"metrics": [
{
"name": "userEngagementDuration"
}
]
}
Does anybody have any idea why it may happen?

Find Geopoint with radius which are overlapping geopoint

As you can see below I have
3 GeoPoint A,B,C With Some Radius
1 GeoPoint K,
I want to find all the GeoPoint with the radius overlapping K Geo
So answer should be B, C.
So how can achieve this?
Currently I am using Mongodb. But any other database is also fine.
This question is opinion-based as is the statement "any other db is fine".
But for the record, the way to do it in ES is as follows:
PUT circles
{
"mappings": {
"properties": {
"location": {
"type": "geo_shape",
"strategy": "recursive"
}
}
}
}
PUT circles/_doc/A
{
"location": {
"type": "circle",
"coordinates": [
16.34817123413086,
48.20968893477074
],
"radius": "2km"
}
}
PUT circles/_doc/B
{
"location": {
"type": "circle",
"coordinates": [
16.374435424804688,
48.20122291334052
],
"radius": "3km"
}
}
PUT circles/_doc/C
{
"location": {
"type": "circle",
"coordinates": [
16.386451721191406,
48.21586595914765
],
"radius": "4km"
}
}
GET circles/_search
{
"query": {
"geo_shape": {
"location": {
"shape": {
"type": "point",
"coordinates": [
16.386795043945312,
48.208773756674425
]
},
"relation": "intersects"
}
}
}
}
yielding
[
{
"_index":"circles",
"_type":"_doc",
"_id":"B",
"_score":1.0,
"_source":{
}
},
{
"_index":"circles",
"_type":"_doc",
"_id":"C",
"_score":1.0,
"_source":{
}
}
]

How to write distinct query in Elasticsearch 7.6.2

I am new to ES. My requirement is to fetch last n recent timestamp and distinct trace_id along with their records. Like in the sql query "Select distinct trace_id, job_name from stpjoblogs where status="SUCCESS"". Please let me know how to achieve the same in Elasticsearch 7.6.2.
My response would be:
_source" : {
"port" : 57376,
"job_name" : "stbl-executive-dashboard",
"timestamp" : "2020-04-28T10:55:45.640267+00:00",
"trace_id" : "180600fd27ef8108",
"PCF_Space" : "Development",
"PCF_Org" : "EPSOrg",
"status" : "SUCCESS"
}
and I tried with below query:
GET /stpjoblogs/_search
{
"query": {
"bool": {
"must": [
{"match":{"status":"SUCCESS"}}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": { "field": "trace_id"}
}
}
}
But I am getting below error
{
"error" : {
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [trace_id] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
},
"status" : 400
}
You need to add trace_id.keyword. If index is created by automapping this field will be already present or you need to this in mapping
"trace_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
GET /stpjoblogs/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": {
"field": "trace_id.keyword"
},
"aggs": {
"job_names": {
"terms": {
"field": "job_name.keyword",
"size": 10
},
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [{"timestamp":"desc"}]
}
}
}
}
}
}
}
}
To get latest document for a trace_id, you can use field collapsing , it works like Group by and returns top one document per field
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"collapse": {
"field": "trace_id.keyword",
"inner_hits" : {
"name": "space",
"collapse" : {"field" : "job_name.keyword"},
"size": 3
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Elasticsearch running aggregate guery - products bought by clients who only buy that product

My docs represents an order in the given format:
{
"name", // the client
"sku" // the product
}
So, suppose the follow data exists:
{ "name": "rudolph", "sku": "apple" }
{ "name": "rudolph", "sku": "apple" }
{ "name": "rudolph", "sku": "apple" }
{ "name": "john", "sku": "banana" }
{ "name": "john", "sku": "banana" }
{ "name": "paul", "sku": "banana" }
{ "name": "paul", "sku": "apple" }
{ "name": "peter", "sku": "banana" }
I can get the clients who bought only 1 kind of item with the query:
{
"aggs": {
"clients": {
"terms": {
"field": "name"
},
"aggs": {
"distinct_sku": {
"cardinality": {
"field": "sku"
}
},
"unique_sku": {
"bucket_selector": {
"buckets_path": {
"qty": "distinct_sku"
},
"script": "params.qty == 1"
}
},
"aggs": {
"terms": {
"field": "sku"
}
}
}
}
},
"size": 0
}
which results
{
...
"aggregations": {
"clients": {
...
"buckets": [
{
"key": "rudolph",
"doc_count": 3,
...
"aggs": {
...
"buckets": [
{
"key": "apple",
"doc_count": 3
}
]
}
},
{
"key": "john",
"doc_count": 2,
...
"aggs": {
...
"buckets": [
{
"key": "banana",
"doc_count": 2
}
]
}
},
{
"key": "peter",
"doc_count": 1,
...
"aggs": {
...
"buckets": [
{
"key": "banana",
"doc_count": 1
}
]
}
}
]
}
}
}
It's possible to manage the query to return how much each item appears in the result?
Something like this:
{
"apple" : 1,
"banana" : 2
}
Thanks in advance.
EDIT
My base has a huge amount of clients and a small quantity of products, so:
Iterate over the above aggregation result to build the wanted result is not an option.
If I have to send a query for each product, It's Ok.

elasticsearch - get unique bucket count in multiple aggregations

I am trying to get the total count of buckets where my aggregation total is greater than some amount say 1000. Below are the documents sample.
[
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3FtHR8lArSPNJl_rcp",
"_score": 1,
"_source": {
"total_amount": 650,
"custid": "2",
"client_id": 1
}
},
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3F5UfjlArSPNJl_rlu",
"_score": 1,
"_source": {
"total_amount": 200,
"custid": "1",
"client_id": 1
}
},
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3F5UfjlArSPNJl_rxm",
"_score": 1,
"_source": {
"total_amount": 1400,
"custid": "1",
"client_id": 1
}
}
]
So first of all, I am able to get the unique records those are greater than 1000 in buckets but the I am getting the buckets count as 2 instead of 1. I am using following query:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"client_id": 1
}
}
]
}
},
"aggs": {
"customers": {
"terms": {
"field": "custid",
"size": 1
},
"aggs": {
"amount_spent": {
"sum": {
"field": "total_amount"
}
},
"amount_spent_filter": {
"bucket_selector": {
"buckets_path": {
"amountSpent": "amount_spent"
},
"script": "params.amountSpent >= 1000"
}
}
}
},
"uniques": {
"cardinality": {
"field": "custid"
}
}
}
}
I just want to get the count of buckets that I am able to fetch using above query.
Thanks