How to write distinct query in Elasticsearch 7.6.2 - elasticsearch-aggregation

I am new to ES. My requirement is to fetch last n recent timestamp and distinct trace_id along with their records. Like in the sql query "Select distinct trace_id, job_name from stpjoblogs where status="SUCCESS"". Please let me know how to achieve the same in Elasticsearch 7.6.2.
My response would be:
_source" : {
"port" : 57376,
"job_name" : "stbl-executive-dashboard",
"timestamp" : "2020-04-28T10:55:45.640267+00:00",
"trace_id" : "180600fd27ef8108",
"PCF_Space" : "Development",
"PCF_Org" : "EPSOrg",
"status" : "SUCCESS"
}
and I tried with below query:
GET /stpjoblogs/_search
{
"query": {
"bool": {
"must": [
{"match":{"status":"SUCCESS"}}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": { "field": "trace_id"}
}
}
}
But I am getting below error
{
"error" : {
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [trace_id] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
},
"status" : 400
}

You need to add trace_id.keyword. If index is created by automapping this field will be already present or you need to this in mapping
"trace_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
GET /stpjoblogs/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": {
"field": "trace_id.keyword"
},
"aggs": {
"job_names": {
"terms": {
"field": "job_name.keyword",
"size": 10
},
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [{"timestamp":"desc"}]
}
}
}
}
}
}
}
}
To get latest document for a trace_id, you can use field collapsing , it works like Group by and returns top one document per field
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"collapse": {
"field": "trace_id.keyword",
"inner_hits" : {
"name": "space",
"collapse" : {"field" : "job_name.keyword"},
"size": 3
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Related

ElasticSearch equivalent to SQL "WHERE (condition1 OR (condition2 AND condition3))"

I'm new to elastic search and I'm having a hard time figuring out how to transform the SQL query below into ES syntax :
WHERE (id IS NULL OR (id IS NOT NULL AND test = 0))
I guess I have to use the "should" keyword somewhere and maybe the filter, but I'm not quite sure how to make the query in ES.
Your query should be like:
{
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"exists": {
"field": "id"
}
}
]
}
},
{
"bool": [
"must": [
{
"exists": {
"field": "id"
}
},
{
"term": {
"test": {
"value": 0
}
}
}
]
]
}
]
}
}

Elasticsearch filter after aggregation

I want convert sql query to ES query.
This is my sql query
SELECT * FROM
(SELECT order_number, MIN(log_datetime) as log_datetime
FROM t_log
WHERE mall_id='amazon' AND action_name='order_register' AND log_level='3'
GROUP BY order_number) as temp
WHERE log_datetime BETWEEN '2018-11-16 00:00:00' AND '2018-11-16 23:59:59';
and my es query
{
"size": 0,
"query": {
"constant_score": {
"filter": {
"bool": {
"must": [
{
"term": {
"mall_id": "devsdkwms1001"
}
},
{
"term": {
"action_name": "order_register"
}
},
{
"term": {
"log_level": 3
}
}
]
}
}
}
},
"aggs": {
"temp": {
"range": {
"field": "log_datetime",
"ranges": [
{
"from": "2018-11-16 00:00:00",
"to": "2018-11-16 23:59:59"
}
]
},
"aggs": {
"result": {
"terms": {
"field": "order_number",
"size": 0
}
}
}
}
}
}
My es query.. it doesn't work properly.
I can't find a way to filter the aggregate results in Elastic Search.
Only can aggregate after filter.
Is there any way? Thank you
If you want filter agg result take a look to bucker selector:
https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-pipeline-bucket-selector-aggregation.html

Elasticsearch equal SQL %Like%

Coming from here i'm asking myselve for the elasticsearch syntax for such querys:
WHERE text LIKE "%quick%"
AND text LIKE "%brown%"
AND text LIKE "%fox%"
my try (unfortunately without success)
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"terms": {
"text": [
"*quick*",
"*brown*",
"*fox*"
]
}
}
]
}
}
]
}
}
Try using bool and wildcard to do such a query.
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": "*quick*"
}
},
{
"wildcard": {
"text": "*brown*"
}
},
{
"wildcard": {
"text": "*fox*"
}
}
]
}
}
}
Wildcard Query Matches documents that have fields matching a wildcard expression (not analyzed). Supported wildcards are *, which matches any character sequence (including the empty one), and ?, which matches any single character.
That's what you're looking for. Just put desired amount of wildcard queries in your bool/must:
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": {
"value": "*quick*"
}
}
},
{
"wildcard": {
"text": {
"value": "*brown*"
}
}
},
{
"wildcard": {
"text": {
"value": "*fox*"
}
}
}
]
}
}
}

elastic search query parsing exception when adding term filter

I'm not quite sure why the term filter "term": {"language": "Austrian"} is causing an elastic search parse exception.
The surprising thing is it works if I remove the query_string query.
Where would I put "term": {"language": "Austrian"} filter if it doesn't go there?
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"term": { <-- Causes parse exception
"language": "Austrian"
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}
Inside your filter, you need a bool filter if you have more than one constraints, which is your case, since you have a query filter and a term filter. So the correct way of doing it is like this:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"bool": { <---- add this
"must": [ <---- and this
{
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}
However, if I may add something, I would rewrite your query a bit differently and move the query_string over to the query part and the status_type term over to the filter part, it would feel more "natural". Also, in your query part you don't need a bool/must if you have only one constraint.
{
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"filter": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}

How to properly implement boolean logic

I'm using ElasticSearch for logging within an application. I need to write a log viewer that filters on all the fields of my document.
My documents look like this:
"_source": {
"timestamp": 1373502012000,
"userId": 6,
"paId": 56331,
"lId": 6,
"prId": 2,
"vId": 6336,
"actionType": "LOAD_DATA"
}
actionType is an enum (Java).
I need to write a ElasticSearch equivalent to the following SQL query:
SELECT * FROM snapshot.action_log_item
WHERE timestamp BETWEEN 1372718783286 AND 1372718783286
AND userId=6
AND paId=56331
AND lId=6
AND prId=2
AND vId=6336
AND (
actionType='LOAD_DATA' OR
actionType='SAVE_DATA' OR
actionType='LOG_IN'
);
Please help me write a properly nested query and/or filter to get a result equivalent to my SQL statement.
EDIT Here's my current code (that works without the { "or"... portion).
{
"query" : {
"bool" : {
"must" : [ {
"term" : {
"userId" : 6
}
}, {
"term" : {
"lId" : 6
}
}, {
"term" : {
"vId" : 6336
}
} ]
}
},
"filter" : {
"and" : {
"filters" : [ {
"term" : {
"paId" : 56331
}
}, {
"range" : {
"timestamp" : {
"from" : 1372718783286,
"to" : 1377643583286,
"include_lower" : true,
"include_upper" : true
}
}
}, {
"or" : {
"filters" : [ {
"term" : {
"actionType" : "LOAD_DATA"
}
}, {
"term" : {
"actionType" : "SAVE_DATA"
}
}, {
"term" : {
"actionType" : "LOG_IN"
}
} ]
}
} ]
}
}
}
EDIT: The following query works. It's not the same query as above, but it returns the expected result. It seems that these filters/queries don't work on the actionType field.
{
"size": 30,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"term": {
"uId": 6
}
},
{
"term": {
"loId": 6
}
},
{
"term": {
"prId": 2
}
},
{
"terms": {
"paId": [
56331,
56298
],
"minimum_should_match": 1
}
}
]
}
},
"filter": {
"range": {
"timestamp": {
"from": 1372718783286,
"to": 1377643583286,
"include_lower": true,
"include_upper": true
}
}
}
}
}
}
The {or... portion should like this:
{
"or": [
{
"term": {
"actionType": "LOAD_DATA"
}
},
{
"term": {
"actionType": "SAVE_DATA"
}
},
{
"term": {
"actionType": "LOG_IN"
}
}
]
}
You can check the doc for that filter here
Edit
As I see you are having problems I rewrote your query. I hope it helps
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"term": {
"userId": 6
}
},
{
"term": {
"paId": 56331
}
},
{
"term": {
"lId": 6
}
},
{
"term": {
"prId": 2
}
},
{
"term": {
"vId": 6336
}
},
{
"terms": {
"actionType": [
"LOAD_DATA",
"SAVE_DATA",
"LOG_IN"
],
"minimum_should_match": 1
}
}
]
}
},
"filter": {
"range": {
"timestamp": {
"from": 1372718783286,
"to": 1377643583286,
"include_lower": true,
"include_upper": true
}
}
}
}
}
}
Basically I put the date range as filter and the other conditions are term queries inside the must clause of the boolean query. You can see that the or part is now inside the must clause as a terms query that act as or between those 3 values.