elastic search query parsing exception when adding term filter - lucene

I'm not quite sure why the term filter "term": {"language": "Austrian"} is causing an elastic search parse exception.
The surprising thing is it works if I remove the query_string query.
Where would I put "term": {"language": "Austrian"} filter if it doesn't go there?
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"term": { <-- Causes parse exception
"language": "Austrian"
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}

Inside your filter, you need a bool filter if you have more than one constraints, which is your case, since you have a query filter and a term filter. So the correct way of doing it is like this:
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
}
]
}
},
"filter": {
"bool": { <---- add this
"must": [ <---- and this
{
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}
However, if I may add something, I would rewrite your query a bit differently and move the query_string over to the query part and the status_type term over to the filter part, it would feel more "natural". Also, in your query part you don't need a bool/must if you have only one constraint.
{
"query": {
"filtered": {
"query": {
"query_string": {
"fields": [
[
"name",
"message"
]
],
"query": "Arnold AND Schwarz"
}
},
"filter": {
"bool": {
"must": [
{
"terms": {
"status_type": [
"1",
"2",
"7"
]
}
},
{
"term": {
"language": "Austrian"
}
}
]
}
}
}
},
"sort": [
{
"total": {
"order": "desc"
}
}
]
}

Related

Nesting conditions on OpenSearch

We recently migrated to Open Search, we are recreating our db logic inside OS, and in most scenarios we have been successful.
There is one use case that we are struggling heavily to achieve.
The original query has nested conditions like this :
WHERE
(
(
NOT D2.DOCTYPE IN ('Text','Log','Query')
AND D2.ATTRIBUTE1 LIKE #DocNumber
)
OR (
D2.DOCTYPE IN ('Novel','Comedy')
AND D2.ATTRIBUTE2 LIKE #DocNumber
)
OR (
D2.DOCTYPE = 'Science-text'
AND D2.ATTRIBUTE3 LIKE #DocNumber
)
OR (
D2.DOCTYPE IN ('Fiction', 'Romance')
AND D2.ATTRIBUTE4 LIKE #DocNumber
)
)
AND (
#Revision = ''
AND D2.RFLAG = 'C'
)
I have read the documentation of boolean queries (https://opensearch.org/docs/latest/opensearch/query-dsl/bool/) inside OP and I know its possible to achieve this query, but for the life of me I have not been able to achieve it.
The closest I have been to achieve the query is this :
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Text"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Log"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Query"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"DOCUMENTNUMBER": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Novel"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Comedy"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"DESCRIPTIOn": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Science-Text"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"ATTRIBUTE3": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Fiction"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Romance"
}
}
}
]
}
},
]
}
}
],
"must": [
{
"wildcard": {
"RFLAG": {
"value": "C"
}
}
}
]
}
}
}
But this query ALWAYS bring the total amount of data inside my index.
What am I doing wrong ?

How to write distinct query in Elasticsearch 7.6.2

I am new to ES. My requirement is to fetch last n recent timestamp and distinct trace_id along with their records. Like in the sql query "Select distinct trace_id, job_name from stpjoblogs where status="SUCCESS"". Please let me know how to achieve the same in Elasticsearch 7.6.2.
My response would be:
_source" : {
"port" : 57376,
"job_name" : "stbl-executive-dashboard",
"timestamp" : "2020-04-28T10:55:45.640267+00:00",
"trace_id" : "180600fd27ef8108",
"PCF_Space" : "Development",
"PCF_Org" : "EPSOrg",
"status" : "SUCCESS"
}
and I tried with below query:
GET /stpjoblogs/_search
{
"query": {
"bool": {
"must": [
{"match":{"status":"SUCCESS"}}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": { "field": "trace_id"}
}
}
}
But I am getting below error
{
"error" : {
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [trace_id] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
},
"status" : 400
}
You need to add trace_id.keyword. If index is created by automapping this field will be already present or you need to this in mapping
"trace_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
GET /stpjoblogs/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": {
"field": "trace_id.keyword"
},
"aggs": {
"job_names": {
"terms": {
"field": "job_name.keyword",
"size": 10
},
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [{"timestamp":"desc"}]
}
}
}
}
}
}
}
}
To get latest document for a trace_id, you can use field collapsing , it works like Group by and returns top one document per field
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"collapse": {
"field": "trace_id.keyword",
"inner_hits" : {
"name": "space",
"collapse" : {"field" : "job_name.keyword"},
"size": 3
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Elasticsearch equal SQL %Like%

Coming from here i'm asking myselve for the elasticsearch syntax for such querys:
WHERE text LIKE "%quick%"
AND text LIKE "%brown%"
AND text LIKE "%fox%"
my try (unfortunately without success)
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"terms": {
"text": [
"*quick*",
"*brown*",
"*fox*"
]
}
}
]
}
}
]
}
}
Try using bool and wildcard to do such a query.
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": "*quick*"
}
},
{
"wildcard": {
"text": "*brown*"
}
},
{
"wildcard": {
"text": "*fox*"
}
}
]
}
}
}
Wildcard Query Matches documents that have fields matching a wildcard expression (not analyzed). Supported wildcards are *, which matches any character sequence (including the empty one), and ?, which matches any single character.
That's what you're looking for. Just put desired amount of wildcard queries in your bool/must:
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": {
"value": "*quick*"
}
}
},
{
"wildcard": {
"text": {
"value": "*brown*"
}
}
},
{
"wildcard": {
"text": {
"value": "*fox*"
}
}
}
]
}
}
}

Elasticsearch match combos of two fields

How can I get this simple SQL query running on Elasticsearch?
SELECT * FROM [mytype] where (id=123 and cid = classroomIdA) or
(id=234 and cid = classroomIdB) or (id=345 and cid = classroomIdC)
I'm really having troubles with its syntax, multi-match queries doesn't work in my case. What type of query should I use?
The right way to do it is to combine bool/should (for the outer OR conditions) and bool/filter (for the inner AND conditions) together.
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"filter": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}
UPDATE
The equivalent ES 1.7 query would be (just replace bool/filter by bool/must):
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"must": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}

Elasticsearch function_score not in explain result

I"m trying to get a function_score working, but it appears to be ignored. Here's my query:
{
"explain" : true,
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_score * 999 * doc['total_digital_items_sold'].value"
}
}
],
"query": {
"filtered": {
"query": {
"query_string": {
"query": "photo",
"fields": [ "title^3" ],
"use_dis_max": true,
"analyzer": "snowball"
}
},
"filter": { "and": [ { } ] }
}
}
}
}
}
}
It appears to not affect the results, nor show up in the explain. Any ideas on what I'm doing wrong?