How to properly implement boolean logic - sql

I'm using ElasticSearch for logging within an application. I need to write a log viewer that filters on all the fields of my document.
My documents look like this:
"_source": {
"timestamp": 1373502012000,
"userId": 6,
"paId": 56331,
"lId": 6,
"prId": 2,
"vId": 6336,
"actionType": "LOAD_DATA"
}
actionType is an enum (Java).
I need to write a ElasticSearch equivalent to the following SQL query:
SELECT * FROM snapshot.action_log_item
WHERE timestamp BETWEEN 1372718783286 AND 1372718783286
AND userId=6
AND paId=56331
AND lId=6
AND prId=2
AND vId=6336
AND (
actionType='LOAD_DATA' OR
actionType='SAVE_DATA' OR
actionType='LOG_IN'
);
Please help me write a properly nested query and/or filter to get a result equivalent to my SQL statement.
EDIT Here's my current code (that works without the { "or"... portion).
{
"query" : {
"bool" : {
"must" : [ {
"term" : {
"userId" : 6
}
}, {
"term" : {
"lId" : 6
}
}, {
"term" : {
"vId" : 6336
}
} ]
}
},
"filter" : {
"and" : {
"filters" : [ {
"term" : {
"paId" : 56331
}
}, {
"range" : {
"timestamp" : {
"from" : 1372718783286,
"to" : 1377643583286,
"include_lower" : true,
"include_upper" : true
}
}
}, {
"or" : {
"filters" : [ {
"term" : {
"actionType" : "LOAD_DATA"
}
}, {
"term" : {
"actionType" : "SAVE_DATA"
}
}, {
"term" : {
"actionType" : "LOG_IN"
}
} ]
}
} ]
}
}
}
EDIT: The following query works. It's not the same query as above, but it returns the expected result. It seems that these filters/queries don't work on the actionType field.
{
"size": 30,
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"term": {
"uId": 6
}
},
{
"term": {
"loId": 6
}
},
{
"term": {
"prId": 2
}
},
{
"terms": {
"paId": [
56331,
56298
],
"minimum_should_match": 1
}
}
]
}
},
"filter": {
"range": {
"timestamp": {
"from": 1372718783286,
"to": 1377643583286,
"include_lower": true,
"include_upper": true
}
}
}
}
}
}

The {or... portion should like this:
{
"or": [
{
"term": {
"actionType": "LOAD_DATA"
}
},
{
"term": {
"actionType": "SAVE_DATA"
}
},
{
"term": {
"actionType": "LOG_IN"
}
}
]
}
You can check the doc for that filter here
Edit
As I see you are having problems I rewrote your query. I hope it helps
{
"query": {
"filtered": {
"query": {
"bool": {
"must": [
{
"term": {
"userId": 6
}
},
{
"term": {
"paId": 56331
}
},
{
"term": {
"lId": 6
}
},
{
"term": {
"prId": 2
}
},
{
"term": {
"vId": 6336
}
},
{
"terms": {
"actionType": [
"LOAD_DATA",
"SAVE_DATA",
"LOG_IN"
],
"minimum_should_match": 1
}
}
]
}
},
"filter": {
"range": {
"timestamp": {
"from": 1372718783286,
"to": 1377643583286,
"include_lower": true,
"include_upper": true
}
}
}
}
}
}
Basically I put the date range as filter and the other conditions are term queries inside the must clause of the boolean query. You can see that the or part is now inside the must clause as a terms query that act as or between those 3 values.

Related

Nesting conditions on OpenSearch

We recently migrated to Open Search, we are recreating our db logic inside OS, and in most scenarios we have been successful.
There is one use case that we are struggling heavily to achieve.
The original query has nested conditions like this :
WHERE
(
(
NOT D2.DOCTYPE IN ('Text','Log','Query')
AND D2.ATTRIBUTE1 LIKE #DocNumber
)
OR (
D2.DOCTYPE IN ('Novel','Comedy')
AND D2.ATTRIBUTE2 LIKE #DocNumber
)
OR (
D2.DOCTYPE = 'Science-text'
AND D2.ATTRIBUTE3 LIKE #DocNumber
)
OR (
D2.DOCTYPE IN ('Fiction', 'Romance')
AND D2.ATTRIBUTE4 LIKE #DocNumber
)
)
AND (
#Revision = ''
AND D2.RFLAG = 'C'
)
I have read the documentation of boolean queries (https://opensearch.org/docs/latest/opensearch/query-dsl/bool/) inside OP and I know its possible to achieve this query, but for the life of me I have not been able to achieve it.
The closest I have been to achieve the query is this :
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"should": [
{
"bool": {
"must_not": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Text"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Log"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Query"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"DOCUMENTNUMBER": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Novel"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Comedy"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"DESCRIPTIOn": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Science-Text"
}
}
}
]
}
},
{
"bool": {
"must": [
{
"wildcard": {
"ATTRIBUTE3": {
"value": "12%"
}
}
}
]
}
}
]
}
},
{
"bool": {
"should": [
{
"bool": {
"should": [
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Fiction"
}
}
},
{
"wildcard": {
"DOCUMENTTYPE": {
"value": "Romance"
}
}
}
]
}
},
]
}
}
],
"must": [
{
"wildcard": {
"RFLAG": {
"value": "C"
}
}
}
]
}
}
}
But this query ALWAYS bring the total amount of data inside my index.
What am I doing wrong ?

How to write distinct query in Elasticsearch 7.6.2

I am new to ES. My requirement is to fetch last n recent timestamp and distinct trace_id along with their records. Like in the sql query "Select distinct trace_id, job_name from stpjoblogs where status="SUCCESS"". Please let me know how to achieve the same in Elasticsearch 7.6.2.
My response would be:
_source" : {
"port" : 57376,
"job_name" : "stbl-executive-dashboard",
"timestamp" : "2020-04-28T10:55:45.640267+00:00",
"trace_id" : "180600fd27ef8108",
"PCF_Space" : "Development",
"PCF_Org" : "EPSOrg",
"status" : "SUCCESS"
}
and I tried with below query:
GET /stpjoblogs/_search
{
"query": {
"bool": {
"must": [
{"match":{"status":"SUCCESS"}}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": { "field": "trace_id"}
}
}
}
But I am getting below error
{
"error" : {
"caused_by" : {
"type" : "illegal_argument_exception",
"reason" : "Text fields are not optimised for operations that require per-document field data like aggregations and sorting, so these operations are disabled by default. Please use a keyword field instead. Alternatively, set fielddata=true on [trace_id] in order to load field data by uninverting the inverted index. Note that this can use significant memory."
}
},
"status" : 400
}
You need to add trace_id.keyword. If index is created by automapping this field will be already present or you need to this in mapping
"trace_id":{
"type":"text",
"fields":{
"keyword":{
"type":"keyword"
}
}
}
GET /stpjoblogs/_search
{
"size": 0,
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
],
"aggs": {
"distinct_transactions": {
"terms": {
"field": "trace_id.keyword"
},
"aggs": {
"job_names": {
"terms": {
"field": "job_name.keyword",
"size": 10
},
"aggs": {
"docs": {
"top_hits": {
"size": 1,
"sort": [{"timestamp":"desc"}]
}
}
}
}
}
}
}
}
To get latest document for a trace_id, you can use field collapsing , it works like Group by and returns top one document per field
{
"query": {
"bool": {
"must": [
{
"match": {
"status": "SUCCESS"
}
}
]
}
},
"collapse": {
"field": "trace_id.keyword",
"inner_hits" : {
"name": "space",
"collapse" : {"field" : "job_name.keyword"},
"size": 3
}
},
"sort": [
{
"timestamp": {
"order": "desc"
}
}
]
}

Elasticsearch equal SQL %Like%

Coming from here i'm asking myselve for the elasticsearch syntax for such querys:
WHERE text LIKE "%quick%"
AND text LIKE "%brown%"
AND text LIKE "%fox%"
my try (unfortunately without success)
"query": {
"bool": {
"filter": [
{
"bool": {
"must": [
{
"terms": {
"text": [
"*quick*",
"*brown*",
"*fox*"
]
}
}
]
}
}
]
}
}
Try using bool and wildcard to do such a query.
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": "*quick*"
}
},
{
"wildcard": {
"text": "*brown*"
}
},
{
"wildcard": {
"text": "*fox*"
}
}
]
}
}
}
Wildcard Query Matches documents that have fields matching a wildcard expression (not analyzed). Supported wildcards are *, which matches any character sequence (including the empty one), and ?, which matches any single character.
That's what you're looking for. Just put desired amount of wildcard queries in your bool/must:
{
"query": {
"bool": {
"must": [
{
"wildcard": {
"text": {
"value": "*quick*"
}
}
},
{
"wildcard": {
"text": {
"value": "*brown*"
}
}
},
{
"wildcard": {
"text": {
"value": "*fox*"
}
}
}
]
}
}
}

Elasticsearch match combos of two fields

How can I get this simple SQL query running on Elasticsearch?
SELECT * FROM [mytype] where (id=123 and cid = classroomIdA) or
(id=234 and cid = classroomIdB) or (id=345 and cid = classroomIdC)
I'm really having troubles with its syntax, multi-match queries doesn't work in my case. What type of query should I use?
The right way to do it is to combine bool/should (for the outer OR conditions) and bool/filter (for the inner AND conditions) together.
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"filter": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"filter": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}
UPDATE
The equivalent ES 1.7 query would be (just replace bool/filter by bool/must):
POST mytype/_search
{
"query": {
"bool": {
"minimum_should_match": 1,
"should": [
{
"bool": {
"must": [
{
"term": {
"id": 123
}
},
{
"term": {
"cid": "classroomIdA"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 234
}
},
{
"term": {
"cid": "classroomIdB"
}
}
]
}
},
{
"bool": {
"must": [
{
"term": {
"id": 345
}
},
{
"term": {
"cid": "classroomIdC"
}
}
]
}
}
]
}
}
}

Elasticsearch function_score not in explain result

I"m trying to get a function_score working, but it appears to be ignored. Here's my query:
{
"explain" : true,
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_score * 999 * doc['total_digital_items_sold'].value"
}
}
],
"query": {
"filtered": {
"query": {
"query_string": {
"query": "photo",
"fields": [ "title^3" ],
"use_dis_max": true,
"analyzer": "snowball"
}
},
"filter": { "and": [ { } ] }
}
}
}
}
}
}
It appears to not affect the results, nor show up in the explain. Any ideas on what I'm doing wrong?