elasticsearch - get unique bucket count in multiple aggregations - sql

I am trying to get the total count of buckets where my aggregation total is greater than some amount say 1000. Below are the documents sample.
[
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3FtHR8lArSPNJl_rcp",
"_score": 1,
"_source": {
"total_amount": 650,
"custid": "2",
"client_id": 1
}
},
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3F5UfjlArSPNJl_rlu",
"_score": 1,
"_source": {
"total_amount": 200,
"custid": "1",
"client_id": 1
}
},
{
"_index": "orders_stage",
"_type": "order",
"_id": "AV3F5UfjlArSPNJl_rxm",
"_score": 1,
"_source": {
"total_amount": 1400,
"custid": "1",
"client_id": 1
}
}
]
So first of all, I am able to get the unique records those are greater than 1000 in buckets but the I am getting the buckets count as 2 instead of 1. I am using following query:
{
"size": 0,
"query": {
"bool": {
"must": [
{
"term": {
"client_id": 1
}
}
]
}
},
"aggs": {
"customers": {
"terms": {
"field": "custid",
"size": 1
},
"aggs": {
"amount_spent": {
"sum": {
"field": "total_amount"
}
},
"amount_spent_filter": {
"bucket_selector": {
"buckets_path": {
"amountSpent": "amount_spent"
},
"script": "params.amountSpent >= 1000"
}
}
}
},
"uniques": {
"cardinality": {
"field": "custid"
}
}
}
}
I just want to get the count of buckets that I am able to fetch using above query.
Thanks

Related

Query in Elastic Search with some field and some condition?

I have data about Product, which have some fields ( _id, Shop, ProductVerion...). It has indexed in Elastic Search. And I want to search products have max ProductVersion with Shop.
Ex:
Shop Amazon has 3 Version crawl product: 111,222,333.
Shop Ebay has 2 version: 222,444
Shop Alibaba has 2 version: 111, 444
Verions may may be the same.
And now, I want to get Products which have:
Shop Amazon and ProducVersion 333
or Shop Ebay and ProductVersion 444
or Shop Alibaba and ProductVersion 444.
But I don't know query that.
Help me, pls!!
I tried it out with some example documents. I kept version field as numeric field.
These are example documents with which I tried
[
{
"_index": "test",
"_type": "doc",
"_id": "12334",
"_score": 1,
"_source": {
"shopName": "amazon",
"version": 341
}
},
{
"_index": "test",
"_type": "doc",
"_id": "123",
"_score": 1,
"_source": {
"shopName": "amazon",
"version": 3412
}
},
{
"_index": "test",
"_type": "doc",
"_id": "1233",
"_score": 1,
"_source": {
"shopName": "amazon",
"version": 341
}
},
{
"_index": "test",
"_type": "doc",
"_id": "1238",
"_score": 1,
"_source": {
"shopName": "alibaba",
"version": 34120
}
},
{
"_index": "test",
"_type": "doc",
"_id": "1239",
"_score": 1,
"_source": {
"shopName": "alibaba",
"version": 3414
}
},
{
"_index": "test",
"_type": "doc",
"_id": "123910",
"_score": 1,
"_source": {
"shopName": "alibaba",
"version": 124
}
}
]
As #demas had specified I went ahead with terms aggregation and top hits aggregation
indexName/_search
{
"size": 0,
"aggs": {
"shop": {
"terms": {
"field": "shopName.keyword"
},
"aggs": {
"product": {
"top_hits": {
"size": 1,
"sort": [
{
"version": {
"order": "desc"
}
}
]
}
}
}
}
}
}
This should provide you with the document that contains the highest product version number for each shop as shown below.
{
"took": 8,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": 6,
"max_score": 0,
"hits": []
},
"aggregations": {
"shop": {
"doc_count_error_upper_bound": 0,
"sum_other_doc_count": 0,
"buckets": [
{
"key": "alibaba",
"doc_count": 3,
"product": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test",
"_type": "doc",
"_id": "1238",
"_score": null,
"_source": {
"shopName": "alibaba",
"version": 34120
},
"sort": [
34120
]
}
]
}
}
},
{
"key": "amazon",
"doc_count": 3,
"product": {
"hits": {
"total": 3,
"max_score": null,
"hits": [
{
"_index": "test",
"_type": "doc",
"_id": "123",
"_score": null,
"_source": {
"shopName": "amazon",
"version": 3412
},
"sort": [
3412
]
}
]
}
}
}
]
}
}
}
It will be better if you provide reproducible example, but it looks like you need to use aggregation framework: term aggregation and top hits aggregation:
GET /_search
{
"aggs": {
"shops": {
"terms": { "field": "Shop" }
},
"aggs": {
"tops": {
"top_hits": {
"size": 100
}
}
}
}
}

Elasticsearch running aggregate guery - products bought by clients who only buy that product

My docs represents an order in the given format:
{
"name", // the client
"sku" // the product
}
So, suppose the follow data exists:
{ "name": "rudolph", "sku": "apple" }
{ "name": "rudolph", "sku": "apple" }
{ "name": "rudolph", "sku": "apple" }
{ "name": "john", "sku": "banana" }
{ "name": "john", "sku": "banana" }
{ "name": "paul", "sku": "banana" }
{ "name": "paul", "sku": "apple" }
{ "name": "peter", "sku": "banana" }
I can get the clients who bought only 1 kind of item with the query:
{
"aggs": {
"clients": {
"terms": {
"field": "name"
},
"aggs": {
"distinct_sku": {
"cardinality": {
"field": "sku"
}
},
"unique_sku": {
"bucket_selector": {
"buckets_path": {
"qty": "distinct_sku"
},
"script": "params.qty == 1"
}
},
"aggs": {
"terms": {
"field": "sku"
}
}
}
}
},
"size": 0
}
which results
{
...
"aggregations": {
"clients": {
...
"buckets": [
{
"key": "rudolph",
"doc_count": 3,
...
"aggs": {
...
"buckets": [
{
"key": "apple",
"doc_count": 3
}
]
}
},
{
"key": "john",
"doc_count": 2,
...
"aggs": {
...
"buckets": [
{
"key": "banana",
"doc_count": 2
}
]
}
},
{
"key": "peter",
"doc_count": 1,
...
"aggs": {
...
"buckets": [
{
"key": "banana",
"doc_count": 1
}
]
}
}
]
}
}
}
It's possible to manage the query to return how much each item appears in the result?
Something like this:
{
"apple" : 1,
"banana" : 2
}
Thanks in advance.
EDIT
My base has a huge amount of clients and a small quantity of products, so:
Iterate over the above aggregation result to build the wanted result is not an option.
If I have to send a query for each product, It's Ok.

Mongo DB query: project array element to property

Having following data
{
"_id": "...",
"name": "John",
"purchases": [
{
"date": {
"$date": "2016-12-20T00:00:00.000Z"
},
"amount": 1000
},
{
"date": {
"$date": "2016-12-23T00:00:00.000Z"
},
"amount": 100
}
]
}
How to get latest purchase amount as result field with different name?
To get next result:
{
"_id": "...",
"name": "John",
"purchases": [
{
"date": {
"$date": "2016-12-20T00:00:00.000Z"
},
"amount": 1000
},
{
"date": {
"$date": "2016-12-23T00:00:00.000Z"
},
"amount": 100
}
]
},
"latestPurchaseAmount": 100
}
You have to use the $rename take a look at this link
And for your last item you should look at this it will helps you Link
You can do something like this. This below query will unwind all the purchases in the collection and, sort the purchases by date descending and, limit the result to one and project the corresponding amount.
db.purchases.aggregate([{
$unwind: "$purchases"
}, {
$sort: {
"purchases.date": -1
}
}, {
$limit: 1
}, {
$project: {
_id: 0,
latestPurchaseAmount: "$purchases.amount"
}
}]);
Output
{ "latestPurchaseAmount" : 100 }

ElasticSearch combine match and function score queries

I have a complicated query to run in elasticsearch that spans across multiple fields (nested and non-nested). I am using a bool should query across a multi-field match and nested field match.
Additionally I want a composite scoring which takes into account several other parameters such as location, rating etc.
I tried to run a simplified proof of concept combined query which looks for a matching term and tries to use function score for the other fields but I am running into an error from es.
GET init/restaurant/_search/
{
"query": {
"match": {
"cuisine_categories": "Oriental"
},
"function_score": {
"functions": [
{
"gauss": {
"coordinates": {
"origin": { "lat": 74.20, "lon": 31.23 },
"offset": "1km",
"scale": "3km"
}
}
},
{
"gauss": {
"nomnom_rating": {
"origin": "4.5",
"offset": "0.5",
"scale": "1"
}
},
"weight": 2
},
{
"gauss": {
"estimated_preparation_time": {
"origin": "30",
"offset": "10",
"scale": "20"
}
},
"weight": 5
}
]
}
}
}
The query is not a valid. The match clause should be within the query object of function score as shown below
Example:
POST init/restaurant/_search/
{
"query": {
"function_score": {
"functions": [
{
"gauss": {
"coordinates": {
"origin": {
"lat": 74.2,
"lon": 31.23
},
"offset": "1km",
"scale": "3km"
}
}
},
{
"gauss": {
"nomnom_rating": {
"origin": "4.5",
"offset": "0.5",
"scale": "1"
}
},
"weight": 2
},
{
"gauss": {
"estimated_preparation_time": {
"origin": "30",
"offset": "10",
"scale": "20"
}
},
"weight": 5
}
],
"query": {
"match": {
"cuisine_categories": "Oriental"
}
}
}
}
}

Need to get the following query to output correctly

Hi guys I've been trying all day to construct this simple mongo query but I can't get the desire output. Below is the query and the current output.
db.customers.aggregate(
{ $match : { "status" : "Closed" } },
{ $unwind: "$lines" },
{ $group : {
_id:{label: "$lines.label",date: {$substr: [ "$date", 0, 10 ]}},
values: { $push: { date: {$substr: [ "$date", 0, 10 ]}} },
count: { $sum: 1 }
}},
{ $project : {
_id : 1,
values:1,
count:1
}}
);
Which outputs the following.
{
"result": [
{
"_id": {
"label": "label",
"date": "2010-10-01"
},
"values": [
{
"date": "2010-10-01"
},
{
"date": "2010-10-01"
},
{
"date": "2010-10-01"
},
{
"date": "2010-10-01"
}
],
"count": 4
},
{
"_id": {
"label": "label",
"date": "2010-10-10"
},
"values": [
{
"date": "2010-10-10"
}
],
"count": 1
},
{
"_id": {
"label": "label",
"date": "2010-07-25"
},
"values": [
{
"date": "2010-07-25"
}
],
"count": 1
}
]
}
However the output below is the one that I'm looking for and just can't get. I can obviously get all the data I desire, just in the wrong places.
{
"result": [
{
"_id": "label",
"values": [
{
"date": "2010-11-27",
"count": 4
},
{
"date": "2010-10-10",
"count": 1
},
{
"date": "2010-07-25",
"count": 1
}
]
}
]
}
Like always thanks for the help and support.
Can you try this:
db.customers.aggregate([
{ $match : { "status" : "Closed" } },
{ $unwind: "$lines" },
{ $group : {
_id:{label: "$lines.label",date: {$substr: [ "$date", 0, 10 ]}},
values: { $push: { date: {$substr: [ "$date", 0, 10 ]}} },
count: { $sum: 1 }
}},
// This one I added to group them behind a single label in an array list
{ $group : {
_id:{
label: "$_id.label"
},
values : {
$push : { date : "$date", count : "$count" }
}
}
},
{ $project : {
_id : 1,
values:1,
count:1
}
}
]);
If I got your problem right, you like to group the counts + dates in an values array. That you can do with $push after the 1st group stage.