mongodb aggregate $limit and $lookup sequence problems - sql

db.getCollection('xxxxxxxx').aggregate(
[
{
"$match": {
"campaigns.campaign_id":ObjectId("5c6e50932fb955f81b0c9f59")
}
},
{
"$sort": {
"campaigns.updatedAt": 1,
"_id": -1
}
},
{
"$limit": 15
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.sales_funnel_id",
"foreignField": "_id",
"as": "sale_funnels"
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.callresult_id",
"foreignField": "_id",
"as": "callresults"
}
},
{
"$lookup": {
"from": "accounts",
"localField": "currentStat.qc.account_id",
"foreignField": "_id",
"as": "accounts"
}
},
{
"$match": {
"$or": [
{
"姓名": /137/
},
{
"电话号码": /137/
},
{
"电子邮件": /137/
},
{
"城市": /137/
},
{
"区域": /137/
},
{
"备注": /137/
}
]
}
}
]
)
The result of executing the above SQL query is 0 ($limlit before $lookup)
The result of executing the above SQL query is 0 ($limlit before $lookup)
The result of executing the above SQL query is 0 ($limlit before $lookup)
if $limlit follows $lookup
db.getCollection('xxxxxxxxxxx').aggregate(
[
{
"$match": {
"campaigns.campaign_id":ObjectId("5c6e50932fb955f81b0c9f59")
}
},
{
"$sort": {
"campaigns.updatedAt": 1,
"_id": -1
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.sales_funnel_id",
"foreignField": "_id",
"as": "sale_funnels"
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.callresult_id",
"foreignField": "_id",
"as": "callresults"
}
},
{
"$lookup": {
"from": "accounts",
"localField": "currentStat.qc.account_id",
"foreignField": "_id",
"as": "accounts"
}
},
{
"$match": {
"$or": [
{
"姓名": /137/
},
{
"电话号码": /137/
},
{
"电子邮件": /137/
},
{
"城市": /137/
},
{
"区域": /137/
},
{
"备注": /137/
}
]
}
},
{
"$limit": 15
}
]
)
Why is that?
Why is that?
Why is that?
Why is that?
Why is that?

In the first case (limit before lookup), lookup is done on the first 15 matched documents only. However, when limit is in the end of the pipe line lookup is done on all matched documents and then the limit is applied.
Taking a simpler example,
This query finds all documents where value of field "n" is 1 and then shows the first 15 matching documents.
db.collection.aggregate([{$match: {"n" : 1}}, {$limit: 15}])
However, the below query takes the top 15 documents and then runs a match on those 15 documents only.
db.collection.aggregate([{$limit: 15}, {$match: {"n" : 1}}])

1) In the first case. you are limiting results upto 15 and match condition is executed. So the match condition only works on the 15 documents.
2) In the second case, you are matching with all the documents in the collection, and then limiting the result.

Related

Query item in nested array

Customer appointments with top level locationId sample data set:
[
{
"locationId": 9999,
"customerAppointments": [
{
"customerId": "1",
"appointments": [
{
"appointmentId": "cbbce566-da59-42c2-8845-53976ba63d56",
"locationName": "Sullivan St"
},
{
"appointmentId": "5f09e2af-ddae-47aa-9f7c-fd1001a9c5e6",
"locationName": "Oak St"
}
]
},
{
"customerId": "2",
"appointments": [
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
},
{
"customerId": "3",
"appointments": []
}
]
},
{
...
},
{
...
}
]
I need to pull out appointment by locationId and customerId and only get the appointment for that customerId e.g
Sample response:
[
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
Tried below query, but it just returns all records for all customers ids (which is kind of expected):
db.getCollection("appointments").find(
{
"locationId" : NumberInt(9999),
"customerAppointments" : {
"$elemMatch" : {
"customerId" : "2"
}
}
}
);
But how can I get just the appointment record for a specific customerId?
When asking this question I was unaware of the older version of MongoDB driver (< v5) so we cannot use the $getField operator.
However, this query seems to work well:
db.getCollection("appointments").aggregate([
{
$match: {
"locationId": NumberInt(9999)
}
},
{
$unwind: "$customerAppointments"
},
{
$match: {
"customerAppointments.customerId": "2"
}
},
{
$project: {
appointments: "$customerAppointments.appointments"
}
}
]);
Yields:
{
"_id" : ObjectId("63eebe95c7a0da54804c1db2"),
"appointments" : [
{
"appointmentId" : "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName" : "Kellet St"
}
]
}

GetBlock and all related methods doesn't return an info about transactions

Usually, tronGrid returns blocks with transactions, but as I found today, it's not behaving as needed.
How it works right now:
{
"blockID": "0000000001b16eb8b97ab73b7dc8f161c5f2f786f0937bfed7886baa33926c84",
"block_header": {
"raw_data": {
"number": 28405432,
"txTrieRoot": "0000000000000000000000000000000000000000000000000000000000000000",
"witness_address": "41f16412b9a17ee9408646e2a21e16478f72ed1e95",
"parentHash": "0000000001b16eb7a6f39a1523f35db8b4089d5a03f591958beafd139e0949d5",
"version": 24,
"timestamp": 1666102851000
},
"witness_signature": "60c7b8b964f103072b7e0fd33b5df636ef4e06d95bb113184ef3266b691b2cf517091960aba72dcd8d5a1ee40374f2124256ddad445429897b066332964ef8d500"
}
}
And how it works before:
{
"blockID": "0000000001b15f18976aee56ff9490303ec64c2007d6034ca03a7a2caefdab73",
"block_header": {
"raw_data": {
"number": 28401432,
"txTrieRoot": "167b9b1620d76e9855d426453ea726a709582f4ed711701ee22fe730bae3f8d8",
"witness_address": "41cd8d8ad1b4a5bd7afe46949421d2b411a3601717",
"parentHash": "0000000001b15f175dc2a20b8c3b29bbbb50860dc57d54d44eff4b02edf849e6",
"version": 24,
"timestamp": 1666089210000
},
"witness_signature": "b9239d12b2044b1bdfa631115f3c7b9b1c1fc5d37c482809d2c7846d05ab84d61778910a55501f4702fe653b943b22b9270b33151ec15e35aa500388dac0abda01"
},
"transactions": [
{
"ret": [
{
"contractRet": "SUCCESS"
}
],
"signature": [
"56a427e32fc0267a2e469ef85530c3145c7de423c8f20d7e11d85dbff98701bdd599d5a45b58ab4f7fa7f212c2ee3cbb5daa50541a83f67dbff533b7e185331501"
],
"txID": "5fd2335105f68de47b82fe3f8065cb3d1cc8ab437aaee55a5d4e61624113730b",
"raw_data": {
...
},
"raw_data_hex": "0a025f0522084cf822e1795ff17f40a7e6a2d5be305a67080112630a2d747970652e676f6f676c65617069732e636f6d2f70726f746f636f6c2e5472616e73666572436f6e747261637412320a1541989cc89d2df684c69bed3563c0cd8817be0a11e1121541bc0777bd8f50e5e148ef59bdce2b895b754c452e1888890a70c7919fd5be30"
}
]
}
Is there a new feature, or it's a bug?

Get data from two collection with specific data Mongo

Get data from two collection from first collection(test1) all data and from second collection (test2) customer name using createdBy and updatedBy from test1 collection.
In createdBy and updatedBy I want fullname from test2 collection:
Test1 collection:
{
"_id": "kcXtyaB7jGPw9Ks",
"dateCreated": "2022-07-12T13:09:16.270Z",
"dateModified": "2022-07-12T13:09:16.270Z",
"data1": 1,
"data2": 100,
"data3": 5,
"createdBy": "xQQrzRgi8",
"updatedBy": "56sgAeKfx"
}
Test2 collection:
{
"_id": "xQQrzRgi8",
"fullName": "test name created"
},
{
"_id": "56sgAeKfx",
"fullName": "test name update"
}
Response be like:
{
"_id": "kcXtyaB7jGPw9Ks",
"dateCreated": "2022-07-12T13:09:16.270Z",
"dateModified": "2022-07-12T13:09:16.270Z",
"data1": 1,
"data2": 100,
"data3": 5,
"createdBy": "test name created",
"updatedBy": "test name update"
}
If I've understood correctly, you can use $lookup like this:
This query do a "join" between "Test1" and "Test2" using updatedBy and _id fields.
And after that get the first element in the result (I assume there were only one element because you are comparing with _id but if there is more than one you can use another way like $unwind) to output the value.
Edit: To get both values (created and updated) you can do a second $lookup.
Now the query:
Get the updatedBy name from field _id in Test2.
Set value into field updatedBy.
Get the createdBy name from field _id in Test2.
Set value into field createdBy.
Use $project to not output result.
db.Test1.aggregate([
{
"$lookup": {
"from": "Test2",
"localField": "updatedBy",
"foreignField": "_id",
"as": "result"
}
},
{
"$set": {
"updatedBy": {
"$first": "$result.fullName"
}
}
},
{
"$lookup": {
"from": "Test2",
"localField": "createdBy",
"foreignField": "_id",
"as": "result"
}
},
{
"$set": {
"createdBy": {
"$first": "$result.fullName"
}
}
},
{
"$project": {
"result": 0
}
}
])
Example here
I solved my query with below mongo query:
db.Test1.aggregate([
{
$lookup: {
from: "Test2",
localField: "updatedBy",
foreignField: "_id",
as: "updatedByName",
},
},
{
$lookup: {
from: "Test2",
localField: "createdBy",
foreignField: "_id",
as: "createdByName",
},
},
{
$set: {
updatedBy: {
$first: "$updatedByName.fullName",
},
},
},
{
$set: {
createdBy: {
$first: "$createdByName.fullName",
},
},
},
{
$project: {
updatedByName: 0,
createdByName: 0,
},
}
])
Here is Solved query https://mongoplayground.net/p/7Ekh-q8tkTy

MongoDB multiple Lookup into same collection

I have two collections Bill and Employee. Bill contains the information about the monthly student bill and Employee contains all types of people working in the school (Accountant, Teachers, Maintenance etc).
Bill has billVerifyBy and classteacher field which points to the records of Employees.
Bill collection
{
"_id": ObjectId("ab12dns..."), //mongoid
"studentname": "demoUser",
"class": { "section": "A"},
"billVerifiedBy": "121212",
"classteacher": "134239",
}
Employee collection
{
"_id": ObjectId("121212"), // random number
"name": "Darn Morphy",
"department": "Accounts",
"email": "dantest#test.com",
}
{
"_id": ObjectId("134239"),
"name": "Derreck",
"department": "Faculty",
"email": "derrect145#test.com",
}
I need to retrieve the Accounts and Teacher information related to a particular bill. I am using Mongodb lookup to get the information. However, I have to lookup to the same table twice since billVerifiedBy and classteacher belong to the same Employee tables as given below.
db.bill.aggregate([
{
$lookup: {"from": "employee", "localField": "billVerifiedBy", "foreignField": "_id", "as": "accounts"}},
},
{
$lookup: {"from": "employee", "localField": "classteacher", "foreignField": "_id", "as": "faculty"}},
},
{
$project: {
"studentname": 1,
"class": 1,
"verifiedUser": "$accounts.name",
"verifiedByEmail":"$accounts.email",
"facultyName": "$faculty.name",
"facultyEmail": "$faculty.email"
}
}
]
I don't know if this is the good way of arranging the Accounts and Faculty information in the single Employee collection. And is it right thing to lookup twice with same collection. Or should I create separate Accounts and Faculty collection and lookup with it. Please suggest what would be the best approach in terms of performance.
In mongodb, when you want to join multiple documents from the same collection, you can use "$lookup" with its "pipeline" and "let" options. It filters documents that you want to take with defined variables.
db.getCollection('Bill').aggregate([{
"$lookup": {
"as": "lookupUsers",
"from": "Employee",
// define variables that you need to use in pipeline to filter documents
"let": {
"verifier": "$billVerifiedBy",
"teacher": "$classteacher"
},
"pipeline": [{ // filter employees who you need to filter.
"$match": {
"$expr": {
"$or": [{
"$eq": ["$_id", "$$verifier"]
},
{
"$eq": ["$_id", "$$teacher"]
}
]
}
}
},
{ // combine filtered 2 documents in an employee array
"$group": {
"_id": "",
"employee": {
"$addToSet": {
"_id": "$_id",
"name": "$name",
"department": "$department",
"email": "$email"
}
}
}
},
{ // takes item from the array by predefined variable.
"$project": {
"_id": 0,
"billVerifiedBy": {
"$slice": [{
"$filter": {
"input": "$employee",
"cond": {
"$eq": ["$$this._id", "$$verifier"]
}
}
},
1
]
},
"classteacher": {
"$slice": [{
"$filter": {
"input": "$employee",
"cond": {
"$eq": ["$$this._id", "$$teacher"]
}
}
},
1
]
}
}
},
{
"$unwind": "$billVerifiedBy"
},
{
"$unwind": "$classteacher"
},
]
}
},
{
"$unwind": "$lookupUsers"
},
]);
Output is like that:
{
"_id": ObjectId("602916dcf4450742cdebe38d"),
"studentname": "demoUser",
"class": {
"section": "A"
},
"billVerifiedBy": ObjectId("6029172e9ea6c9d4776517ce"),
"classteacher": ObjectId("6029172e9ea6c9d4776517cf"),
"lookupUsers": {
"billVerifiedBy": {
"_id": ObjectId("6029172e9ea6c9d4776517ce"),
"name": "Darn Morphy",
"department": "Accounts",
"email": "dantest#test.com"
},
"classteacher": {
"_id": ObjectId("6029172e9ea6c9d4776517cf"),
"name": "Derreck",
"department": "Faculty",
"email": "derrect145#test.com"
}
}
}

hierarchical faceting with Elasticsearch

I'm using elasticsearch and need to implement facet search for hierarchical object as follow:
category 1 (10)
subcategory 1 (4)
subcategory 2 (6)
category 2 (X)
...
So I need to get facets for two related objects. Documentation says that it's possible to get such kind of facets for numeric value, but I need it for strings http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-facets-terms-stats-facet.html
Here is another interesting topic, unfortunately it's old: http://elasticsearch-users.115913.n3.nabble.com/Pivot-facets-td2981519.html
Does it possible with elastic search?
If so, how can I do that?
The previous solution works really well until you have no more than a multi-level tag on a single-document. In this case a simple aggregation doesn't work, because the flat structure of the lucene fields mix the results on the internal aggregation.
See the example below:
DELETE /test_category
POST /test_category
# Insert a doc with 2 hierarchical tags
POST /test_category/test/1
{
"categories": [
{
"cat_1": "1",
"cat_2": "1.1"
},
{
"cat_1": "2",
"cat_2": "2.2"
}
]
}
# Simple two-levels aggregations query
GET /test_category/test/_search?search_type=count
{
"aggs": {
"main_category": {
"terms": {
"field": "categories.cat_1"
},
"aggs": {
"sub_category": {
"terms": {
"field": "categories.cat_2"
}
}
}
}
}
}
That's the WRONG response that I have got on ES 1.4, where the fields on the internal aggregation are mixed at a document level:
{
...
"aggregations": {
"main_category": {
"buckets": [
{
"key": "1",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1",
"doc_count": 1
},
{
"key": "2.2", <= WRONG
"doc_count": 1
}
]
}
},
{
"key": "2",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1", <= WRONG
"doc_count": 1
},
{
"key": "2.2",
"doc_count": 1
}
]
}
}
]
}
}
}
A Solution can be to use nested objects. These are the steps to do:
1) Define a new type in the schema with nested objects
POST /test_category/test2/_mapping
{
"test2": {
"properties": {
"categories": {
"type": "nested",
"properties": {
"cat_1": {
"type": "string"
},
"cat_2": {
"type": "string"
}
}
}
}
}
}
# Insert a single document
POST /test_category/test2/1
{"categories":[{"cat_1":"1","cat_2":"1.1"},{"cat_1":"2","cat_2":"2.2"}]}
2) Run a nested aggregation query:
GET /test_category/test2/_search?search_type=count
{
"aggs": {
"categories": {
"nested": {
"path": "categories"
},
"aggs": {
"main_category": {
"terms": {
"field": "categories.cat_1"
},
"aggs": {
"sub_category": {
"terms": {
"field": "categories.cat_2"
}
}
}
}
}
}
}
}
That's the response, now correct, that I have got:
{
...
"aggregations": {
"categories": {
"doc_count": 2,
"main_category": {
"buckets": [
{
"key": "1",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "1.1",
"doc_count": 1
}
]
}
},
{
"key": "2",
"doc_count": 1,
"sub_category": {
"buckets": [
{
"key": "2.2",
"doc_count": 1
}
]
}
}
]
}
}
}
}
The same solution can be extended to a more than two-levels hierarchy facet.
Currently, elasticsearch does not support hierarchical facetting out-of-the-box. But the upcoming 1.0 release features a new aggregations module, that can be used to get these kind of facets (which are more like pivot-facets rather than hierarchical facets). Version 1.0 is currently in beta, you can download the second beta and test out aggregatins by yourself. Your example might look like
curl -XPOST 'localhost:9200/_search?pretty' -d '
{
"aggregations": {
"main category": {
"terms": {
"field": "cat_1",
"order": {"_term": "asc"}
},
"aggregations": {
"sub category": {
"terms": {
"field": "cat_2",
"order": {"_term": "asc"}
}
}
}
}
}
}'
The idea is, to have a different field for each level of facetting and bucket your facets based on the terms of the first level (cat_1). These aggregations then would have sub-buckets, based on the terms of the second level (cat_2). The result may look like
{
"aggregations" : {
"main category" : {
"buckets" : [ {
"key" : "category 1",
"doc_count" : 10,
"sub category" : {
"buckets" : [ {
"key" : "subcategory 1",
"doc_count" : 4
}, {
"key" : "subcategory 2",
"doc_count" : 6
} ]
}
}, {
"key" : "category 2",
"doc_count" : 7,
"sub category" : {
"buckets" : [ {
"key" : "subcategory 1",
"doc_count" : 3
}, {
"key" : "subcategory 2",
"doc_count" : 4
} ]
}
} ]
}
}
}