Apache atlas column lineage not found - hive

I use Hive2.1.1 and Atlas2.0.0.
I can found table lineage but not found column level lineage
the detailed information is shown below
For debug,
I try Restful first, nothing about column lineage.
[root#cent1 bin]# curl -X GET -u admin:admin http://cent1:21000/api/atlas/v2/lineage/27c81b16-b422-4479-84b9-2d643b5dba48
{"baseEntityGuid":"27c81b16-b422-4479-84b9-2d643b5dba48","lineageDirection":"BOTH","lineageDepth":3,"guidEntityMap":{},"relations":[]}
I print Kafka msg from ATLAS_HOOK topic below:
{
"version":{
"version":"1.0.0",
"versionParts":[
1
]
},
"msgCompressionKind":"NONE",
"msgSplitIdx":1,
"msgSplitCount":1,
"msgSourceIP":"192.168.10.128",
"msgCreatedBy":"root",
"msgCreationTime":1572687183793,
"message":{
"type":"ENTITY_CREATE_V2",
"user":"root",
"entities":{
"referredEntities":{
"-53640556650491":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes.bar#primary",
"name":"bar",
"comment":null,
"position":1,
"type":"string",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes#primary"
}
}
},
"guid":"-53640556650491",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650492":{
"typeName":"hive_table",
"attributes":{
"owner":"root",
"temporary":false,
"lastAccessTime":1572687176000,
"qualifiedName":"atlas1.pokes_create3#primary",
"columns":[
{
"guid":"-53640556650494",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3.foo#primary"
}
},
{
"guid":"-53640556650495",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3.bar#primary"
}
}
],
"tableType":"MANAGED_TABLE",
"sd":{
"guid":"-53640556650493",
"typeName":"hive_storagedesc",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3#primary_storage"
}
},
"createTime":1572687176000,
"name":"pokes_create3",
"comment":null,
"partitionKeys":[
],
"parameters":{
"totalSize":"5812",
"numRows":"500",
"rawDataSize":"5312",
"COLUMN_STATS_ACCURATE":"{"BASIC_STATS":"true"}",
"numFiles":"1",
"transient_lastDdlTime":"1572687178"
},
"db":{
"guid":"-53640556650487",
"typeName":"hive_db",
"uniqueAttributes":{
"qualifiedName":"atlas1#primary"
}
},
"retention":0
},
"guid":"-53640556650492",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650490":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes.foo#primary",
"name":"foo",
"comment":null,
"position":0,
"type":"int",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes#primary"
}
}
},
"guid":"-53640556650490",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650495":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes_create3.bar#primary",
"name":"bar",
"comment":null,
"position":1,
"type":"string",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3#primary"
}
}
},
"guid":"-53640556650495",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650493":{
"typeName":"hive_storagedesc",
"attributes":{
"qualifiedName":"atlas1.pokes_create3#primary_storage",
"storedAsSubDirectories":false,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db/pokes_create3",
"compressed":false,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3#primary"
}
},
"serdeInfo":{
"typeName":"hive_serde",
"attributes":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name":null,
"parameters":{
"serialization.format":"1"
}
}
},
"numBuckets":-1
},
"guid":"-53640556650493",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650494":{
"typeName":"hive_column",
"attributes":{
"owner":"root",
"qualifiedName":"atlas1.pokes_create3.foo#primary",
"name":"foo",
"comment":null,
"position":0,
"type":"int",
"table":{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3#primary"
}
}
},
"guid":"-53640556650494",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650488":{
"typeName":"hive_table",
"attributes":{
"owner":"root",
"temporary":false,
"lastAccessTime":1572663197000,
"qualifiedName":"atlas1.pokes#primary",
"columns":[
{
"guid":"-53640556650490",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes.foo#primary"
}
},
{
"guid":"-53640556650491",
"typeName":"hive_column",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes.bar#primary"
}
}
],
"tableType":"MANAGED_TABLE",
"sd":{
"guid":"-53640556650489",
"typeName":"hive_storagedesc",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes#primary_storage"
}
},
"createTime":1572663197000,
"name":"pokes",
"comment":null,
"partitionKeys":[
],
"parameters":{
"transient_lastDdlTime":"1572663225",
"totalSize":"5812",
"numRows":"0",
"rawDataSize":"0",
"numFiles":"1"
},
"db":{
"guid":"-53640556650487",
"typeName":"hive_db",
"uniqueAttributes":{
"qualifiedName":"atlas1#primary"
}
},
"retention":0
},
"guid":"-53640556650488",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650489":{
"typeName":"hive_storagedesc",
"attributes":{
"qualifiedName":"atlas1.pokes#primary_storage",
"storedAsSubDirectories":false,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db/pokes",
"compressed":false,
"inputFormat":"org.apache.hadoop.mapred.TextInputFormat",
"parameters":{
},
"outputFormat":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat",
"table":{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes#primary"
}
},
"serdeInfo":{
"typeName":"hive_serde",
"attributes":{
"serializationLib":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe",
"name":null,
"parameters":{
"serialization.format":"1"
}
}
},
"numBuckets":-1
},
"guid":"-53640556650489",
"provenanceType":0,
"version":0,
"proxy":false
},
"-53640556650487":{
"typeName":"hive_db",
"attributes":{
"owner":"root",
"ownerType":"USER",
"qualifiedName":"atlas1#primary",
"clusterName":"primary",
"name":"atlas1",
"description":null,
"location":"hdfs://cent1:9000/user/hive/warehouse/atlas1.db",
"parameters":{
}
},
"guid":"-53640556650487",
"provenanceType":0,
"version":0,
"proxy":false
}
},
"entities":[
{
"typeName":"hive_process",
"attributes":{
"outputs":[
{
"guid":"-53640556650492",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes_create3#primary"
}
}
],
"recentQueries":[
"create table pokes_create3 as select foo,bar from pokes"
],
"qualifiedName":"atlas1.pokes_create3#primary:1572687176000",
"inputs":[
{
"guid":"-53640556650488",
"typeName":"hive_table",
"uniqueAttributes":{
"qualifiedName":"atlas1.pokes#primary"
}
}
],
"name":"create table pokes_create3 as select foo,bar from pokes",
"queryText":"create table pokes_create3 as select foo,bar from pokes",
"operationType":"CREATETABLE_AS_SELECT",
"startTime":1572686979104,
"queryPlan":"Not Supported",
"endTime":1572687183754,
"userName":"root",
"queryId":"root_20191102172939_5de2901c-49e1-473c-ad2f-7e5f5b3251ee"
},
"guid":"-53640556650496",
"provenanceType":0,
"version":0,
"proxy":false
}
]
}
}
}
Where is the column lineage? How can I debug this further?

It seems that you need to have both of these Hive patches.
https://issues.apache.org/jira/browse/HIVE-13112 released in Hive 2.1.0
https://issues.apache.org/jira/browse/HIVE-14706 released in hive 2.2.0
As far as I understand, the first patch exposes Lineage information in case of CTAS and the second patch allows atlas plugin to pick it up from the hive hook.
According to the Atlas doc : https://atlas.apache.org/#/HookHive

Related

AWS IoT rule sql select statement

I am trying to write a SQL select statement for my AWS IoT rule to extract the values 'gateway_id and 'rssi' from the following MQTT message:
{
"end_device_ids": {
"device_id": "imd2",
"application_ids": {
"application_id": "pennal"
},
"dev_eui": "004E3A0DF76DC9E9",
"join_eui": "70B3D57ED003CBE8",
"dev_addr": "260BA9D0"
},
"correlation_ids": [
"as:up:01G30W0J4D65P6D50QH1DN3ZQP",
"gs:conn:01G2ZZ7FT9BH6J93WRYS4ATVDM",
"gs:up:host:01G2ZZ7FTN14103H90QN71Q557",
"gs:uplink:01G30W0HXWMES1Z7X7F2MCFMPF",
"ns:uplink:01G30W0HXXJM5PNGJAD0W01GGH",
"rpc:/ttn.lorawan.v3.GsNs/HandleUplink:01G30W0HXWFR3HNGBZS7XJV15E",
"rpc:/ttn.lorawan.v3.NsAs/HandleUplink:01G30W0J4D18JZW199EM8WERGR"
],
"received_at": "2022-05-14T08:47:25.837680984Z",
"uplink_message": {
"session_key_id": "AYBlRLSz9n83bW3WU3+GfQ==",
"f_port": 1,
"f_cnt": 5013,
"frm_payload": "DiAAAA==",
"decoded_payload": {
"rainmm": 0,
"voltage": 3.616
},
"rx_metadata": [
{
"gateway_ids": {
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
},
"time": "2022-05-14T08:47:25.065794944Z",
"timestamp": 114306297,
"rssi": -126,
"channel_rssi": -126,
"snr": -8.25,
"uplink_token": "ChgKFgoKcGVubmFsLWd3MhIIrB8J//4FfsYQ+dnANhoMCJ3Z/ZMGEOPmv6sCIKjZvump7gYqCwid2f2TBhCA568f"
}
],
"settings": {
"data_rate": {
"lora": {
"bandwidth": 125000,
"spreading_factor": 11
}
},
"coding_rate": "4/5",
"frequency": "868100000",
"timestamp": 114306297,
"time": "2022-05-14T08:47:25.065794944Z"
},
"received_at": "2022-05-14T08:47:25.629041670Z",
"confirmed": true,
"consumed_airtime": "0.659456s",
"version_ids": {
"brand_id": "heltec",
"model_id": "cubecell-dev-board-class-a-otaa",
"hardware_version": "_unknown_hw_version_",
"firmware_version": "1.0",
"band_id": "EU_863_870"
},
"network_ids": {
"net_id": "000013",
"tenant_id": "ttn",
"cluster_id": "eu1",
"cluster_address": "eu1.cloud.thethings.network"
}
}
}
I have tried following the documentation here: AWS Documentation but am struggling with the nested part of the message.
my SQL statement at the moment is:
SELECT received_at as datetime, end_device_ids.device_id as device_id,
uplink_message.decoded_payload.rainmm as rainmm, uplink_message.decoded_payload.voltage as
voltage, uplink_message.settings.data_rate.lora.spreading_factor as sprfact,
uplink_message.consumed_airtime as time_on_air ,uplink_message.settings.timestamp as ts,
uplink_message.rx_metadata as rx,(select value gateway_ids from uplink_message.rx_metadata) as gw,
(select value rssi from uplink_message.rx_metadata)as rssi, get((select gateway_id from
uplink_message.rx_metadata),0).gateway_id as gwn FROM 'thethings/lorawan/matt-pennal-ire/uplink'
which returns
{
"datetime": "2022-05-15T12:19:11.947844474Z",
"device_id": "md4",
"rainmm": 5.842001296924288,
"voltage": 3.352,
"sprfact": 8,
"time_on_air": "0.092672s",
"ts": 3262497863,
"rx": [
{
"gateway_ids": {
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
},
"time": "2022-05-15T12:19:11.178463935Z",
"timestamp": 3262497863,
"rssi": -125,
"channel_rssi": -125,
"snr": -7.5,
"uplink_token": "ChgKFgoKcGVubmFsLWd3MhIIrB8J//4FfsYQx4jXkwwaDAi/34OUBhCCy9XhAiDY6prg+ckHKgsIv9+DlAYQv8mMVQ=="
}
],
"gw": [
{
"gateway_id": "pennal-gw2",
"eui": "AC1F09FFFE057EC6"
}
],
"rssi": [
-125
]
}
but I would like it to return
{
"datetime": "2022-05-15T12:19:11.947844474Z",
"device_id": "md4",
"rainmm": 5.842001296924288,
"voltage": 3.352,
"sprfact": 8,
"time_on_air": "0.092672s",
"ts": 3262497863,
"gwn":"pennal_gw2"
"rssi":-126
}
Any help to get the values from the nested array would be greatly appreciated!

An extra signer with status as "created" is getting along with other signers

I created an envelope using Docusign API with status as sent and I had two signers in it. But when I actually checked in the account - there is one extra signer with the same name added along with the other signers in the envelope.Envelope Screen
I also checked the same while calling the recipients API and here also it had 3 signers and the extra one had status as created. Recipient API response But I referred doc it said created recipients are only for Envelope who is in Draft state. so how can the signer of status sent and created be added together?
Not sure how this extra signer got added. Any inputs will be appreciated.
my json request to generate envelope Id
{
"status":"Sent",
"emailSubject":"Your Amboy Documents",
"emailBlurb":"Please sign the Disclosures",
"compositeTemplates":[
{
"serverTemplates":[
{
"templateId":"1b890a65-ce07-40a7-9086-3be315de6f4f",
"sequence":"1"
}
],
"inlineTemplates":[
{
"sequence":"1",
"recipients":{
"signers":[
{
"tabs":{
"textTabs":[
{
"value":"laptop services",
"tabLabel":"Party_Business_ENTName"
}
],
"checkboxTabs":[
{
"value":"X",
"tabLabel":"App_1_BT_CORP",
"selected":"true"
}
]
},
"roleName":"Signer 1",
"recipientId":"2",
"partyId":"a0uR00000043CfMIAU",
"name":"rafeeq khan",
"emailNotification":null,
"email":"kshama.vaidya#terafinainc.com",
"clientUserId":"2"
},
{
"tabs":{
"textTabs":[
{
"value":"laptop services",
"tabLabel":"Party_Business_ENTName"
}
],
"checkboxTabs":[
{
"value":"X",
"tabLabel":"App_1_BT_CORP",
"selected":"true"
}
]
},
"roleName":"Signer 2",
"recipientId":"3",
"partyId":"a0uR00000043CiRIAU",
"name":"Imam khan",
"emailNotification":null,
"email":"kshama.vaidya#terafinainc.com",
"clientUserId":"3"
}
],
"carbonCopies":[
]
}
}
]
},
{
"serverTemplates":[
{
"templateId":"0b69b96b-6746-470c-9032-06bff16d98e2",
"sequence":"2"
}
],
"inlineTemplates":[
{
"sequence":"2",
"recipients":{
"signers":[
{
"tabs":{
"textTabs":[
{
"value":"rafeeq khan",
"tabLabel":"Party_1_Name"
}
],
"checkboxTabs":[
]
},
"roleName":"Signer 600",
"recipientId":"2",
"partyId":"a0uR00000043CfMIAU",
"name":"rafeeq khan",
"emailNotification":null,
"email":"kshama.vaidya#terafinainc.com",
"clientUserId":"2"
}
],
"carbonCopies":[
]
}
}
]
},
{
"serverTemplates":[
{
"templateId":"0e3fc453-a721-46bb-837d-7b5408e4384b",
"sequence":"3"
}
],
"inlineTemplates":[
{
"sequence":"3",
"recipients":{
"signers":[
{
"tabs":{
"textTabs":[
{
"value":"rafeeq khan",
"tabLabel":"Party_1_Name"
}
],
"checkboxTabs":[
]
},
"roleName":"Signer 1",
"recipientId":"2",
"partyId":"a0uR00000043CfMIAU",
"name":"rafeeq khan",
"emailNotification":null,
"email":"kshama.vaidya#terafinainc.com",
"clientUserId":"2"
},
{
"tabs":{
"textTabs":[
{
"value":"rafeeq khan",
"tabLabel":"Party_1_Name"
}
],
"checkboxTabs":[
]
},
"roleName":"Signer 2",
"recipientId":"3",
"partyId":"a0uR00000043CiRIAU",
"name":"Imam khan",
"emailNotification":null,
"email":"kshama.vaidya#terafinainc.com",
"clientUserId":"3"
}
],
"carbonCopies":[
]
}
}
]
}
],
"accountId":"11931216"
}

SQL script to form the nested JSON as a output

Consider I have below data,
create table #Temp(PropertyID nvarchar(255),BuildingID nvarchar(255),UnitID nvarchar(255),TenantName nvarchar(255),FieldName nvarchar(255),CurrentValue nvarchar(255),PreviousValue nvarchar(255))
insert into #Temp Values
('p1','B1','5','Spencer','Lease_EndDate','01/01/2021','03/01/2021'),
('p1','B1','5','Spencer','MonthlyBaseRent','3232','3000'),
('p1','B1','5','BCR','MonthlyBaseRent','1000','1100'),
('p1','B1','6','EA','MonthlyBaseRent','5000','5100'),
('p1','B2','5','VR','MonthlyBaseRent','3232','3000'),*
I need output as below nested JSON format, but I am getting flat JSON like [{},{},{}]
[
{
"PropertyID": "p1",
"Building": [
{
"BuildingID": "B1",
"Unit": [
{
"UnitID": "5",
"Tenant": [
{
"TenantName": "Spencer",
"Lease_EndDate": {
"CurrentValue": "01/01/2021",
"PreviousValue": "03/01/2021"
},
"MonthlyBaseRent": {
"CurrentValue": "3232",
"PreviousValue": "3000"
}
},
{
"TenantName": "BCR",
"MonthlyBaseRent": {
"CurrentValue": "1000",
"PreviousValue": "1100"
}
}
]
},
{
"UnitID": "6",
"Tenant": [
{
"TenantName": "EA",
"MonthlyBaseRent": {
"CurrentValue": "5000",
"PreviousValue": "5100"
}
}
]
}
]
},
{
"BuildingID": "B2",
"Unit": [
{
"UnitID": "5",
"Tenant": [
{
"TenantName": "VR",
"MonthlyBaseRent": {
"CurrentValue": "3232",
"PreviousValue": "3000"
}
}
]
}
]
}
]
}
]
This is an example of nested JSON, 2 levels. Use this template to build as much levels as needed.
select PropertyID ,BuildingID,
(select UnitID ,
(select t1.TenantName, t1.FieldName, t1.CurrentValue,t1.PreviousValue
from #Temp t1
where t1.PropertyID = t2.PropertyID and t1.BuildingID = t2.BuildingID and t1.UnitID = t2.UnitID
for json path) Tenant
from #Temp t2
where t2.PropertyID = t3.PropertyID and t2.BuildingID = t3.BuildingID
group by UnitID, PropertyID ,BuildingID
for json path) Unit
from #Temp t3
group by PropertyID ,BuildingID
for json path

How can I define jsonPath for given json?

{
"name": "ninja",
"contry": "India",
"Account": [
{
"id": "123",
"orgId": 223,
"investment": [
{
"invetmentId": "111",
"name": "India tech",
"performance": [
{
"id": "123",
"performanceSet": [
{
"amount": "231",
"currency": "USD"
},
{
"amount": "250",
"currency": "IND"
}
]
}
]
}
]
}
]
}
So I have to select the amount where the currency is USD?
And I tried it as "$.Account..investment.performance..performanceSet.amount[?(#.currency=~/.*USD/)]"
This JsonPath should work:
$..performanceSet[?(#.currency == "USD")].amount
Tested on:
{
"name":"ninja",
"contry":"India",
"Account":[
{
"id":"123",
"orgId":223,
"investment":[
{
"invetmentId":"111",
"name":"India tech",
"performance":[
{
"id":"123",
"performanceSet":[
{
"amount":"231",
"currency":"USD"
},
{
"amount":"250",
"currency":"IND"
}
]
}
]
},
{
"invetmentId":"112",
"name":"India tech 2",
"performance":[
{
"id":"124",
"performanceSet":[
{
"amount":"235",
"currency":"USD"
},
{
"amount":"250",
"currency":"IND"
}
]
}
]
}
]
}
]
}
which returns:
[
"231",
"235"
]
A good way to try it out is this site: https://jsonpath.com/
Read the docs: https://github.com/intuit/karate#jsonpath-filters
* def temp = $..performanceSet[?(#.currency=='USD')]
* match temp[0].amount == '231'
You can try it this way
$.Account..investment.performance..performanceSet.amount[?(#.currency=~/.*USD/)]

mongodb aggregate $limit and $lookup sequence problems

db.getCollection('xxxxxxxx').aggregate(
[
{
"$match": {
"campaigns.campaign_id":ObjectId("5c6e50932fb955f81b0c9f59")
}
},
{
"$sort": {
"campaigns.updatedAt": 1,
"_id": -1
}
},
{
"$limit": 15
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.sales_funnel_id",
"foreignField": "_id",
"as": "sale_funnels"
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.callresult_id",
"foreignField": "_id",
"as": "callresults"
}
},
{
"$lookup": {
"from": "accounts",
"localField": "currentStat.qc.account_id",
"foreignField": "_id",
"as": "accounts"
}
},
{
"$match": {
"$or": [
{
"姓名": /137/
},
{
"电话号码": /137/
},
{
"电子邮件": /137/
},
{
"城市": /137/
},
{
"区域": /137/
},
{
"备注": /137/
}
]
}
}
]
)
The result of executing the above SQL query is 0 ($limlit before $lookup)
The result of executing the above SQL query is 0 ($limlit before $lookup)
The result of executing the above SQL query is 0 ($limlit before $lookup)
if $limlit follows $lookup
db.getCollection('xxxxxxxxxxx').aggregate(
[
{
"$match": {
"campaigns.campaign_id":ObjectId("5c6e50932fb955f81b0c9f59")
}
},
{
"$sort": {
"campaigns.updatedAt": 1,
"_id": -1
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.sales_funnel_id",
"foreignField": "_id",
"as": "sale_funnels"
}
},
{
"$lookup": {
"from": "callresults",
"localField": "currentStat.callresult_id",
"foreignField": "_id",
"as": "callresults"
}
},
{
"$lookup": {
"from": "accounts",
"localField": "currentStat.qc.account_id",
"foreignField": "_id",
"as": "accounts"
}
},
{
"$match": {
"$or": [
{
"姓名": /137/
},
{
"电话号码": /137/
},
{
"电子邮件": /137/
},
{
"城市": /137/
},
{
"区域": /137/
},
{
"备注": /137/
}
]
}
},
{
"$limit": 15
}
]
)
Why is that?
Why is that?
Why is that?
Why is that?
Why is that?
In the first case (limit before lookup), lookup is done on the first 15 matched documents only. However, when limit is in the end of the pipe line lookup is done on all matched documents and then the limit is applied.
Taking a simpler example,
This query finds all documents where value of field "n" is 1 and then shows the first 15 matching documents.
db.collection.aggregate([{$match: {"n" : 1}}, {$limit: 15}])
However, the below query takes the top 15 documents and then runs a match on those 15 documents only.
db.collection.aggregate([{$limit: 15}, {$match: {"n" : 1}}])
1) In the first case. you are limiting results upto 15 and match condition is executed. So the match condition only works on the 15 documents.
2) In the second case, you are matching with all the documents in the collection, and then limiting the result.