mongodb query need improvements - mongodb-query

for a given parameter, I am querying most recent record from collection like this:
query = {'id': parameter}
doc = collection.find_one(query, sort=[('updated_at',-1)])
How can I get most recent record for all 'id' present in collection in one query? For now, I am iterating over parameter and concatenating the output
INPUT: Collection has multiple record/documents like:
{
id: "ABC",
weight: 35,
updated_at: "2013-10-01T1:32:12.112Z"
},
{
id: "ABC",
weight: 45,
updated_at: "2017-10-01T1:32:12.112Z"
},
{
id: "BAD",
weight: 38,
updated_at: "2013-10-11T1:32:12.112Z"
}
Output:
{
{
id: "ABC",
weight: 45,
updated_at: "2017-10-01T1:32:12.112Z"
},
{
id: "BAD",
weight: 38,
updated_at: "2013-10-11T1:32:12.112Z"
}
}
Solution: If I construct pipeline as shown below. what would be the implication of weight using $first
[
{ "$sort": { "id": 1, "updated_at": -1 } },
{ "$group": {
"_id": "$id",
"updated_at": { "$first": "$updated_at" },
"weight": { "$first": "$weight" }
}
]

You should go for Aggregation if you want to do it in single query.
Try the following query:
db.collection.aggregate(
[
{
$group:
{
_id: "$id",
maxWeight: { $max: "$weight" }
recent: { $max: "$updated_at" }
}
}
]
)
Here you group by 'id' or 'parameter' field.

Related

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

Get data from two collection with specific data Mongo

Get data from two collection from first collection(test1) all data and from second collection (test2) customer name using createdBy and updatedBy from test1 collection.
In createdBy and updatedBy I want fullname from test2 collection:
Test1 collection:
{
"_id": "kcXtyaB7jGPw9Ks",
"dateCreated": "2022-07-12T13:09:16.270Z",
"dateModified": "2022-07-12T13:09:16.270Z",
"data1": 1,
"data2": 100,
"data3": 5,
"createdBy": "xQQrzRgi8",
"updatedBy": "56sgAeKfx"
}
Test2 collection:
{
"_id": "xQQrzRgi8",
"fullName": "test name created"
},
{
"_id": "56sgAeKfx",
"fullName": "test name update"
}
Response be like:
{
"_id": "kcXtyaB7jGPw9Ks",
"dateCreated": "2022-07-12T13:09:16.270Z",
"dateModified": "2022-07-12T13:09:16.270Z",
"data1": 1,
"data2": 100,
"data3": 5,
"createdBy": "test name created",
"updatedBy": "test name update"
}
If I've understood correctly, you can use $lookup like this:
This query do a "join" between "Test1" and "Test2" using updatedBy and _id fields.
And after that get the first element in the result (I assume there were only one element because you are comparing with _id but if there is more than one you can use another way like $unwind) to output the value.
Edit: To get both values (created and updated) you can do a second $lookup.
Now the query:
Get the updatedBy name from field _id in Test2.
Set value into field updatedBy.
Get the createdBy name from field _id in Test2.
Set value into field createdBy.
Use $project to not output result.
db.Test1.aggregate([
{
"$lookup": {
"from": "Test2",
"localField": "updatedBy",
"foreignField": "_id",
"as": "result"
}
},
{
"$set": {
"updatedBy": {
"$first": "$result.fullName"
}
}
},
{
"$lookup": {
"from": "Test2",
"localField": "createdBy",
"foreignField": "_id",
"as": "result"
}
},
{
"$set": {
"createdBy": {
"$first": "$result.fullName"
}
}
},
{
"$project": {
"result": 0
}
}
])
Example here
I solved my query with below mongo query:
db.Test1.aggregate([
{
$lookup: {
from: "Test2",
localField: "updatedBy",
foreignField: "_id",
as: "updatedByName",
},
},
{
$lookup: {
from: "Test2",
localField: "createdBy",
foreignField: "_id",
as: "createdByName",
},
},
{
$set: {
updatedBy: {
$first: "$updatedByName.fullName",
},
},
},
{
$set: {
createdBy: {
$first: "$createdByName.fullName",
},
},
},
{
$project: {
updatedByName: 0,
createdByName: 0,
},
}
])
Here is Solved query https://mongoplayground.net/p/7Ekh-q8tkTy

Nest JS - Serialization not work with nested object

I want to append a new field called logo_img_url after data manipulation. It works with the object but the nested array object seems not to work. Any recommendations or solution
import { Expose } from 'class-transformer';
import {} from 'class-validator';
import { Entity, Column } from 'typeorm';
import { BaseEntity } from '#common/base/entity.base';
import { getAdminImageUrl } from '#common/util/vault-file.util';
#Entity()
export class Agency extends BaseEntity {
#Expose()
#Column({
type: 'varchar',
length: '255',
nullable: true,
})
public logo_key!: string;
#Expose({ name: 'logoImgUrl' })
logo_img_url = this.logoImgUrl;
#Expose()
get logoImgUrl(): any {
return "http://localhost:30001/". this.logo_key;
}
}
Actaul response:
{
"data": [
{
"logo_img_url": "",
"id": 22,
"logo_url": "9597e74d-aaea-4502-b5cd-b2867504fd80",
},
{
"logo_img_url": "",
"id": 21,
"logo_url": "0a655497-1318-4276-a21f-cfdc259241a2",
},
],
"meta": {
"status_code": 200,
"pagination": {
"totalPages": 1,
"currentPage": 1,
"nextPage": null,
"paginationToken": null,
"previousPage": null,
"totalItems": 9,
"itemsPerPage": 10
}
}
}
Expected result:
{
"data": [
{
"logo_img_url": "http://localhost:3000/9597e74d-aaea-4502-b5cd-b2867504fd80",
"id": 22,
"logo_url": "9597e74d-aaea-4502-b5cd-b2867504fd80",
},
{
"logo_img_url": "http://localhost:3000/0a655497-1318-4276-a21f-cfdc259241a2",
"id": 21,
"logo_url": "0a655497-1318-4276-a21f-cfdc259241a2",
},
],
"meta": {
"status_code": 200,
"pagination": {
"totalPages": 1,
"currentPage": 1,
"nextPage": null,
"paginationToken": null,
"previousPage": null,
"totalItems": 9,
"itemsPerPage": 10
}
}
}
You can use #AfterLoad decorator from typeorm. Its calls the decorated method once the entity is loaded and then one can perform the required manipulations.
logo_img_url:string
#AfterLoad()
async transform(){
this.logo_img_url=`http://localhost:30001/${this.logo_key}`
}

Group mongodb collection and output the result as a single object

Is there a way to group a collection which looks like
[
{_id: "5bd258a7877e74059b6b65b2", year: 2017, title: "One"},
{_id: "5bd258a7877e74059b6b65b3", year: 2017, title: "Two"},
{_id: "5bd258a7877e74059b6b65b4", year: 2018, title: "Three"},
{_id: "5bd258a7877e74059b6b65b5", year: 2018, title: "Four"}
]
and output the result as
{
2017: [
0: {_id: "5bd258a7877e74059b6b65b2", title: "One", …}
1: {_id: "5bd258a7877e74059b6b65b3", title: "Two", …}
],
2018: [
0: {_id: "5bd258a7877e74059b6b65b4", title: "Three", …}
1: {_id: "5bd258a7877e74059b6b65b5", title: "Four", …}
]
}
using mongodb aggregations? Similar to how lodash groupBy works
You can do that with the following mongoDB aggregation:
db.collection.aggregate([{
$group: {
_id: "$year",
docs: {
$addToSet: "$$CURRENT"
}
}
},
{
"$group": {
"_id": null,
"data": {
"$push": {
"k": {
$toString: "$_id"
},
"v": "$docs"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": "$data"
}
}
}
])
You can see the result here
The idea is to group the data in a way where we can utilize $arrayToObject. First group gives you the grouped by year where the second is just prep for $arrayToObject which requires key, value object. Last thing is the $replaceRoot.
This requires MongoDB 3.6 and up due to $arrayToObject being introduced in that version. Before that you had to use $push etc.

Mongodb: get count of multiple values in a field grouped by another field

I have a collection as below
{"country":"US","city":"NY"}
{"country":"US","city":"AL"}
{"country":"US","city":"MA"}
{"country":"US","city":"NY"}
{"country":"US","city":"MA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
and expecting an output
{ "data": { "US": {"NY": 2,"AL": 1,"MA": 2 },
"IN": {"DL": 3,"KA": 2 }}
}
Below is the mongodb query I tried, i was able to get to get the count at country level, but not at the state level. please help me in correcting the below query to get data at state level.
db.country_dash.aggregate([
{"$group": {
"_id":"$country",
"state": {"$addToSet": "$state"}
}},
{"$project": {
"_id":0,
"country":"$_id",
"state": {"$size": "$state"}
} }
])
db.country_dash.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id: {
city: '$city'
},
total: {
$sum: 1
},
country: {
$addToSet: '$country'
}
}
},
// Stage 2
{
$project: {
total: 1,
country: {
$arrayElemAt: ['$country', 0]
},
city: '$_id.city',
_id: 0
}
},
// Stage 3
{
$group: {
_id: '$country',
data: {
$addToSet: {
city: '$city',
total: '$total'
}
}
}
},
]
);