db.users.aggregate([{
$addFields: {
age: { $subtract: [57, 20] },
age2: { $subtract: [67, 20] }
}
},
{ $project: { age: 1, age2: 1 } },
{
$match: {
age: { $lte: "$age" }
}
}
])
This code is not working, replace "$age" with any number or float the it works.
Please can anyone explain this behaviour?
Mongo shell version 3.4
Related
In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example
I'm new to SQL and MongoDB. I'm trying to convert this:
SELECT accountType, ROUND(AVG(balance), 2) avgBalance
FROM customers
WHERE gender="female"
GROUP BY accountType
HAVING COUNT(*) < 140
ORDER BY avgBalance
LIMIT 1
to MongoDB but I can't get it to work. I don't quite understand how the order ($group, $match, $project, $round, $avg etc.) should be and how the "ROUND and AVG" are used together. This is how the answer should be like: { "accountType" : "account-type", "avgBalance" : NumberDecimal("9999.99") }
Here is what I have so far:
db.customers.aggregate( [ { $group: { _id: { accountType: "accountType", avgBalance: { $avg: { "balance" } } }, { $match: { count: { $lt: 140 } } }, { gender: "female" }, { $project: { "accountType": { $round: [ $agv: "balance", 2 ] } } }, { $limit: 1 } ] )
Direction is not bad, would be this one:
db.customers.aggregate([
// WHERE gender="female"
{ $match: { gender: "female" } },
// GROUP BY accountType, SELECT AVG(balance)
{
$group: {
_id: "$accountType",
avgBalance: { $avg: "$balance" },
count: {$sum: 1}
}
},
// HAVING COUNT(*) < 140
{ $match: { count: { $lt: 140 } } },
// SELECT ... AS ...
{
$project: {
accountType: "$_id",
avgBalance: { $round: ["$avgBalance", 2] }
}
},
// ORDER BY avgBalance
{ $sort: { avgBalance: 1 } },
// LIMIT 1
{ $limit: 1 }
])
I have a collection as below
{"country":"US","city":"NY"}
{"country":"US","city":"AL"}
{"country":"US","city":"MA"}
{"country":"US","city":"NY"}
{"country":"US","city":"MA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
and expecting an output
{ "data": { "US": {"NY": 2,"AL": 1,"MA": 2 },
"IN": {"DL": 3,"KA": 2 }}
}
Below is the mongodb query I tried, i was able to get to get the count at country level, but not at the state level. please help me in correcting the below query to get data at state level.
db.country_dash.aggregate([
{"$group": {
"_id":"$country",
"state": {"$addToSet": "$state"}
}},
{"$project": {
"_id":0,
"country":"$_id",
"state": {"$size": "$state"}
} }
])
db.country_dash.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id: {
city: '$city'
},
total: {
$sum: 1
},
country: {
$addToSet: '$country'
}
}
},
// Stage 2
{
$project: {
total: 1,
country: {
$arrayElemAt: ['$country', 0]
},
city: '$_id.city',
_id: 0
}
},
// Stage 3
{
$group: {
_id: '$country',
data: {
$addToSet: {
city: '$city',
total: '$total'
}
}
}
},
]
);
I am using MongoDB version 3.2.8. I am executing db.Member.find({$where: "var d = new Date(this.Birthdate); return d.getUTCDate() === 4 && d.getUTCMonth() === 2 && d.getUTCFullYear() !== 2017" }) It is taking too much time to execute this query on my local mongo. Is there any alternative for this query so query can optimize?
You can try using the MongoDB Aggregation Framework. I tested using the Mingo library for Javascript
Example:
var mingo = require('mingo')
var data = [{
_id: 100,
Birthdate: new Date("1995-02-04")
}]
var pipeline = [
{
$project: { M: { $month: "$Birthdate"}, Y: { $year: "$Birthdate"}, D: { $dayOfMonth: "$Birthdate"}, Birthdate: 1 }
},
{
$match: { $and: [ { D: 4 }, { M: 2 }, {Y: { $ne: 2017 } } ] }
},
{
$project: { M: 0, D: 0, Y: 0 }
}
]
var result = mingo.aggregate(data, pipeline)
console.log(result)
// Output
// [ { Birthdate: 1995-02-04T00:00:00.000Z, _id: 100 } ]
For MongoDB:
db.Member.aggregate(pipeline)
my friend is telling me that mongo is not worth learning since its very bad to do complex querying, something like this:
SELECT person, SUM(score), AVG(score), MIN(score), MAX(score), COUNT(*)
FROM demo
WHERE score > 0 AND person IN('bob','jake')
GROUP BY person;
he is telling me that if i want to do this query with mongo i have to write this
db.demo.group({
"key": {
"person": true
},
"initial": {
"sumscore": 0,
"sumforaverageaveragescore": 0,
"countforaverageaveragescore": 0,
"countstar": 0
},
"reduce": function(obj, prev) {
prev.sumscore = prev.sumscore + obj.score - 0;
prev.sumforaverageaveragescore += obj.score;
prev.countforaverageaveragescore++;
prev.minimumvaluescore = isNaN(prev.minimumvaluescore) ? obj.score : Math.min(prev.minimumvaluescore, obj.score);
prev.maximumvaluescore = isNaN(prev.maximumvaluescore) ? obj.score : Math.max(prev.maximumvaluescore, obj.score);
if (true != null) if (true instanceof Array) prev.countstar += true.length;
else prev.countstar++;
},
"finalize": function(prev) {
prev.averagescore = prev.sumforaverageaveragescore / prev.countforaverageaveragescore;
delete prev.sumforaverageaveragescore;
delete prev.countforaverageaveragescore;
},
"cond": {
"score": {
"$gt": 0
},
"person": {
"$in": ["bob", "jake"]
}
}
});
so having no mongodb background i dont know what to think and i've been searching arround and everyone says that mongo is better for a lot of stuff, still how do i do this query in mongo?
is it like my friend says? or is there a easier way to do this?
There is a much easier way to do that.
db.demo.aggregate([
{ $match: { score: { $gt: 0 }, person: { $in: ["bob", "jake"] } } },
{ $group: { _id: "$person", scoreSum: { $sum: "$score" }, scoreAvg: { $avg: "$score" }, scoreMin: { $min: "$score" }, scoreMax: { $max: "$score" }, count: { $sum: 1 } } }
])