Mongodb: get count of multiple values in a field grouped by another field - mongodb-query

I have a collection as below
{"country":"US","city":"NY"}
{"country":"US","city":"AL"}
{"country":"US","city":"MA"}
{"country":"US","city":"NY"}
{"country":"US","city":"MA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
and expecting an output
{ "data": { "US": {"NY": 2,"AL": 1,"MA": 2 },
"IN": {"DL": 3,"KA": 2 }}
}
Below is the mongodb query I tried, i was able to get to get the count at country level, but not at the state level. please help me in correcting the below query to get data at state level.
db.country_dash.aggregate([
{"$group": {
"_id":"$country",
"state": {"$addToSet": "$state"}
}},
{"$project": {
"_id":0,
"country":"$_id",
"state": {"$size": "$state"}
} }
])

db.country_dash.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id: {
city: '$city'
},
total: {
$sum: 1
},
country: {
$addToSet: '$country'
}
}
},
// Stage 2
{
$project: {
total: 1,
country: {
$arrayElemAt: ['$country', 0]
},
city: '$_id.city',
_id: 0
}
},
// Stage 3
{
$group: {
_id: '$country',
data: {
$addToSet: {
city: '$city',
total: '$total'
}
}
}
},
]
);

Related

Mongo db query | Join | Create query on two collection

Select leadId count on two collection in Mongo DB
Collection 1 : leads
{
leadId:"abc123",
status:"OPENED",
stage:"start",
crossSell:
{
cc:
{
consent:true,
shown:[{first:true}]
}
}
}
Collection 2 : pdata
{
activeLeadId:"abc123",
status:"OPENED",
details:
[
{
rating:10
},
{
rating:9
}
]
}
Question : Find leadId count from leads collection join with pdata collection based on below conditions
leads.leadId = pdata.activeleadId and
leads.status = "OPENED" and
leads.crossSell.cc.consent = true and
leads.crossSell.cc.shown[0].first = true and
pdata.details.rating >= 5
You can try a aggregation query,
$match your conditions for leads collection
$lookup with pdata collection, pass leadId to match with pdata
match required conditions for pdata
$limit to return single document, because we don't need that data in response
$match condition to check is pdata is not empty
$count to get total number of records
db.leads.aggregate([
{
$match: {
status: "OPENED",
"crossSell.cc.consent": true,
"crossSell.cc.shown.first": true
}
},
{
"$lookup": {
"from": "pdata",
"let": { "leadId": "$leadId" },
"pipeline": [
{
$match: {
$expr: { $eq: ["$$leadId", "$activeLeadId"] },
"details.rating": { $gte: 5 }
}
},
{ $limit: 1 }
],
"as": "pdata"
}
},
{ $match: { pdata: { $ne: [] } } },
{ $count: "count" }
])
Playground

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

sql conversion to mongodb aggregate

I'm new to SQL and MongoDB. I'm trying to convert this:
SELECT accountType, ROUND(AVG(balance), 2) avgBalance
FROM customers
WHERE gender="female"
GROUP BY accountType
HAVING COUNT(*) < 140
ORDER BY avgBalance
LIMIT 1
to MongoDB but I can't get it to work. I don't quite understand how the order ($group, $match, $project, $round, $avg etc.) should be and how the "ROUND and AVG" are used together. This is how the answer should be like: { "accountType" : "account-type", "avgBalance" : NumberDecimal("9999.99") }
Here is what I have so far:
db.customers.aggregate( [ { $group: { _id: { accountType: "accountType", avgBalance: { $avg: { "balance" } } }, { $match: { count: { $lt: 140 } } }, { gender: "female" }, { $project: { "accountType": { $round: [ $agv: "balance", 2 ] } } }, { $limit: 1 } ] )
Direction is not bad, would be this one:
db.customers.aggregate([
// WHERE gender="female"
{ $match: { gender: "female" } },
// GROUP BY accountType, SELECT AVG(balance)
{
$group: {
_id: "$accountType",
avgBalance: { $avg: "$balance" },
count: {$sum: 1}
}
},
// HAVING COUNT(*) < 140
{ $match: { count: { $lt: 140 } } },
// SELECT ... AS ...
{
$project: {
accountType: "$_id",
avgBalance: { $round: ["$avgBalance", 2] }
}
},
// ORDER BY avgBalance
{ $sort: { avgBalance: 1 } },
// LIMIT 1
{ $limit: 1 }
])

mongodb aggregate distinct count

Realise this topic has been asked many times - but the advice hasn't helped me solve this problem.
The following query is trying to determine the presence of sales on a given weekday using ISODay. Because the query will be run at the start of the month, I need to know how many occurrences of the specific ISOday occur in the month.
var query = { eventType: 'Sale', site : 4, tank: 1, txnDate : { "$gt" : new Date('2018-08-01T00:00:00') } };
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $sort: { "_id.isoday": 1, "_id.dday": 1 } }
])
provides the following output
/* 1 */
{
"_id" : {
"isoday" : 1,
"dday" : "06"
},
"count" : 62.0
}
/* 2 */
{
"_id" : {
"isoday" : 1,
"dday" : "13"
},
"count" : 69.0
}
/* 3 */
{
"_id" : {
"isoday" : 1,
"dday" : "20"
},
"count" : 72.0
}
/* 4 */
{
"_id" : {
"isoday" : 2,
"dday" : "07"
},
"count" : 75.0
}
I am trying to have "count" represent the number of unique "dday" records - so using the output above, I want count to be "3" for isoDay = 1. At the moment count is reporting number of sales events that occurred for the group combination
All you need to do is have the grouping twice.
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $project : {
isoDay_Final: "$_id.isoday"
}
},
{ $group:
{ _id : "$isoDay_Final", count: { "$sum" : 1 } }
},
{ $sort: { "_id": 1 } }
])

query in mongo db

my friend is telling me that mongo is not worth learning since its very bad to do complex querying, something like this:
SELECT person, SUM(score), AVG(score), MIN(score), MAX(score), COUNT(*)
FROM demo
WHERE score > 0 AND person IN('bob','jake')
GROUP BY person;
he is telling me that if i want to do this query with mongo i have to write this
db.demo.group({
"key": {
"person": true
},
"initial": {
"sumscore": 0,
"sumforaverageaveragescore": 0,
"countforaverageaveragescore": 0,
"countstar": 0
},
"reduce": function(obj, prev) {
prev.sumscore = prev.sumscore + obj.score - 0;
prev.sumforaverageaveragescore += obj.score;
prev.countforaverageaveragescore++;
prev.minimumvaluescore = isNaN(prev.minimumvaluescore) ? obj.score : Math.min(prev.minimumvaluescore, obj.score);
prev.maximumvaluescore = isNaN(prev.maximumvaluescore) ? obj.score : Math.max(prev.maximumvaluescore, obj.score);
if (true != null) if (true instanceof Array) prev.countstar += true.length;
else prev.countstar++;
},
"finalize": function(prev) {
prev.averagescore = prev.sumforaverageaveragescore / prev.countforaverageaveragescore;
delete prev.sumforaverageaveragescore;
delete prev.countforaverageaveragescore;
},
"cond": {
"score": {
"$gt": 0
},
"person": {
"$in": ["bob", "jake"]
}
}
});
so having no mongodb background i dont know what to think and i've been searching arround and everyone says that mongo is better for a lot of stuff, still how do i do this query in mongo?
is it like my friend says? or is there a easier way to do this?
There is a much easier way to do that.
db.demo.aggregate([
{ $match: { score: { $gt: 0 }, person: { $in: ["bob", "jake"] } } },
{ $group: { _id: "$person", scoreSum: { $sum: "$score" }, scoreAvg: { $avg: "$score" }, scoreMin: { $min: "$score" }, scoreMax: { $max: "$score" }, count: { $sum: 1 } } }
])