How to find duplicate records count in mongodb
Here is how I get that in mysql
SELECT name, COUNT(*) c FROM table GROUP BY name HAVING c > 1;
Try this
db.table.group({
"key": {
"name": true
},
"initial": {
"c": 0
},
"reduce": function(obj, prev) {
if (true != null) if (true instanceof Array) prev.c += true.length;
else prev.c++;
}});
db.mycollection.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
"_id": "$a",
count: {
$sum: 1
}
}
},
// Stage 2
{
$match: {
count: {
$gt: 1
}
}
},
]
);
Related
Select leadId count on two collection in Mongo DB
Collection 1 : leads
{
leadId:"abc123",
status:"OPENED",
stage:"start",
crossSell:
{
cc:
{
consent:true,
shown:[{first:true}]
}
}
}
Collection 2 : pdata
{
activeLeadId:"abc123",
status:"OPENED",
details:
[
{
rating:10
},
{
rating:9
}
]
}
Question : Find leadId count from leads collection join with pdata collection based on below conditions
leads.leadId = pdata.activeleadId and
leads.status = "OPENED" and
leads.crossSell.cc.consent = true and
leads.crossSell.cc.shown[0].first = true and
pdata.details.rating >= 5
You can try a aggregation query,
$match your conditions for leads collection
$lookup with pdata collection, pass leadId to match with pdata
match required conditions for pdata
$limit to return single document, because we don't need that data in response
$match condition to check is pdata is not empty
$count to get total number of records
db.leads.aggregate([
{
$match: {
status: "OPENED",
"crossSell.cc.consent": true,
"crossSell.cc.shown.first": true
}
},
{
"$lookup": {
"from": "pdata",
"let": { "leadId": "$leadId" },
"pipeline": [
{
$match: {
$expr: { $eq: ["$$leadId", "$activeLeadId"] },
"details.rating": { $gte: 5 }
}
},
{ $limit: 1 }
],
"as": "pdata"
}
},
{ $match: { pdata: { $ne: [] } } },
{ $count: "count" }
])
Playground
I'm new to SQL and MongoDB. I'm trying to convert this:
SELECT accountType, ROUND(AVG(balance), 2) avgBalance
FROM customers
WHERE gender="female"
GROUP BY accountType
HAVING COUNT(*) < 140
ORDER BY avgBalance
LIMIT 1
to MongoDB but I can't get it to work. I don't quite understand how the order ($group, $match, $project, $round, $avg etc.) should be and how the "ROUND and AVG" are used together. This is how the answer should be like: { "accountType" : "account-type", "avgBalance" : NumberDecimal("9999.99") }
Here is what I have so far:
db.customers.aggregate( [ { $group: { _id: { accountType: "accountType", avgBalance: { $avg: { "balance" } } }, { $match: { count: { $lt: 140 } } }, { gender: "female" }, { $project: { "accountType": { $round: [ $agv: "balance", 2 ] } } }, { $limit: 1 } ] )
Direction is not bad, would be this one:
db.customers.aggregate([
// WHERE gender="female"
{ $match: { gender: "female" } },
// GROUP BY accountType, SELECT AVG(balance)
{
$group: {
_id: "$accountType",
avgBalance: { $avg: "$balance" },
count: {$sum: 1}
}
},
// HAVING COUNT(*) < 140
{ $match: { count: { $lt: 140 } } },
// SELECT ... AS ...
{
$project: {
accountType: "$_id",
avgBalance: { $round: ["$avgBalance", 2] }
}
},
// ORDER BY avgBalance
{ $sort: { avgBalance: 1 } },
// LIMIT 1
{ $limit: 1 }
])
Realise this topic has been asked many times - but the advice hasn't helped me solve this problem.
The following query is trying to determine the presence of sales on a given weekday using ISODay. Because the query will be run at the start of the month, I need to know how many occurrences of the specific ISOday occur in the month.
var query = { eventType: 'Sale', site : 4, tank: 1, txnDate : { "$gt" : new Date('2018-08-01T00:00:00') } };
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $sort: { "_id.isoday": 1, "_id.dday": 1 } }
])
provides the following output
/* 1 */
{
"_id" : {
"isoday" : 1,
"dday" : "06"
},
"count" : 62.0
}
/* 2 */
{
"_id" : {
"isoday" : 1,
"dday" : "13"
},
"count" : 69.0
}
/* 3 */
{
"_id" : {
"isoday" : 1,
"dday" : "20"
},
"count" : 72.0
}
/* 4 */
{
"_id" : {
"isoday" : 2,
"dday" : "07"
},
"count" : 75.0
}
I am trying to have "count" represent the number of unique "dday" records - so using the output above, I want count to be "3" for isoDay = 1. At the moment count is reporting number of sales events that occurred for the group combination
All you need to do is have the grouping twice.
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $project : {
isoDay_Final: "$_id.isoday"
}
},
{ $group:
{ _id : "$isoDay_Final", count: { "$sum" : 1 } }
},
{ $sort: { "_id": 1 } }
])
I have a collection as below
{"country":"US","city":"NY"}
{"country":"US","city":"AL"}
{"country":"US","city":"MA"}
{"country":"US","city":"NY"}
{"country":"US","city":"MA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"DL"}
{"country":"IN","city":"KA"}
and expecting an output
{ "data": { "US": {"NY": 2,"AL": 1,"MA": 2 },
"IN": {"DL": 3,"KA": 2 }}
}
Below is the mongodb query I tried, i was able to get to get the count at country level, but not at the state level. please help me in correcting the below query to get data at state level.
db.country_dash.aggregate([
{"$group": {
"_id":"$country",
"state": {"$addToSet": "$state"}
}},
{"$project": {
"_id":0,
"country":"$_id",
"state": {"$size": "$state"}
} }
])
db.country_dash.aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id: {
city: '$city'
},
total: {
$sum: 1
},
country: {
$addToSet: '$country'
}
}
},
// Stage 2
{
$project: {
total: 1,
country: {
$arrayElemAt: ['$country', 0]
},
city: '$_id.city',
_id: 0
}
},
// Stage 3
{
$group: {
_id: '$country',
data: {
$addToSet: {
city: '$city',
total: '$total'
}
}
}
},
]
);
my friend is telling me that mongo is not worth learning since its very bad to do complex querying, something like this:
SELECT person, SUM(score), AVG(score), MIN(score), MAX(score), COUNT(*)
FROM demo
WHERE score > 0 AND person IN('bob','jake')
GROUP BY person;
he is telling me that if i want to do this query with mongo i have to write this
db.demo.group({
"key": {
"person": true
},
"initial": {
"sumscore": 0,
"sumforaverageaveragescore": 0,
"countforaverageaveragescore": 0,
"countstar": 0
},
"reduce": function(obj, prev) {
prev.sumscore = prev.sumscore + obj.score - 0;
prev.sumforaverageaveragescore += obj.score;
prev.countforaverageaveragescore++;
prev.minimumvaluescore = isNaN(prev.minimumvaluescore) ? obj.score : Math.min(prev.minimumvaluescore, obj.score);
prev.maximumvaluescore = isNaN(prev.maximumvaluescore) ? obj.score : Math.max(prev.maximumvaluescore, obj.score);
if (true != null) if (true instanceof Array) prev.countstar += true.length;
else prev.countstar++;
},
"finalize": function(prev) {
prev.averagescore = prev.sumforaverageaveragescore / prev.countforaverageaveragescore;
delete prev.sumforaverageaveragescore;
delete prev.countforaverageaveragescore;
},
"cond": {
"score": {
"$gt": 0
},
"person": {
"$in": ["bob", "jake"]
}
}
});
so having no mongodb background i dont know what to think and i've been searching arround and everyone says that mongo is better for a lot of stuff, still how do i do this query in mongo?
is it like my friend says? or is there a easier way to do this?
There is a much easier way to do that.
db.demo.aggregate([
{ $match: { score: { $gt: 0 }, person: { $in: ["bob", "jake"] } } },
{ $group: { _id: "$person", scoreSum: { $sum: "$score" }, scoreAvg: { $avg: "$score" }, scoreMin: { $min: "$score" }, scoreMax: { $max: "$score" }, count: { $sum: 1 } } }
])