Group mongodb collection and output the result as a single object - mongodb-query

Is there a way to group a collection which looks like
[
{_id: "5bd258a7877e74059b6b65b2", year: 2017, title: "One"},
{_id: "5bd258a7877e74059b6b65b3", year: 2017, title: "Two"},
{_id: "5bd258a7877e74059b6b65b4", year: 2018, title: "Three"},
{_id: "5bd258a7877e74059b6b65b5", year: 2018, title: "Four"}
]
and output the result as
{
2017: [
0: {_id: "5bd258a7877e74059b6b65b2", title: "One", …}
1: {_id: "5bd258a7877e74059b6b65b3", title: "Two", …}
],
2018: [
0: {_id: "5bd258a7877e74059b6b65b4", title: "Three", …}
1: {_id: "5bd258a7877e74059b6b65b5", title: "Four", …}
]
}
using mongodb aggregations? Similar to how lodash groupBy works

You can do that with the following mongoDB aggregation:
db.collection.aggregate([{
$group: {
_id: "$year",
docs: {
$addToSet: "$$CURRENT"
}
}
},
{
"$group": {
"_id": null,
"data": {
"$push": {
"k": {
$toString: "$_id"
},
"v": "$docs"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$arrayToObject": "$data"
}
}
}
])
You can see the result here
The idea is to group the data in a way where we can utilize $arrayToObject. First group gives you the grouped by year where the second is just prep for $arrayToObject which requires key, value object. Last thing is the $replaceRoot.
This requires MongoDB 3.6 and up due to $arrayToObject being introduced in that version. Before that you had to use $push etc.

Related

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

How to get nested documents in FaunaDB with a filter?

The following query:
Paginate(Documents(Collection("backyard"))),
Lambda(
"f",
Let(
{
backyard: Get(Var("f")),
user: Get(Select(["data", "user"], Var("backyard")))
},
{
backyard: Var("backyard"),
user: Var("user")
}
)
)
)
results to:
{
data: [
{
backyard: {
ref: Ref(Collection("backyard"), "333719283470172352"),
ts: 1654518359560000,
data: {
user: Ref(Collection("user"), "333718599460978887"),
product: "15358",
date: "2022-06-06",
counter: "1"
}
},
user: {
ref: Ref(Collection("user"), "333718599460978887"),
ts: 1654517707220000,
data: {
email: "<email>",
name: "Paolo"
}
}
},
{
backyard: {
ref: Ref(Collection("backyard"), "333747850716381384"),
ts: 1654545603400000,
data: {
user: Ref(Collection("user"), "333718599460978887"),
product: "15358",
date: "2022-06-08",
counter: "4"
}
},
user: {
ref: Ref(Collection("user"), "333718599460978887"),
ts: 1654517707220000,
data: {
email: "<email>",
name: "Paolo"
}
}
}
]
}
How can I filter backyard by date without losing the nested users?
I tried:
Map(
Paginate(Range(Match(Index("backyard_by_date")), "2022-05-08", "2022-06-08")),
Lambda(
"f",
Let(
{
backyard: Get(Var("f")),
user: Get(Select(["data", "user"], Var("backyard")))
},
{
backyard: Var("backyard"),
user: Var("user")
}
)
)
)
However, the resultset is an empty array and the following already returns an empty array:
Paginate(Range(Match(Index("backyard_by_date")), "2022-05-08", "2022-06-08"))
My index:
{
name: "backyard_by_date",
unique: false,
serialized: true,
source: "backyard"
}
Maybe I have to adjust my index? The following helped me a lot:
How to get nested documents in FaunaDB?
How to Get Data from two collection in faunadb
how to join collections in faunadb?
Your index definition is missing details. Once that gets fixed, everything else you were doing is exactly right.
In your provided index, there are no terms or values specified, which makes the backyard_by_date index a "collection" index: it only records the references of every document in the collection. In this way, it is functionally equivalent to using the Documents function but incurs additional write operations as documents are created or updated within the backyard collection.
To make your query work, you should delete your existing index and (after 60 seconds) redefine it like this:
CreateIndex({
name: "backyard_by_date",
source: Collection("backyard"),
values: [
{field: ["data", "date"]},
{field: ["ref"]}
]
})
That definition configures the index to return the date field and the reference for every document.
Let's confirm that the index returns what we expect:
> Paginate(Match(Index("backyard_by_date")))
{
data: [
[ '2022-06-06', Ref(Collection("backyard"), "333719283470172352") ],
[ '2022-06-08', Ref(Collection("backyard"), "333747850716381384") ]
]
}
Placing the date field's value first means that we can use it effectively in Range:
> Paginate(Range(Match(Index("backyard_by_date")), "2022-05-08", "2022-06-08"))
{
data: [
[ '2022-06-06', Ref(Collection("backyard"), "333719283470172352") ],
[ '2022-06-08', Ref(Collection("backyard"), "333747850716381384") ]
]
}
And to verify that Range is working as expected:
> Paginate(Range(Match(Index("backyard_by_date")), "2022-06-07", "2022-06-08"))
{
data: [
[ '2022-06-08', Ref(Collection("backyard"), "333747850716381384") ]
]
}
Now that we know the index is working correctly, your filter query needs a few adjustments:
> Map(
Paginate(
Range(Match(Index("backyard_by_date")), "2022-05-08", "2022-06-08")
),
Lambda(
["date", "ref"],
Let(
{
backyard: Get(Var("ref")),
user: Get(Select(["data", "user"], Var("backyard")))
},
{
backyard: Var("backyard"),
user: Var("user")
}
)
)
)
{
data: [
{
backyard: {
ref: Ref(Collection("backyard"), "333719283470172352"),
ts: 1657918078190000,
data: {
user: Ref(Collection("user"), "333718599460978887"),
product: '15358',
date: '2022-06-06',
counter: '1'
}
},
user: {
ref: Ref(Collection("user"), "333718599460978887"),
ts: 1657918123870000,
data: { name: 'Paolo', email: '<email>' }
}
},
{
backyard: {
ref: Ref(Collection("backyard"), "333747850716381384"),
ts: 1657918172850000,
data: {
user: Ref(Collection("user"), "333718599460978887"),
product: '15358',
date: '2022-06-08',
counter: '4'
}
},
user: {
ref: Ref(Collection("user"), "333718599460978887"),
ts: 1657918123870000,
data: { name: 'Paolo', email: '<email>' }
}
}
]
}
Since the index returns a date string and a reference, the Lambda inside the Map has to accept those values as arguments. Aside from renaming f to ref, the rest of your query is unchanged.

mongodb query need improvements

for a given parameter, I am querying most recent record from collection like this:
query = {'id': parameter}
doc = collection.find_one(query, sort=[('updated_at',-1)])
How can I get most recent record for all 'id' present in collection in one query? For now, I am iterating over parameter and concatenating the output
INPUT: Collection has multiple record/documents like:
{
id: "ABC",
weight: 35,
updated_at: "2013-10-01T1:32:12.112Z"
},
{
id: "ABC",
weight: 45,
updated_at: "2017-10-01T1:32:12.112Z"
},
{
id: "BAD",
weight: 38,
updated_at: "2013-10-11T1:32:12.112Z"
}
Output:
{
{
id: "ABC",
weight: 45,
updated_at: "2017-10-01T1:32:12.112Z"
},
{
id: "BAD",
weight: 38,
updated_at: "2013-10-11T1:32:12.112Z"
}
}
Solution: If I construct pipeline as shown below. what would be the implication of weight using $first
[
{ "$sort": { "id": 1, "updated_at": -1 } },
{ "$group": {
"_id": "$id",
"updated_at": { "$first": "$updated_at" },
"weight": { "$first": "$weight" }
}
]
You should go for Aggregation if you want to do it in single query.
Try the following query:
db.collection.aggregate(
[
{
$group:
{
_id: "$id",
maxWeight: { $max: "$weight" }
recent: { $max: "$updated_at" }
}
}
]
)
Here you group by 'id' or 'parameter' field.

Using lodash to retrieve values from a complex array

I have the following complex array
[
{
label: "Country1",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 234184
},
{
label: "abc",
metric: "abc",
value: 145678
}
]
},
{
label: "Country2",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 123456
},
{
label: "abc",
metric: "abc",
value: 456789
}
]
},
{
label: "Country3",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 62389
},
{
label: "abc",
metric: "abc",
value: 4964738
}
]
}
]
I need to convert it to the following simple array wherein from the metrics sub array the values for label and value becomes a key value pair.
[
{label: “Country1”, xyz: 234184, abc: 145678},
{label: “Country2”, xyz: 123456, abc: 456789},
{label: “Country3”, xyz: 62389, abc: 4964738}
]
Can this conversion happen using lodash?
There might be a better / cleaner way to do this, but this is what I could come up with in just a few minutes.
const inputData = [
{
label: "Country1",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 234184
},
{
label: "abc",
metric: "abc",
value: 145678
}
]
},
{
label: "Country2",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 123456
},
{
label: "abc",
metric: "abc",
value: 456789
}
]
},
{
label: "Country3",
metrics: [
{
label: "xyz",
metric: "xyz",
value: 62389
},
{
label: "abc",
metric: "abc",
value: 4964738
}
]
}
];
const newData = _.map(inputData, first => {
const additional = _.map(first.metrics, metric => [metric.label, metric.value]);
return _.assign({}, _.fromPairs(additional), {
label: first.label
});
});
console.log(newData);
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.17.10/lodash.js"></script>
Here's a solution that doesn't require lodash, you simply have to take advantage of using Array#map to transform each item in the top-level array, and then use Array#reduce to transform the object and get the rest of the properties you need. The methodology used below takes advantage of the following ES6 features:
Destructuring assignment to extract the properties from the Array#map and Array#reduce callbacks.
Spread Syntax to combine properties of objects from different sources.
Computed property names for a dynamic property name in an object.
var result = data.map(({ label, metrics }) =>
metrics.reduce(
(result, { metric, value }) => ({ ...result, [metric]: value }),
{ label }
)
);
var data = [{label:"Country1",metrics:[{label:"xyz",metric:"xyz",value:234184},{label:"abc",metric:"abc",value:145678}]},{label:"Country2",metrics:[{label:"xyz",metric:"xyz",value:123456},{label:"abc",metric:"abc",value:456789}]},{label:"Country3",metrics:[{label:"xyz",metric:"xyz",value:62389},{label:"abc",metric:"abc",value:4964738}]}];
var result = data.map(({ label, metrics }) =>
metrics.reduce(
(result, { metric, value }) => ({ ...result, [metric]: value }),
{ label }
)
);
console.log(result);
.as-console-wrapper{min-height:100%;top:0}

How do I query for tag names with :find in SnapshotStore store config

I am trying to setup a filter that is similar to a defect view within a Trend chart. The filter in the defect view is:
(State < Closed) AND (Severity <= Major) AND (Tags !contains Not a Stop Ship)
I cannot seem to get the Tags find to work correctly. Any suggestions?
this.myTrendChart = Ext.create('Rally.ui.chart.Chart', {
storeType: 'Rally.data.lookback.SnapshotStore',
storeConfig: {
find: {
_TypeHierarchy: "Defect",
State: {
$lt: "Closed"
},
Severity: {
$lte: "Major"
},
Tags: {
$ne: "Not a Stop Ship"
},
_ProjectHierarchy: ProjectOid
},
hydrate: ["Priority"],
fetch: ["_ValidFrom", "_ValidTo", "ObjectID", "Priority"]
},
calculatorType: 'My.TrendCalc',
calculatorConfig: {},
chartConfig: {
chart: {
zoomType: 'x',
type: 'line'
},
title: {
text: 'Defects over Time'
},
xAxis: {
type: 'datetime',
minTickInterval: 3
},
yAxis: {
title: {
text: 'Number of Defects'
}
}
}
});
Based on reviewing the JSON messages, I figured out the tag needed to be the ObjectId. Once I found this, I replaced "Not a Stop Ship" with the ObjectId value and the filter worked correctly.