SQLite script to MongoDB - sql

I'm a newbie on MongoDB. And I need help. I have a small project with SQLite (7 tables and 1 view). And I need to make this project on MongoDB, I'm using Studio 3T, I'm already migrate SQLite tables to MongoDB collections, but now I need to make (VIEW/TEST) for test this project. Please help how to write this SQL script with MongoDB.
SQLITE:
MongoDB:
SQLIte script I want to make with MongoDB:
CREATE VIEW rezultatas AS
SELECT p.pavadinimas AS detales_pavadinimas,
SUM(d.pagamintas_kiekis) AS pagamintas_kiekis,
SUM(z.gamybos_islaidos) AS vidutine_kaina,
STRFTIME('%m', d.pagaminimo_data) AS menuo,
STRFTIME('%Y', d.pagaminimo_data) AS metai
FROM detales d,
zinynas z,
produktas p
WHERE (z.detale_id = p._id_) AND
(d.detale_id = z.detale_id) AND
(d.pagaminimo_data >= z.data_nuo) AND
NOT EXISTS (
SELECT *
FROM zinynas
WHERE (d.detale_id = detale_id) AND
(d.pagaminimo_data >= data_nuo) AND
(z.data_nuo < data_nuo)
)
GROUP BY p.pavadinimas,
STRFTIME('%m', d.pagaminimo_data),
STRFTIME('%Y', d.pagaminimo_data)

I had to guess some things due to the lack of schemes but the basic layout should work.
We're going to use $createView with these parameters as input:
db.createView('rezultatas', 'produktas', pipeline)
Meaning our pipeline creating the view starts with the produktas collection.
The pipeline to use:
[
{ // match the documents from the zinyas collection.
$lookup:
{
from: "zinynas",
let: { produktas_id: "$_id" }, // i'm guessing its _id
pipeline: [
{ $match:
{ $expr: { $eq: [ "$detale_id", "$$produktas_id" ] }}
},
],
as: "z"
}
},
{
$unwind: "$z"
},
{ // match the documents from the detales collection. only keep the one with maximum data_nuo value.
$lookup:
{
from: "detales",
let: { z_detale_id: "$z.detale_id", z_data_nuo: "$z.data_nuo" },
pipeline: [
{ $match:
{
$and: [
{ $expr: { $eq: [ "$detale_id", "$$z_detale_id" ] }},
{ $expr: { $gte: [ "$pagaminimo_data", "$$z_data_nuo"]}}
]
}
},
{
$sort: {
data_nuo: -1
}
},
{
$limit: 1
}
],
as: "d"
}
},
{
$unwind: "$d"
},
{ // end up saving the fields we want.
$group: {
_id: { pavadinimas : "$pavadinimas", month: {$month: "$d.pagaminimo_data"}, year: {$year: "$d.pagaminimo_data"}},
pagamintas_kiekis: {$sum: "$d.pagamintas_kiekis"},
vidutine_kaina: {$sum: "$z.gamybos_islaidos"},
month: {$first: {$month: "$d.pagaminimo_data"}},
year: {$first: {$year: "$d.pagaminimo_data"}},
detales_pavadinimas: {$first: "$pavadinimas"}
}
}
]

Related

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

Counting $lookup and $unwind documents filtered with $match without getting rid of parent document when all results match

I have a collection "Owners" and I want to return a list of "Owner" matching a filter (any filter), plus the count of "Pet" from the "Pets" collection for that owner, except I don't want the dead pets. (made up example)
I need the returned documents to look exactly like an "Owner" document with the addition of the "petCount" field because I'm using Java Pojos with the Mongo Java driver.
I'm using AWS DocumentDB that does not support $lookup with filters yet. If it did I would use this and I'd be done:
db.Owners.aggregate( [
{ $match: {_id: UUID("b13e733d-2686-4266-a686-d3dae6501887")} },
{ $lookup: { from: 'Pets', as: 'pets', 'let': { ownerId: '$_id' }, pipeline: [ { $match: { $expr: { $ne: ['$state', 'DEAD'] } } } ] } },
{ $addFields: { petCount: { $size: '$pets' } } },
{ $project: { pets: 0 } }
]).pretty()
But since it doesn't this is what I got so far:
db.Owners.aggregate( [
{ $match: {_id: { $in: [ UUID("cbb921f6-50f8-4b0c-833f-934998e5fbff") ] } } },
{ $lookup: { from: 'Pets', localField: '_id', foreignField: 'ownerId', as: 'pets' } },
{ $unwind: { path: '$pets', preserveNullAndEmptyArrays: true } },
{ $match: { 'pets.state': { $ne: 'DEAD' } } },
{ "$group": {
"_id": "$_id",
"doc": { "$first": "$$ROOT" },
"pets": { "$push": "$pets" }
}
},
{ $addFields: { "doc.petCount": { $size: '$pets' } } },
{ $replaceRoot: { "newRoot": "$doc" } },
{ $project: { pets: 0 } }
]).pretty()
This works perfectly, except if an Owner only has "DEAD" pets, then the owner doesn't get returned because all the "document copies" got filtered out by the $match. I'd need the parent document to be returned with petCount = 0 when ALL of them are "DEAD". I cannot figure out how to do this.
Any ideas?
These are the supported operations for DocDB 4.0 https://docs.amazonaws.cn/en_us/documentdb/latest/developerguide/mongo-apis.html
EDIT: update to use $filter as $reduce not supported by aws document DB
You can use $filter to keep only not DEAD pets in the lookup array, then count the size of the remaining array.
Here is the Mongo playground for your reference.
$reduce version
You can use $reduce in your aggregation pipeline to to a conditional sum for the state.
Here is Mongo playground for your reference.
As of January 2022, Amazon DocumentDB added support for $reduce, the solution posted above should work for you.
Reference.

SQL Server Replace in MongoDB

I want to do a replace in projection. Like a SQL Server REPLACE. I'm pretty sure we can handle that in code but looking for some shell commands.
Here is what I have
db.OrderHistoryHeader.aggregate([
{
$project:{
"_id":0,
"OrderNo":1 // I want to do Replace(OrderNo,'XYZ','ABC')
}
}
],
{
allowDiskUse:true
}).pretty();
There's no built-in operator for that currently but you can use $indexOfBytes combined with $substr and $concat.
db.OrderHistoryHeader.aggregate([
{
$addFields:
{
index: { $indexOfBytes: [ "$OrderNo", "XYZ" ] },
}
},
{
$project: {
OrderNo: {
$concat: [
{ $substr: [ "$OrderNo", 0, "$index" ] },
"ABC",
{ $substr: [ "$OrderNo", { $add: [3, "$index"] }, -1 ] }
]
}
}
},
{
$project: {
index: 0
}
}
])
Where 3 is the length of text being replaced.
You can use the replaceOne method
db.collection.replaceOne(filter, replacement, options)
From documentation:
Behavior
replaceOne() replaces the first matching document in the collection that matches the filter, using the replacement document.
upsert
If upsert: true and no documents match the filter, db.collection.replaceOne() creates a new document based on the replacement document.

how to count number of keys in embedded mongodb document

I have a mongodb query: (Give me the settings where account='test')
db.collection_name.find({"account" : "test1"}, {settings : 1}).pretty();
where I get the following sample output:
{
"_id" : ObjectId("49830ede4bz08bc0b495f123"),
"settings" : {
"clusterData" : {
"us-south-1" : "cluster1",
"us-east-1" : "cluster2"
},
},
What I'm looking for now, is to give me the account where the clusterData has more than 1 key.
I'm only interested in listing those accounts with (2) or more keys.
I've tried this: (but this doesn't work)
db.collection_name.find({'settings.clusterData.1': {$exists: true}}, {account : 1}).pretty();
Is this possible to do with the current data structure? I don't have the option to redesign this schema.
Your clusterData field is not an array which is why you cannot just filter the number of elements it has. There is a way, though, to get what you want via the aggregation framework. Try this:
db.collection_name.aggregate({
$match: {
"account" : "test1"
}
}, {
$project: {
"settingsAsArraySize": { $size: { $objectToArray: "$settings.clusterData" } },
"settings.clusterData": 1
}
}, {
$match: {
"settingsAsArraySize": { $gt: 1 }
}
}, {
$project: {
"_id": 0,
"settings.clusterData": 1
}
}).pretty();

How to retrieve null lookup entries on mongodb?

I have this query that provides me the join I want to:
db.summoners.aggregate([
{ "$match": { "nick":"Luispfj" } },
{ "$unwind": "$matches" },
{
"$lookup": {
"from":"matches",
"localField":"matches.gameId",
"foreignField":"gameId",
"as":"fullMatches"
}
},
{ "$unwind": "$fullMatches" },
{
"$group": {
"_id": null,
"matches": { "$push":"$fullMatches" }
}
}
])
But when I run the unwind function the null entries are gone. How do I retrieve them (with their respective "gameId"s, if possible?
Also, is there a way to retrieve only the matches array, instead of it being a subproperty of the "null-id-object" it creates?
$unwind takes an optional field preserveNullAndEmptyArrays which by default is false. If you set it to true, unwind will output the documents that are null. Read more about $unwind
{
"$unwind": {
path: "$fullMatches",
preserveNullAndEmptyArrays: true
}
},