MongoDB - how can I query "INTERSECT" of SQL?

MongoDB - how can I query "INTERSECT" of SQL? - sql

I have a question about "INTERSECT" query in the MongoDB query.
I want to query like the following SQL query in MongoDB.
(select course_id from section where semester = 'fall' and year = '2009')
intersect
(select course_id from section where semester = 'spring' and year = '2010');
In my MongoDB, Section collection data structure is as follows.
{
"_id" : {
"course_id" : "486",
"sec_id" : "1",
"semester" : "Fall",
"year" : 2009.0
},
"course_id" : "486",
"sec_id" : "1",
"semester" : "Fall",
"year" : 2009.0,
"building" : "Whitman",
"room_number" : "134",
"time_slot_id" : "K"
}
How to query to get the same result as SQL language?

It's not exactly sexy but you can do something like this:
db.collection.aggregate([
{
$match: {
$or: [
{
$and: [
{
year: 2009
},
{
semester: "fall"
}
]
},
{
$and: [
{
year: 2010
},
{
semester: "spring"
}
]
}
]
}
},
{
$group: {
_id: "$course_id",
years_x_semester: {$addToSet: {year: "$year", semester: "$semester"}},
}
},
{
$match: {
"years_x_semester.1": {$exists: true}
}
}
])

Related

Query item in nested array

Customer appointments with top level locationId sample data set:
[
{
"locationId": 9999,
"customerAppointments": [
{
"customerId": "1",
"appointments": [
{
"appointmentId": "cbbce566-da59-42c2-8845-53976ba63d56",
"locationName": "Sullivan St"
},
{
"appointmentId": "5f09e2af-ddae-47aa-9f7c-fd1001a9c5e6",
"locationName": "Oak St"
}
]
},
{
"customerId": "2",
"appointments": [
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
},
{
"customerId": "3",
"appointments": []
}
]
},
{
...
},
{
...
}
]
I need to pull out appointment by locationId and customerId and only get the appointment for that customerId e.g
Sample response:
[
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
Tried below query, but it just returns all records for all customers ids (which is kind of expected):
db.getCollection("appointments").find(
{
"locationId" : NumberInt(9999),
"customerAppointments" : {
"$elemMatch" : {
"customerId" : "2"
}
}
}
);
But how can I get just the appointment record for a specific customerId?

When asking this question I was unaware of the older version of MongoDB driver (< v5) so we cannot use the $getField operator.
However, this query seems to work well:
db.getCollection("appointments").aggregate([
{
$match: {
"locationId": NumberInt(9999)
}
},
{
$unwind: "$customerAppointments"
},
{
$match: {
"customerAppointments.customerId": "2"
}
},
{
$project: {
appointments: "$customerAppointments.appointments"
}
}
]);
Yields:
{
"_id" : ObjectId("63eebe95c7a0da54804c1db2"),
"appointments" : [
{
"appointmentId" : "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName" : "Kellet St"
}
]
}

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?

If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.

One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

How to get last second of every minute from mongoDB using SQL query

I have a table with records for every millisecond. I need to get only the last second of every minute using Mongodb sql query.
Id Balance DataTime
1 "2462188.61" 2019-09-27T05:49:33.575+00:00
1 "2449426.30" 2019-10-30T19:30:52.513+00:00
1 "2456459.67" 2019-10-15T18:20:09.490+00:00
5 "1006266.91" 2019-10-31T13:48:18.290+00:00
I tried the LIKE condition but that didn't work.
Select Id, DateTime,Balance from AccountBalance where DateTime like '%59.000%'
Here is the link for the mongoldb SQL reference :
https://docs.mongodb.com/bi-connector/current/supported-operations/
I am using the BI connector to connect to Tableau(hence need the sql version of the query)
Thanks in advance!

You could try...
db.z.aggregate([
{ $addFields: {
year: { $dateToString: { format: "%Y", date: "$DataTime" } },
month: { $dateToString: { format: "%m", date: "$DataTime" } },
day: { $dateToString: { format: "%d", date: "$DataTime" } },
hour: { $dateToString: { format: "%H", date: "$DataTime" } },
minute: { $dateToString: { format: "%M", date: "$DataTime" } },
second: { $dateToString: { format: "%S", date: "$DataTime" } }
}
}
]).pretty()
This assumes your field DataTime is of type ISODate()...
Example Documents:
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023b"), "Balance" : "2462188.61", "DataTime" : ISODate("2019-09-27T05:49:33.575Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023c"), "Balance" : "2449426.30", "DataTime" : ISODate("2019-10-30T19:30:52.513Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023d"), "Balance" : "2456459.67", "DataTime" : ISODate("2019-10-15T18:20:09.490Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023e"), "Balance" : "1006266.91", "DataTime" : ISODate("2019-10-31T13:48:18.290Z") }
Example Query Output:
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023b"),
"Balance" : "2462188.61",
"DataTime" : ISODate("2019-09-27T05:49:33.575Z"),
"year" : "2019",
"month" : "09",
"day" : "27",
"hour" : "05",
"minute" : "49",
"second" : "33"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023c"),
"Balance" : "2449426.30",
"DataTime" : ISODate("2019-10-30T19:30:52.513Z"),
"year" : "2019",
"month" : "10",
"day" : "30",
"hour" : "19",
"minute" : "30",
"second" : "52"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023d"),
"Balance" : "2456459.67",
"DataTime" : ISODate("2019-10-15T18:20:09.490Z"),
"year" : "2019",
"month" : "10",
"day" : "15",
"hour" : "18",
"minute" : "20",
"second" : "09"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023e"),
"Balance" : "1006266.91",
"DataTime" : ISODate("2019-10-31T13:48:18.290Z"),
"year" : "2019",
"month" : "10",
"day" : "31",
"hour" : "13",
"minute" : "48",
"second" : "18"
}

use sort function with date
eg:
db.collection.find().sort("Date_Field")

mongodb aggregate distinct count

Realise this topic has been asked many times - but the advice hasn't helped me solve this problem.
The following query is trying to determine the presence of sales on a given weekday using ISODay. Because the query will be run at the start of the month, I need to know how many occurrences of the specific ISOday occur in the month.
var query = { eventType: 'Sale', site : 4, tank: 1, txnDate : { "$gt" : new Date('2018-08-01T00:00:00') } };
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $sort: { "_id.isoday": 1, "_id.dday": 1 } }
])
provides the following output
/* 1 */
{
"_id" : {
"isoday" : 1,
"dday" : "06"
},
"count" : 62.0
}
/* 2 */
{
"_id" : {
"isoday" : 1,
"dday" : "13"
},
"count" : 69.0
}
/* 3 */
{
"_id" : {
"isoday" : 1,
"dday" : "20"
},
"count" : 72.0
}
/* 4 */
{
"_id" : {
"isoday" : 2,
"dday" : "07"
},
"count" : 75.0
}
I am trying to have "count" represent the number of unique "dday" records - so using the output above, I want count to be "3" for isoDay = 1. At the moment count is reporting number of sales events that occurred for the group combination

All you need to do is have the grouping twice.
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $project : {
isoDay_Final: "$_id.isoday"
}
},
{ $group:
{ _id : "$isoDay_Final", count: { "$sum" : 1 } }
},
{ $sort: { "_id": 1 } }
])

SQL Where clause equivalent for Elastic Search

I am trying to create a aggregate results in elastic search but filter option is not working for me.
I can aggregate data without filter e.g.
select name , material ,sum(price)
from products group by name , material
curl -XGET 'http://localhost:9200/products/_search?pretty=true' -d'
{
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}'
but I am facing problems to write equivalent DSL query of :
select name , material ,sum(price)
from products
where material = "wood"
group by name , material

Should be something like this:
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"material": "wood"
}
}
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}
Use a filter if you know the exact value and do not need a match, else use a match query instead of the filtered query.

You can use match
{
"query": {
"bool": {
"must": [
{
"match": {
"material": "wood"
}
}
],
"filter": [
{
"match_all": {}
},
]
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}

We Keep Coding

sql objective-c vba vb.net react-native apache vue.js tensorflow api pandas

MongoDB - how can I query "INTERSECT" of SQL? - sql

Related

Query item in nested array

Mongodb aggregation to find outliers

How to get last second of every minute from mongoDB using SQL query

mongodb aggregate distinct count

SQL Where clause equivalent for Elastic Search

Categories

Resources