MongoDB - how can I query "INTERSECT" of SQL? - sql

I have a question about "INTERSECT" query in the MongoDB query.
I want to query like the following SQL query in MongoDB.
(select course_id from section where semester = 'fall' and year = '2009')
intersect
(select course_id from section where semester = 'spring' and year = '2010');
In my MongoDB, Section collection data structure is as follows.
{
"_id" : {
"course_id" : "486",
"sec_id" : "1",
"semester" : "Fall",
"year" : 2009.0
},
"course_id" : "486",
"sec_id" : "1",
"semester" : "Fall",
"year" : 2009.0,
"building" : "Whitman",
"room_number" : "134",
"time_slot_id" : "K"
}
How to query to get the same result as SQL language?

It's not exactly sexy but you can do something like this:
db.collection.aggregate([
{
$match: {
$or: [
{
$and: [
{
year: 2009
},
{
semester: "fall"
}
]
},
{
$and: [
{
year: 2010
},
{
semester: "spring"
}
]
}
]
}
},
{
$group: {
_id: "$course_id",
years_x_semester: {$addToSet: {year: "$year", semester: "$semester"}},
}
},
{
$match: {
"years_x_semester.1": {$exists: true}
}
}
])

Related

Query item in nested array

Customer appointments with top level locationId sample data set:
[
{
"locationId": 9999,
"customerAppointments": [
{
"customerId": "1",
"appointments": [
{
"appointmentId": "cbbce566-da59-42c2-8845-53976ba63d56",
"locationName": "Sullivan St"
},
{
"appointmentId": "5f09e2af-ddae-47aa-9f7c-fd1001a9c5e6",
"locationName": "Oak St"
}
]
},
{
"customerId": "2",
"appointments": [
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
},
{
"customerId": "3",
"appointments": []
}
]
},
{
...
},
{
...
}
]
I need to pull out appointment by locationId and customerId and only get the appointment for that customerId e.g
Sample response:
[
{
"appointmentId": "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName": "Kellet St"
}
]
Tried below query, but it just returns all records for all customers ids (which is kind of expected):
db.getCollection("appointments").find(
{
"locationId" : NumberInt(9999),
"customerAppointments" : {
"$elemMatch" : {
"customerId" : "2"
}
}
}
);
But how can I get just the appointment record for a specific customerId?
When asking this question I was unaware of the older version of MongoDB driver (< v5) so we cannot use the $getField operator.
However, this query seems to work well:
db.getCollection("appointments").aggregate([
{
$match: {
"locationId": NumberInt(9999)
}
},
{
$unwind: "$customerAppointments"
},
{
$match: {
"customerAppointments.customerId": "2"
}
},
{
$project: {
appointments: "$customerAppointments.appointments"
}
}
]);
Yields:
{
"_id" : ObjectId("63eebe95c7a0da54804c1db2"),
"appointments" : [
{
"appointmentId" : "964a3c1c-ccec-4082-99e2-65795352ba79",
"locationName" : "Kellet St"
}
]
}

Mongodb aggregation to find outliers

In my mongodb collection documents are stored in the following format:
{ "_id" : ObjectId("62XXXXXX"), "res" : 12, ... }
{ "_id" : ObjectId("63XXXXXX"), "res" : 23, ... }
{ "_id" : ObjectId("64XXXXXX"), "res" : 78, ... }
...
I need to extract id's for the document for which the value of "res" is outlier (i.e. value < Q1 - 1.5 * IQR or value > Q3 + 1.5 * IQR (Q1, Q3 are percentiles)). I have done this using pandas functionality by retrieving all documents from the collection, which may become slow if the number of documents in collection become too big.
Is there a way to do this using mongodb aggregation pipeline (or just calculating percentiles)?
If I understand how you want to retrieve outliers, here's one way you might be able to do it.
db.collection.aggregate([
{ // partition res into quartiles
"$bucketAuto": {
"groupBy": "$res",
"buckets": 4
}
},
{ // get the max of each quartile
"$group": {
"_id": "$_id.max"
}
},
{ // sort the quartile maxs
"$sort": {
"_id": 1
}
},
{ // put sorted quartile maxs into array
"$group": {
"_id": null,
"maxs": {"$push": "$_id"}
}
},
{ // assign Q1 and Q3
"$project": {
"_id": 0,
"q1": {"$arrayElemAt": ["$maxs", 0]},
"q3": {"$arrayElemAt": ["$maxs", 2]}
}
},
{ // set IQR
"$set": {
"iqr": {
"$subtract": ["$q3", "$q1"]
}
}
},
{ // assign upper/lower outlier thresholds
"$project": {
"outlierThresholdLower": {
"$subtract": [
"$q1",
{"$multiply": ["$iqr", 1.5]}
]
},
"outlierThresholdUpper": {
"$add": [
"$q3",
{"$multiply": ["$iqr", 1.5]}
]
}
}
},
{ // get outlier _id's
"$lookup": {
"from": "collection",
"as": "outliers",
"let": {
"oTL": "$outlierThresholdLower",
"oTU": "$outlierThresholdUpper"
},
"pipeline": [
{
"$match": {
"$expr": {
"$or": [
{"$lt": ["$res", "$$oTL"]},
{"$gt": ["$res", "$$oTU"]}
]
}
}
},
{
"$project": {
"_id": 1
}
}
]
}
}
])
Try it on mongoplayground.net.
One more option based on #rickhg12hs's answer, is to use $setWindowFields:
db.collection.aggregate([
{$setWindowFields: {
sortBy: {res: 1},
output: {
totalCount: {$count: {}},
index: {$sum: 1, window: {documents: ["unbounded", "current"]}}
}
}
},
{$match: {
$expr: {$lte: [
{$abs: {$subtract: [
{$mod: [
{$multiply: [
{$add: ["$index", {$round: {$divide: ["$totalCount", 4]}}]}, 2]},
"$totalCount"
]}, 0]}
}, 1]}
}},
{$group: {_id: null, res: {$push: "$res"}}},
{$project: {_id: 0, q1: {$first: "$res"}, q3: {$last: "$res"},
iqr: {"$subtract": [{$last: "$res"}, {$first: "$res"}]}
}},
{$project: {
outlierThresholdLower: {$subtract: ["$q1", {$multiply: ["$iqr", 1.5]}]},
outlierThresholdUpper: {$add: ["$q3", {$multiply: ["$iqr", 1.5]}]}
}
},
{$lookup: {
from: "collection",
as: "outliers",
let: {oTL: "$outlierThresholdLower", oTU: "$outlierThresholdUpper"},
pipeline: [
{$match: {$expr: {$or: [{$lt: ["$res", "$$oTL"]}, {$gt: ["$res", "$$oTU"]}]}}},
{$project: {_id: 1}}
]
}
}
])
See how it works on the playground example

How to get last second of every minute from mongoDB using SQL query

I have a table with records for every millisecond. I need to get only the last second of every minute using Mongodb sql query.
Id Balance DataTime
1 "2462188.61" 2019-09-27T05:49:33.575+00:00
1 "2449426.30" 2019-10-30T19:30:52.513+00:00
1 "2456459.67" 2019-10-15T18:20:09.490+00:00
5 "1006266.91" 2019-10-31T13:48:18.290+00:00
I tried the LIKE condition but that didn't work.
Select Id, DateTime,Balance from AccountBalance where DateTime like '%59.000%'
Here is the link for the mongoldb SQL reference :
https://docs.mongodb.com/bi-connector/current/supported-operations/
I am using the BI connector to connect to Tableau(hence need the sql version of the query)
Thanks in advance!
You could try...
db.z.aggregate([
{ $addFields: {
year: { $dateToString: { format: "%Y", date: "$DataTime" } },
month: { $dateToString: { format: "%m", date: "$DataTime" } },
day: { $dateToString: { format: "%d", date: "$DataTime" } },
hour: { $dateToString: { format: "%H", date: "$DataTime" } },
minute: { $dateToString: { format: "%M", date: "$DataTime" } },
second: { $dateToString: { format: "%S", date: "$DataTime" } }
}
}
]).pretty()
This assumes your field DataTime is of type ISODate()...
Example Documents:
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023b"), "Balance" : "2462188.61", "DataTime" : ISODate("2019-09-27T05:49:33.575Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023c"), "Balance" : "2449426.30", "DataTime" : ISODate("2019-10-30T19:30:52.513Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023d"), "Balance" : "2456459.67", "DataTime" : ISODate("2019-10-15T18:20:09.490Z") }
{ "_id" : ObjectId("5dea94c3b4ae6bbc17cd023e"), "Balance" : "1006266.91", "DataTime" : ISODate("2019-10-31T13:48:18.290Z") }
Example Query Output:
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023b"),
"Balance" : "2462188.61",
"DataTime" : ISODate("2019-09-27T05:49:33.575Z"),
"year" : "2019",
"month" : "09",
"day" : "27",
"hour" : "05",
"minute" : "49",
"second" : "33"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023c"),
"Balance" : "2449426.30",
"DataTime" : ISODate("2019-10-30T19:30:52.513Z"),
"year" : "2019",
"month" : "10",
"day" : "30",
"hour" : "19",
"minute" : "30",
"second" : "52"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023d"),
"Balance" : "2456459.67",
"DataTime" : ISODate("2019-10-15T18:20:09.490Z"),
"year" : "2019",
"month" : "10",
"day" : "15",
"hour" : "18",
"minute" : "20",
"second" : "09"
}
{
"_id" : ObjectId("5dea94c3b4ae6bbc17cd023e"),
"Balance" : "1006266.91",
"DataTime" : ISODate("2019-10-31T13:48:18.290Z"),
"year" : "2019",
"month" : "10",
"day" : "31",
"hour" : "13",
"minute" : "48",
"second" : "18"
}
use sort function with date
eg:
db.collection.find().sort("Date_Field")

mongodb aggregate distinct count

Realise this topic has been asked many times - but the advice hasn't helped me solve this problem.
The following query is trying to determine the presence of sales on a given weekday using ISODay. Because the query will be run at the start of the month, I need to know how many occurrences of the specific ISOday occur in the month.
var query = { eventType: 'Sale', site : 4, tank: 1, txnDate : { "$gt" : new Date('2018-08-01T00:00:00') } };
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $sort: { "_id.isoday": 1, "_id.dday": 1 } }
])
provides the following output
/* 1 */
{
"_id" : {
"isoday" : 1,
"dday" : "06"
},
"count" : 62.0
}
/* 2 */
{
"_id" : {
"isoday" : 1,
"dday" : "13"
},
"count" : 69.0
}
/* 3 */
{
"_id" : {
"isoday" : 1,
"dday" : "20"
},
"count" : 72.0
}
/* 4 */
{
"_id" : {
"isoday" : 2,
"dday" : "07"
},
"count" : 75.0
}
I am trying to have "count" represent the number of unique "dday" records - so using the output above, I want count to be "3" for isoDay = 1. At the moment count is reporting number of sales events that occurred for the group combination
All you need to do is have the grouping twice.
db.tankevent.aggregate([
{ $match: query },
{ $project : {
isoDay: { $isoDayOfWeek: "$txnDate" },
dayDate: { $dateToString: { format: "%d", date:"$txnDate" } }
}
},
{ $group:
{ _id : { isoday: "$isoDay", dday: "$dayDate" }, count: { "$sum" : 1 } }
},
{ $project : {
isoDay_Final: "$_id.isoday"
}
},
{ $group:
{ _id : "$isoDay_Final", count: { "$sum" : 1 } }
},
{ $sort: { "_id": 1 } }
])

SQL Where clause equivalent for Elastic Search

I am trying to create a aggregate results in elastic search but filter option is not working for me.
I can aggregate data without filter e.g.
select name , material ,sum(price)
from products group by name , material
curl -XGET 'http://localhost:9200/products/_search?pretty=true' -d'
{
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}'
but I am facing problems to write equivalent DSL query of :
select name , material ,sum(price)
from products
where material = "wood"
group by name , material
Should be something like this:
{
"query": {
"filtered": {
"query": {
"match_all": {}
},
"filter": {
"term": {
"material": "wood"
}
}
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}
Use a filter if you know the exact value and do not need a match, else use a match query instead of the filtered query.
You can use match
{
"query": {
"bool": {
"must": [
{
"match": {
"material": "wood"
}
}
],
"filter": [
{
"match_all": {}
},
]
}
},
"aggs" : {
"product" : {
"terms" : {
"field" : "name"
},
"aggs" : {
"material" : {
"terms" : {
"field" : "material"
},
"aggs" : {
"sum_price" : {
"sum" : {
"field" : "price"
}
}
}
}
}
}
},
"size" : 0
}