Pymongo aggregation with project - pymongo

I have a very basic question,
what's the Pymongo equivalent of
select name, surname, CONCAT(name, surname)
from db

Add the mongo db collection, to db object. it works
concat_results = db.aggregate(
[
{ "$project": { "NewFieldName": { "$concat": [ "$name", "$surname" ] } ,"name":True,"name":surname} }
]
);
for concat_result in concat_results:
print("name":concat_result["name"],"surname":concat_result["name"],"NewFieldName":concat_result["NewFieldName"])

Related

Flatten complex json using Databricks and ADF

I have following json which I have flattened partially using explode
{
"result":[
{
"employee":[
{
"employeeType":{
"name":"[empName]",
"displayName":"theName"
},
"groupValue":"value1"
},
{
"employeeType":{
"name":"#bossName#",
"displayName":"theBoss"
},
"groupValue":[
{
"id":"1",
"type":{
"name":"firstBoss",
"displayName":"CEO"
},
"name":"Martha"
},
{
"id":"2",
"type":{
"name":"secondBoss",
"displayName":"cto"
},
"name":"Alex"
}
]
}
]
}
]
}
I need to get following fields:
employeeType.name
groupValue
I am able to extract those fields and value. But, if name value starts with # like in "name":"#bossName#", I am getting groupValue as string from which I need to extract id and name.
"groupValue":[
{
"id":"1",
"type":{
"name":"firstBoss",
"displayName":"CEO"
},
"name":"Martha"
},
{
"id":"2",
"type":{
"name":"secondBoss",
"displayName":"cto"
},
"name":"Alex"
}
]
How to convert this string to json and get the values.
My code so far:
from pyspark.sql.functions import *
db_flat = (df.select(explode("result.employee").alias("emp"))
.withColumn("emp_name", col(emp.employeeType.name))
.withColumn("emp_val",col("emp.groupValue")).drop("emp"))
How can I extract groupValue from db_flat and get id and name from it. Maybe use python panda library.
Since you see they won't be dynamic. You can traverse through the json while mapping like as below. Just identify record and array, specify index [i] as needed.
Example:
id --> $['employee'][1]['groupValue'][0]['id']
name --> $['employee'][1]['groupValue'][0]['type']['name']

Need to convert this SQL query to MongoDB

I am new to MongoDB. I need to convert this SQL code to MongoDB
select TOP 5 r.regionName, COUNT(c.RegionID)
from region as r,
company as c
where c.RegionID = r._id
group by r.regionName
order by COUNT(c.RegionID) DESC;
Option 1. You can use the aggregation framework with $lookup, $group, $project , $sort and $limit stages, but this seems like a wrong approach since the true power to change relation database with mongoDB is the denormalization and avoidance of join ($lookup) like queries.
Option 2. You convert your multi-table relational database schema to document model and proceed with simple $group, $project, $sort and $limit stage aggregation query for the above task.
Since you have not provided any mongodb document examples it is hard to provide how your queries will look like ...
Despite of my comment I try to give a translation (not tested):
db.region.aggregate([
{
$lookup: // left outer join collections
{
from: "company",
localField: "_id",
foreignField: "RegionID",
as: "c"
}
},
{ $match: { c: { $ne: [] } } }, // remove non-matching documents (i.e. INNER JOIN)
{ $group: { _id: "$regionName", regions: { $addToSet: { "$c.RegionID" } } } }, // group and get distinct regions
{ $project: { regionName: "$_id", count: { $size: "$regions" } , _id: 0} } // some cosmetic and count
{ $sort: { regionName: 1 } }, // order result
{ $limit: 5 } // limit number or returned documents
])

How to query field inside nested arrays in CosmosDB SQL

How can I return all documents which have parameter.code = "123", given this document structure, using CosmosDB SQL query? Is it necessary to use a UDF? (If so, how?)
{
"batch_id": "abc",
"samples": [
{
"sample_id": "123",
"tests": [
{
"parameter": {
"code": "123", // <- target
}
}
]
}
]
}
No need to use UDF(User Defined Function),just use cosmos db query sql with double JOIN.
SQL:
SELECT c.batch_id FROM c
join samples in c.samples
join tests in samples.tests
where tests.parameter.code = "123"
Output:

How to Make a Lookup connection between two Collection

Goal:
This sql and its result should be the same result from mongoDB's query code.
In order words, same result but for mongoDB.
Problem:
How to you make a lookup connection in relation to People and Role in Mongo DB's query code?
Info:
I'm new in mongo DB
SQL code
SELECT
a.*,
'.' AS '.',
b.*,
'.' AS '.',
c.*
FROM
[db1].[dbo].[People_Course_Grade] a
INNER JOIN [db1].[dbo].[People] b on a.PeopleId = b.PeopleId
INNER JOIN [db1].[dbo].[Role] c on b.RoleId = c.RoleId
Json data:
Role:
[{"RoleId":1,"Name":"Student"},{"RoleId":2,"Name":"Teacher"}]
People_Course_Grade:
[{"People_Course_GradeId":1,"PeopleId":1,"CourseId":1},
{"People_Course_GradeId":2,"PeopleId":2,"CourseId":1},
{"People_Course_GradeId":3,"PeopleId":3,"CourseId":2},
{"People_Course_GradeId":4,"PeopleId":1,"CourseId":2}]
Course:
[{"CourseId":1,"Name":"Java"},{"CourseId":2,"Name":"Java II"},
{"CourseId":3,"Name":"Statistik 1"}]
db.People_Course_Grade.aggregate([
{
$lookup:{
from: "People",
localField: "people_id",
foreignField: "_id",
as: "people"
}
},
{ $unwind:"$people" },
{
$project:{
course_id : 1,
people_id : 1,
// grade_id : 1,
Name : "$people.Name",
}
}
]);
You need to start with double $lookup since you have three collections. Then you can use $arrayElemAt to always get single element from lookup's result. To flatten your structure you can use $replaceRoot with $mergeObjects (promotes all the fields from people and course to root level.
db.People_Course_Grade.aggregate([
{
$lookup:{
from: "Role",
localField: "PeopleId",
foreignField: "RoleId",
as: "people"
}
},
{
$lookup:{
from: "Course",
localField: "CourseId",
foreignField: "CourseId",
as: "course"
}
},
{
$replaceRoot: {
newRoot: {
$mergeObjects: [
"$$ROOT",
{ $arrayElemAt: [ "$people", 0 ] },
{ $arrayElemAt: [ "$course", 0 ] },
]
}
}
},
{
$project: {
people: 0,
course: 0
}
}
])
Mongo Playground
$arrayElemAt can always be replaced with $unwind like you tried. You also have a naming conflict on name field so probably you need to run $project to rename one of those fields - otherwise you'll get only one of them in final result.

How to implement the follow RMDB query in MongoDB

My team started to use MongoDB now and wanna migrate some sql to Mongo.
For example, I have an order table and has the fields price and quanty.I want to query the price*quanty greater than 100. sql is like below
select * from Order where price * quanty > 100;
How to use "price * quanty" this kind query in Mongo?
Thanks.
You can do this by using the $expr operator to use aggregation expressions within your query:
db.orders.find({
$expr: {
$gt: [
{ $multiply: ["$price", "$quantity"] },
100
]
}
})
As JohnnyHK points out you can use $expr, but as an alternative you can also use aggregation to first create a new field that is the product of two other fields:
db.orders.aggregate([
{ $set: { product: { $multiply: [ "$price", "$quantity" ] } } }
])
Note: $set is new in 4.2 and just an alias for $addFields
Then add a $match stage that only matches documents with the new product field meeting your condition:
db.orders.aggregate([
{ $set: { product: { $multiply: [ "$price", "$quantity" ] } } },
{ $match: { product: { $gt: 100 } } }
])