MongoDB - joining two results of queries and dense_rank - mongodb

I'm learning about MongoDB and I have some problems with understanding its concept.
I have a collection which looks like that:
db.email.findOne()
{
"_id" : ObjectId("52af48b5d55148fa0c199646"),
"sender" : "tori.wells#enron.com",
"recipients" : [
"michael#optsevents.com"
],
"cc" : [ ],
"text" : "Mr. Christman:\n\nThank you for your invitation for Dr. Lay to speak at your upcoming forum in \nFrance, the format looks wonderful. Unfortunately, Dr. Lay has calendar \nconflicts and will be unable to participate.\n\nIf you should need further assistance, please do not hesitate to contact us.\n\nTori Wells\nExecutive Assistant",
"mid" : "22263156.1075840285610.JavaMail.evans#thyme",
"fpath" : "enron_mail_20110402/maildir/lay-k/_sent/101.",
"bcc" : [ ],
"to" : [
"michael#optsevents.com"
],
"replyto" : null,
"ctype" : "text/plain; charset=us-ascii",
"fname" : "101.",
"date" : "2000-08-04 09:04:00-07:00",
"folder" : "_sent",
"subject" : "Wall Street Journal Millennium Forum"
}
It's the Enron database.
I'm trying to make a query which will return listed emails with amount of messages sent by it and amount of messages received by it.
I managed to make two queries which looks like that:
db.email.aggregate({$group:{_id:"$sender",SendsAmount:{$sum:1}}},{$sort:{SendsAmount:-1}})
{ "_id" : "rosalee.fleming#enron.com", "SendsAmount" : 849 }
{ "_id" : "brown_mary_jo#lilly.com", "SendsAmount" : 82 }
{ "_id" : "leonardo.pacheco#enron.com", "SendsAmount" : 78 }
db.email.aggregate({$group:{_id:"$recipients",ReceivedAmount:{$sum:1}}},{$unwind:"$_id"},{$sort:{ReceivedAmount:-1}})
{ "_id" : "klay#enron.com", "ReceivedAmount" : 1350 }
{ "_id" : "kenneth.lay#enron.com", "ReceivedAmount" : 912 }
{ "_id" : "kenneth.lay#enron.com", "ReceivedAmount" : 78 }
As you can see first one returns me emails and amount of emails sends from it and second one also returns emails and amount of received ones by it.
My point is to join(?) these two into one and get one query which will return something like:
{ "_id" : "email#enron.com", "SendsAmount" : 57, "ReceivedAmount": 43 }
I know there is $lookup but it can be used only with two collections, so my idea was to make two collections out of these two queries but I'm feeling like there is better way of solving my problem.
---My second problem is about trying to do some DENSE_RANK which is not present in MongoDB. I want to rank email adresses by amount of sent emails.
I used the $unwind and insertArrayIndex but I got something like ROW_NUMBER which is not I'm looking for.
I have written something like that:
db.email.aggregate({$group:{"_id":"$sender",SendsAmount:{$sum:1},rank:0}},{$sort:{"ile":-1}}).forEach(function(x){
var howmany=0;
var query=db.email.aggregate({$group:{"_id":"$sender",SendsAmount:{$sum:1}}},{$match:{ile:{$gt:x.SendsAmount}}},{$group:{_id:null, HowManyGreater:{$sum:1}}});
query.forEach(function(y){
howmany=y.HowManyGreater;
});
howmany=howmany+1;
print("email: "+ x._id + " SendsAmount: " + x.SendsAmount + " rank " + howmany+1);
});
Which is givig me the result I want, but it's not even the document but only printed information. I've read about MapReduce but I didn't get the idea how to use it in this case.

If you want to do all calculations in aggregate query, you can use $facet and $group stages as below
db.email.aggregate([
{
$facet: {
send: [
{
$group: {
_id: "$sender",
SendsAmount: {
$sum: 1
}
}
},
{
$sort: {
SendsAmount: -1
}
}
],
recieve: [
{
$group: {
_id: "$recipients",
ReceivedAmount: {
$sum: 1
}
}
},
{
$unwind: "$_id"
},
{
$sort: {
ReceivedAmount: -1
}
}
]
}
},
{
$project: {
all: {
$concatArrays: [
"$recieve",
"$send"
]
}
}
},
{
$unwind: "$all"
},
{
$group: {
_id: "$all._id",
ReceivedAmount: {
$sum: {
$cond: {
if: {
$gt: [
"$all.ReceivedAmount",
null
]
},
then: "$all.ReceivedAmount",
else: 0
}
}
},
SendsAmount: {
$sum: {
$cond: {
if: {
$gt: [
"$all.SendsAmount",
null
]
},
then: "$all.SendsAmount",
else: 0
}
}
}
}
}
])

Related

MongoDB group by multiple fields

I have a collection with documents in MongoDB:
[
{
"_id" : ObjectId("61ba65af74cf385ee93ad2c7"),
"Car_brand":"BMW X7",
"Plate_number":"8OP66",
"Model_year":"2018",
"Company":"BMW",
"Purchase_year":"2019",
"Body_color":"red",
"Mileage":1000,
"Price":35000,
"Body_type":"crossover"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2c8"),
"Car_brand":"Tesla Model X",
"Plate_number":"5XR56",
"Model_year":"2015",
"Company":"Tesla Motors",
"Purchase_year":"2019",
"Body_color":"white",
"Mileage":800,
"Price":25000,
"Body_type":"SUV"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2c9"),
"Car_brand":"Tesla Cybertruck",
"Plate_number":"2ED45",
"Model_year":"2021",
"Company":"Tesla Motors",
"Purchase_year":"2021",
"Body_color":"gray",
"Mileage":0,
"Price":50000,
"Body_type":"pickup"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2ca"),
"Car_brand":"Lamborghini Aventador",
"Plate_number":"2MN50",
"Model_year":"2011",
"Company":"Lamborghini",
"Purchase_year":"2017",
"Body_color":"orange",
"Mileage":700,
"Price":45000,
"Body_type":"supercar"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2cb"),
"Car_brand":"BMW X7",
"Plate_number":"3QW14",
"Model_year":"2018",
"Company":"BMW",
"Purchase_year":"2020",
"Body_color":"black",
"Mileage":4500,
"Price":14000,
"Body_type":"crossover"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2cc"),
"Car_brand":"Mercedes-Benz G-Class",
"Plate_number":"9KI24",
"Model_year":"2017",
"Company":"Mercedes-Benz",
"Purchase_year":"2017",
"Body_color":"black",
"Mileage":6000,
"Price":13000,
"Body_type":"SUV"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2cd"),
"Car_brand":"BMW X6",
"Plate_number":"2GH47",
"Model_year":"2008",
"Company":"BMW",
"Purchase_year":"2016",
"Body_color":"white",
"Mileage":4500,
"Price":14500,
"Body_type":"SUV"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2ce"),
"Car_brand":"Chevrolet Camaro",
"Plate_number":"7BV58",
"Model_year":"2015",
"Company":"Chevrolet",
"Purchase_year":"2020",
"Body_color":"orange",
"Mileage":4000,
"Price":43000,
"Body_type":"cabriolet"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2cf"),
"Car_brand":"Ford Mustang",
"Plate_number":"4AM23",
"Model_year":"2016",
"Company":"Ford Motor Company",
"Purchase_year":"2019",
"Body_color":"purple",
"Mileage":2000,
"Price":30000,
"Body_type":"cabriolet"
},
{
"_id" : ObjectId("61ba65af74cf385ee93ad2d0"),
"Car_brand":"Dodge Challenger",
"Plate_number":"6DL73",
"Model_year":"2020",
"Company":"Dodge (Chrysler Corporation)",
"Purchase_year":"2020",
"Body_color":"red",
"Mileage":0,
"Price":40000,
"Body_type":"muscle car"
}
]
I need to get:
Number of colors for each body type.
The company with two car brands.
I've tried the first one like this:
db.Vehicles.aggregate([{$group:{"_id":"$Body_type","Количество цветов":{$sum: 1}}}]);
But I get all the colors including those which repeat twice. And I need to get a number of distinct colors.
And I can't think of any suggestions for the second one.
Thanks.
Number of colors for each body type
You can do it like this:
db.collection.aggregate([
{
"$group": {
"_id": "$Body_type",
"colors_array": {
"$addToSet": "$Body_color"
}
}
},
{
"$project": {
"colors_number": {
"$size": "$colors_array"
},
"colors_array": 1
}
}
])
Note that I included the array of all the colors as well - colors_array property. If you want only number of unique colors, and not colors array in addition, you just change "colors_array": 1 to "colors_array": -1.
Working example
The company with two car brands.
You can do it like this:
db.collection.aggregate([
{
"$group": {
"_id": "$Company",
"brands_array": {
"$addToSet": "$Car_brand"
}
}
},
{
"$set": {
"brands_number": {
"$size": "$brands_array"
}
}
},
{
"$match": {
"brands_number": 2
}
}
])
Working example

NoSQL aggregation query Shakespeare’s dataset

I'm trying to learn NoSQL aggregation queries and here is dataset (name - shakespeare_plays) structure:
"_id" : "Romeo and Juliet",
"acts" : [
{
"title" : "ACT I",
"scenes" : [
{
"title" : "SCENE I. Verona. A public place.",
"action" : [
{
"character" : "SAMPSON",
"says" : [
"Gregory, o' my word, we'll not carry coals."
]
},
{
"character" : "GREGORY",
"says" : [
"No, for then we should be colliers."
]
},
// ...
{
"character" : "GREGORY",
"says" : [
"To move is to stir; and to be valiant is to stand:",
"therefore, if thou art moved, thou runn'st away."
]
},
{
"character" : "SAMPSON",
"says" : [
"A dog of that house shall move me to stand: I will",
"take the wall of any man or maid of Montague's."
]
},
{
"character" : "GREGORY",
"says" : [
"That shows thee a weak slave; for the weakest goes",
"to the wall."
]
},
// ...
},
// ...
]
},
// ...
]
}
What tasks am I trying to do:
What characters are found in more than one play
How many replicas does Juliet have
Number of characters in Othello
Any tips how to do it via aggregate?
You're on the right track. Sharing some queries to achieve your goal.
From where you're right now, you can get a list of all characters by adding $group stage
db.getCollection('shakespeare_plays').aggregate([{
$unwind: "$acts"
}, {
$unwind: "$acts.scenes"
}, {
$unwind: "$acts.scenes.action"
}, {
$group: {
_id: "$acts.scenes.action.character"
}
}])
Going further, you want to see who has appeared how many times, you can use $sum operator inside $group
db.getCollection('shakespeare_plays').aggregate([{
$unwind: "$acts"
}, {
$unwind: "$acts.scenes"
}, {
$unwind: "$acts.scenes.action"
}, {
$group: {
_id: "$acts.scenes.action.character",
count: {$sum: 1}
}
}])
//Results : [{ "_id" : "GREGORY", "count" : 4 }]
You can export the results to an array and perform any logic you want to perform on the results which will give you all the answers you needed
var myResults = db.getCollection('shakespeare_plays').aggregate([pipelineQuery]).toArray();
//Here you can perform any logic on the variable myResults in your programming language
Read more about $group and $sum

Project values of different columns into one field

{
"_id" : ObjectId("5ae84dd87f5b72618ba7a669"),
"main_sub" : "MATHS",
"reporting" : [
{
"teacher" : "ABC"
}
],
"subs" : [
{
"sub" : "GEOMETRIC",
"teacher" : "XYZ",
}
]
}
{
"_id" : ObjectId("5ae84dd87f5b72618ba7a669"),
"main_sub" : "SOCIAL SCIENCE",
"reporting" : [
{
"teacher" : "XYZ"
}
],
"subs" : [
{
"sub" : "CIVIL",
"teacher" : "ABC",
}
]
}
I have simplified the structure of the documents that i have.
The basic structure is that I have a parent subject with an array of reporting teachers and an array of sub-subjects(each having a teacher)
I now want to extract all the subject(parent/sub-subjects) along with the condition if they are sub-subjects or not which are taught by a particular teacher.
For eg:
for teacher ABC i want the following structure:
[{'subject':'MATHS', 'is_parent':'True'}, {'subject':'CIVIL', 'is_parent':'FALSE'}]
-- What is the most efficient query possible ..? I have tried $project with $cond and $switch but in both the cases I have had to repeat the conditional statement for 'subject' and 'is_parent'
-- Is it advised to do the computation in a query or should I get the data dump and then modify the structure in the server code? AS in, I could $unwind and get a mapping of the parent subjects with each sub-subject and then do a for loop.
I have tried
db.collection.aggregate(
{$unwind:'$reporting'},
{$project:{
'result':{$cond:[
{$eq:['ABC', '$reporting.teacher']},
"$main_sub",
"$subs.sub"]}
}}
)
then I realised that even if i transform the else part into another query for the sub-subjects I will have to write the exact same thing for the property of is_parent
You have 2 arrays, so you need to unwind both - the reporting and the subs.
After that stage each document will have at most 1 parent teacher-subj and at most 1 sub teacher-subj pairs.
You need to unwind them again to have a single teacher-subj per document, and it's where you define whether it is parent or not.
Then you can group by teacher. No need for $conds, $filters, or $facets. E.g.:
db.collection.aggregate([
{ $unwind: "$reporting" },
{ $unwind: "$subs" },
{ $project: {
teachers: [
{ teacher: "$reporting.teacher", sub: "$main_sub", is_parent: true },
{ teacher: "$subs.teacher", sub: "$subs.sub", is_parent: false }
]
} },
{ $unwind: "$teachers" },
{ $group: {
_id: "$teachers.teacher",
subs: { $push: {
subject: "$teachers.sub",
is_parent: "$teachers.is_parent"
} }
} }
])

need me use aggregation mongodb in arrays

I need help in aggregate this query, I need aggregate values of debito
{
"_id" : ObjectId("5a088f6584ccb0a665900726"),
"usuario" : "tamura",
"creditos" : [
{
"nome_do_credito" : "credito inicial",
"credito" : 0
}
],
"debitos" : [
{
"nome_do_debito" : "debito inicial",
"debito" : 0
},
{
"nome_do_debito" : "Faculdade",
"debito" : "150.00"
}
]
}
I need the output
debito : 150
(0+150)
You will first need to turn all your debito fields into a numerical type (as in 150.00) since you cannot do Maths on strings (as in "150.00"). And then the following query should do the trick:
db.collection.aggregate({
$project: {
"debitos": {
$sum: "$debitos.debito"
}
}
})
In case you have more than one document in your collection and you want the total sum over all documents you can run this:
db.collection.aggregate({
$unwind: "$debitos" // flatten the "debitos" array
}, {
$group: {
"_id": null, // do not really group, just throw all documents in the same group
"debitos": {
$sum: "$debitos.debito" // sum up all debito fields
}
}
})

Excluding data in mongo aggregation

I'm working with a mongodb query. Each document in the collection looks like this:
{
"_id": "12345",
"name": "Trinity Force",
"price": 3702,
"comp": [
"Zeal",
"Phage",
"Sheen",
]
}
I was working on a query that returns the 5 cheapest items (lowest price), with prices equal to 0 excluded (those trinkets though). I wrote this (sorry for poor formatting)
db.league.aggregate( { $project : { _id : 1, name: 1, price: 1, comp: 0 } },
{ $match : {price : { $gt : 0 } } },
{ $sort: { price : 1 } }).limit(5)
I ran into two problems, though; the limit function doesn't seem to work with this aggregation, and neither does the $project. The output I'm looking for should exclude the item components (hence comp: 0) and limit it to 5 outputs. Could I get some assistance, please?
db.league.aggregate(
{ $project : { _id : "$_id", name: "$name", price: "$price"} },
{ $match : { "price" : { $gt : 0 } } },
{ $sort: { "price" : 1 } },
{ $limit : 5 })
This is aggregation query to return the 5 cheapest items
imo, this is not aggregating but sorting results.
db.league.find({ price: { $gt :0} }, {comp: 0}).sort({price: 1}).limit(5)
nevertheless, i would test both for performance