Mongo $cond if expression doesn't work like $match - mongodb

I have a collection with documents with a "parent" field.
[
{
"parent": "P1",
"tagGroups": [],
},
{
"parent": "P1",
"tagGroups": [
{
group: 1,
tags: {
tag1: {
value: true
},
tag2: {
value: "foo"
},
}
},
{
group: 2,
tags: {}
}
]
},
{
"parent": "P2",
"tagGroups": [],
}
]
I want to make request that retrieves all documents with the same parent when at least one match with my criteria: tag1.value = true.
Expected:
[
{
"parent": "P1",
"tagGroups": [],
},
{
"parent": "P1",
"tagGroups": [
{
group: 1,
tags: {
tag1: {
value: true
},
tag2: {
value: "foo"
},
}
},
{
group: 2,
tags: {}
}
]
}
]
For that I wanted to use the $cond to flag every document, then group by parent.
https://mongoplayground.net/p/WiIlVeLDrY-
But the "if" part seems to work differently that a $match
https://mongoplayground.net/p/_jcoUHE-aOu
Do you have another efficient way to do that kind of query?
Edit: I can use a lookup stage but I'm afraid of bad performances
Thanks

You haven't mentioned what you want to achieve, but you expect that your tried code (first link) should be working. You need to use $in instead of $eq in your query
db.collection.aggregate({
"$addFields": {
"match": {
"$cond": [
{ $in: [ true, "$tagGroups.tags.tag1.value" ] }, 1, 0] }
}
},
{
"$group": {
"_id": "$parent",
"elements": { "$addToSet": "$$ROOT" },
"elementsMatch": { "$sum": "$match" }
}
},
{ "$match": { "elementsMatch": { $gt: 0 } }},
{ "$unwind": "$elements"}
)
Working Mongo playground
Note : You have asked about the efficient way. Better you need to post expected result

Related

Add Aggregate field in MongoDB pipeline depending on all elements of an array

Given the following documents in a collection:
[{
"_id": {
"$oid": "63f06283b80a395adf27780d"
},
"suppliers": [
{
"name": "S1",
"duesPaid": true
},
{
"name": "S2",
"duesPaid": true
}
]
},{
"_id": {
"$oid": "63f06283b80a395adf27780e"
},
"suppliers": [
{
"name": "S1",
"duesPaid": true
},
{
"name": "S2",
"duesPaid": false
}
]
}]
I would like to create an aggregateField in each document that does the following: If the suppliers array has at least 1 element and every element in that has the duesPaid field == true, then add a field to the document suppliersPaid = true. Otherwise add suppliersPaid = false. The resulting documents from the pipeline should look like this:
[{
"_id": {
"$oid": "63f06283b80a395adf27780d"
},
"suppliers": [
{
"name": "S1",
"duesPaid": true
},
{
"name": "S2",
"duesPaid": true
}
],
"suppliersPaid": true,
},{
"_id": {
"$oid": "63f06283b80a395adf27780e"
},
"suppliers": [
{
"name": "S1",
"duesPaid": true
},
{
"name": "S2",
"duesPaid": false
}
],
"suppliersPaid": false,
}]
I have tried the following pipeline:
[{$addFields: {
suppliersPaid: {
$and: [
{ $gte: [{ $size: "$suppliers" }, 1] },
{
suppliers: {
$not: {
$elemMatch: { duesPaid: false },
},
},
},
],
},
}}]
and I get the following error: Invalid $addFields :: caused by :: Unrecognized expression '$elemMatch'
I've tried to eliminate the reliance on $elemMatch per the docs https://www.mongodb.com/docs/manual/reference/operator/query/elemMatch/#single-query-condition as such:
[{$addFields: {
suppliersPaid: {
$and: [
{ $gte: [{ $size: "$suppliers" }, 1] },
{
suppliers: {
$not: {
duesPaid: false
},
},
},
],
},
}}]
But this yields the incorrect result of setting suppliersPaid to true for both documents, which is incorrect.
Note: I would like to avoid using any sort of JS in this code i.e. no $where operators.
For the second condition:
$eq - Compare the result from 1.1 to return an empty array.
1.1. $filter - Filter the documents from suppliers containing { duesPaid: false }.
db.collection.aggregate([
{
$addFields: {
suppliersPaid: {
$and: [
{
$gte: [
{
$size: "$suppliers"
},
1
]
},
{
$eq: [
{
$filter: {
input: "$suppliers",
cond: {
$eq: [
"$$this.duesPaid",
false
]
}
}
},
[]
]
}
]
}
}
}
])
Demo # Mongo Playground

MongoDb sum issue after match and group

Suppose I have document as userDetails:
[
{
"roles": [
"author",
"reader"
],
"completed_roles": ["author", "reader"],
"address": {
"current_address": {
"city": "abc"
}
},
"is_verified": true
},
{
"roles": [
"reader"
],
"completed_roles": ["reader"],
"address": {
"current_address": {
"city": "abc"
}
},
"is_verified": true
},
{
"roles": [
"author"
],
"completed_roles": [],
"address": {
"current_address": {
"city": "xyz"
}
},
"is_verified": false
}
]
I want to fetch sum for all roles which has author based on city, total_roles_completed and is_verified.
So the O/P should look like:
[
{
"_id": {
"city": "abc"
},
"total_author": 1,
"total_roles_completed": 1,
"is_verified": 1
},
{
"_id": {
"city": "xyz"
},
"total_author": 1,
"total_roles_completed": 0,
"is_verified": 0
}
]
Basic O/P required:
Filter the document based on author in role (other roles may be present in role but author must be present)
Sum the author based on city
sum on basis of completed_profile has "author"
Sum on basis of documents if they are verified.
For this I tried as:
db.userDetails.aggregate([
{
$match: {
roles: {
$eleMatch: {
$eq: "author"
}
}
}
},
{
$unwind: "$completed_roles"
},
{
"$group": {
_id: { city: "$address.current_address.city"},
total_authors: {$sum: 1},
total_roles_completed: {
$sum: {
$cond: [
{
$eq: ["$completed_roles","author"]
}
]
}
},
is_verified: {
$sum: {
$cond: [
{
$eq: ["$is_verified",true]
}
]
}
}
}
}
]);
But the sum is incorrect. Please let me know where I made mistake. Also, if anyone needs any further information please let me know.
Edit: I figured that because of unwind it is giving me incorrect value, if I remove the unwind the sum is coming correct.
Is there any other way by which I can calculate the sum of total_roles_completed for each city?
If I've understood correctly you can try this query:
First $match to get only documents where roles contains author.
And then $group by the city (the document is not a valid JSON so I assume is address:{"current_addres:{city:"abc"}}). This $group get the authors for each city and also: $sum 1 if "author" is in completed_roles and check if is verified.
Here I don't know the way to know if the author is verified (I don't know if can be true in one document and false in other document. If is the same value over all documents you can use $first to get the first is_verified value). But I decided to use $allElementsTrue in a $project stage, so this only will be true if is_verified is true in all documents grouped by $group.
db.collection.aggregate([
{
"$match": {
"roles": "author"
}
},
{
"$group": {
"_id": "$address.current_address.city",
"total_author": {
"$sum": 1
},
"total_roles_completed": {
"$sum": {
"$cond": {
"if": {
"$in": [
"author",
"$completed_roles"
]
},
"then": 1,
"else": 0
}
}
},
"is_verified": {
"$addToSet": "$is_verified"
}
}
},
{
"$project": {
"_id": 0,
"city": "$_id",
"is_verified": {
"$allElementsTrue": "$is_verified"
},
"total_author": 1,
"total_roles_completed": 1
}
}
])
Example here
The result from this query is:
[
{
"city": "xyz",
"is_verified": false,
"total_author": 1,
"total_roles_completed": 0
},
{
"city": "abc",
"is_verified": true,
"total_author": 2,
"total_roles_completed": 2
}
]

MongoDB/Mongoose how to return a nested subdocument by _id

MongoDB newbie here.
I have a 'client' document that looks like this:
{
name: "myClient",
products: [{
name: "myProduct1",
environments: [{
name: "myEnvironment1",
changeLogs: [
{ "some": "fields21" },
{ "some": "fields22" }
]
},
{
name: "myEnvironment2",
changeLogs: [
{ "some": "fields11" },
{ "some": "fields12" }
]
}
]
},
{
name: "myProduct2",
environments: [{
name: "myEnv1",
changeLogs: [
{ "some": "fields1" },
{ "some": "fields2" }
]
},
{
name: "myEnv1",
changeLogs: [
{ "some": "fields3" },
{ "some": "fields4" }
]
}
]
}]
}
So a client has many products, which has many environments, which has many changeLogs. I am looking to return a list of changeLogs for a given environment, with only the environment._id to go on.
I can find the correct client document using just this _id:
db.clients.find({'products.environments._id': ObjectId("5a1bf4b410842200043d56ff")})
But this returns the entire client document. What I want is to return just the changeLogs array from the environment with _id: ObjectId("5a1bf4b410842200043d56ff")
Assuming I have the _id of the first environment of the first product, my desired output is the following:
[
{ "some": "fields21" },
{ "some": "fields22" }
]
What query would you recommend I use to achieve this?
Many thanks in advance for any help. The docs thus far have only been confusing, but I'm sure I'll get there in the end!
The idea here is to $unwind the products array so that its environments can be fed as input to $filter after a $match on the _id.
(lets assume the enviroment _id is 1)
db.collection.aggregate([
{
$unwind: "$products"
},
{
$match: {
"products.environments._id": 1
}
},
{
$project: {
"logsArray": {
$filter: {
input: "$products.environments",
as: "env",
cond: {
$eq: [
"$$env._id",
1
]
}
}
}
}
},
{
$unwind: "$logsArray"
}
])
O/P Should be like:
[
{
"_id": ObjectId("5a934e000102030405000000"),
"logsArray": {
"changeLogs": [
{
"some": "fields21"
},
{
"some": "fields22"
}
],
"id": 1,
"name": "myEnvironment1"
}
}
]
Note: notice the last stage $unwind of logsArray which I think is just pretty-fying the ouput. Otherwise without it also the resultant is acceptable (if you agree, can remove that).
This is just another way of doing the aggregation query. This gets the desired result.
Note I am using the "name" field of the "environments" from the sample document you had provided. The "name" can be substituted with "id" as needed.
var ENV = "myEnvironment1";
db.env.aggregate( [
{ $match: {
{ $unwind: "$products" },
{ $unwind: "$products.environments" },
{ $match: { "products.environments.name": ENV} },
{ $project: { _id: 0, changeLogs: "$products.environments.changeLogs" } },
] )
The result:
{ "changeLogs" : [ { "some" : "fields21" }, { "some" : "fields22" } ] }
If the variable ENV's value is changed, then the result will be accordingly; e.g.,: ENV = "myEnv1";
{ "changeLogs" : [ { "some" : "fields1" }, { "some" : "fields2" } ] }
{ "changeLogs" : [ { "some" : "fields3" }, { "some" : "fields4" } ] }
db.clients.aggregate([
{
$unwind: "$products"
},
{
$unwind: "$products.environments"
},
{
$match: { "products.environments._id": ObjectId("5a1bf4b410842200043fffff") }
},
{
$project: { _id: 0, changeLogs: "$products.environments.changeLogs" }
}
]).pretty()
Results in:
{
"changeLogs": [
{ "some": "fields21" },
{ "some": "fields22" }
]
}
For those finding that code confusing I found it very useful to just add one aggregate method at a time, look at the results, and then add then next method to the pipeline. Once the pipeline was complete I also experimented with removing intermediary steps to see if I could get the same results with less piping.

Mongodb aggregation with $addFileds and condition

Given that:
db :
{
id:"112",
val1: {val:""},
val2: {val:"123"},
}
I would like to run a script that updates a new field according to the aggregation result. The result is true if one of the values (val1, val2) is empty
The below is what I did with aggregation and then I would go over with for and update all rows:
db.valTest.aggregate(
[{
"$addFields": {
"val.selected": {
'$or': [{
'val1.val': ''
}, {
'val2.val': ''
}]
}
}
},
{
"$group": {
"_id": "$_id",
"id": {
"$first": "$id"
},
"value": {
"$first": "val1.val"
},
"result": {
"$push": {
"val": "val1.val",
"selected": "val.selected"
}
}
}
}
]
)
But, I do not get the correct result. I would like to get result like:
{
id:"112",
val1: {val:""},
val2: {val:"123"},
result: true
},
{
id:"114",
val1: {val:"4545"},
val2: {val:"123"},
result: false
}
Presently, I am getting the following error:
"message" : "FieldPath field names may not contain '.'.",
You need to use $eq aggregation operator for the matching criteria
db.collection.aggregate([
{ "$addFields": {
"result": {
"$cond": [
{ "$or": [{ "$eq": ["$val1.val", ""] }, { "$eq": ["$val2.val", ""] }] },
true,
false
]
}
}}
])

Filter subdocument by datetime

I've the following model
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
author: { type: Schema.Types.ObjectId }
});
var conversationSchema = new Schema({
title: { type: String },
author: { type : Schema.Types.ObjectId },
members: [ { type: Schema.Types.ObjectId } ],
creationDate: { type: Date, default: Date.now },
lastUpdate: { type: Date, default: Date.now },
comments: [ messageSchema ]
});
I want to create two methods to get the comments generated after a date by user or by conversationId.
By User
I tried with the following method
var query = {
members : { $all : [ userId, otherUserId ], "$size" : 2 }
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
When there are no comments after the specified date (at from) the method returns [] or null
By conversationId
The same happen when I try to get by user id
var query = { _id : conversationId
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
Is there any way to make the method returns the conversation information with an empty comments?
Thank you!
Sounds like a couple of problems here, but stepping through them all
In order to get more than a single match "or" none from an array to need the aggregation framework of mapReduce to do this. You could try "projecting" with $elemMatch but this can only return the "first" match. i.e:
{ "a": [1,2,3] }
db.collection.find({ },{ "$elemMatch": { "$gte": 2 } })
{ "a": [2] }
So standard projection does not work for this. It can return an "empty" array but it an also only return the "first" that is matched.
Moving along, you also have this in your code:
{ $all : [ userId, otherUserId ], "$site" : 2 }
Where $site is not a valid operator. I think you mean $size but there are actuall "two" operators with that name and your intent may not be clear here.
If you mean that the array you are testing must have "only two" elements, then this is the operator for you. If you meant that the matched conversation between the two people had to be equal to both in the match, then $all does this anyway so the $size becomes redundant in either case unless you don't want anyone else in the conversation.
On to the aggregation problem. You need to "filter" the content of the array in a "non-destructive way" in order to get more than one match or an empty array.
The best approach for this is with modern MongoDB features available from 2.6, which allows the array content to be filtered without processing $unwind:
Model.aggregate(
[
{ "$match": {
"members": { "$all": [userId,otherUserId] }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
],
function(err,result) {
}
);
That uses $map which can process an expression against each array element. In this case the vallues are tested under the $cond ternary to either return the array element where the condition is true or otherwise return false as the element.
These are then "filtered" by the $setDifference operator which essentially compares the resulting array of $map to the other array [false]. This removes any false values from the result array and only leaves matched elements or no elements at all.
An alternate may have been $redact but since your document contains "creationDate" at multiple levels, then this messes with the logic used with it's $$DESCEND operator. This rules that action out.
In earlier versions "not destroying" the array needs to be treated with care. So you need to do much the same "filter" of results in order to get the "empty" array you want:
Model.aggregate(
[
{ "$match": {
"$and": [
{ "members": userId },
{ "members": otherUserId }
}},
{ "$unwind": "$comments" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
"$comments",
false
]
}
},
"matchedSize": {
"$sum": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
1,
0
]
}
}
}},
{ "$unwind": "$comments" },
{ "$match": {
"$or": [
{ "comments": { "$ne": false } },
{ "matchedSize": 0 }
]
}},
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": { "$push": "$comments" }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$cond": [
{ "$eq": [ "$comments", [false] ] },
{ "$const": [] },
"$comments"
]
}
}}
],
function(err,result) {
}
)
This does much of the same things, but longer. In order to look at the array content you need to $unwind the content. When you $group back, you look at each element to see if it matches the condition to decide what to return, also keeping a count of the matches.
This is going to put some ( one with $addToSet ) false results in the array or only an array with the entry false where there are no matches. So yo filter these out with $match but also testing on the matched "count" to see if no matches were found. If no match was found then you don't throw away that item.
Instead you replace the [false] arrays with empty arrays in a final $project.
So depending on your MongoDB version this is either "fast/easy" or "slow/hard" to process. Compelling reasons to update a version already many years old.
Working example
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var memberSchema = new Schema({
name: { type: String }
});
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
});
var conversationSchema = new Schema({
members: [ { type: Schema.Types.ObjectId } ],
comments: [messageSchema]
});
var Member = mongoose.model( 'Member', memberSchema );
var Conversation = mongoose.model( 'Conversation', conversationSchema );
async.waterfall(
[
// Clean
function(callback) {
async.each([Member,Conversation],function(model,callback) {
model.remove({},callback);
},
function(err) {
callback(err);
});
},
// add some people
function(callback) {
async.map(["bill","ted","fred"],function(name,callback) {
Member.create({ "name": name },callback);
},callback);
},
// Create a conversation
function(names,callback) {
var conv = new Conversation();
names.forEach(function(el) {
conv.members.push(el._id);
});
conv.save(function(err,conv) {
callback(err,conv,names)
});
},
// add some comments
function(conv,names,callback) {
async.eachSeries(names,function(name,callback) {
Conversation.update(
{ "_id": conv._id },
{ "$push": { "comments": { "comment": name.name } } },
callback
);
},function(err) {
callback(err,names);
});
},
function(names,callback) {
Conversation.findOne({},function(err,conv) {
callback(err,names,conv.comments[1].creationDate);
});
},
function(names,from,callback) {
var ids = names.map(function(el) {
return el._id
});
var pipeline = [
{ "$match": {
"$and": [
{ "members": ids[0] },
{ "members": ids[1] }
]
}},
{ "$project": {
"members": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
];
//console.log(JSON.stringify(pipeline, undefined, 2 ));
Conversation.aggregate(
pipeline,
function(err,result) {
if(err) throw err;
console.log(JSON.stringify(result, undefined, 2 ));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
Which produces this output:
[
{
"_id": "55a63133dcbf671918b51a93",
"comments": [
{
"comment": "ted",
"_id": "55a63133dcbf671918b51a95",
"creationDate": "2015-07-15T10:08:51.217Z"
},
{
"comment": "fred",
"_id": "55a63133dcbf671918b51a96",
"creationDate": "2015-07-15T10:08:51.220Z"
}
],
"members": [
"55a63133dcbf671918b51a90",
"55a63133dcbf671918b51a91",
"55a63133dcbf671918b51a92"
]
}
]
Note the "comments" only contain the last two entries which are "greater than or equal" to the date which was used as input ( being the date from the second comment ).