Mongodb aggregation with $addFileds and condition - mongodb

Given that:
db :
{
id:"112",
val1: {val:""},
val2: {val:"123"},
}
I would like to run a script that updates a new field according to the aggregation result. The result is true if one of the values (val1, val2) is empty
The below is what I did with aggregation and then I would go over with for and update all rows:
db.valTest.aggregate(
[{
"$addFields": {
"val.selected": {
'$or': [{
'val1.val': ''
}, {
'val2.val': ''
}]
}
}
},
{
"$group": {
"_id": "$_id",
"id": {
"$first": "$id"
},
"value": {
"$first": "val1.val"
},
"result": {
"$push": {
"val": "val1.val",
"selected": "val.selected"
}
}
}
}
]
)
But, I do not get the correct result. I would like to get result like:
{
id:"112",
val1: {val:""},
val2: {val:"123"},
result: true
},
{
id:"114",
val1: {val:"4545"},
val2: {val:"123"},
result: false
}
Presently, I am getting the following error:
"message" : "FieldPath field names may not contain '.'.",

You need to use $eq aggregation operator for the matching criteria
db.collection.aggregate([
{ "$addFields": {
"result": {
"$cond": [
{ "$or": [{ "$eq": ["$val1.val", ""] }, { "$eq": ["$val2.val", ""] }] },
true,
false
]
}
}}
])

Related

MongoDB Aggregate Query to find the documents with missing values

I am having a huge collection of objects where the data is stored for different employees.
{
"employee": "Joe",
"areAllAttributesMatched": false,
"characteristics": [
{
"step": "A",
"name": "house",
"score": "1"
},
{
"step": "B",
"name": "car"
},
{
"step": "C",
"name": "job",
"score": "3"
}
]
}
There are cases where the score for an object is completely missing and I want to find out all these details from the database.
In order to do this, I have written the following query, but seems I am going wrong somewhere due to which it is not displaying the output.
I want the data in the following format for this query, so that it is easy to find out which employee is missing the score for which step and which name.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
"characteristics.score": {
"$exists": false
}
}
},
{
"$project": {
"employee": 1,
"name": "$characteristics.name",
"step": "$characteristics.step",
_id: 0
}
}
])
You need to use $exists to check the existence
playground
You can use $ifNull to handle both cases of 1. the score field is missing 2. score is null.
db.collection.aggregate([
{
"$unwind": "$characteristics"
},
{
"$match": {
$expr: {
$eq: [
{
"$ifNull": [
"$characteristics.score",
null
]
},
null
]
}
}
},
{
"$group": {
_id: null,
documents: {
$push: {
"employee": "$employee",
"name": "$characteristics.name",
"step": "$characteristics.step",
}
}
}
},
{
$project: {
_id: false
}
}
])
Here is the Mongo playground for your reference.

Mongo $cond if expression doesn't work like $match

I have a collection with documents with a "parent" field.
[
{
"parent": "P1",
"tagGroups": [],
},
{
"parent": "P1",
"tagGroups": [
{
group: 1,
tags: {
tag1: {
value: true
},
tag2: {
value: "foo"
},
}
},
{
group: 2,
tags: {}
}
]
},
{
"parent": "P2",
"tagGroups": [],
}
]
I want to make request that retrieves all documents with the same parent when at least one match with my criteria: tag1.value = true.
Expected:
[
{
"parent": "P1",
"tagGroups": [],
},
{
"parent": "P1",
"tagGroups": [
{
group: 1,
tags: {
tag1: {
value: true
},
tag2: {
value: "foo"
},
}
},
{
group: 2,
tags: {}
}
]
}
]
For that I wanted to use the $cond to flag every document, then group by parent.
https://mongoplayground.net/p/WiIlVeLDrY-
But the "if" part seems to work differently that a $match
https://mongoplayground.net/p/_jcoUHE-aOu
Do you have another efficient way to do that kind of query?
Edit: I can use a lookup stage but I'm afraid of bad performances
Thanks
You haven't mentioned what you want to achieve, but you expect that your tried code (first link) should be working. You need to use $in instead of $eq in your query
db.collection.aggregate({
"$addFields": {
"match": {
"$cond": [
{ $in: [ true, "$tagGroups.tags.tag1.value" ] }, 1, 0] }
}
},
{
"$group": {
"_id": "$parent",
"elements": { "$addToSet": "$$ROOT" },
"elementsMatch": { "$sum": "$match" }
}
},
{ "$match": { "elementsMatch": { $gt: 0 } }},
{ "$unwind": "$elements"}
)
Working Mongo playground
Note : You have asked about the efficient way. Better you need to post expected result

Mongo db not in query by having two subset of documents from same collection

I am new to mongodb. Assume the following. There are 3 types of documents in one collection x, y and z.
docs = [{
"item_id": 1
"type": "x"
},
{
"item_id": 2
"type": "x"
},{
"item_id": 3
"type": "y",
"relavent_item_ids": [1, 2]
},
{
"item_id": 3
"type": "y",
"relavent_item_ids": [1, 2, 3]
},{
"item_id": 4
"type": "z",
}]
I want to get the following.
Ignore the documents with type z
Get all the documents of type x where it's item_id is not in relavent_item_ids of type y documents.
The result should have item_id field.
I tried doing match $in but this returns me all the records, I am unable to figure out how to have in condition with subset of documents of type y.
You can use below query
const item_ids = (await db.collection.find({ "type": "y" })).map(({ relavent_item_ids }) => relavent_item_ids)
const result = db.collection.find({
"item_id": { "$exists": true },
"type": { "$ne": "z", "$eq": "x" },
"relavent_item_ids": { "$nin": item_ids }
})
console.log({ result })
Ignore the documents with type z --> Use $ne not equal to query operator to filter out z types.
Get all the documents of type x where it's item_id is not in relavent_item_ids of type y documents --> Use $expr to match the same documents fields.
The result should have item_id field --> Use $exists query operator.
The solution:
db.test.aggregate( [
{
$facet: {
firstQuery: [
{
$match: { type: { $eq: "x", $ne: "z" } }
},
{
$project: {
item_id : 1, _id: 0
}
}
],
secondQuery: [
{
$match: { type: "y" }
},
{
$group: {
_id: null,
relavent: { $push: "$relavent_item_ids" }
}
},
{
$project: {
relavent: {
$reduce: {
input: "$relavent",
initialValue: [ ],
in: { $setUnion: [ "$$value", "$$this" ] }
}
}
}
}
]
}
},
{
$addFields: { secondQuery: { $arrayElemAt: [ "$secondQuery", 0 ] } }
},
{
$project: {
result: {
$filter: {
input: "$firstQuery" ,
as: "e",
cond: { $not: [ { $in: [ "$$e.item_id", "$secondQuery.relavent" ] } ] }
}
}
}
},
] )
Using the input documents in the question post and adding one more following document to the collection:
{
"item_id": 11,
"type": "x",
}
: only this document's item_id (value 11) will show in the output.
The aggregation uses a $facet to make two individual queries with a single pass. The first query gets all the "x" types (and ignores type "z") as an array. The second query gets an array of relavent_item_ids with unique values (from the documents of type "y"). The final, $project stage filters the first query result array with the condition:
Get all the documents of type x where it's item_id is not in
relavent_item_ids of type y documents
I am not sure if its an elegant solution.
db.getCollection('test').aggregate([
{
"$unwind": {
"path": "$relavent_item_ids",
"preserveNullAndEmptyArrays": true
}
},
{
"$group": {
"_id":null,
"relavent_item_ids": {"$addToSet":"$relavent_item_ids"},
"other_ids": {
"$addToSet":{
"$cond":[
{"$eq":["$type", "x"]},
"$item_id",
null
]
}
}
}
},
{
"$project":{
"includeIds": {"$setDifference":["$other_ids", "$relavent_item_ids"]}
}
},
{
"$unwind": "$includeIds"
},
{
"$match": {"includeIds":{"$ne":null}}
},
{
"$lookup":{
"from": "test",
"let": { "includeIds": "$includeIds"},
"pipeline": [
{ "$match":
{ "$expr":
{ "$and":
[
{ "$eq": [ "$item_id", "$$includeIds" ] },
{ "$eq": [ "$type", "x" ] }
]
}
}
}
],
"as": "result"
}
},
{
"$unwind": "$result"
},
])

MongoDB distinct values on subdocuments

I have a little weird database structure it is as follows:
I have a document with normal properties, then I have a metadata property which is an array of objects.
metadata: {[
{
key: [key],
value: [value]
},
...
]}
Edit: There will never be a metadata sub-document which has a duplicate key
It was done this way to retain the order of the metadata objects
Now I want to get distinct values of a metadata object with a given key.
I want to find every distinct [value] where [key] = "x" using MongoDB. And have the distinct values returned in an array (not the document)
I guess this is not possible using the distinct command, but is this possible using an aggregation pipeline or do I have to use Map-Reduce?
Any suggestions?
Thanks in advance! :)
I presume you mean this:
{
"metadata": [
{ "key": "abc", "value": "borf" },
{ "key": "cdc", "value": "biff" }
]
},
{
"metadata": [
{ "key": "bbc", "value": "barf" },
{ "key": "abc", "value": "borf" },
{ "key": "abc", "value": "barf" }
]
}
Where if you filter for "abc" and get the distinct "value" entries like this:
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$unwind": "$metadata" },
{ "$match": { "metadata.key": "abc" } },
{ "$group": {
"_id": "$metadata.value"
}}
])
Or even better:
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$redact": {
"$cond": {
"if": { "$eq": [ { "$ifNull": [ "$key", "abc" ] }, "abc" ] },
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$metadata" },
{ "$group": {
"_id": "$metadata.value",
"count": { "$sum": 1 }
}}
])
Which would basically give:
{ "_id": "barf", "count": 1 },
{ "_id": "borf", "count": 2 }
But it is not possible for this to just be an array of "barf" and "borf". The distinct() method does an array of keys only, but it is also very limited. Therefore it can only do this:
db.collection.distinct("metadata.value",{ "metadata.key": "abc" })
[ "biff", "borf", "barf" ]
Which is incorrect as a result. So just take the "document" results from above and apply some "post processing":
db.collection.aggregate([
{ "$match": { "metadata.key": "abc" } },
{ "$redact": {
"$cond": {
"if": { "$eq": [ { "$ifNull": [ "$key", "abc" ] }, "abc" ] },
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}},
{ "$unwind": "$metadata" },
{ "$group": {
"_id": "$metadata.value"
}}
]).map(function(doc) {
return doc._id;
})
And that result is a plain array of just the distinct values:
[ "borf", "barf" ]

Filter subdocument by datetime

I've the following model
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
author: { type: Schema.Types.ObjectId }
});
var conversationSchema = new Schema({
title: { type: String },
author: { type : Schema.Types.ObjectId },
members: [ { type: Schema.Types.ObjectId } ],
creationDate: { type: Date, default: Date.now },
lastUpdate: { type: Date, default: Date.now },
comments: [ messageSchema ]
});
I want to create two methods to get the comments generated after a date by user or by conversationId.
By User
I tried with the following method
var query = {
members : { $all : [ userId, otherUserId ], "$size" : 2 }
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
When there are no comments after the specified date (at from) the method returns [] or null
By conversationId
The same happen when I try to get by user id
var query = { _id : conversationId
, comments : { $elemMatch : { creationDate : { $gte: from } } }
};
Is there any way to make the method returns the conversation information with an empty comments?
Thank you!
Sounds like a couple of problems here, but stepping through them all
In order to get more than a single match "or" none from an array to need the aggregation framework of mapReduce to do this. You could try "projecting" with $elemMatch but this can only return the "first" match. i.e:
{ "a": [1,2,3] }
db.collection.find({ },{ "$elemMatch": { "$gte": 2 } })
{ "a": [2] }
So standard projection does not work for this. It can return an "empty" array but it an also only return the "first" that is matched.
Moving along, you also have this in your code:
{ $all : [ userId, otherUserId ], "$site" : 2 }
Where $site is not a valid operator. I think you mean $size but there are actuall "two" operators with that name and your intent may not be clear here.
If you mean that the array you are testing must have "only two" elements, then this is the operator for you. If you meant that the matched conversation between the two people had to be equal to both in the match, then $all does this anyway so the $size becomes redundant in either case unless you don't want anyone else in the conversation.
On to the aggregation problem. You need to "filter" the content of the array in a "non-destructive way" in order to get more than one match or an empty array.
The best approach for this is with modern MongoDB features available from 2.6, which allows the array content to be filtered without processing $unwind:
Model.aggregate(
[
{ "$match": {
"members": { "$all": [userId,otherUserId] }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
],
function(err,result) {
}
);
That uses $map which can process an expression against each array element. In this case the vallues are tested under the $cond ternary to either return the array element where the condition is true or otherwise return false as the element.
These are then "filtered" by the $setDifference operator which essentially compares the resulting array of $map to the other array [false]. This removes any false values from the result array and only leaves matched elements or no elements at all.
An alternate may have been $redact but since your document contains "creationDate" at multiple levels, then this messes with the logic used with it's $$DESCEND operator. This rules that action out.
In earlier versions "not destroying" the array needs to be treated with care. So you need to do much the same "filter" of results in order to get the "empty" array you want:
Model.aggregate(
[
{ "$match": {
"$and": [
{ "members": userId },
{ "members": otherUserId }
}},
{ "$unwind": "$comments" },
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
"$comments",
false
]
}
},
"matchedSize": {
"$sum": {
"$cond": [
{ "$gte": [ "$comments.creationDate", from ] },
1,
0
]
}
}
}},
{ "$unwind": "$comments" },
{ "$match": {
"$or": [
{ "comments": { "$ne": false } },
{ "matchedSize": 0 }
]
}},
{ "$group": {
"_id": "$_id",
"title": { "$first": "$title" },
"author": { "$first": "$author" },
"members": { "$first": "$members" },
"creationDate": { "$first": "$creationDate" },
"lastUpdate": { "$first": "$lastUpdate" },
"comments": { "$push": "$comments" }
}},
{ "$project": {
"title": 1,
"author": 1,
"members": 1,
"creationDate": 1,
"lastUpdate": 1,
"comments": {
"$cond": [
{ "$eq": [ "$comments", [false] ] },
{ "$const": [] },
"$comments"
]
}
}}
],
function(err,result) {
}
)
This does much of the same things, but longer. In order to look at the array content you need to $unwind the content. When you $group back, you look at each element to see if it matches the condition to decide what to return, also keeping a count of the matches.
This is going to put some ( one with $addToSet ) false results in the array or only an array with the entry false where there are no matches. So yo filter these out with $match but also testing on the matched "count" to see if no matches were found. If no match was found then you don't throw away that item.
Instead you replace the [false] arrays with empty arrays in a final $project.
So depending on your MongoDB version this is either "fast/easy" or "slow/hard" to process. Compelling reasons to update a version already many years old.
Working example
var async = require('async'),
mongoose = require('mongoose'),
Schema = mongoose.Schema;
mongoose.connect('mongodb://localhost/aggtest');
var memberSchema = new Schema({
name: { type: String }
});
var messageSchema = new Schema({
creationDate: { type: Date, default: Date.now },
comment: { type: String },
});
var conversationSchema = new Schema({
members: [ { type: Schema.Types.ObjectId } ],
comments: [messageSchema]
});
var Member = mongoose.model( 'Member', memberSchema );
var Conversation = mongoose.model( 'Conversation', conversationSchema );
async.waterfall(
[
// Clean
function(callback) {
async.each([Member,Conversation],function(model,callback) {
model.remove({},callback);
},
function(err) {
callback(err);
});
},
// add some people
function(callback) {
async.map(["bill","ted","fred"],function(name,callback) {
Member.create({ "name": name },callback);
},callback);
},
// Create a conversation
function(names,callback) {
var conv = new Conversation();
names.forEach(function(el) {
conv.members.push(el._id);
});
conv.save(function(err,conv) {
callback(err,conv,names)
});
},
// add some comments
function(conv,names,callback) {
async.eachSeries(names,function(name,callback) {
Conversation.update(
{ "_id": conv._id },
{ "$push": { "comments": { "comment": name.name } } },
callback
);
},function(err) {
callback(err,names);
});
},
function(names,callback) {
Conversation.findOne({},function(err,conv) {
callback(err,names,conv.comments[1].creationDate);
});
},
function(names,from,callback) {
var ids = names.map(function(el) {
return el._id
});
var pipeline = [
{ "$match": {
"$and": [
{ "members": ids[0] },
{ "members": ids[1] }
]
}},
{ "$project": {
"members": 1,
"comments": {
"$setDifference": [
{ "$map": {
"input": "$comments",
"as": "c",
"in": { "$cond": [
{ "$gte": [ "$$c.creationDate", from ] },
"$$c",
false
]}
}},
[false]
]
}
}}
];
//console.log(JSON.stringify(pipeline, undefined, 2 ));
Conversation.aggregate(
pipeline,
function(err,result) {
if(err) throw err;
console.log(JSON.stringify(result, undefined, 2 ));
callback(err);
}
)
}
],
function(err) {
if (err) throw err;
process.exit();
}
);
Which produces this output:
[
{
"_id": "55a63133dcbf671918b51a93",
"comments": [
{
"comment": "ted",
"_id": "55a63133dcbf671918b51a95",
"creationDate": "2015-07-15T10:08:51.217Z"
},
{
"comment": "fred",
"_id": "55a63133dcbf671918b51a96",
"creationDate": "2015-07-15T10:08:51.220Z"
}
],
"members": [
"55a63133dcbf671918b51a90",
"55a63133dcbf671918b51a91",
"55a63133dcbf671918b51a92"
]
}
]
Note the "comments" only contain the last two entries which are "greater than or equal" to the date which was used as input ( being the date from the second comment ).