Check for missing field or null value in mongoDB atlas - mongodb

I am using mongodb atlas for full text search.
My sample collection looks like this :
{
"_id": "62fdfd7518da050007f035c5",
"expiryDate": "2022-08-18T23:59:59+05:30",
"arrayField" : ['abc', 'def', 'ghi', 'jkl']
},
{
"_id": "62fdfd7518da050007f035c6",
"expiryDate": null,
"arrayField" : ['abc','jkl']
},
{
"_id": "62fdfd7518da050007f035c7",
"arrayField" : []
},
{
"_id": "62fdfd7518da050007f035c8",
"expiryDate": null
}
expiryDate is a Date type field and arrayField is an Array type field.
My goal is to get all documents where either :
expiryDate doesn't exists OR
If expiryDate does exists, then it must be null OR
If expiryDate does exists, then it must greater than current time.
My current atlas aggregation looks like :
{
'compound' : {
'should' : [
{
'compound' : {
'mustNot' : [{
"exists": {
"path": "expiryDate",
}
}]
}
},
{
"range": {
"path": "expiryDate",
'gte': new Date()
}
}
],
'minimumShouldMatch' : 1
}
}
This is not returning all documents where the expiryDate field have null value and it is only matching one clause of should where expiryDate is greater than or equal to current time. I want it to return all those documents too where the expiryDate is null.
Please advise.

You can use the $exists operator (docs) to check if an element exists, and if it does, run a check on its value.

So I tried with multiple clauses and approaches and found that there are two solutions to this problem :
Use combination of $must, $should and $mustNot :
{
'compound' : {
'should' : [
{
'compound' : {
'mustNot' : [{
"exists": {
"path": "expiryDate",
}
}]
}
},
{
"compound": {
"must": [
{
"exists": {
"path": "expiryDate"
}
}
],
"mustNot": [
{
"range": {
"path": "expiryDate",
"lt": new Date()
}
}
]
}
}
{
"range": {
"path": "expiryDate",
'gte': new Date()
}
}
],
'minimumShouldMatch' : 1
}
}
And the second one is rather not optimized but works. Since at the end it's and aggregation; We can use $match operator just outside the $search pipeline like so :
db.exampleCollection.aggregate([
{
"$search": {
"index": "default",
"compound": {
"must": [
...some conditions
],
"filter": [
...some clauses
]
}
}
},
{
"$match": [...some other conditions]
},
{
"$project": {
...some fields
}
},
{
"$skip": 0
},
{
"$limit": 10
},
{
"$sort": {
"score": 1
}
}
])
Hope it helps someone 🙂

#PawanSaxena I'm trying to do something similar, i.e., finding documents whose date field exists (and) with null value. Did you test out just the 2nd goal in your original post. Somehow it didn't work for me. Here is my testing code:
{
"compound": {
"must": [
{
"exists": {
"path": "expiryDate"
}
}
],
"mustNot": [ /// should it be "must"?
{
"range": {
"path": "expiryDate",
"lt": new Date() /// see below _MONGOSH, new Date() will output current date
// "lt": 1 /// this doesn't work, either
}
}
]
}
}

Related

MongoDB query to replace NULL values

I'm currently working with a MongoDB database and I have fields that have a value of NULL is there a way to run a query that will replace these NULL fields with a value of "Missing" instead?
An example of the document is:
{
"_id" : 1,
"Users" : [
{
"name" : "John Davies",
"age" : null,
"place_of_birth" : "Cardigan"
},
{
"name" : "Edward Jones",
"age" : null,
"place_of_birth" : null
},
{
"name" : "Daniel Rhys",
"age" : NumberLong(63),
"place_of_birth" : "Cardigan"
},
{
"name" : null,
"age" : NumberLong(61),
"place_of_birth" : "Cardigan"
},
{
"name" : "John Davies ",
"age" : null,
"place_of_birth" : "Cardigan"
}
]
}
Demo - https://mongoplayground.net/p/dsI5G6zfbLr
Use $[]
db.collection.update(
{},
{ $set: { "Users.$[u].age": "missing" } },
{ arrayFilters: [ { "u.age": null } ], multi: true}
)
Combine multiple queries into 1 using db.collection.bulkWrite
db.collection.bulkWrite( [
{ updateMany :
{
"filter": {},
"update": { $set: { "Users.$[u].age": "missing" } },
"arrayFilters": [ { "u.age": null } ],
}
},
{ updateMany :
{
"filter": {},
"update": { $set: { "Users.$[u].name": "missing" } },
"arrayFilters": [ { "u.name": null } ],
}
},
{ updateMany :
{
"filter": {},
"update": { $set: { "Users.$[u].place_of_birth": "missing" } },
"arrayFilters": [ { "u.place_of_birth": null } ],
}
}
] )
Update for MongoDB Version 3.2+
while (db.collection.find({$or:[{"Users.age":null},{"Users.name":null},{"Users.place_of_birth":null}]}).count()) {
db.collection.bulkWrite( [
{ updateMany :
{
"filter": { "Users.age": null },
"update": { $set: { "Users.$.age": "missing" } }
}
},
{ updateMany :
{
"filter": { "Users.name": null },
"update": { $set: { "Users.$.name": "missing" } },
}
},
{ updateMany :
{
"filter": { "Users.place_of_birth": null },
"update": { $set: { "Users.$.place_of_birth": "missing" } },
}
}
] )
}
Try update with aggregation pipeline starting from MongoDB 4.2,
$map to iterate loop of Users array
$objectToArray to convert current object in $map to array key-value pair
$map to iterate loop of above converted array
$ifNull to check if value is null then replace Missing otherwise remain same
$arrayToObject convert above key-value array to object format
db.collection.update({},
[{
$set: {
Users: {
$map: {
input: "$Users",
in: {
$arrayToObject: {
$map: {
input: { $objectToArray: "$$this" },
in: {
k: "$$this.k",
v: { $ifNull: ["$$this.v", "Missing"] }
}
}
}
}
}
}
}
}],
{ multi: true }
)
Playground
MongoDB version 3.2 or above:
set default value for replacement in variable nullReplace
find() query to get all documents from your collection and loop through forEach
for loop of user object and check condition if value is null then replace nullReplace variable
return user oibject
updateOne() to update updated Users array in your collection
var nullReplace = "Missing";
db.collection.find({}).forEach(function(doc) {
var Users = doc.Users.map(function(u) {
for (var u in userObj) {
if (userObj[u] === null) userObj[u] = nullReplace;
}
return userObj;
})
db.collection.updateOne({ _id: doc._id }, { $set: { Users: Users } });
});

Query array by stored index in document in MongoDB

I have a document like this
{
"status": {
"current": 0,
"priority": [{
"operationName": "PHOTO",
"status": "WAITING"
},
{
"operationName": "DESIGN",
"status": "NOTARRIVED"
},
{
"operationName": "COLOR_SEPARATION",
"status": "NOTARRIVED"
}]
}
}
and want to query on data like this
{
"status.priority.$status.current.operationName": {
$in: ['SERVICES', 'PHOTO']
}
}
when I query like this
{
"status.priority.0.operationName": {
$in: ['SERVICES', 'PHOTO']
}
}
it returns the data needed as the 'PHOTO' is the current operation.
I need to query based on an index of the array and this index is stored in the document in status.current
any hint?
UPDATE
After question solved I want to optimize it.
You can use $arrayElemAt with $expr in 3.6.
Something like
db.colname.find(
{"$expr":{
"$in":[
{"$arrayElemAt":["$status.priority.operationName","$status.current"]},
['DESIGN', 'COLOR_SEPARATION', 'PHOTO']
]
}}
)
For this you need to use aggregation
db.collname.aggregate([{
"$project": {
"_id": 1,
priority: { $arrayElemAt: ["$status.priority", '$status.current'] },
}
},
{
$match: {
"priority.operationName": {
$in: ['DESIGN', 'COLOR_SEPARATION', 'PHOTO']
}
}
}
])
This will work for you.
Result will be like
{
"_id" : ObjectId("5b6b656818883ec018d1542d"),
"priority" : {
"operationName" : "PHOTO",
"status" : "WAITING"
}
}

Matching ObjectId to String for $graphLookup

I'm trying to run a $graphLookup like demonstrated in print bellow:
The objective is to, given a specific record (commented $match there), retrieve it's full "path" throught immediateAncestors property. As you can see, it's not happening.
I introduced $convert here to deal with _id from collection as string, believing it could be possible to "match" with _id from immediateAncestors records list (which is a string).
So, I did run another test with different data (no ObjectIds involved):
db.nodos.insert({"id":5,"name":"cinco","children":[{"id":4}]})
db.nodos.insert({"id":4,"name":"quatro","ancestors":[{"id":5}],"children":[{"id":3}]})
db.nodos.insert({"id":6,"name":"seis","children":[{"id":3}]})
db.nodos.insert({"id":1,"name":"um","children":[{"id":2}]})
db.nodos.insert({"id":2,"name":"dois","ancestors":[{"id":1}],"children":[{"id":3}]})
db.nodos.insert({"id":3,"name":"três","ancestors":[{"id":2},{"id":4},{"id":6}]})
db.nodos.insert({"id":7,"name":"sete","children":[{"id":5}]})
And the query:
db.nodos.aggregate( [
{ $match: { "id": 3 } },
{ $graphLookup: {
from: "nodos",
startWith: "$ancestors.id",
connectFromField: "ancestors.id",
connectToField: "id",
as: "ANCESTORS_FROM_BEGINNING"
}
},
{ $project: {
"name": 1,
"id": 1,
"ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING.id"
}
}
] )
...which outputs what I was expecting (the five records directly and indirectly connected to the one with id 3):
{
"_id" : ObjectId("5afe270fb4719112b613f1b4"),
"id" : 3.0,
"name" : "três",
"ANCESTORS_FROM_BEGINNING" : [
1.0,
4.0,
6.0,
5.0,
2.0
]
}
The question is: there is a way to achieve the objetive I mentioned in the beginning?
I'm running Mongo 3.7.9 (from official Docker)
Thanks in advance!
You are currently using a development version of MongoDB which has some features enabled expected to be released with MongoDB 4.0 as an official release. Note that some features may be subject to change before the final release, so production code should be aware of this before you commit to it.
Why $convert fails here
Probably the best way to explain this is to look at your altered sample but replacing with ObjectId values for _id and "strings" for those under the the arrays:
{
"_id" : ObjectId("5afe5763419503c46544e272"),
"name" : "cinco",
"children" : [ { "_id" : "5afe5763419503c46544e273" } ]
},
{
"_id" : ObjectId("5afe5763419503c46544e273"),
"name" : "quatro",
"ancestors" : [ { "_id" : "5afe5763419503c46544e272" } ],
"children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{
"_id" : ObjectId("5afe5763419503c46544e274"),
"name" : "seis",
"children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{
"_id" : ObjectId("5afe5763419503c46544e275"),
"name" : "um",
"children" : [ { "_id" : "5afe5763419503c46544e276" } ]
}
{
"_id" : ObjectId("5afe5763419503c46544e276"),
"name" : "dois",
"ancestors" : [ { "_id" : "5afe5763419503c46544e275" } ],
"children" : [ { "_id" : "5afe5763419503c46544e277" } ]
},
{
"_id" : ObjectId("5afe5763419503c46544e277"),
"name" : "três",
"ancestors" : [
{ "_id" : "5afe5763419503c46544e273" },
{ "_id" : "5afe5763419503c46544e274" },
{ "_id" : "5afe5763419503c46544e276" }
]
},
{
"_id" : ObjectId("5afe5764419503c46544e278"),
"name" : "sete",
"children" : [ { "_id" : "5afe5763419503c46544e272" } ]
}
That should give a general simulation of what you were trying to work with.
What you attempted was to convert the _id value into a "string" via $project before entering the $graphLookup stage. The reason this fails is whilst you did an initial $project "within" this pipeline, the problem is that the source for $graphLookup in the "from" option is still the unaltered collection and therefore you don't get the correct details on the subsequent "lookup" iterations.
db.strcoll.aggregate([
{ "$match": { "name": "três" } },
{ "$addFields": {
"_id": { "$toString": "$_id" }
}},
{ "$graphLookup": {
"from": "strcoll",
"startWith": "$ancestors._id",
"connectFromField": "ancestors._id",
"connectToField": "_id",
"as": "ANCESTORS_FROM_BEGINNING"
}},
{ "$project": {
"name": 1,
"ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING._id"
}}
])
Does not match on the "lookup" therefore:
{
"_id" : "5afe5763419503c46544e277",
"name" : "três",
"ANCESTORS_FROM_BEGINNING" : [ ]
}
"Patching" the problem
However that is the core problem and not a failing of $convert or it's aliases itself. In order to make this actually work we can instead create a "view" which presents itself as a collection for the sake of input.
I'll do this the other way around and convert the "strings" to ObjectId via $toObjectId:
db.createView("idview","strcoll",[
{ "$addFields": {
"ancestors": {
"$ifNull": [
{ "$map": {
"input": "$ancestors",
"in": { "_id": { "$toObjectId": "$$this._id" } }
}},
"$$REMOVE"
]
},
"children": {
"$ifNull": [
{ "$map": {
"input": "$children",
"in": { "_id": { "$toObjectId": "$$this._id" } }
}},
"$$REMOVE"
]
}
}}
])
Using the "view" however means that the data is consistently seen with the values converted. So the following aggregation using the view:
db.idview.aggregate([
{ "$match": { "name": "três" } },
{ "$graphLookup": {
"from": "idview",
"startWith": "$ancestors._id",
"connectFromField": "ancestors._id",
"connectToField": "_id",
"as": "ANCESTORS_FROM_BEGINNING"
}},
{ "$project": {
"name": 1,
"ANCESTORS_FROM_BEGINNING": "$ANCESTORS_FROM_BEGINNING._id"
}}
])
Returns the expected output:
{
"_id" : ObjectId("5afe5763419503c46544e277"),
"name" : "três",
"ANCESTORS_FROM_BEGINNING" : [
ObjectId("5afe5763419503c46544e275"),
ObjectId("5afe5763419503c46544e273"),
ObjectId("5afe5763419503c46544e274"),
ObjectId("5afe5763419503c46544e276"),
ObjectId("5afe5763419503c46544e272")
]
}
Fixing the problem
With all of that said, the real issue here is that you have some data which "looks like" an ObjectId value and is in fact valid as an ObjectId, however it has been recorded as a "string". The basic issue to everything working as it should is that the two "types" are not the same and this results in an equality mismatch as the "joins" are attempted.
So the real fix is still the same as it always has been, which is to instead go through the data and fix it so that the "strings" are actually also ObjectId values. These will then match the _id keys which they are meant to refer to, and you are saving a considerable amount of storage space since an ObjectId takes up a lot less space to store than it's string representation in hexadecimal characters.
Using MongoDB 4.0 methods, you "could" actually use the "$toObjectId" in order to write a new collection, just in much the same matter that we created the "view" earlier:
db.strcoll.aggregate([
{ "$addFields": {
"ancestors": {
"$ifNull": [
{ "$map": {
"input": "$ancestors",
"in": { "_id": { "$toObjectId": "$$this._id" } }
}},
"$$REMOVE"
]
},
"children": {
"$ifNull": [
{ "$map": {
"input": "$children",
"in": { "_id": { "$toObjectId": "$$this._id" } }
}},
"$$REMOVE"
]
}
}}
{ "$out": "fixedcol" }
])
Or of course where you "need" to keep the same collection, then the traditional "loop and update" remains the same as what has always been required:
var updates = [];
db.strcoll.find().forEach(doc => {
var update = { '$set': {} };
if ( doc.hasOwnProperty('children') )
update.$set.children = doc.children.map(e => ({ _id: new ObjectId(e._id) }));
if ( doc.hasOwnProperty('ancestors') )
update.$set.ancestors = doc.ancestors.map(e => ({ _id: new ObjectId(e._id) }));
updates.push({
"updateOne": {
"filter": { "_id": doc._id },
update
}
});
if ( updates.length > 1000 ) {
db.strcoll.bulkWrite(updates);
updates = [];
}
})
if ( updates.length > 0 ) {
db.strcoll.bulkWrite(updates);
updates = [];
}
Which is actually a bit of a "sledgehammer" due to actually overwriting the entire array in a single go. Not a great idea for a production environment, but enough as a demonstration for the purposes of this exercise.
Conclusion
So whilst MongoDB 4.0 will add these "casting" features which can indeed be very useful, their actual intent is not really for cases such as this. They are in fact much more useful as demonstrated in the "conversion" to a new collection using an aggregation pipeline than most other possible uses.
Whilst we "can" create a "view" which transforms the data types to enable things like $lookup and $graphLookup to work where the actual collection data differs, this really is only a "band-aid" on the real problem as the data types really should not differ, and should in fact be permanently converted.
Using a "view" actually means that the aggregation pipeline for construction needs to effectively run every time the "collection" ( actually a "view" ) is accessed, which creates a real overhead.
Avoiding overhead is usually a design goal, therefore correcting such data storage mistakes is imperative to getting real performance out of your application, rather than just working with "brute force" that will only slow things down.
A much safer "conversion" script which applied "matched" updates to each array element. The code here requires NodeJS v10.x and a latest release MongoDB node driver 3.1.x:
const { MongoClient, ObjectID: ObjectId } = require('mongodb');
const EJSON = require('mongodb-extended-json');
const uri = 'mongodb://localhost/';
const log = data => console.log(EJSON.stringify(data, undefined, 2));
(async function() {
try {
const client = await MongoClient.connect(uri);
let db = client.db('test');
let coll = db.collection('strcoll');
let fields = ["ancestors", "children"];
let cursor = coll.find({
$or: fields.map(f => ({ [`${f}._id`]: { "$type": "string" } }))
}).project(fields.reduce((o,f) => ({ ...o, [f]: 1 }),{}));
let batch = [];
for await ( let { _id, ...doc } of cursor ) {
let $set = {};
let arrayFilters = [];
for ( const f of fields ) {
if ( doc.hasOwnProperty(f) ) {
$set = { ...$set,
...doc[f].reduce((o,{ _id },i) =>
({ ...o, [`${f}.$[${f.substr(0,1)}${i}]._id`]: ObjectId(_id) }),
{})
};
arrayFilters = [ ...arrayFilters,
...doc[f].map(({ _id },i) =>
({ [`${f.substr(0,1)}${i}._id`]: _id }))
];
}
}
if (arrayFilters.length > 0)
batch = [ ...batch,
{ updateOne: { filter: { _id }, update: { $set }, arrayFilters } }
];
if ( batch.length > 1000 ) {
let result = await coll.bulkWrite(batch);
batch = [];
}
}
if ( batch.length > 0 ) {
log({ batch });
let result = await coll.bulkWrite(batch);
log({ result });
}
await client.close();
} catch(e) {
console.error(e)
} finally {
process.exit()
}
})()
Produces and executes bulk operations like these for the seven documents:
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e272"
}
},
"update": {
"$set": {
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e273"
}
}
},
"arrayFilters": [
{
"c0._id": "5afe5763419503c46544e273"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e273"
}
},
"update": {
"$set": {
"ancestors.$[a0]._id": {
"$oid": "5afe5763419503c46544e272"
},
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e277"
}
}
},
"arrayFilters": [
{
"a0._id": "5afe5763419503c46544e272"
},
{
"c0._id": "5afe5763419503c46544e277"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e274"
}
},
"update": {
"$set": {
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e277"
}
}
},
"arrayFilters": [
{
"c0._id": "5afe5763419503c46544e277"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e275"
}
},
"update": {
"$set": {
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e276"
}
}
},
"arrayFilters": [
{
"c0._id": "5afe5763419503c46544e276"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e276"
}
},
"update": {
"$set": {
"ancestors.$[a0]._id": {
"$oid": "5afe5763419503c46544e275"
},
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e277"
}
}
},
"arrayFilters": [
{
"a0._id": "5afe5763419503c46544e275"
},
{
"c0._id": "5afe5763419503c46544e277"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5763419503c46544e277"
}
},
"update": {
"$set": {
"ancestors.$[a0]._id": {
"$oid": "5afe5763419503c46544e273"
},
"ancestors.$[a1]._id": {
"$oid": "5afe5763419503c46544e274"
},
"ancestors.$[a2]._id": {
"$oid": "5afe5763419503c46544e276"
}
}
},
"arrayFilters": [
{
"a0._id": "5afe5763419503c46544e273"
},
{
"a1._id": "5afe5763419503c46544e274"
},
{
"a2._id": "5afe5763419503c46544e276"
}
]
}
},
{
"updateOne": {
"filter": {
"_id": {
"$oid": "5afe5764419503c46544e278"
}
},
"update": {
"$set": {
"children.$[c0]._id": {
"$oid": "5afe5763419503c46544e272"
}
}
},
"arrayFilters": [
{
"c0._id": "5afe5763419503c46544e272"
}
]
}
}

mongodb - how to aggregate/filter elements in different subdocuments?

I have a doc looks like below:
{
"contents": [
{
"translationId": "MENU",
},
{
"translationId": "PAGETITLE"
}
],
"slides": [
{
"translationId": "SLIDE1",
"imageUrl": "assets/img/room/1.jpg",
"desc": {
"translationId": "DESC",
}
},
{
"translationId": "SLIDE2",
"imageUrl": "assets/img/aa/2.jpg"
}
]}
I would like to aggregate against the translationId no matter in which subdocument the data is. My current query is like below which does not give me the expected result.
db.cursor.find({"contents.translationId": { $exists: true }},
{"contents.translationId":1,'slides.translationId':1,"slides.desc.translationId":1,'_id':0})
I expect result like below. Is there a good approach to retrieve such a result directly from mongodb query?
[
{
"translationId": "MENU"
},
{
"translationId": "PAGETITLE"
},
{
"translationId": "SLIDE1"
},
{
"translationId": "SLIDE2"
},
{
"translationId": "DESC"
}
]
Additionally, I might not know in which element translationId might exists. In this case it resides in contents, slides and slides.desc but it might also be under some other elements. Is it possible?
Thanks!
As long as the items are unqiue you can use the $setUnion operator in modern MongoDB releases 2.6 and over, as well as the $map operator for transaltion of just the required element from the other array:
db.cursor.aggregate([
{ "$project": {
"joined": {
"$setDifference": [
{ "$setUnion": [
"$contents",
{ "$map": {
"input": "$slides",
"as": "slide",
"in": {
"translationId": "$$slide.translationId"
}
}},
{ "$map": {
"input": "$slides",
"as": "slide",
"in": {
"$cond": [
{ "$ifNull": [ "$$slide.desc.translationId", false] },
{ "translationId": "$$slide.desc.translationId" },
false
]
}
}}
]},
[false]
]
}
}}
])
You also need $setDifference to filter out any false values returned where the "desc" field is not present.
It produces:
{
"_id" : ObjectId("55f13f444db9bc30de351c84"),
"joined" : [
{
"translationId" : "DESC"
},
{
"translationId" : "SLIDE2"
},
{
"translationId" : "SLIDE1"
},
{
"translationId" : "PAGETITLE"
},
{
"translationId" : "MENU"
}
]
}
Of course if you have no idea of the structure "at all", then you need a recursive function with mapReduce instead:
db.cursor.mapReduce(
function() {
var tags = [];
function walkObj(obj) {
Object.keys(obj).forEach(function(key) {
if ( typeof(obj[key]) == "object" ) {
walkObj(obj[key]);
} else if ( key == "translationId" ) {
tags.push({ "translationId": obj[key] })
}
});
}
walkObj(this);
emit(this._id,{ "joined": tags})
},
function(){},
{ "out": { "inline": 1 } }
)
Which gives basically the same output as before but of course does not need to be aware of the structure

MongoDb aggregate and group by two fields depending on values

I want to aggregate over a collection where a type is given. If the type is foo I want to group by the field author, if the type is bar I want to group by user.
All this should happen in one query.
Example Data:
{
"_id": 1,
"author": {
"someField": "abc",
},
"type": "foo"
}
{
"_id": 2,
"author": {
"someField": "abc",
},
"type": "foo"
}
{
"_id": 3,
"user": {
"someField": "abc",
},
"type": "bar"
}
This user field is only existing if the type is bar.
So basically something like that... tried to express it with an $or.
function () {
var results = db.vote.aggregate( [
{ $or: [ {
{ $match : { type : "foo" } },
{ $group : { _id : "$author", sumAuthor : {$sum : 1} } } },
{ { $match : { type : "bar" } },
{ $group : { _id : "$user", sumUser : {$sum : 1} } }
} ] }
] );
return results;
}
Does someone have a good solution for this?
I think it can be done by
db.c.aggregate([{
$group : {
_id : {
$cond : [{
$eq : [ "$type", "foo"]
}, "author", "user"]
},
sum : {
$sum : 1
}
}
}]);
The solution below can be cleaned up a bit...
For "bar" (note: for "foo", you have to change a bit)
db.vote.aggregate(
{
$project:{
user:{ $ifNull: ["$user", "notbar"]},
type:1
}
},
{
$group:{
_id:{_id:"$user.someField"},
sumUser:{$sum:1}
}
}
)
Also note: In you final answer, anything that is not of type "bar" will have an _id=null
What you want here is the $cond operator, which is a ternary operator returning a specific value where the condition is true or false.
db.vote.aggregate([
{ "$group": {
"_id": null,
"sumUser": {
"$sum": {
"$cond": [ { "$eq": [ "$type", "user" ] }, 1, 0 ]
}
},
"sumAuhtor": {
"$sum": {
"$cond": [ { "$eq": [ "$type", "auhtor" ] }, 1, 0 ]
}
}
}}
])
This basically tests the "type" of the current document and decides whether to pass either 1 or 0 to the $sum operation.
This also avoids errant grouping should the "user" and "author" fields contain the same values as they do in your example. The end result is a single document with the count of both types.