Get property with highest value from key value pair - mongodb

In MongoDB, I have documents with a structure like this:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: {
class_1: 0.45,
class_2: 0.11,
class_3: 0.44
}
}
Using the aggregation pipeline, is it possible to give me an object that contains the highest classification? So, given the above, I would like something like this as result:
{
_id: "123456...", // an ObjectId
name: "foobar",
classification: "class_1"
}
I thought I could use $unwind but the classification property is not an array.
For what it's worth: I know there will always be three properties in classification, so it's ok to hard-code the keys in the query.

You should probably note here that every technique applied is essentially based on "coercion" of the "key/value" pairs into an "array" format for comparison and extraction. So the real lesson to learn is is that your document "should" in fact store this as an "array" instead. But onto the techniques.
If you have MongoDB 3.4 then you can use $objectToArray to turn the "keys" into an array so you can get the value:
Dynamic
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$arrayElemAt": [
{ "$map": {
"input": {
"$filter": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"cond": {
"$eq": [
"$$c.v",
{ "$max": {
"$map": {
"input": { "$objectToArray": "$classification" },
"as": "c",
"in": "$$c.v"
}
}}
]
}
}
},
"as": "c",
"in": "$$c.k",
}},
0
]
}
}}
])
Otherwise just to the transformation as you iterate the cursor if you do not really need it for further aggregation. As a basic JavaScript example:
db.collection.find().map(d => Object.assign(
d,
{ classification: Object.keys(d.classification)
.filter(k => d.classification[k] === Math.max.apply(null,
Object.keys(d.classification).map(k => d.classification[k])
))[0]
}
));
And that's also the same basic logic that you apply using mapReduce if you were actually aggregating something.
Both produce:
/* 1 */
{
"_id" : "123456...",
"name" : "foobar",
"classification" : "class_1"
}
HardCoding
On the "hardcoding" case which you say is okay. Then you can construct like this with $switch by supplying $max with each of the values:
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$switch": {
"branches": [
{ "case": { "$eq": [ "$classification.class_1", "$$max" ] }, "then": "class_1" },
{ "case": { "$eq": [ "$classification.class_2", "$$max" ] }, "then": "class_2" },
{ "case": { "$eq": [ "$classification.class_3", "$$max" ] }, "then": "class_3" },
]
}
}
}
}
}}
])
Which gives rise to then actually being able to write that out longer using $cond, and then the only real constraint is the change in $max for MongoDB 3.2, which allowed an array of arguments as opposed to it's previous role as an "accumulator only":
db.collection.aggregate([
{ "$addFields": {
"classification": {
"$let": {
"vars": {
"max": {
"$max": [
"$classification.class_1",
"$classification.class_2",
"$classification.class_3"
]
}
},
"in": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}
}
}}
])
If you were "really" constrained then you could "force" the "max" through a separate pipeline stage using $map and $unwind on the array then $group again. This would make the operations compatible with MongoDB 2.6:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"max": {
"$map": {
"input": [1,2,3],
"as": "e",
"in": {
"$cond": {
"if": { "$eq": [ "$$e", 1 ] },
"then": "$classification.class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$$e", 2 ] },
"then": "$classification.class_2",
"else": "$classification.class_3"
}
}
}
}
}
}
}},
{ "$unwind": "$max" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": { "$max": "$max" }
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": {
"if": { "$eq": [ "$classification.class_1", "$max" ] },
"then": "class_1",
"else": {
"$cond": {
"if": { "$eq": [ "$classification.class_2", "$max" ] },
"then": "class_2",
"else": "class_3"
}
}
}
}
}}
])
And going really ancient, then we can instead $unwind from $const, which was (and still is) a "hidden" and undocumented operator equal in function to $literal (which is technically aliased to it) in modern versions, but also with the alternate syntax to $cond as an "array" ternary operation this then becomes compatible with all versions since the aggregation framework existed:
db.collection.aggregate([
{ "$project": {
"name": 1,
"classification": 1,
"temp": { "$const": [1,2,3] }
}},
{ "$unwind": "$temp" },
{ "$group": {
"_id": "$_id",
"name": { "$first": "$name" },
"classification": { "$first": "$classification" },
"max": {
"$max": {
"$cond": [
{ "$eq": [ "$temp", 1 ] },
"$classification.class_1",
{ "$cond": [
{ "$eq": [ "$temp", 2 ] },
"$classification.class_2",
"$classification.class_3"
]}
]
}
}
}},
{ "$project": {
"name": 1,
"classification": {
"$cond": [
{ "$eq": [ "$max", "$classification.class_1" ] },
"class_1",
{ "$cond": [
{ "$eq": [ "$max", "$classification.class_2" ] },
"class_2",
"class_3"
]}
]
}
}}
])
But it is of course possible, even if extremely messy.

You can use $indexOfArray operator to find the $max value in classification followed by projecting the key. $objectToArray to convert classification embedded doc into array of key value pairs in 3.4.4 version.
db.collection.aggregate([
{
"$addFields": {
"classification": {
"$let": {
"vars": {
"classificationkv": {
"$objectToArray": "$classification"
}
},
"in": {
"$let": {
"vars": {
"classificationmax": {
"$arrayElemAt": [
"$$classificationkv",
{
"$indexOfArray": [
"$$classificationkv.v",
{
"$max": "$$classificationkv.v"
}
]
}
]
}
},
"in": "$$classificationmax.k"
}
}
}
}
}
}
])

In the end, I went with a more simple solution, but not as generic as the other ones posted here. I used this a switch case statement:
{'$project': {'_id': 1, 'name': 1,
'classification': {'$switch': {
'branches': [
{'case': {'$and': [{'$gt': ['$classification.class_1', '$classification.class_2']},
{'$gt': ['$classification.class_1', '$classification.class_3']}]},
'then': "class1"},
{'case': {'$and': [{'$gt': ['$classification.class_2', '$classification.class_1']},
{'$gt': ['$classification.class_2', '$classification.class_3']}]},
'then': "class_2"},
{'case': {'$and': [{'$gt': ['$classification.class_3', '$classification.class_1']},
{'$gt': ['$classification.class_3', '$classification.class_2']}]},
'then': "class_3"}],
'default': ''}}
}}
This works for me, but the other answers might be a better option, YMMV.

Related

mongodb aggregation get max number of negative sequence in array

I need to get the max count of negative sequence from array via aggregation , example documents:
{
"id": 1,
x: [ 1,1,-1,-1,1,1,1,-1,-1,-1,-1]
},
{
"id": 2,
x: [ 1,-1,-1,1,1,1,-1 ]
}
expected result:
{"id": 1,x:4},
{"id": 2,x:2}
Please, advice?
You can use $reduce to iterate the array and $cond to apply your logic (consecutive negatives)
The carrier is in format
{
previous: // previous value to compare for continuity
acc: // number of consecutive negatives in the current sequence
max: // length of the longest sequence
}
$let is to memoise current accumulator to reuse in the max calculation. It's optional yet convenient:
db.collection.aggregate([
{
"$set": {
"x": {
"$reduce": {
"input": "$x",
"initialValue": {
previous: 0,
acc: 0,
max: 0
},
"in": {
$let: {
vars: {
result: {
"$cond": {
"if": {
"$and": [
{
"$lt": [
"$$this",
0
]
},
{
"$lt": [
"$$value.previous",
0
]
}
]
},
"then": {
"$add": [
"$$value.acc",
1
]
},
"else": {
"$cond": {
"if": {
"$lt": [
"$$this",
0
]
},
"then": 1,
"else": 0
}
}
}
}
},
in: {
previous: "$$this",
acc: "$$result",
max: {
"$cond": {
"if": {
$gt: [
"$$value.max",
"$$result"
]
},
"then": "$$value.max",
"else": "$$result"
}
}
}
}
}
}
}
}
},
{
"$set": {
x: "$x.max"
}
}
])
Try it on mongoplayground.net.
Here's another way to do it. The general idea is to $reduce the sequence to a string and then $split to make an array filled with strings of each run. Then map the array of strings to an array of string lengths and then take the max.
db.collection.aggregate({
"$project": {
"_id": 0,
"id": 1,
"x": {
"$max": {
"$map": {
"input": {
$split: [
{
"$reduce": {
"input": "$x",
"initialValue": "",
"in": {
$concat: [
"$$value",
{
"$cond": [
{
"$gt": [
"$$this",
0
]
},
"p",
"n"
]
}
]
}
}
},
"p"
]
},
"in": {
"$strLenBytes": "$$this"
}
}
}
}
}
})
Try it on mongoplayground.net.

Use $addToSet condition vise in mongodb

I have below mongodb query, in which i am using $addToSet, Now i want to use it condition vise.
Worksheet.aggregate([
{
"$group": {
"_id": null,
"todayBilling": {
"$sum": {
"$cond": [{ "$and" : [ { "$eq": [ "$isBilling", true] }, { $eq: [ "$date",new Date(moment().format('l'))]}] },"$hours",0 ]
}
},
"todayProjects": { "$addToSet": "$projectId" }
},
},
{ "$addFields": { "todayProjects": { "$size": "$todayProjects" }}},
{
"$lookup":{
"from": "projects",
"let": {},
"pipeline": [
{
"$group": { "_id": null, "count": { "$sum": 1 } }
}
],
"as": "totalProjects"
}
},
{'$unwind':'$totalProjects'}
])
Now, I want to get the count of todayProjects field if got result today date vise. means where "todayProjects": { "$addToSet": "$projectId" } exists, i want to use $cond with below condition:
{ $eq: [ "$date",new Date(moment().format('l'))]}

Join fields when not all have values

I want to modify a field through a projection stage in the aggregation pipeline, this field is combination of other fields values separated by (-)
if the field is null or empty of missing it will not be added to the cocatenated string
{$project:{
//trial-1:
finalField:{
$concat["$field1",'-','$field2','-','$field3',...]
//problem1: $concat will return null if any of it's arguments is null or missing
//problem2: if all the fields are exist with non-null values, the delimiter will exists even if the field dosen't
}
//trial-2:
finalField:{
$concat:[
{$cond:[{field1:null},'',{$concat:['$field1','-']},..]
//the problem: {field1:null} fails if the field dosen't exixt (i.e the expression gives true)
//trial-3
finalField:{
$concat:[
{$cond:[{$or:[{field1:null},{field:{$exists:true}},'',
{$concat:['$field1','-']}
]}]}
]
}
]
}
//trial-4 -> using $reduce instead of $concate (same issues)
}
You basically want $ifNull. It's "sort of" like $exists but for aggregation statements, where it returns a default value when the field expression returns null, meaning "not there":
{ "$project": {
"finalField": {
"$concat": [
{ "$ifNull": [ "$field1", "" ] },
"-",
{ "$ifNull": [ "$field2", "" ] },
"-",
{ "$ifNull": [ "$field3", "" ] }
]
}
}}
For example with data like:
{ "field1": "a", "field2": "b", "field3": "c" },
{ "field1": "a", "field2": "b" },
{ "field1": "a", "field3": "c" }
You get, without any error producing of course:
{ "finalField" : "a-b-c" }
{ "finalField" : "a-b-" }
{ "finalField" : "a--c" }
If you want something fancier, then you would instead dynamically work with the names, as in:
{ "$project": {
"finalField": {
"$reduce": {
"input": {
"$filter": {
"input": { "$objectToArray": "$$ROOT" },
"cond": { "$ne": [ "$$this.k", "_id" ] }
}
},
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ "$$value", "" ] },
"then": { "$concat": [ "$$value", "$$this.v" ] },
"else": { "$concat": [ "$$value", "-", "$$this.v" ] }
}
}
}
}
}}
Which can be aware of what fields were actually present and only attempt to join those:
{ "finalField" : "a-b-c" }
{ "finalField" : "a-b" }
{ "finalField" : "a-c" }
You can even manually specify the list of fields if you don't want the $objectToArray over the document or sub-document:
{ "$project": {
"finalField": {
"$reduce": {
"input": {
"$filter": {
"input": ["$field1", "$field2", "$field3"],
"cond": { "$ne": [ "$$this", null ] }
}
},
"initialValue": "",
"in": {
"$cond": {
"if": { "$eq": [ "$$value", "" ] },
"then": { "$concat": [ "$$value", "$$this" ] },
"else": { "$concat": [ "$$value", "-", "$$this" ] }
}
}
}
}
}}

Match Distinct Count from Multiple Arrays

Now I have a collection col including docs like follows:
{
"_id": 1,
"shares": [{
"fundcode": "000001",
"lastshares": 1230.20,
"agencyno": "260",
"netno": "260"
},{
"fundcode": "000002",
"lastshares": 213124.00,
"agencyno": "469",
"netno": "001"
},{
"fundcode": "000003",
"lastshares": 10000.80,
"agencyno": "469",
"netno": "002"
}
],
"trade": [{
"fundcode": "000001",
"c_date": "20160412",
"agencyno": "260",
"netno": "260",
"bk_tradetype": "122",
"confirmbalance": 1230.20,
"cserialno": "10110000119601",
"status": "1"
},{
"fundcode": "000002",
"c_date": "20160506",
"agencyno": "469",
"netno": "001",
"bk_tradetype": "122",
"confirmbalance": 213124.00,
"cserialno": "10110000119602",
"status": "1"
},{
"fundcode": "000003",
"c_date": "20170507",
"agencyno": "469",
"netno": "002",
"bk_tradetype": "122",
"confirmbalance": 10000.80,
"netvalue": 1.0000,
"cserialno": "10110000119602",
"status": "1"
}
]
}
how can I realize a selection like the following sql using mongodb query?:
SELECT _id
FROM col
WHERE col.shares.lastshares > 1000
AND col.trade.agencyno = '469'
GROUP BY _id
HAVING COUNT(DISTINCT col.shares.fundcode) > 2
AND COUNT(DISTINCT col.trade.fundcode) > 2
I tried the $unwind, $groupby, $match aggregate pipeline twice, but I did not get the right answer. Thanks for help.
It doesn't really help that the supplied sample does not meet the conditions, but of course only because the "trade" array would only produce 2 distinct matches which is not enough to meet the *"greater than 2"` constraint in the query.
The structure is certainly different than in an RDBMS, so "sub-queries" do not apply, but at least you made these array. But ideally we would not use $unwind here at all.
And therefore all we need to do is "count" the "distinct" matches from within the arrays. This can basically be applied within a $redact using $map, $setDifference and $size as the main operations:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$shares",
"as": "el",
"in": {
"$cond": {
"if": { "$gt": [ "$$el.lastshares", 1000 ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]},
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$trade",
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el.agencyno", "469" ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
/*
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
*/
])
That makes it basically compatible with MongoDB 2.6 and upwards, and only adding the $addFields in there so you could see the results of the "filter" at least, but it's not needed since that is not what the query in the question asks for, which is in fact "just the document _id", but just returning the whole document takes less work. Add a $project for just the _id on the end if you really want that.
Also, to taste you can use $filter instead with MongoDB 3.x releases, but the syntax in this case is actually a little longer:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"as": "el",
"in": "$$el.fundcode"
}},
[]
]
}},
2
]},
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
},
"as": "el",
"in": "$$el.fundcode"
}},
[]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
/*
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
*/
])
The basic principles here is the part with:
having (count(distinct fundcode))...
Is being achieved by $size and $setDifference to the "filtered" array content by the conditions. And in fact the "GROUP BY"parts are not even required, since the "array" represents the relationship in a "grouped" form already. Think of the overall $redact statement as the "HAVING" here.
If your MongoDB is truly ancient and you cannot use those forms, then it still is possible with $unwind. And this time we $addToSet to get the "distinct" entries:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$unwind": "$shares" },
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
}},
{ "$group": {
"_id": "$_id",
"shares": { "$addToSet": "$shares.fundcode" },
"trade": { "$first": "$trade" }
}},
{ "$unwind": "$trade" },
{ "$match": {
"trade.agencyno": "469"
}},
{ "$group": {
"_id": "$_id",
"shares": { "$first": "$shares" },
"trade": { "$addToSet": "$trade.fundcode" }
}},
{ "$match": {
"shares.2": { "$exists": true },
"trade.2": { "$exists": true }
}}
])
Where in this case the "HAVING" is represented by the $match clause, where the notations such as "shares.2": { "$exists": true } actually ask if the array being tested actually has a "third index", which in turn means it has "greater than two", which is the point of the condition.
But the document only has "two" matches
As noted it would have helped your question if you actually supplied a document that matched the conditions you asked for. Unfortunately the supplied document falls short of the required number of matches for the "trade" array in the document.
Fixing your condition to match the supplied document we make it rather $gte for greater than or equal to 2 on the "trade" conditions:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$gt": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$shares",
"as": "el",
"in": {
"$cond": {
"if": { "$gt": [ "$$el.lastshares", 1000 ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]},
{ "$gte": [
{ "$size": {
"$setDifference": [
{ "$map": {
"input": "$trade",
"as": "el",
"in": {
"$cond": {
"if": { "$eq": [ "$$el.agencyno", "469" ] },
"then": "$$el.fundcode",
"else": false
}
}
}},
[false]
]
}},
2
]}
]
},
"then": "$$KEEP",
"else": "$$PRUNE"
}
}},
{ "$addFields": {
"shares": {
"$filter": {
"input": "$shares",
"as": "el",
"cond": { "$gt": [ "$$el.lastshares", 1000 ] }
}
},
"trade": {
"$filter": {
"input": "$trade",
"as": "el",
"cond": { "$eq": [ "$$el.agencyno", "469" ] }
}
}
}}
])
Which outputs in that form as:
{
"_id" : 1.0,
"shares" : [
{
"fundcode" : "000001",
"lastshares" : 1230.2,
"agencyno" : "260",
"netno" : "260"
},
{
"fundcode" : "000002",
"lastshares" : 213124.0,
"agencyno" : "469",
"netno" : "001"
},
{
"fundcode" : "000003",
"lastshares" : 10000.8,
"agencyno" : "469",
"netno" : "002"
}
],
"trade" : [
{
"fundcode" : "000002",
"c_date" : "20160506",
"agencyno" : "469",
"netno" : "001",
"bk_tradetype" : "122",
"confirmbalance" : 213124.0,
"cserialno" : "10110000119602",
"status" : "1"
},
{
"fundcode" : "000003",
"c_date" : "20170507",
"agencyno" : "469",
"netno" : "002",
"bk_tradetype" : "122",
"confirmbalance" : 10000.8,
"netvalue" : 1.0,
"cserialno" : "10110000119602",
"status" : "1"
}
]
}
Or with the $unwind, relax the length to test for 2 positions:
db.getCollection('collection').aggregate([
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
"trade.agencyno": "469"
}},
{ "$unwind": "$shares" },
{ "$match": {
"shares.lastshares": { "$gt": 1000 },
}},
{ "$group": {
"_id": "$_id",
"shares": { "$addToSet": "$shares.fundcode" },
"trade": { "$first": "$trade" }
}},
{ "$unwind": "$trade" },
{ "$match": {
"trade.agencyno": "469"
}},
{ "$group": {
"_id": "$_id",
"shares": { "$first": "$shares" },
"trade": { "$addToSet": "$trade.fundcode" }
}},
{ "$match": {
"shares.2": { "$exists": true },
"trade.1": { "$exists": true }
}}
])
Which returns:
{
"_id" : 1.0,
"shares" : [
"000003",
"000002",
"000001"
],
"trade" : [
"000003",
"000002"
]
}
But of course both identify the "document" to the conditions which is what the original query asks, and therefore it's the same basic result regardless of content returned. Which you can alwauys $project as just the _id if you must.

Return Sub-document only when matched but keep empty arrays

I have a collection set with documents like :
{
"_id": ObjectId("57065ee93f0762541749574e"),
"name": "myName",
"results" : [
{
"_id" : ObjectId("570e3e43628ba58c1735009b"),
"color" : "GREEN",
"week" : 17,
"year" : 2016
},
{
"_id" : ObjectId("570e3e43628ba58c1735009d"),
"color" : "RED",
"week" : 19,
"year" : 2016
}
]
}
I am trying to build a query witch alow me to return all documents of my collection but only select the field 'results' with subdocuments if week > X and year > Y.
I can select the documents where week > X and year > Y with the aggregate function and a $match but I miss documents with no match.
So far, here is my function :
query = ModelUser.aggregate(
{$unwind:{path:'$results', preserveNullAndEmptyArrays:true}},
{$match:{
$or: [
{$and:[
{'results.week':{$gte:parseInt(week)}},
{'results.year':{$eq:parseInt(year)}}
]},
{'results.year':{$gt:parseInt(year)}},
{'results.week':{$exists: false}}
{$group:{
_id: {
_id:'$_id',
name: '$name'
},
results: {$push:{
_id:'$results._id',
color: '$results.color',
numSemaine: '$results.numSemaine',
year: '$results.year'
}}
}},
{$project: {
_id: '$_id._id',
name: '$_id.name',
results: '$results'
);
The only thing I miss is : I have to get all 'name' even if there is no result to display.
Any idea how to do this without 2 queries ?
It looks like you actually have MongoDB 3.2, so use $filter on the array. This will just return an "empty" array [] where the conditions supplied did not match anything:
db.collection.aggregate([
{ "$project": {
"name": 1,
"user": 1,
"results": {
"$filter": {
"input": "$results",
"as": "result",
"cond": {
"$and": [
{ "$eq": [ "$$result.year", year ] },
{ "$or": [
{ "$gt": [ "$$result.week", week ] },
{ "$not": { "$ifNull": [ "$$result.week", false ] } }
]}
]
}
}
}
}}
])
Where the $ifNull test in place of $exists as a logical form can actually "compact" the condition since it returns an alternate value where the property is not present, to:
db.collection.aggregate([
{ "$project": {
"name": 1,
"user": 1,
"results": {
"$filter": {
"input": "$results",
"as": "result",
"cond": {
"$and": [
{ "$eq": [ "$$result.year", year ] },
{ "$gt": [
{ "$ifNull": [ "$$result.week", week+1 ] },
week
]}
]
}
}
}
}}
])
In MongoDB 2.6 releases, you can probably get away with using $redact and $$DESCEND, but of course need to fake the match in the top level document. This has similar usage of the $ifNull operator:
db.collection.aggregate([
{ "$redact": {
"$cond": {
"if": {
"$and": [
{ "$eq": [{ "$ifNull": [ "$year", year ] }, year ] },
{ "$gt": [
{ "$ifNull": [ "$week", week+1 ] }
week
]}
]
},
"then": "$$DESCEND",
"else": "$$PRUNE"
}
}}
])
If you actually have MongoDB 2.4, then you are probably better off filtering the array content in client code instead. Every language has methods for filtering array content, but as a JavaScript example reproducible in the shell:
db.collection.find().forEach(function(doc) {
doc.results = doc.results.filter(function(result) {
return (
result.year == year &&
( result.hasOwnProperty('week') ? result.week > week : true )
)
]);
printjson(doc);
})
The reason being is that prior to MongoDB 2.6 you need to use $unwind and $group, and various stages in-between. This is a "very costly" operation on the server, considering that all you want to do is remove items from the arrays of documents and not actually "aggregate" from items within the array.
MongoDB releases have gone to great lengths to provide array processing that does not use $unwind, since it's usage for that purpose alone is not a performant option. It should only ever be used in the case where you are removing a "significant" amount of data from arrays as a result.
The whole point is that otherwise the "cost" of the aggregation operation is likely greater than the "cost" of transferring the data over the network to be filtered on the client instead. Use with caution:
db.collection.aggregate([
// Create an array if one does not exist or is already empty
{ "$project": {
"name": 1,
"user": 1,
"results": {
"$cond": [
{ "$ifNull": [ "$results.0", false ] },
"$results",
[false]
]
}
}},
// Unwind the array
{ "$unwind": "$results" },
// Conditionally $push based on match expression and conditionally count
{ "$group": {
"_id": "_id",
"name": { "$first": "$name" },
"user": { "$first": "$user" },
"results": {
"$push": {
"$cond": [
{ "$or": [
{ "$not": "$results" },
{ "$and": [
{ "$eq": [ "$results.year", year ] },
{ "$gt": [
{ "$ifNull": [ "$results.week", week+1 ] },
week
]}
]}
] },
"$results",
false
]
}
},
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$results.year", year ] },
{ "$gt": [
{ "$ifNull": [ "$results.week", week+1 ] },
week
]}
] }
1,
0
]
}
}
}},
// $unwind again
{ "$unwind": "$results" }
// Filter out false items unless count is 0
{ "$match": {
"$or": [
"$results",
{ "count": 0 }
]
}},
// Group again
{ "$group": {
"_id": "_id",
"name": { "$first": "$name" },
"user": { "$first": "$user" },
"results": { "$push": "$results" }
}},
// Now swap [false] for []
{ "$project": {
"name": 1,
"user": 1,
"results": {
"$cond": [
{ "$ne": [ "$results", [false] ] },
"$results",
[]
]
}
}}
])
Now that is a lot of operations and shuffling just to "filter" content from an array compared to all of the other approaches which are really quite simple. And aside from the complexity, it really does "cost" a lot more to execute on the server.
So if your server version actually supports the newer operators that can do this optimally, then it's okay to do so. But if you are stuck with that last process, then you probably should not be doing it and instead do your array filtering in the client.