Mongodb: Sort by custom expression - mongodb

How to sort results by a custom expression which I use in find?
The collection contains documents with the following attributes for example:
{
"_id" : ObjectId("5ef1cd704b35c6d6698f2050"),
"Name" : "TD",
"Date" : ISODate("2021-06-23T09:37:51.976Z"),
"A" : "19.36",
"B" : 2.04,
}
I'm using the following find query to get the records with Date since "2022-01-01" and the ratio between A and B is lower than 0.1:
db.getCollection('my_collection').find(
{
"experationDate" :
{
$gte: new ISODate("2022-01-01 00:00:00.000Z")
},
"$expr":
{
"$lte": [
{ "$divide": ["$A", "$B"] },
0.1
]
}
})
Now, I can't find the right way to sort the results by this ratio.

You can use aggregate in this way:
Search the documents you want using $match, add a field named ratio and use it to sort. And finally, not shown the field using $project:
db.collection.aggregate([
{ "$match": {
"Date": { "$gte": ISODate("2020-01-01") },
"$expr": { "$lte": [ { "$divide": [ "$B", { "$toDouble": "$A" } ] }, 0.1 ] } }
},
{
"$set": {
"ratio": { "$divide": [ "$B", { "$toDouble": "$A" } ] }
}
},
{
"$sort": { "ratio": 1 }
},
{
"$project": { "ratio": 0 }
}
])
Example here
By te way, I've used other values to get results. Ratio between 2.04 and 19.36 is greater than 0.1. You have dividied A/B but I think you mean B/A.
By the way, this is not important, you can change values but the query will still works ok.
Also, maybe this could work better. Is the same query, but could be more efficient (maybe, I don't know) because prevent to divide each value into collection twice:
First filter by date, then add the field ratio to each document found (and in this way is not necessary divide every document). Another filter using the ratio, the sort, and not output the field.
db.collection.aggregate([
{
"$match": { "Date": { "$gte": ISODate("2020-01-01") } }
},
{
"$set": { "ratio": { "$divide": [ "$B", { "$toDouble": "$A" } ] } }
},
{
"$match": { "ratio": { "$lte": 0.1 } }
},
{
"$sort": { "ratio": 1 }
},
{
"$project": { "ratio": 0 }
}
])
Example

Related

How to sum values in a nested date range in MongoDB

I need to sum the values for 2018-06-01 through 2018-06-30 for each document in the collection. Each key in "days" is a different date and value. What should the mongo aggregate command look like? Result should look something like {
_id: Product_123 ,
June_Sum:
value}
That's really not a great structure for the sort of operation you now want to do. The whole point of keeping data in such a format is that you "increment" it as you go.
For example:
var now = Date.now(),
today = new Date(now - ( now % ( 1000 * 60 * 60 * 24 ))).toISOString().substr(0,10);
var product = "Product_123";
db.counters.updateOne(
{
"month": today.substr(0,7),
"product": product
},
{
"$inc": {
[`dates.${today}`]: 1,
"totals": 1
}
},
{ "upsert": true }
)
In that way the subsequent updates with $inc apply to both the "key" used for the "date" and also increment the "totals" property of the matched document. So after a few iterations you would end up with something like:
{
"_id" : ObjectId("5af395c53945a933add62173"),
"product": "Product_123",
"month": "2018-05",
"dates" : {
"2018-05-10" : 2,
"2018-05-09" : 1
},
"totals" : 3
}
If you're not actually doing that then you "should" be since it's the intended usage pattern for such a structure.
Without keeping a "totals" or like type of entry within the document(s) storing these keys the only methods left for "aggregation" in processing are to effectively coerce the the "keys" into an "array" form.
MongoDB 3.6 with $objectToArray
db.colllection.aggregate([
// Only consider documents with entries within the range
{ "$match": {
"$expr": {
"$anyElementTrue": {
"$map": {
"input": { "$objectToArray": "$days" },
"in": {
"$and": [
{ "$gte": [ "$$this.k", "2018-06-01" ] },
{ "$lt": [ "$$this.k", "2018-07-01" ] }
]
}
}
}
}
}},
// Aggregate for the month
{ "$group": {
"_id": "$product", // <-- or whatever your key for the value is
"total": {
"$sum": {
"$sum": {
"$map": {
"input": { "$objectToArray": "$days" },
"in": {
"$cond": {
"if": {
"$and": [
{ "$gte": [ "$$this.k", "2018-06-01" ] },
{ "$lt": [ "$$this.k", "2018-07-01" ] }
]
},
"then": "$$this.v",
"else": 0
}
}
}
}
}
}
}}
])
Other versions with mapReduce
db.collection.mapReduce(
// Taking the same presumption on your un-named key for "product"
function() {
Object.keys(this.days)
.filter( k => k >= "2018-06-01" && k < "2018-07-01")
.forEach(k => emit(this.product, this.days[k]));
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 },
"query": {
"$where": function() {
return Object.keys(this.days).some(k => k >= "2018-06-01" && k < "2018-07-01")
}
}
}
)
Both are pretty horrible since you need to calculate whether the "keys" fall within the required range even to select the documents and even then still filter through the keys in those documents again in order to decide whether to accumulate for it or not.
Also noting here that if your "Product_123' is also the "name of a key" in the document and NOT a "value", then you're performing even more "gymnastics" to simply convert that "key" into a "value" form, which is how databases do things and the whole point of the the unnecessary coercion going on here.
Better Option
So as opposed to the handling as originally shown where you "should" be accumulating "as you go" with every write to the document(s) at hand, the better option than needing "processing" in order to coerce into an array format is to simply put the data into an array in the first place:
{
"_id" : ObjectId("5af395c53945a933add62173"),
"product": "Product_123",
"month": "2018-05",
"dates" : [
{ "day": "2018-05-09", "value": 1 },
{ "day": "2018-05-10", "value": 2 }
},
"totals" : 3
}
These are infinitely better for purposes of query and further analysis:
db.counters.aggregate([
{ "$match": {
// "month": "2018-05" // <-- or really just that, since it's there
"dates": {
"day": {
"$elemMatch": {
"$gte": "2018-05-01", "$lt": "2018-06-01"
}
}
}
}},
{ "$group": {
"_id": null,
"total": {
"$sum": {
"$sum": {
"$filter": {
"input": "$dates",
"cond": {
"$and": [
{ "$gte": [ "$$this.day", "2018-05-01" ] },
{ "$lt": [ "$$this.day", "2018-06-01" ] }
]
}
}
}
}
}
}}
])
Which is of course really efficient, and kind of deliberately avoiding the "total" field that is already there for demonstration only. But of course you keep the "running accumulation" on writes by doing:
db.counters.updateOne(
{ "product": product, "month": today.substr(0,7)}, "dates.day": today },
{ "$inc": { "dates.$.value": 1, "total": 1 } }
)
Which is really simple. Adding upserts adds a "little" more complexity:
// A "batch" of operations with bulkWrite
db.counter.bulkWrite([
// Incrementing the matched element
{ "udpdateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
"dates.day": today
},
"update": {
"$inc": { "dates.$.value": 1, "total": 1 }
}
}},
// Pushing a new "un-matched" element
{ "updateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
"dates.day": { "$ne": today }
},
"update": {
"$push": { "dates": { "day": today, "value": 1 } },
"$inc": { "total": 1 }
}
}},
// "Upserting" a new document were not matched
{ "updateOne": {
"filter": {
"product": product,
"month": today.substr(0,7)},
},
"update": {
"$setOnInsert": {
"dates": [{ "day": today, "value": 1 }],
"total": 1
}
},
"upsert": true
}}
])
But generally your getting the "best of both worlds" by having something simple to accumulate "as you go" as well as something that's easy and efficient to query and do other analysis on later.
The overall moral of the story is to "choose the right structure" for what you actually want to do. Don't put things into "keys" which are clearly intended to be used as "values", since it's an anti-pattern which just adds complexity and inefficiency to the rest of your purposes, even if it seemed right for a "single" purpose when you originally stored it that way.
NOTE Also not really advocating storing "strings" for "dates" in any way here. As noted the better approach is to use "values" where you really mean "values" you intend to use. When storing date data as a "value" it is always far more efficient and practical to store as a BSON Date, and NOT a "string".

Return Only the Keys from Document Where the Query condition was True

I'm having group of elements in MongoDB as given below:
{
"_id": ObjectId("5942643ea2042e12245de00c"),
"user": NumberInt(1),
"name": {
"value": "roy",
"time": NumberInt(121)
},
"lname": {
"value": "roy s",
"time": NumberInt(122)
},
"sname": {
"value": "roy 9",
"time": NumberInt(123)
}
}
but when I execute the query below
db.temp.find({
$or: [{
'name.time': {
$gte: 123
}
}, {
'lname.time': {
$gte: 123
}
}, {
'sname.time': {
$gte: 123
}
}]
})
it is returning the whole document which is correct.
Is there any way to fetch only specified object in which condition matched.Like in my document let condition within lname.time equl to 122 then only lname object will return rest will ignored.
The type of thing you are asking for is only really "practical" with MongoDB 3.4 in order to return this from the server.
Summary
The general case here is that the "projection" of fields by logical conditions is not straightforward. Whilst it would be nice if MongoDB had such a DSL for projection, this is basically delegated either to:
Do your manipulation "after" the results are returned from the server
Use the aggregation pipeline in order to manipulate the documents.
Therefore, in "CASE B" being "aggregation pipeline", this is really only a practical excercise if the steps involved "mimic" the standard .find() behavior of "query" and "project". Introducing other pipeline stages beyond that will only introduce performance problems greatly outweighing any gain from "trimming" the documents to return.
Thus the summary here is $match then $newRoot to "project", following the pattern. It is also I think a good "rule of thumb" to consider here that the aggregation approach "should only" be applied where there is a significant reduction in the size of data returned. I would expand by example saying that "if" the size of the keys to "trim" was actually in the Megabytes range on the returned result, then it is a worthwhile exercise to remove them "on the server".
In the case where such a saving would really only constitute "bytes" in comparison, then the most logical course is to simply allow the documents to return in the cursor "un-altered", and only then in "post processing" would you bother removing unwanted keys that did not meet the logical condition.
That said, On with the actual methods.
Aggregation Case
db.temp.aggregate([
{ "$match": {
"$or": [
{ "name.time": { "$gte": 123 } },
{ "lname.time": { "$gte": 123 } },
{ "sname.time": { "$gte": 123 } }
]
}},
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "_id", "v": "$_id" },
{ "k": "user", "v": "$user" },
],
{ "$filter": {
"input": [
{ "$cond": [
{ "$gte": [ "$name.time", 123 ] },
{ "k": "name", "v": "$name" },
false
]},
{ "$cond": [
{ "$gte": [ "$lname.time", 123 ] },
{ "k": "lname", "v": "$lname" },
false
]},
{ "$cond": [
{ "$gte": [ "$sname.time", 123 ] },
{ "k": "sname", "v": "$sname" },
false
]}
],
"as": "el",
"cond": "$$el"
}}
]
}
}
}}
])
It's a pretty fancy statement that relies on $arrayToObject and $replaceRoot to achieve the dynamic structure. At its core the "keys" are all represented in array form, where the "array" only contains those keys that actually pass the conditions.
Fully constructed after the conditions are filtered we turn the array into a document and return the projection to the new Root.
Cursor Processing Case
You can actually do this in the client code with ease though. For example in JavaScript:
db.temp.find({
"$or": [
{ "name.time": { "$gte": 123 } },
{ "lname.time": { "$gte": 123 } },
{ "sname.time": { "$gte": 123 } }
]
}).map(doc => {
if ( doc.name.time < 123 )
delete doc.name;
if ( doc.lname.time < 123 )
delete doc.lname;
if ( doc.sname.time < 123 )
delete doc.sname;
return doc;
})
In both cases you get the same desired result:
{
"_id" : ObjectId("5942643ea2042e12245de00c"),
"user" : 1,
"sname" : {
"value" : "roy 9",
"time" : 123
}
}
Where sname was the only field to meet the condition in the document and therefore the only one returned.
Dynamic Generation and DSL Re-use
Addressing Sergio's question then I suppose you can actually re-use the DSL from the $or condition to generate in both cases:
Considering the variable defined
var orlogic = [
{
"name.time" : {
"$gte" : 123
}
},
{
"lname.time" : {
"$gte" : 123
}
},
{
"sname.time" : {
"$gte" : 123
}
}
];
Then with cursor iteration:
db.temp.find({
"$or": orlogic
}).map(doc => {
orlogic.forEach(cond => {
Object.keys(cond).forEach(k => {
var split = k.split(".");
var op = Object.keys(cond[k])[0];
if ( op === "$gte" && doc[split[0]][split[1]] < cond[k][op] )
delete doc[split[0]];
else if ( op === "$lte" && doc[split[0]][split[1]] > cond[k][op] )
delete doc[split[0]];
})
});
return doc;
})
Which evaluates against the DSL to actually perform the operations without "hardcoded" ( somewhat ) if statements;
Then the aggregation approach would also be:
var pipeline = [
{ "$match": { "$or": orlogic } },
{ "$replaceRoot": {
"newRoot": {
"$arrayToObject": {
"$concatArrays": [
[
{ "k": "_id", "v": "$_id" },
{ "k": "user", "v": "$user" }
],
{ "$filter": {
"input": orlogic.map(cond => {
var obj = {
"$cond": {
"if": { },
"then": { },
"else": false
}
};
Object.keys(cond).forEach(k => {
var split = k.split(".");
var op = Object.keys(cond[k])[0];
obj.$cond.if[op] = [ `$${k}`, cond[k][op] ];
obj.$cond.then = { "k": split[0], "v": `$${split[0]}` };
});
return obj;
}),
"as": "el",
"cond": "$$el"
}}
]
}
}
}}
];
db.test.aggregate(pipeline);
So the same basic conditions where we re-use existing $or DSL to generate the required pipeline parts as opposed to hard coding them in.
The second argument to find specifies the fields to return (projection)
db.collection.find(query, projection)
https://docs.mongodb.com/manual/reference/method/db.collection.find/
as in example
db.bios.find( { }, { name: 1, contribs: 1 } )
db.temp.find({
"$elemMatch": "$or"[
{
'name.time': {
$gte: 123
}
},
{
'lname.time': {
$gte: 123
}
},
{
'sname.time': {
$gte: 123
}
}
]
},
{
{
"name.time": 1,
"lname.time": 1,
"sname.time": 1
}
}
})
My approach using aggregation pipeline
$project - Project is used to create an key for the documents name, sname and lname
Initial project Query
db.collection.aggregate([{$project: {_id:1, "tempname.name": "$name", "templname.lname":"$lname", "tempsname.sname":"$sname"}}]);
Result of this query is
{"_id":ObjectId("5942643ea2042e12245de00c"),"tempname":{"name":{"value":"roy","time":121}},"templname":{"lname":{"value":"roy s","time":122}},"tempsname":{"sname":{"value":"roy 9","time":123}}}
Use $project one more time to add the documents into an array
db.collection.aggregate([{$project: {_id:1, "tempname.name": "$name", "templname.lname":"$lname", "tempsname.sname":"$sname"}},
{$project: {names: ["$tempname", "$templname", "$tempsname"]}}])
Our document will be like this after the execution of second project
{"_id":ObjectId("5942643ea2042e12245de00c"),"names":[{"name":{"value":"roy","time":121}},{"lname":{"value":"roy s","time":122}},{"sname":{"value":"roy 9","time":123}}]}
Then use $unwind to break the array into separate documents
after breaking the documents use $match with $or to get the desired result
**
Final Query
**
db.collection.aggregate([
{
$project: {
_id: 1,
"tempname.name": "$name",
"templname.lname": "$lname",
"tempsname.sname": "$sname"
}
},
{
$project: {
names: [
"$tempname",
"$templname",
"$tempsname"
]
}
},
{
$unwind: "$names"
},
{
$match: {
$or: [
{
"names.name.time": {
$gte: 123
}
},
{
"names.lname.time": {
$gte: 123
}
},
{
"names.sname.time": {
$gte: 123
}
}
]
}
}
])
Final result of the query closer to your expected result(with an additional key)
{
"_id" : ObjectId("5942643ea2042e12245de00c"),
"names" : {
"sname" : {
"value" : "roy 9",
"time" : 123
}
}
}

Get Distinct list of two properties using MongoDB 2.4

I have an article collection:
{
_id: 9999,
authorId: 12345,
coAuthors: [23456,34567],
title: 'My Article'
},
{
_id: 10000,
authorId: 78910,
title: 'My Second Article'
}
I'm trying to figure out how to get a list of distinct author and co-author ids out of the database. I have tried push, concat, and addToSet, but can't seem to find the right combination. I'm on 2.4.6 so I don't have access to setUnion.
Whilst $setUnion would be the "ideal" way to do this, there is another way that basically involved "switching" between a "type" to alternate which field is picked:
db.collection.aggregate([
{ "$project": {
"authorId": 1,
"coAuthors": { "$ifNull": [ "$coAuthors", [null] ] },
"type": { "$const": [ true,false ] }
}},
{ "$unwind": "$coAuthors" },
{ "$unwind": "$type" },
{ "$group": {
"_id": {
"$cond": [
"$type",
"$authorId",
"$coAuthors"
]
}
}},
{ "$match": { "_id": { "$ne": null } } }
])
And that is it. You may know the $const operation as the $literal operator from MongoDB 2.6. It has always been there, but was only documented and given an "alias" at the 2.6 release.
Of course the $unwind operations in both cases produce more "copies" of the data, but this is grouping for "distinct" values so it does not matter. Just depending on the true/false alternating value for the projected "type" field ( once unwound ) you just pick the field alternately.
Also this little mapReduce does much the same thing:
db.collection.mapReduce(
function() {
emit(this.authorId,null);
if ( this.hasOwnProperty("coAuthors"))
this.coAuthors.forEach(function(id) {
emit(id,null);
});
},
function(key,values) {
return null;
},
{ "out": { "inline": 1 } }
)
For the record, $setUnion is of course a lot cleaner and more performant:
db.collection.aggregate([
{ "$project": {
"combined": {
"$setUnion": [
{ "$map": {
"input": ["A"],
"as": "el",
"in": "$authorId"
}},
{ "$ifNull": [ "$coAuthors", [] ] }
]
}
}},
{ "$unwind": "$combined" },
{ "$group": {
"_id": "$combined"
}}
])
So there the only real concerns are converting the singular "authorId" to an array via $map and feeding an empty array where the "coAuthors" field is not present in the document.
Both output the same distinct values from the sample documents:
{ "_id" : 78910 }
{ "_id" : 23456 }
{ "_id" : 34567 }
{ "_id" : 12345 }

How to return count:0 in an aggregation if there is no match

I am having an issues that I thought it would happen often, but I wasn't able to find enough information during my research.
My problem is that I expect the return of a query to have a given JSON format, but when the match filters out all documents, I get no json.
A simplified example: I would like to have the count if documents that match a given criteria, so I have the following query
db.collection.aggregate( [{
$match: {
type: /^1[.]2[.]3[.].*$/
}
}, {
$group: {
_id: {$ifNull : ["$type", 0]},
count: { $sum: 1 }
}
}]);
If I have at least one document that matches, then the query works:
{ "_id" : "1.2.3", "count" : 44 }
If I have no documents, I would like to receive a json like this:
{ "_id" : "1.5.3", "count" : 0 }
Is this possible?
ps: this is a simplified case, it would not be so easy to handle that on the application side, so I would rather try to adjust my query
If you can know beforehand the value of the key that you are searching for(i.e. 1.2.3, 1.5.3 in your case), here is a workaround using $facet. It first tries to get the documents by $match and store them into an array named results. Depending on the $size of the results array, we either replace it with the $group result (when we have matched records); or replace it with a default count: 0 record with the key you specified.
db.collection.aggregate([
{
"$facet": {
"results": [
{
$match: {
"type": <key you want to search>
}
},
{
$group: {
_id: {
$ifNull: [
"$type",
0
]
},
count: {
$sum: 1
}
}
}
]
}
},
{
"$replaceRoot": {
"newRoot": {
"$cond": {
"if": {
$gt: [
{
"$size": "$results"
},
0
]
},
"then": "$$ROOT",
"else": {
"results": [
{
"_id": <key you want to search>,
"count": 0
}
]
}
}
}
}
},
{
"$unwind": "$results"
},
{
"$replaceRoot": {
"newRoot": "$results"
}
}
])
Mongo Playground

How to calculate difference between values of different documents using mongo aggregation?

Hi my mongo structure as below
{
"timemilliSec":1414590255,
"data":[
{
"x":23,
"y":34,
"name":"X"
},
{
"x":32,
"y":50,
"name":"Y"
}
]
},
{
"timemilliSec":1414590245,
"data":[
{
"x":20,
"y":13,
"name":"X"
},
{
"x":20,
"y":30,
"name":"Y"
}
]
}
Now I want to calculate difference of first document and second document and second to third in this way
so calculation as below
diffX = ((data.x-data.x)/(data.y-data.y)) in our case ((23-20)/(34-13))
diffY = ((data.x-data.x)/(data.y-data.y)) in our case ((32-20)/(50-30))
Tough question in principle, but I'm going to stay with the simplified case you present of two documents and base a solution around that. The concepts should abstract, but are more difficult for expanded cases. Possible with the aggregation framework in general:
db.collection.aggregate([
// Match the documents in a pair
{ "$match": {
"timeMilliSec": { "$in": [ 1414590255, 1414590245 ] }
}}
// Trivial, just keeping an order
{ "$sort": { "timeMilliSec": -1 } },
// Unwind the arrays
{ "$unwind": "$data" },
// Group first and last
{ "$group": {
"_id": "$data.name",
"firstX": { "$first": "$data.x" },
"lastX": { "$last": "$data.x" },
"firstY": { "$first": "$data.y" },
"lastY": { "$last": "$data.y" }
}},
// Difference on the keys
{ "$project": {
"diff": {
"$divide": [
{ "$subtract": [ "$firstX", "$lastX" ] },
{ "$subtract": [ "$firstY", "$lastY" ] }
]
}
}},
// Not sure you want to take it this far
{ "$group": {
"_id": null,
"diffX": {
"$min": {
"$cond": [
{ "$eq": [ "$_id", "X" ] },
"$diff",
false
]
}
},
"diffY": {
"$min": {
"$cond": [
{ "$eq": [ "$_id", "Y" ] },
"$diff",
false
]
}
}
}}
])
Possibly overblown, not sure of the intent, but the output of this based on the sample would be:
{
"_id" : null,
"diffX" : 0.14285714285714285,
"diffY" : 0.6
}
Which matches the calculations.
You can adapt to your case, but the general principle is as shown.
The last "pipeline" stage there is a little "extreme" as all that is done is combine the results into a single document. Otherwise, the "X" and "Y" results are already obtained in two documents in the pipeline. Mostly by the $group operation with $first and $last operations to find the respective elements on the grouping boundary.
The subsequent operations in $project as a pipeline stage performs the required math to determine the distinct results. See the aggregation operators for more details, particularly $divide and $subtract.
Whatever you do you follow this course. Get a "start" and "end" pair on your two keys. Then perform the calculations.