MongoDB aggregate array documents - mongodb

I have a MongoDB collection containing elements like this:
{
"name": "test",
"instances": [
{
"year": 2015
},
{
"year": 2016
},
]
}
How can I get the minimum and maximum value for year within the document named test? E.g. I want to aggregate all documents inside that array, but I can't find the syntax for that. Thanks in advance!

Both $min and $max takes an array as a parameter and in your case path instances.year returns an array so your query can look like below:
db.col.aggregate([
{
$match: { name: "test" }
},
{
$project: {
minYear: { $min: "$instances.year" },
maxYear: { $max: "$instances.year" }
}
}
])

You can use below aggregation
db.collection.aggregate([
{ "$project": {
"maxYear": {
"$arrayElemAt": [
"$instances",
{
"$indexOfArray": [
"$instances.year",
{ "$max": "$instances.year" }
]
}
]
},
"minYear": {
"$arrayElemAt": [
"$instances",
{
"$indexOfArray": [
"$instances.year",
{ "$min": "$instances.year" }
]
}
]
}
}}
])

Related

How do I use $unwind and then $group in the same mongodb query

I have the following mongodb structure...
[
{
track: 'Newcastle',
time: '17:30',
date: '22/04/2022',
bookmakers: [
{
bookmaker: 'Coral',
runners: [
{
runner: 'John',
running: true,
odds: 3.2
},
...
]
},
...
]
},
...
]
I'm trying to find filter the bookmakers array for each document to only include the objects that match the specified bookmaker values, for example:
{ 'bookmakers.bookmaker': { $in: ['Coral', 'Bet365'] } }
At the moment, I'm using the following mongodb query to only select the bookmakers that are specified, however I need to put the documents back together after they've been seperated by the '$unwind', is there a way I can do this using $group?
await HorseRacingOdds.aggregate([
{ $unwind: "$bookmakers" },
{
$group: {
_id: "$_id",
bookmakers: "$bookmakers"
}
},
{
$project: {
"_id": 0,
"__v": 0,
"lastUpdate": 0
}
}
])
How about a plain $addFields with $filter?
db.collection.aggregate([
{
"$addFields": {
"bookmakers": {
"$filter": {
"input": "$bookmakers",
"as": "b",
"cond": {
"$in": [
"$$b.bookmaker",
[
"Coral",
"Bet365"
]
]
}
}
}
}
},
{
$project: {
"_id": 0,
"__v": 0,
"lastUpdate": 0
}
}
])
Here is the Mongo playground for your reference.

MongoDB: count both matching documents and matching subdocuments, grouped by property of document

Given a collection of documents each containing an array of subdocuments (among other properties):
{
"prop1": False,
"prop2": "unique_value",
"subdocuments": [
{
"subprop1": 1,
"subprop2": 10
},
{
"subprop1": 30,
"subprop2": 40
},
{
"subprop1": 10,
"subprop2": 1
}
]
}
And a $match query covering both documents and subdocuments:
{
"prop1": False,
"$or": [
{"subdocuments.subprop1": {"$lt": 3}},
{"subdocuments.subprop2": {"$lt": 5}}
]
}
How can I create an aggregate query that returns the number of matching subdocuments and matching documents, grouped by a specific property of the root documents?
Just counting total subdocuments and matching documents is simple, but I'm struggling to also get the right count of matching subdocuments.
Ideally I'd like to have a result like this (if we consider the sample document, only subdoc 1 and 3 match the $or conditions):
{
"unique_value": {
"documents": 1,
"subdocuments": 2
}
}
In this case the results are being grouped by the value of "prop2".
You can use $size and $filter to get the count for matching subdocuments first. Then do a $sum to get the documentCount and subdocumentCount.
db.collection.aggregate([
{
"$match": {
"prop1": false,
"$or": [
{
"subdocuments.subprop1": {
"$lt": 3
}
},
{
"subdocuments.subprop2": {
"$lt": 5
}
}
]
}
},
{
"$addFields": {
"subdocumentCount": {
$size: {
"$filter": {
"input": "$subdocuments",
"as": "s",
"cond": {
"$or": [
{
$lt: [
"$$s.subprop1",
3
]
},
{
$lt: [
"$$s.subprop2",
5
]
}
]
}
}
}
}
}
},
{
$group: {
_id: "$prop2",
documentCount: {
$sum: 1
},
subdocumentCount: {
$sum: "$subdocumentCount"
}
}
},
{
$project: {
_id: 0,
k: "$_id",
v: {
documentCount: "$documentCount",
subdocumentCount: "$subdocumentCount"
}
}
},
{
$group: {
_id: null,
docs: {
$push: "$$ROOT"
}
}
},
{
"$addFields": {
"docs": {
"$arrayToObject": "$docs"
}
}
},
{
"$replaceRoot": {
"newRoot": "$docs"
}
}
])
Here is the Mongo playground for your reference.

add condition to most recent values result

mongoplayground
My result include 'mostRecentValues' and I wish to project values only if they are not the same. My struggle is in adding the condition to show only if there's a change between the 2 values (in this case mostRecentValues).
my_coll.create_index([('car_id',1),('timestamp',-1)], unique=True)
When an operation involves turning on an off full documents, think of $group. Adding this to your pipeline should point you in the right direction.
(playground)
db.collection.aggregate([
{
"$group": {
_id: "cars",
"cars": {
"$push": {
"$cond": [
{
"$ne": [
{
"$arrayElemAt": [
"$mostRecentValues",
0
]
},
{
"$arrayElemAt": [
"$mostRecentValues",
1
]
}
]
},
"$$ROOT",
"$$REMOVE"
]
}
}
}
},
{
$unwind: "$cars"
}
])
If you only have 3 fields on each document you could add:
{
$project: {
car_id: "$cars.car_id",
mostRecentTime: "$cars.mostRecentTime",
mostRecentValues: "$cars.MostRecentValues"
}
}

Mongodb: Sort by custom expression

How to sort results by a custom expression which I use in find?
The collection contains documents with the following attributes for example:
{
"_id" : ObjectId("5ef1cd704b35c6d6698f2050"),
"Name" : "TD",
"Date" : ISODate("2021-06-23T09:37:51.976Z"),
"A" : "19.36",
"B" : 2.04,
}
I'm using the following find query to get the records with Date since "2022-01-01" and the ratio between A and B is lower than 0.1:
db.getCollection('my_collection').find(
{
"experationDate" :
{
$gte: new ISODate("2022-01-01 00:00:00.000Z")
},
"$expr":
{
"$lte": [
{ "$divide": ["$A", "$B"] },
0.1
]
}
})
Now, I can't find the right way to sort the results by this ratio.
You can use aggregate in this way:
Search the documents you want using $match, add a field named ratio and use it to sort. And finally, not shown the field using $project:
db.collection.aggregate([
{ "$match": {
"Date": { "$gte": ISODate("2020-01-01") },
"$expr": { "$lte": [ { "$divide": [ "$B", { "$toDouble": "$A" } ] }, 0.1 ] } }
},
{
"$set": {
"ratio": { "$divide": [ "$B", { "$toDouble": "$A" } ] }
}
},
{
"$sort": { "ratio": 1 }
},
{
"$project": { "ratio": 0 }
}
])
Example here
By te way, I've used other values to get results. Ratio between 2.04 and 19.36 is greater than 0.1. You have dividied A/B but I think you mean B/A.
By the way, this is not important, you can change values but the query will still works ok.
Also, maybe this could work better. Is the same query, but could be more efficient (maybe, I don't know) because prevent to divide each value into collection twice:
First filter by date, then add the field ratio to each document found (and in this way is not necessary divide every document). Another filter using the ratio, the sort, and not output the field.
db.collection.aggregate([
{
"$match": { "Date": { "$gte": ISODate("2020-01-01") } }
},
{
"$set": { "ratio": { "$divide": [ "$B", { "$toDouble": "$A" } ] } }
},
{
"$match": { "ratio": { "$lte": 0.1 } }
},
{
"$sort": { "ratio": 1 }
},
{
"$project": { "ratio": 0 }
}
])
Example

Mongo Query to Return only a subset of SubDocuments

Using the example from the Mongo docs:
{ _id: 1, results: [ { product: "abc", score: 10 }, { product: "xyz", score: 5 } ] }
{ _id: 2, results: [ { product: "abc", score: 8 }, { product: "xyz", score: 7 } ] }
{ _id: 3, results: [ { product: "abc", score: 7 }, { product: "xyz", score: 8 } ] }
db.survey.find(
{ id: 12345, results: { $elemMatch: { product: "xyz", score: { $gte: 6 } } } }
)
How do I return survey 12345 (regardless of even if it HAS surveys or not) but only return surveys with a score greater than 6? In other words I don't want the document disqualified from the results based on the subdocument, I want the document but only a subset of subdocuments.
What you are asking for is not so much a "query" but is basically just a filtering of content from the array in each document.
You do this with .aggregate() and $project:
db.survey.aggregate([
{ "$project": {
"results": {
"$setDifference": [
{ "$map": {
"input": "$results",
"as": "el",
"in": {
"$cond": [
{ "$and": [
{ "$eq": [ "$$el.product", "xyz" ] },
{ "$gte": [ "$$el.score", 6 ] }
]}
]
}
}},
[false]
]
}
}}
])
So rather than "contrain" results to documents that have an array member matching the condition, all this is doing is "filtering" the array members out that do not match the condition, but returns the document with an empty array if need be.
The fastest present way to do this is with $map to inspect all elements and $setDifference to filter out any values of false returned from that inspection. The possible downside is a "set" must contain unique elements, so this is fine as long as the elements themselves are unique.
Future releases will have a $filter method, which is similar to $map in structure, but directly removes non-matching results where as $map just returns them ( via the $cond and either the matching element or false ) and is then better suited.
Otherwise if not unique or the MongoDB server version is less than 2.6, you are doing this using $unwind, in a non performant way:
db.survey.aggregate([
{ "$unwind": "$results" },
{ "$group": {
"_id": "$_id",
"results": { "$push": "$results" },
"matched": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$results.product", "xyz" ] },
{ "$gte": [ "$results.score", 6 ] }
]},
1,
0
]
}
}
}},
{ "$unwind": "$results" },
{ "$match": {
"$or": [
{
"results.product": "xyz",
"results.score": { "$gte": 6 }
},
{ "matched": 0 }
}},
{ "$group": {
"_id": "$_id",
"results": { "$push": "$results" },
"matched": { "$first": "$matched" }
}},
{ "$project": {
"results": {
"$cond": [
{ "$ne": [ "$matched", 0 ] },
"$results",
[]
]
}
}}
])
Which is pretty horrible in both design and perfomance. As such you are probably better off doing the filtering per document in client code instead.
You can use $filter in mongoDB 3.2
db.survey.aggregate([{
$match: {
{ id: 12345}
}
}, {
$project: {
results: {
$filter: {
input: "$results",
as: "results",
cond:{$gt: ['$$results.score', 6]}
}
}
}
}]);
It will return all the sub document that have score greater than 6. If you want to return only first matched document than you can use '$' operator.
You can use $redact in this way:
db.survey.aggregate( [
{ $match : { _id : 12345 }},
{ $redact: {
$cond: {
if: {
$or: [
{ $eq: [ "$_id", 12345 ] },
{ $and: [
{ $eq: [ "$product", "xyz" ] },
{ $gte: [ "$score", 6 ] }
]}
]
},
then: "$$DESCEND",
else: "$$PRUNE"
}
}
}
] );
It will $match by _id: 12345 first and then it will "$$PRUNE" all the subdocuments that don't have "product":"xyz" and don't have score greater or equal 6. I added the condition ($cond) { $eq: [ "$_id", 12345 ] } so that it wouldn't prune the whole document before it reaches the subdocuments.