Can I get the count of subdocuments that match a filter?

Can I get the count of subdocuments that match a filter? - mongodb

I have the following document
[
{
"_id": "624713340a3d2901f2f5a9c0",
"username": "fotis",
"exercises": [
{
"_id": "624713530a3d2901f2f5a9c3",
"description": "Sitting",
"duration": 60,
"date": "2022-03-24T00:00:00.000Z"
},
{
"_id": "6247136a0a3d2901f2f5a9c6",
"description": "Coding",
"duration": 999,
"date": "2022-03-31T00:00:00.000Z"
},
{
"_id": "624713a00a3d2901f2f5a9ca",
"description": "Sitting",
"duration": 999,
"date": "2022-03-30T00:00:00.000Z"
}
],
"__v": 3
}
]
And I am trying to get the count of exercises returned with the following aggregation (I know it is way easier to do it in my code, but I am trying to understand how to use mongodb queries)
db.collection.aggregate([
{
"$match": {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
"$project": {
"username": 1,
"exercises": {
"$slice": [
{
"$filter": {
"input": "$exercises",
"as": "exercise",
"cond": {
"$eq": [
"$$exercise.description",
"Sitting"
]
}
}
},
1
]
},
"count": {
"$size": "exercises"
}
}
}
])
When I try to access the exercises field using "$size": "exercises", I get an error query failed: (Location17124) Failed to optimize pipeline :: caused by :: The argument to $size must be an array, but was of type: string.
But when I access the subdocument exercises using "$size": "$exercises" I get the count of all the subdocuments contained in the document.
Note: I know that in this example I use $slice and I set the limit to 1, but in my code it is a variable.

You are actually on the right track. You don't really need the $slice. You can just use $reduce to perform the filtering. The reason that your count is not working is that the filtering and the $size are in the same stage. In such case, it will take the pre-filtered array to do the count. You can resolve this by adding a $addFields stage.
db.collection.aggregate([
{
"$match": {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
"$project": {
"username": 1,
"exercises": {
"$filter": {
"input": "$exercises",
"as": "exercise",
"cond": {
"$eq": [
"$$exercise.description",
"Sitting"
]
}
}
}
}
},
{
"$addFields": {
"count": {
$size: "$exercises"
}
}
}
])
Here is the Mongo playground for your reference.

Related

MongoDB $filter nested array by date does not work

I have a document with a nested array which looks like this:
[
{
"id": 1,
data: [
[
ISODate("2000-01-01T00:00:00Z"),
2,
3
],
[
ISODate("2000-01-03T00:00:00Z"),
2,
3
],
[
ISODate("2000-01-05T00:00:00Z"),
2,
3
]
]
},
{
"id": 2,
data: []
}
]
As you can see, we have an array of arrays. For each element in the data array, the first element is a date.
I wanted to create an aggregation pipeline which filters only the elements of data where the date is larger than a given date.
db.collection.aggregate([
{
"$match": {
"id": 1
}
},
{
"$project": {
"data": {
"$filter": {
"input": "$data",
"as": "entry",
"cond": {
"$gt": [
"$$entry.0",
ISODate("2000-01-04T00:00:00Z")
]
}
}
}
}
}
])
The problem is that with $gt, this just returns an empty array for data. With $lt this returns all elements. So the filtering clearly does not work.
Expected result:
[
{
"id": 1,
"data": [
[
ISODate("2000-01-05T00:00:00Z"),
2,
3
]
]
}
]
Any ideas?
Playground

I believe the issue is that when you write $$entry.0, MongoDB is trying to evaluate entry.0 as a variable name, when in reality the variable is named entry. You could make use of the $first array operator in order to get the first element like so:
db.collection.aggregate([
{
"$match": {
"id": 1
}
},
{
"$project": {
"data": {
"$filter": {
"input": "$data",
"as": "entry",
"cond": {
"$gt": [
{
$first: "$$entry"
},
ISODate("2000-01-04T00:00:00Z")
]
}
}
}
}
}
])
Mongo playground example

Don't think $$entry.0 work to get the first element of the array. Instead, use $arrayElemAt operator.
db.collection.aggregate([
{
"$match": {
"id": 1
}
},
{
"$project": {
"data": {
"$filter": {
"input": "$data",
"as": "entry",
"cond": {
"$gt": [
{
"$arrayElemAt": [
"$$entry",
0
]
},
ISODate("2000-01-04T00:00:00Z")
]
}
}
}
}
}
])
Sample Mongo Playground

to specify which element in the array you are comparing it is better to use $arrayElemAt instead of $$ARRAY.0. you must pass 2 parameters while using $arrayElemAt, the first one is the array which in your case is $$entry, and the second one is the index which in your case is 0
this is the solution I came up with:
db.collection.aggregate([
{
"$match": {
"id": 1
}
},
{
"$project": {
"data": {
"$filter": {
"input": "$data",
"as": "entry",
"cond": {
"$gt": [
{
"$arrayElemAt": [
"$$entry",
0
]
},
ISODate("2000-01-04T00:00:00Z")
]
}
}
}
}
}
])
playground

Why doesn't mongoose aggregate method return all fields of a document?

I have the following document
[
{
"_id": "624713340a3d2901f2f5a9c0",
"username": "fotis",
"exercises": [
{
"_id": "624713530a3d2901f2f5a9c3",
"description": "Sitting",
"duration": 60,
"date": "2022-03-24T00:00:00.000Z"
},
{
"_id": "6247136a0a3d2901f2f5a9c6",
"description": "Coding",
"duration": 999,
"date": "2022-03-31T00:00:00.000Z"
},
{
"_id": "624713a00a3d2901f2f5a9ca",
"description": "Sitting",
"duration": 999,
"date": "2022-03-30T00:00:00.000Z"
}
],
"__v": 3
}
]
And I am executing the following aggregation (on mongoplayground.net)
db.collection.aggregate([
{
$match: {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
$project: {
exercises: {
$filter: {
input: "$exercises",
as: "exercise",
cond: {
$eq: [
"$$exercise.description",
"Sitting"
]
}
}
},
limit: 1
}
}
])
And the result is the following
[
{
"_id": "624713340a3d2901f2f5a9c0",
"exercises": [
{
"_id": "624713530a3d2901f2f5a9c3",
"date": "2022-03-24T00:00:00.000Z",
"description": "Sitting",
"duration": 60
},
{
"_id": "624713a00a3d2901f2f5a9ca",
"date": "2022-03-30T00:00:00.000Z",
"description": "Sitting",
"duration": 999
}
]
}
]
So my first question is why is the username field not included in the result?
And the second one is why aren't the exercises limited to 1? Is the limit currently applied to the whole user document? If so, is it possible to apply it only on exercises subdocument?
Thank you!

First Question
When you use $project stage, then only the properties that you specified in the stage will be returned. You only specified exercises property, so only that one is returned. NOTE that _id property is returned by default, even you didn't specify it.
Second Question
$limit is also a stage as $project. You can apply $limit to the whole resulting documents array, not to nested array property of one document.
Solution
In $project stage, you can specify username filed as well, so it will also be returned. Instead of $limit, you can use $slice to specify the number of documents that you want to be returned from an array property.
db.collection.aggregate([
{
"$match": {
"_id": "624713340a3d2901f2f5a9c0"
}
},
{
"$project": {
"username": 1,
"exercises": {
"$slice": [
{
"$filter": {
"input": "$exercises",
"as": "exercise",
"cond": {
"$eq": [
"$$exercise.description",
"Sitting"
]
}
}
},
1
]
}
}
}
])
Working example

How can I get multiple elements from an array in MongoDB?

How can I get multiple elements from an array at once that satisfy a specific condition, for example: Date <= 2020-12-31. I read about $elemMatch, but I can only get one specific element with it.
"someArray": [
{
"Date": "2021-09-30",
"value": "6.62"
},
{
"Date": "2020-12-31",
"value": "8.67"
},
{
"Date": "2019-12-31",
"value": "12.81"
},
{
"Date": "2018-12-31",
"value": "13.82"
},
{
"Date": "2017-12-31",
"value": "13.83"
},
...
]

You can use $filter in an aggregation query like this:
db.collection.aggregate([
{
"$project": {
"someArray": {
"$filter": {
"input": "$someArray",
"as": "a",
"cond": {
"$lte": [
"$$a.Date",
ISODate("2020-12-31")
]
}
}
}
}
}
])
Example here
Note that you can use $project or $set (available since version 4.2): example or $addFields: example

MongoDB find count of all possible 'columns' within a collection

Is there a way to find all possible number of 'columns' or json properties available in a collection? (I know it's not correct to call them columns, but just for the ease of understanding)
For example, all the following documents are in the same collection called 'people':
{"Name": "bob", "Profession": "IT", "Height": 200},
{"Name": "simon", "Weight": 100, "IQ": 120},
{"Name": "james", "Weight": 130, "Glasses": "Yes"}
The possible 'columns' here are: Name, Profession, Height, Weight, IQ and Glasses. A total of 6.
Is there any way I can do an operation which gets this count of 6? (extra useful if there's also a pymongo variant)
I'm wanting to transfer data from MongoDB into a table format, and knowing the overall number of columns the table can have is useful.

You can use this aggregation query to get your desired result:
The trick here is to use $objectToArray to get the keys as values. Then remove the key _id (if exists) and group to get the total.
db.collection.aggregate([
{
"$project": {
"keys": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$unwind": "$keys"
},
{
"$match": {
"keys.k": {
"$ne": "_id"
}
}
},
{
"$group": {
"_id": "$keys.k",
"total": {
"$sum": 1
}
}
},
{
"$group": {
"_id": null,
"total": {
"$sum": 1
}
}
}
])
Example here
Edit:
Another way to avoid $unwind and double $group id this query:
The idea is the same as before, use $objectToArray to get the keys as key.k and then $group all values and add into an array.
Then get the size of the array after to do some calculations: A $reduce to flatten the array and $filter to not get the _id field.
Note that if you want to count the _id you can simply remove the $filter stage like this example
db.collection.aggregate([
{
"$project": {
"keys": {
"$objectToArray": "$$ROOT"
}
}
},
{
"$group": {
"_id": null,
"keys": {
"$addToSet": "$keys.k"
}
}
},
{
"$project": {
"_id": 0,
"keys": {
"$size": {
"$filter": {
"input": {
"$reduce": {
"input": "$keys",
"initialValue": [],
"in": {
"$setUnion": [
"$$value",
"$$this"
]
}
}
},
"cond": {
"$ne": [
"$$this",
"_id"
]
}
}
}
}
}
}
])
Example here

How to find match in documents in Mongo and Mongo aggregation?

I have following json structure in mongo collection-
{
"students":[
{
"name":"ABC",
"fee":1233
},
{
"name":"PQR",
"fee":345
}
],
"studentDept":[
{
"name":"ABC",
"dept":"A"
},
{
"name":"XYZ",
"dept":"X"
}
]
},
{
"students":[
{
"name":"XYZ",
"fee":133
},
{
"name":"LMN",
"fee":56
}
],
"studentDept":[
{
"name":"XYZ",
"dept":"X"
},
{
"name":"LMN",
"dept":"Y"
},
{
"name":"ABC",
"dept":"P"
}
]
}
Now I want to calculate following output.
if students.name = studentDept.name
so my result should be as below
{
"name":"ABC",
"fee":1233,
"dept":"A",
},
{
"name":"XYZ",
"fee":133,
"dept":"X"
}
{
"name":"LMN",
"fee":56,
"dept":"Y"
}
Do I need to use mongo aggregation or is it possible to get above given output without using aggregation???

What you are really asking here is how to make MongoDB return something that is actually quite different from the form in which you store it in your collection. The standard query operations do allow a "limitted" form of "projection", but even as the title on the page shared in that link suggests, this is really only about "limiting" the fields to display in results based on what is present in your document already.
So any form of "alteration" requires some form of aggregation, which with both the aggregate and mapReduce operations allow to "re-shape" the document results into a form that is different from the input. Perhaps also the main thing people miss with the aggregation framework in particular, is that it is not just all about "aggregating", and in fact the "re-shaping" concept is core to it's implementation.
So in order to get results how you want, you can take an approach like this, which should be suitable for most cases:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$group": {
"_id": "$students.name",
"tfee": { "$first": "$students.fee" },
"tdept": {
"$min": {
"$cond": [
{ "$eq": [
"$students.name",
"$studentDept.name"
]},
"$studentDept.dept",
false
]
}
}
}},
{ "$match": { "tdept": { "$ne": false } } },
{ "$sort": { "_id": 1 } },
{ "$project": {
"_id": 0,
"name": "$_id",
"fee": "$tfee",
"dept": "$tdept"
}}
])
Or alternately just "filter out" the cases where the two "name" fields do not match and then just project the content with the fields you want, if crossing content between documents is not important to you:
db.collection.aggregate([
{ "$unwind": "$students" },
{ "$unwind": "$studentDept" },
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$studentDept.dept",
"same": { "$eq": [ "$students.name", "$studentDept.name" ] }
}},
{ "$match": { "same": true } },
{ "$project": {
"name": 1,
"fee": 1,
"dept": 1
}}
])
From MongoDB 2.6 and upwards you can even do the same thing "inline" to the document between the two arrays. You still want to reshape that array content in your final output though, but possible done a little faster:
db.collection.aggregate([
// Compares entries in each array within the document
{ "$project": {
"students": {
"$map": {
"input": "$students",
"as": "stu",
"in": {
"$setDifference": [
{ "$map": {
"input": "$studentDept",
"as": "dept",
"in": {
"$cond": [
{ "$eq": [ "$$stu.name", "$$dept.name" ] },
{
"name": "$$stu.name",
"fee": "$$stu.fee",
"dept": "$$dept.dept"
},
false
]
}
}},
[false]
]
}
}
}
}},
// Students is now an array of arrays. So unwind it twice
{ "$unwind": "$students" },
{ "$unwind": "$students" },
// Rename the fields and exclude
{ "$project": {
"_id": 0,
"name": "$students.name",
"fee": "$students.fee",
"dept": "$students.dept"
}},
])
So where you want to essentially "alter" the structure of the output then you need to use one of the aggregation tools to do. And you can, even if you are not really aggregating anything.