MongoDb Aggregate Total Count Before Grouping - mongodb

I have an aggregation pipeline that groups objects and holds count for some specific field for grouped objects. You can reproduce the problem here: https://mongoplayground.net/p/2DGaiQDYDBP .
The schema is like this;
[
{
"_id": {
"$oid": "63ce93ffb6e06322db59fdc0"
},
"fruit": "apple",
"source": "tree",
"is_fruit_important": "true"
},
{
"_id": {
"$oid": "63ce93ffb6e06322db59fdc1"
},
"fruit": "orange",
"source": "tree",
"is_fruit_important": "false"
},
]
and the current query groups fruits by the source, and holds the count of important fruits for every group. After applying aggregation I get something like this after query:
[
{
"count": {
"number_of_important_fruits": 1
},
"objects": [
{
"fruit": "apple",
"id": "63ce93ffb6e06322db59fdc0",
"is_fruit_important": "true",
"source": "tree"
},
{
"fruit": "orange",
"id": "63ce93ffb6e06322db59fdc1",
"is_fruit_important": "false",
"source": "tree"
}
],
"source": {
"source-of": "tree"
}
}
]
Is there a way to put the number of all fruits in the database to the response object. For example like this:
{
"total-count": 2,
"result": [
{
"count": {
"number_of_important_fruits": 1
},
"objects": [
{
"fruit": "apple",
"id": "63ce93ffb6e06322db59fdc0",
"is_fruit_important": "true",
"source": "tree"
},
{
"fruit": "orange",
"id": "63ce93ffb6e06322db59fdc1",
"is_fruit_important": "false",
"source": "tree"
}
],
"source": {
"source-of": "tree"
}
}
]
}
They can be handled in separate aggregation pipelines but that's what I would not like to implement. Any help would be highly appreciated.

Add one additional group stage just before the final $project, using $sum with $size for a total count, or add up the important counts for a total important count.
{$group: {
_id: null,
result: {$push: "$$ROOT"},
"count_total": {$sum: {$size: "$objects"}},
"count_important": {$sum: "$count.number_of_important_fruits"}
}},
Playground

You can simply add a $facet stage to push all your results into result. Then perform a $size on result to get total-count.
db.collection.aggregate([
...,
{
"$facet": {
"result": [],
"total-important-count": [
{
$group: {
_id: null,
cnt: {
$sum: "$count.number_of_important_fruits"
}
}
}
]
}
},
{
"$addFields": {
"total-count": {
$size: "$result"
},
"total-important-count": {
$first: "$total-important-count.cnt"
}
}
}
])
Mongo Playground

Related

Query maximum N records of each group base on a condition in MongoDB?

I have a question regarding querying data in MongoDB. Here is my sample data:
{
"_id": 1,
"category": "fruit",
"userId": 1,
"name": "Banana"
},
{
"_id": 2,
"category": "fruit",
"userId": 2,
"name": "Apple"
},
{
"_id": 3,
"category": "fresh-food",
"userId": 1,
"name": "Fish"
},
{
"_id": 4,
"category": "fresh-food",
"userId": 2,
"name": "Shrimp"
},
{
"_id": 5,
"category": "vegetable",
"userId": 1,
"name": "Salad"
},
{
"_id": 6,
"category": "vegetable",
"userId": 2,
"name": "carrot"
}
The requirements:
If the category is fruit, returns all the records match
If the category is NOT fruit, returns maximum 10 records of each category grouped by user
The category is known and stable, so we can hard-coded in our query.
I want to get it done in a single query. So the result expected should be:
{
"fruit": [
... // All records of
],
"fresh-food": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "fresh-food"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "fresh-food"
]
},
...
],
"vegetable": [
{
"userId": 1,
"data": [
// Top 10 records of user 1 with category = "vegetable"
]
},
{
"userId": 2,
"data": [
// Top 10 records of user 2 with category = "vegetable"
]
},
]
}
I've found the guideline to group by each group using $group and $slice, but I can't apply the requirement number #1.
Any help would be appreciated.
You need to use aggregation for this
$facet to categorize incoming data, we categorized into two. 1. Fruit and 2. non_fruit
$match to match the condition
$group first group to group the data based on category and user. Second group to group by its category only
$objectToArray to make the object into key value pair
$replaceRoot to make the non_fruit to root with fruit
Here is the code
db.collection.aggregate([
{
"$facet": {
"fruit": [
{ $match: { "category": "fruit" } }
],
"non_fruit": [
{
$match: {
$expr: {
$ne: [ "$category", "fruit" ]
}
}
},
{
$group: {
_id: { c: "$category", u: "$userId" },
data: { $push: "$$ROOT" }
}
},
{
$group: {
_id: "$_id.c",
v: {
$push: {
uerId: "$_id.u",
data: { "$slice": [ "$data", 3 ] }
}
}
}
},
{ $addFields: { "k": "$_id", _id: "$$REMOVE" } }
]
}
},
{ $addFields: { non_fruit: { "$arrayToObject": "$non_fruit" } }},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [ "$$ROOT", "$non_fruit" ]
}
}
},
{ $project: { non_fruit: 0 } }
])
Working Mongo playground

How to count embedded array object elements in mongoDB

{
"orderNo": "123",
"bags": [{
"type": "small",
"products": [{
"id": "1",
"name": "ABC",
"returnable": true
}, {
"id": "2",
"name": "XYZ"
}
]
},{
"type": "big",
"products": [{
"id": "3",
"name": "PQR",
"returnable": true
}, {
"id": "4",
"name": "UVW"
}
]
}
]
}
I have orders collection where documents are in this format. I want to get a total count of products which has the returnable flag. e.g: for the above order the count should be 2. I am very new to MongoDB wanted to know how to write a query to find this out, I have tried few things but did not help:
this is what I tried but not worked:
db.orders.aggregate([
{ "$unwind": "$bags" },
{ "$unwind": "$bags.products" },
{ "$unwind": "$bags.products.returnable" },
{ "$group": {
"_id": "$bags.products.returnable",
"count": { "$sum": 1 }
}}
])
For inner array you can use $filter to check returnable flag and $size to get number of such items. For the outer one you can take advantage of $reduce to sum the values from inner arrays:
db.collection.aggregate([
{
$project: {
totalReturnable: {
$reduce: {
input: "$bags",
initialValue: 0,
in: {
$add: [
"$$value",
{
$size: {
$filter: {
input: "$$this.products",
as: "prod",
cond: {
$eq: [ "$$prod.returnable", true ]
}
}
}
]
}
}
}
}
}
}
])
Mongo Playground

How to get count by order in mongodb aggregate?

I have two collections name listings and moods.
listings sample:
{
"_id": ObjectId("5349b4ddd2781d08c09890f3"),
"name": "Hotel Radisson Blu",
"moods": [
ObjectId("507f1f77bcf86cd799439010"),
ObjectId("507f1f77bcf86cd799439011")
]
}
moods sample:
{
"_id": ObjectId("507f1f77bcf86cd799439011"),
"name": "Sports"
},
{
"_id": ObjectId("507f1f77bcf86cd799439010"),
"name": "Spanish Food"
},
{
"_id": ObjectId("507f1f77bcf86cd799439009"),
"name": "Action"
}
I need this record.
{
"_id": ObjectId("507f1f77bcf86cd799439011"),
"name": "Sports",
"count": 1
},
{
"_id": ObjectId("507f1f77bcf86cd799439010"),
"name": "Spanish Food",
"count": 1
},
{
"_id": ObjectId("507f1f77bcf86cd799439009"),
"name": "Action",
"count": 0
}
I need this type of record. I have no idea about aggregate.
You can do it using aggregate(),
$lookup to join collection listings
$match pipeline to check moods _id in listings field moods array
db.moods.aggregate([
{
"$lookup": {
"from": "listings",
"as": "count",
let: { id: "$_id" },
pipeline: [
{
"$match": {
"$expr": { "$in": ["$$id", "$moods"] }
}
}
]
}
},
$addFields to add count on the base of $size of array count that we got from above lookup
{
$addFields: {
count: { $size: "$count" }
}
}
])
Playground
did this work:
db.collection.aggrate().count()
Try to combine the functions, it might work.

Filtering a mongodb query result based on the position of a field in an array

Apologies for the confusing title, I am not sure how to summarize this.
Suppose I have the following list of documents in a collection:
{ "name": "Lorem", "source": "A" }
{ "name": "Lorem", "source": "B" }
{ "name": "Ipsum", "source": "A" }
{ "name": "Ipsum", "source": "B" }
{ "name": "Ipsum", "source": "C" }
{ "name": "Foo", "source": "B" }
as well an ordered list of accepted sources, where lower indexes signify higher priority
sources = ["A", "B"]
My query should:
Take a list of available sources and a list of wanted names
Return a maximum of one document per name.
In case of multiple matches, the document with the most prioritized source should be chosen.
Example:
wanted_names = ['Lorem', 'Ipsum', 'Foo', 'NotThere']
Result:
{ "name": "Lorem", "source": "A" }
{ "name": "Ipsum", "source": "A" }
{ "name": "Foo", "source": "B" }
The results don't necessarily have to be ordered.
Is it possible to do this with a Mongo query alone? If so could someone point me towards a resource detailing how to accomplish it?
My current solution doesn't support a list of names, and instead relies on a Python script to execute multiple queries:
db.collection.aggregate([
{$match: {
"name": "Lorem",
"source": {
$in: sources
}}},
{$addFields: {
"order": {
$indexOfArray: [sources, "$source"]
}}},
{$sort: {
"order": 1
}},
{$limit: 1}
]);
Note: _id fields are omitted in this question for the sake of brevity
How about this: With $group we have $min operator which takes lower source
Note: If you prioritize as ['B', 'A'], use $max then
db.collection.aggregate([
{
$match: {
"name": {
$in: [
"Lorem",
"Ipsum",
"Foo",
"NotThere"
]
},
"source": {
$in: [
"A",
"B"
]
}
}
},
{
$group: {
_id: "$name",
source: {
$min: "$source"
}
}
},
{
$project: {
_id: 0,
name: "$_id",
source: 1
}
}
])
MongoPlayground

How to $push a field depending on a condition?

I'm trying to conditionally push a field into an array during the $group stage of the MongoDB aggregation pipeline.
Essentially I have documents with the name of the user, and an array of the actions they performed.
If I group the user actions like this:
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: $action"} } }
I get the following:
[{
"_id": {
"name": "Bob"
},
"actions": ["add", "wait", "subtract"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add"]
}, {
"_id": {
"name": "Susan"
},
"actions": ["add, subtract"]
}]
So far so good. The idea would be to now group together the actions array to see which set of user actions are the most popular. The problem is that I need to remove the "wait" action before taking into account the group. Therefore the result should be something like this, taking into account that the "wait" element should not be considered in the grouping:
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 2
}]
Test #1
If I add this $group stage:
{ $group : { _id : "$actions", total: { $sum: 1} }}
I get the count that I want, but it takes into account the unwanted "wait" array element.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", "wait", "subtract"],
"total": 1
}]
Test #2
{ $group: { _id: { "name": "$user.name" }, "actions": { $push: { $cond: { if:
{ $ne: [ "$action", 'wait']}, then: "$action", else: null } }}} }
{ $group : { _id : "$actions", total: { $sum: 1} }}
This is as close as I've gotten, but this pushes null values where the wait would be, and I can't figure out how to remove them.
[{
"_id": ["add"],
"total": 1
}, {
"_id": ["add", "subtract"],
"total": 1
}, {
"_id": ["add", null, "subtract"],
"total": 1
}]
UPDATE:
My simplified documents look like this:
{
"_id": ObjectID("573e0c6155e2a8f9362fb8ff"),
"user": {
"name": "Bob",
},
"action": "add",
}
You need a preliminary $match stage in your pipeline to select only those documents where "action" is not equals to "wait".
db.collection.aggregate([
{ "$match": { "action": { "$ne": "wait" } } },
{ "$group": {
"_id": "$user.name",
"actions": { "$push": "$action" },
"total": { "$sum": 1 }
}}
])