MongoDB match filters with grouping and get total count - mongodb

My sample data:
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_2",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_3",
"priority": "P2",
"owners": ["user-1", "user-2"],
},
I want to run an aggregation pipeline on the data involving match filters and grouping, also I want to limit the number of groups returned as well as the number of items in each group.
Essentially, if limit=2, limit_per_group=1, group_by=owner, priority=P1, I want the following results:
[
{
"data": [
{
"group_key": "user-1",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
{
"group_key": "user-2",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
]
},
{
"metadata": {
"total_items_matched": 2,
"total_groups": 2
}
},
]
Need some help on how to write an aggregation pipeline to get the required result.
My current query is as follows:
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 1,
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched"
}
]
}
}
Mongo playground link
I am unable to calculate the total number of groups.

add new stage of $addfields at the end of pipeline
db.collection.aggregate([
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 0,
"group_key": "$_id",
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched",
}
]
}
},
{
"$addFields": {
"metadata.total_groups": {
"$size": "$data"
}
}
}
])
https://mongoplayground.net/p/y5a0jvr6fxI

Related

MongoDB aggregate using $match with $expr with array

MongoDB 5.0.9
I am trying to get
value of application within course and their specification
value of paid application ( status : paid) based on course and their specification
courses collection having multiple courses with specification which might be there maybe not
[
{
"_id": {
"$oid": "62aab6669b3740313d881a30"
},
"course_name": "Master",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Social Work",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": true
},
{
"_id": {
"$oid": "62aab6669b3740313d881a38"
},
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"course_name": "BBA",
"fees": "Rs.1000.0/-",
"is_pg": false,
"course_specialization": null
},
{
"_id": {
"$oid": "628f3967cb69fc0789e69181"
},
"course_name": "BTech",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Computer Science and Engineering",
"is_activated": true
},
{
"spec_name": "Mutiple Specs",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": false
},
{
"_id": {
"$oid": "628f35a1cb69fc0789e6917e"
},
"course_name": "Bachelor",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Social Work",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": false
}
],
Student Application forms collection where we are storing student application forms details
[
{
"_id": {
"$oid": "62cd476adbc878a0490e20ee"
},
"spec_name1": "Social Work",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cd1374dbc878a0490e20a5"
},
"course_id": {
"$oid": "62aab6669b3740313d881a30"
},
"current_stage": 2.5,
"declaration": true,
"payment_info": {
"payment_id": "123458",
"status": "paid"
},
"enquiry_date": {
"$date": {
"$numberLong": "1657620330432"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657621796062"
}
}
},
{
"_id": {
"$oid": "62cd476adbc878a0490e20ef"
},
"spec_name1": "",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cd1374dbc878a0490e20a5"
},
"course_id": {
"$oid": "62aab6669b3740313d881a38"
},
"current_stage": 2.5,
"declaration": true,
"payment_info": {
"payment_id": "123458",
"status": "paid"
},
"enquiry_date": {
"$date": {
"$numberLong": "1657620330432"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657621796062"
}
}
},
{
"_id": {
"$oid": "62cdc12000b820f5ea58cc60"
},
"spec_name1": "Social Work",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cdad90a9b64d58b15e6976"
},
"course_id": {
"$oid": "628f35a1cb69fc0789e6917e"
},
"current_stage": 6.25,
"declaration": false,
"payment_info": {
"payment_id": "",
"status": ""
},
"enquiry_date": {
"$date": {
"$numberLong": "1657651488511"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657651987155"
}
}
}
]
Desired output with every specification within the course
[
"_id": {
"coursename": "Master",
"spec": "Social Work",
"Application_Count": 1,
"Paid_Application_Count:0
},
{
"_id": {
"coursename": "Bachelor"
"spec":"" ,
"Application_Count": 1,
"Paid_Application_Count:0
},
{
"_id": {
"coursename": "BBA"
"spec":"" ,
"Application_Count": 1,
"Paid_Application_Count:1
},
]
Aggregation Query
[{
$match: {
college_id: ObjectId('628dfd41ef796e8f757a5c13')
}
}, {
$project: {
_id: 1,
course_name: 1,
course_specialization: 1
}
}, {
$unwind: {
path: '$course_name',
includeArrayIndex: 'course_index',
preserveNullAndEmptyArrays: true
}
}, {
$unwind: {
path: '$course_specialization',
includeArrayIndex: 'course_specs_index',
preserveNullAndEmptyArrays: true
}
}, {
$lookup: {
from: 'studentApplicationForms',
'let': {
id: '$_id',
spec: '$course_specialization.spec_name'
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
'$course_id',
'$$id'
]
},
{
$eq: [
'$spec_name1',
'$$spec'
]
}
]
}
}
},
{
$project: {
student_id: 1,
payment_info: 1,
spec_name1: 1,
spec_name2: 1,
spec_name3: 1
}
}
],
as: 'student_application'
}
}, {
$unwind: {
path: '$student_application',
includeArrayIndex: 'application',
preserveNullAndEmptyArrays: true
}
}, {
$facet: {
course: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
count: {
$count: {}
}
}
}
],
declatration: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
count_dec: {
$sum: {
$cond: [
'$student_application.declaration',
1,
0
]
}
}
}
}
],
payment: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
payment: {
$sum: {
$eq: [
'$student_application.payment_info.status',
'paid'
]
}
}
}
}
]
}
}]
Problem :
I am able to get application count but it is not getting unique value if 2 specs are same then duplicate value is coming as you can see on sample application collection Social Work is in two different course . So my aggregations is not grouping them based in course name.specs
Not able to find correct Paid_Application_Count and Application_Count
Update :
Updated JSON Data Matching use cases with different type of data
MongoDB Playground
You can do it in several different ways, I took the liberty to simplify the pipeline a little bit.
I will just mention that the structure does not fully make sense to me, and there are some additional contradictions between the sample input you provided and the "text" description/pipeline description.
Just a tiny example is payment_info_status being paid in the sample and capture in the pipeline.
These things will not change the pipeline structure, will just need to be fixed by you based on the actual needs.
db.courses.aggregate([
{
$project: {
_id: 1,
course_name: 1,
course_specialization: 1
}
},
{
$unwind: {
path: "$course_specialization",
preserveNullAndEmptyArrays: true
}
},
{
$lookup: {
from: "studentApplicationForms",
"let": {
courseId: "$_id",
spec: {
$ifNull: [
"$course_specialization.spec_name",
""
]
}
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$spec_name1",
"$$spec"
]
},
{
$eq: [
"$$courseId",
"$course_id"
]
}
]
}
}
},
{
$project: {
student_id: 1,
payment_info: 1,
spec_name1: 1,
spec_name2: 1,
spec_name3: 1,
declaration: 1,
}
},
{
$group: {
_id: null,
count: {
$sum: 1
},
declatration: {
$sum: {
$cond: [
"$declaration",
1,
0
]
}
},
paid: {
$sum: {
$cond: [
{
$eq: [
"$payment_info.status",
"paid"
]
},
1,
0
]
}
},
}
}
],
as: "student_application"
}
},
{
$project: {
_id: {
coursename: "$course_name",
spec: "$course_specialization.spec_name",
Application_count: {
$ifNull: [
{
$first: "$student_application.count"
},
0
]
},
Declaration_count: {
$ifNull: [
{
$first: "$student_application.declatration"
},
0
]
},
Paid_Application_Count: {
$ifNull: [
{
$first: "$student_application.paid"
},
0
]
},
}
}
}
])
Mongo Playground

MongoDB : group and count users by gender, civilStatus and professionalCategory

I have a collection of users, each user has a profile. I want to implement a query to make statistics on users.
This is my collection.
[
{
"_id": ObjectId("61d2db0d273a9076d630697b"),
"state": "VALIDATED",
"phone": "xxx",
"civilStatus": "SINGLE",
"gender": "MALE",
"professionalCategory": "STUDENT"
}
]
I want the result to contain an array of all genders of users in the database, and the number of users with each gender. same for civilStatus and professionalCategories
This is the result i am looking for :
{
"total": 2000
"validated": 1800,
"genders": [
{
"value": "MALE",
"count": 1200
},
{
"value": "FEMALE",
"count": 600
}
],
"civilStatus": [
{
"value": "SINGLE",
"count": "300"
}
...
],
"professionalCategories": [
{
"value": "STUDENT",
"count": "250"
}
...
]
}
I implemented the query, but I still have a few things that I don't know how to do.
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$push: "$gender"
},
civilStatus: {
$push: "$civilStatus"
},
professionalCategories: {
$push: "$professionalCategory"
}
}
}
])
This is the result of this query :
{
"total": 2000
"validated": 1800,
"genders": [
"MALE",
"MALE",
"FEMALE",
"MALE",
"FEMALE",
"FEMALE"
...
],
"civilStatus": [
"SINGLE",
"MARIED",
"SINGLE",
...
],
"professionalCategories": [
"STUDENT",
"WORKER",
"RETIRED"
...
]
}
I miss how to group each gender, civil Status and professional Category and calculate the number of users for each one.
I also tried this query, but I don't know how to complete the "count" field for each item of the array :
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$addToSet: {
value: "$gender",
count: {
//
}
}
},
civilStatus: {
$addToSet: {
value: "$civilStatus",
count: {
//
}
}
},
professionalCategories: {
$addToSet: {
value: "$professionalCategory",
count: {
//
}
}
},
}
}
])
if the query was to treat only one field, for example gender. it would have been easier with "unwind". but here I have 3 fields.
can someone help me please?
You can use following aggregation
Here is the code
db.collection.aggregate([
{
"$facet": {
"genders": [
{
"$group": {
"_id": "$gender",
"total": { $sum: 1 }
}
}
],
"civilStatus": [
{
"$group": {
"_id": "$civilStatus",
"total": { $sum: 1 }
}
}
],
"professionalCategory": [
{
"$group": {
"_id": "$professionalCategory",
"total": { $sum: 1 }
}
}
],
"validated": [
{
"$group": {
"_id": "$state",
"total": { "$sum": 1 }
}
}
]
}
},
{
$set: {
validated: {
"$filter": {
"input": "$validated",
"cond": {
"$eq": [ "$$this._id", "VALIDATED" ]
}
}
}
}
},
{
$set: {
validated: {
"$ifNull": [
{
"$arrayElemAt": [ "$validated", 0 ]
},
0
]
}
}
},
{
$set: { validated: "$validated.total" }
}
])
Working Mongo playground

Paginate MongoDB aggregation response

I have a database of users that have skills. I have set up a way to find users in the database using am aggregation method included in mongoose. Depending on the search criteria I input into the aggregation, the results may be too big to actually display on my front end app. I am curious how I can paginate an aggregation query with the typical limit, page, and skip variables like you would do in a typical GET request.
Here is my aggregation query:
const foundUsers = await User.aggregate([
{
$addFields: {
matchingSkills: {
$filter: {
input: '$skills',
cond: {
$or: test,
},
},
},
requiredSkills,
},
},
{
$addFields: {
// matchingSkills: '$$REMOVE',
percentageMatch: {
$multiply: [
{ $divide: [{ $size: '$matchingSkills' }, skillSearch.length] }, // yu already know how many values you need to pass, thats' why `2`
100,
],
},
},
},
{
$addFields: {
matchingSkillsNames: {
$map: {
input: '$matchingSkills',
as: 'matchingSkill',
in: '$$matchingSkill.skill',
},
},
},
},
{
$addFields: {
missingSkills: {
$filter: {
input: '$requiredSkills',
cond: {
$not: {
$in: ['$$this', '$matchingSkillsNames'],
},
},
},
},
},
},
{
$match: { percentageMatch: { $gte: 25 } },
},
]);
Passing these skills to this aggregate function:
{
"skillSearch": [
{
"class": "skills",
"skill": "SQL",
"operator": "GT",
"yearsExperience": 6
},
{
"class": "skills",
"skill": "C",
"operator": "GT",
"yearsExperience": 1
}
]
}
Will result in a response similar to this:
{
"_id": "60184ce81e65633873d709aa",
"name": "Brad",
"email": "brad#gmail.com",
"password": "$2a$12$37v2RwaO5LhSMT8GJQSZyel.Aawn6AmlqqSOkZtopqIIXyJ0LRBfu",
"__v": 0,
"skills": [
{
"_id": "60a306ce819cde701c1934a8",
"skill": "SQL",
"yearsExperience": 8
},
{
"_id": "60a306ce819cde701c1934a9",
"skill": "C",
"yearsExperience": 5
},
{
"_id": "60a306ce819cde701c1934aa",
"skill": "PL/I",
"yearsExperience": 2
},
{
"_id": "60a306ce819cde701c1934ab",
"skill": "Awk",
"yearsExperience": 9
}
],
"matchingSkills": [
{
"_id": "60a306ce819cde701c1934a8",
"skill": "SQL",
"yearsExperience": 8
},
{
"_id": "60a306ce819cde701c1934a9",
"skill": "C",
"yearsExperience": 5
}
],
"requiredSkills": [
"SQL",
"C"
],
"percentageMatch": 100,
"matchingSkillsNames": [
"SQL",
"C"
],
"missingSkills": []
},
For the pagination, you need to pass the page and size form the front end
$sort to sort the documents,
$skip skip the documents. For eg : if you are in page two and u need 10 rows , u need to skip first 10 documents
$limit to how many documents you need to show after skip
here is the code
db.collection.aggregate([
{
$sort: {
_id: 1
}
},
{
$skip: 0 // page*size
},
{
$limit: 10 // size
}
])
Working Mongo playground
More than, the pagination requires total elements too, for that
db.collection.aggregate([
{
"$facet": {
"elements": [
{
"$group": {
"_id": null,
"count": { "$sum": 1 }
}
}
],
"data": [
{ $sort: { _id: 1 } },
{ $skip: 0 }, // page*size
{ $limit: 10 } // size
]
}
},
{ "$unwind": "$elements" },
{
"$addFields": {
"elements": "$$REMOVE",
"totalRecords": "$elements.count"
}
}
])
Working Mongo playground

Group on field while getting the last document for each field with MongoDB

Problem
I'm trying to group a stock inventory by products. At first, my stock entries was fully filled each time so I made this aggregate:
[
{ $sort: { date: 1 } },
{
$group: {
_id: '$userId',
stocks: { $last: '$stocks' },
},
},
{ $unwind: '$stocks' },
{
$group: {
_id: '$stocks.productId',
totalQuantity: { $sum: '$stocks.quantity' },
stocks: { $push: { userId: '$_id', quantity: '$stocks.quantity' } },
},
},
]
Now, it can be possible that a stock entry doesn't contain all the products filled. So I'm stuck while writing the new aggregate.
Basically I need to group every products by productId and have an array of the last entry for each user.
Output
This is my expected output:
[
{
"_id": ObjectId("5e75eae1359fc8159d5b6073"),
"totalQuantity": 33,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 33
}
]
},
{
"_id": ObjectId("5e75eaea359fc8159d5b6074"),
"totalQuantity": 2,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 2
}
]
}
]
Documents
Documents (when fully filled):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
},
{
"productId": ObjectId("5e75eaea359fc8159d5b6074"),
"quantity": 2
}
]
}
Sometimes it won't be filled for the whole inventory (that's why I need the lastDate):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
}
]
}
Try this one:
db.collection.aggregate([
{
$group: {
_id: "$userId",
root: {
$push: "$$ROOT"
}
}
},
{
$addFields: {
root: {
$map: {
input: "$root",
as: "data",
in: {
"stocks": {
$map: {
input: "$$data.stocks",
as: "stock",
in: {
"productId": "$$stock.productId",
"userId": "$$data.userId",
"quantity": "$$stock.quantity",
"lastDate": "$$data.date"
}
}
}
}
}
}
}
},
{
$unwind: "$root"
},
{
$replaceRoot: {
newRoot: "$root"
}
},
{
$unwind: "$stocks"
},
{
$sort: {
"stocks.lastDate": 1
}
},
{
$group: {
_id: "$stocks.productId",
totalQuantity: {
$last: "$stocks.quantity"
},
stocks: {
$last: "$stocks"
}
}
},
{
$addFields: {
stocks: [
{
"lastDate": "$stocks.lastDate",
"quantity": "$stocks.quantity",
"userId": "$stocks.userId"
}
]
}
}
])
MongoPlayground

Mongodb multi level aggregation

Data in mongo
[{
"_id": "5d71d1432f7c8151c58c4481",
"payment": {
"transactions": [
{
"_id": "5d71d1ff2f7c8151c58c44cf",
"method": "paytm",
"amount": 100,
"paymentOn": "2019-09-06T03:26:44.959Z"
},
{
"_id": "5d71d1ff2f7c8151c58c44ce",
"method": "cash",
"amount": 650,
"paymentOn": "2019-09-06T03:26:55.531Z"
}
],
"status": "partial"
},
"customer": "5d66c434c24f2b1fb6772014",
"order": {
"orderNumber": "WP-ORD-06092019-001",
"total": 770,
"balance": 20
}
},
{
"_id": "5d71d1432f7c8151c58c4481",
"payment": {
"transactions": [
{
"_id": "5d71d1ff2f7c8151c58c44cf",
"method": "paytm",
"amount": 100,
"paymentOn": "2019-09-06T03:26:44.959Z"
}
],
"status": "partial"
},
"customer": "5d66c434c24f2b1fb6772014",
"order": {
"orderNumber": "WP-ORD-06092019-001",
"total": 200,
"balance": 100
}
}]
I want to aggregate payments by method.
So the result would look like below:
Output:
Paytm: 200
Cash : 650
Unpaid(Balance): 120
I have tried:
[
{
'$unwind': {
'path': '$payment.transactions',
'preserveNullAndEmptyArrays': true
}
}, {
'$project': {
'amount': '$payment.transactions.amount',
'method': '$payment.transactions.method'
}
}, {
'$group': {
'_id': '$method',
'amount': {
'$sum': '$amount'
}
}
}
]
But how to include balance calculation as well
Using the above dataset, use the aggregate pipeline for calculation using aggregate as:
db.collection.aggregate([
{
$facet: {
paidAmounts: [
{ '$unwind': { 'path': '$payment.transactions', 'preserveNullAndEmptyArrays': true } },
{
$group: {
_id: "$payment.transactions.method",
amount: {
$sum: "$payment.transactions.amount"
}
}
}
],
leftAmounts: [
{
$group: {
_id: null,
balance: {
$sum: "$order.balance"
}
}
}
]
}
}
])
giving output:
here leftAmounts has left balance and paidAmounts having grouped paid data on basis of payment type
[
{
"leftAmounts": [
{
"_id": null,
"balance": 120
}
],
"paidAmounts": [
{
"_id": "cash",
"amount": 650
},
{
"_id": "paytm",
"amount": 200
}
]
}
]
Working solution : https://mongoplayground.net/p/7IWELKKMsWe
db.collection.aggregate([
{
"$unwind": "$payment.transactions"
},
{
"$group": {
"_id": "$_id",
"balance": {
"$first": "$order.balance"
},
"paytm": {
"$sum": {
"$cond": [
{
"$eq": [
"$payment.transactions.method",
"paytm"
]
},
"$payment.transactions.amount",
0
]
}
},
"cash": {
"$sum": {
"$cond": [
{
"$eq": [
"$payment.transactions.method",
"cash"
]
},
"$payment.transactions.amount",
0
]
}
}
}
},
{
"$group": {
"_id": null,
"balance": {
"$sum": "$balance"
},
"cash": {
"$sum": "$cash"
},
"paytm": {
"$sum": "$paytm"
}
}
}
])