MongoDB : group and count users by gender, civilStatus and professionalCategory - mongodb

I have a collection of users, each user has a profile. I want to implement a query to make statistics on users.
This is my collection.
[
{
"_id": ObjectId("61d2db0d273a9076d630697b"),
"state": "VALIDATED",
"phone": "xxx",
"civilStatus": "SINGLE",
"gender": "MALE",
"professionalCategory": "STUDENT"
}
]
I want the result to contain an array of all genders of users in the database, and the number of users with each gender. same for civilStatus and professionalCategories
This is the result i am looking for :
{
"total": 2000
"validated": 1800,
"genders": [
{
"value": "MALE",
"count": 1200
},
{
"value": "FEMALE",
"count": 600
}
],
"civilStatus": [
{
"value": "SINGLE",
"count": "300"
}
...
],
"professionalCategories": [
{
"value": "STUDENT",
"count": "250"
}
...
]
}
I implemented the query, but I still have a few things that I don't know how to do.
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$push: "$gender"
},
civilStatus: {
$push: "$civilStatus"
},
professionalCategories: {
$push: "$professionalCategory"
}
}
}
])
This is the result of this query :
{
"total": 2000
"validated": 1800,
"genders": [
"MALE",
"MALE",
"FEMALE",
"MALE",
"FEMALE",
"FEMALE"
...
],
"civilStatus": [
"SINGLE",
"MARIED",
"SINGLE",
...
],
"professionalCategories": [
"STUDENT",
"WORKER",
"RETIRED"
...
]
}
I miss how to group each gender, civil Status and professional Category and calculate the number of users for each one.
I also tried this query, but I don't know how to complete the "count" field for each item of the array :
db.getCollection("users").aggregate([
{
$group: {
_id: null,
validated: {
$sum: {
$cond: {
if: { $eq: ["$state", "VALIDATED"] },
then: 1,
else: 0
}
}
},
genders: {
$addToSet: {
value: "$gender",
count: {
//
}
}
},
civilStatus: {
$addToSet: {
value: "$civilStatus",
count: {
//
}
}
},
professionalCategories: {
$addToSet: {
value: "$professionalCategory",
count: {
//
}
}
},
}
}
])
if the query was to treat only one field, for example gender. it would have been easier with "unwind". but here I have 3 fields.
can someone help me please?

You can use following aggregation
Here is the code
db.collection.aggregate([
{
"$facet": {
"genders": [
{
"$group": {
"_id": "$gender",
"total": { $sum: 1 }
}
}
],
"civilStatus": [
{
"$group": {
"_id": "$civilStatus",
"total": { $sum: 1 }
}
}
],
"professionalCategory": [
{
"$group": {
"_id": "$professionalCategory",
"total": { $sum: 1 }
}
}
],
"validated": [
{
"$group": {
"_id": "$state",
"total": { "$sum": 1 }
}
}
]
}
},
{
$set: {
validated: {
"$filter": {
"input": "$validated",
"cond": {
"$eq": [ "$$this._id", "VALIDATED" ]
}
}
}
}
},
{
$set: {
validated: {
"$ifNull": [
{
"$arrayElemAt": [ "$validated", 0 ]
},
0
]
}
}
},
{
$set: { validated: "$validated.total" }
}
])
Working Mongo playground

Related

MongoDB aggregate using $match with $expr with array

MongoDB 5.0.9
I am trying to get
value of application within course and their specification
value of paid application ( status : paid) based on course and their specification
courses collection having multiple courses with specification which might be there maybe not
[
{
"_id": {
"$oid": "62aab6669b3740313d881a30"
},
"course_name": "Master",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Social Work",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": true
},
{
"_id": {
"$oid": "62aab6669b3740313d881a38"
},
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"course_name": "BBA",
"fees": "Rs.1000.0/-",
"is_pg": false,
"course_specialization": null
},
{
"_id": {
"$oid": "628f3967cb69fc0789e69181"
},
"course_name": "BTech",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Computer Science and Engineering",
"is_activated": true
},
{
"spec_name": "Mutiple Specs",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": false
},
{
"_id": {
"$oid": "628f35a1cb69fc0789e6917e"
},
"course_name": "Bachelor",
"fees": "Rs.1000.0/-",
"course_specialization": [
{
"spec_name": "Social Work",
"is_activated": true
}
],
"college_id": {
"$oid": "628dfd41ef796e8f757a5c13"
},
"is_pg": false
}
],
Student Application forms collection where we are storing student application forms details
[
{
"_id": {
"$oid": "62cd476adbc878a0490e20ee"
},
"spec_name1": "Social Work",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cd1374dbc878a0490e20a5"
},
"course_id": {
"$oid": "62aab6669b3740313d881a30"
},
"current_stage": 2.5,
"declaration": true,
"payment_info": {
"payment_id": "123458",
"status": "paid"
},
"enquiry_date": {
"$date": {
"$numberLong": "1657620330432"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657621796062"
}
}
},
{
"_id": {
"$oid": "62cd476adbc878a0490e20ef"
},
"spec_name1": "",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cd1374dbc878a0490e20a5"
},
"course_id": {
"$oid": "62aab6669b3740313d881a38"
},
"current_stage": 2.5,
"declaration": true,
"payment_info": {
"payment_id": "123458",
"status": "paid"
},
"enquiry_date": {
"$date": {
"$numberLong": "1657620330432"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657621796062"
}
}
},
{
"_id": {
"$oid": "62cdc12000b820f5ea58cc60"
},
"spec_name1": "Social Work",
"spec_name2": "",
"spec_name3": "",
"student_id": {
"$oid": "62cdad90a9b64d58b15e6976"
},
"course_id": {
"$oid": "628f35a1cb69fc0789e6917e"
},
"current_stage": 6.25,
"declaration": false,
"payment_info": {
"payment_id": "",
"status": ""
},
"enquiry_date": {
"$date": {
"$numberLong": "1657651488511"
}
},
"last_updated_time": {
"$date": {
"$numberLong": "1657651987155"
}
}
}
]
Desired output with every specification within the course
[
"_id": {
"coursename": "Master",
"spec": "Social Work",
"Application_Count": 1,
"Paid_Application_Count:0
},
{
"_id": {
"coursename": "Bachelor"
"spec":"" ,
"Application_Count": 1,
"Paid_Application_Count:0
},
{
"_id": {
"coursename": "BBA"
"spec":"" ,
"Application_Count": 1,
"Paid_Application_Count:1
},
]
Aggregation Query
[{
$match: {
college_id: ObjectId('628dfd41ef796e8f757a5c13')
}
}, {
$project: {
_id: 1,
course_name: 1,
course_specialization: 1
}
}, {
$unwind: {
path: '$course_name',
includeArrayIndex: 'course_index',
preserveNullAndEmptyArrays: true
}
}, {
$unwind: {
path: '$course_specialization',
includeArrayIndex: 'course_specs_index',
preserveNullAndEmptyArrays: true
}
}, {
$lookup: {
from: 'studentApplicationForms',
'let': {
id: '$_id',
spec: '$course_specialization.spec_name'
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
'$course_id',
'$$id'
]
},
{
$eq: [
'$spec_name1',
'$$spec'
]
}
]
}
}
},
{
$project: {
student_id: 1,
payment_info: 1,
spec_name1: 1,
spec_name2: 1,
spec_name3: 1
}
}
],
as: 'student_application'
}
}, {
$unwind: {
path: '$student_application',
includeArrayIndex: 'application',
preserveNullAndEmptyArrays: true
}
}, {
$facet: {
course: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
count: {
$count: {}
}
}
}
],
declatration: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
count_dec: {
$sum: {
$cond: [
'$student_application.declaration',
1,
0
]
}
}
}
}
],
payment: [
{
$group: {
_id: {
course_name: '$course_name',
spec: '$course_specialization'
},
payment: {
$sum: {
$eq: [
'$student_application.payment_info.status',
'paid'
]
}
}
}
}
]
}
}]
Problem :
I am able to get application count but it is not getting unique value if 2 specs are same then duplicate value is coming as you can see on sample application collection Social Work is in two different course . So my aggregations is not grouping them based in course name.specs
Not able to find correct Paid_Application_Count and Application_Count
Update :
Updated JSON Data Matching use cases with different type of data
MongoDB Playground
You can do it in several different ways, I took the liberty to simplify the pipeline a little bit.
I will just mention that the structure does not fully make sense to me, and there are some additional contradictions between the sample input you provided and the "text" description/pipeline description.
Just a tiny example is payment_info_status being paid in the sample and capture in the pipeline.
These things will not change the pipeline structure, will just need to be fixed by you based on the actual needs.
db.courses.aggregate([
{
$project: {
_id: 1,
course_name: 1,
course_specialization: 1
}
},
{
$unwind: {
path: "$course_specialization",
preserveNullAndEmptyArrays: true
}
},
{
$lookup: {
from: "studentApplicationForms",
"let": {
courseId: "$_id",
spec: {
$ifNull: [
"$course_specialization.spec_name",
""
]
}
},
pipeline: [
{
$match: {
$expr: {
$and: [
{
$eq: [
"$spec_name1",
"$$spec"
]
},
{
$eq: [
"$$courseId",
"$course_id"
]
}
]
}
}
},
{
$project: {
student_id: 1,
payment_info: 1,
spec_name1: 1,
spec_name2: 1,
spec_name3: 1,
declaration: 1,
}
},
{
$group: {
_id: null,
count: {
$sum: 1
},
declatration: {
$sum: {
$cond: [
"$declaration",
1,
0
]
}
},
paid: {
$sum: {
$cond: [
{
$eq: [
"$payment_info.status",
"paid"
]
},
1,
0
]
}
},
}
}
],
as: "student_application"
}
},
{
$project: {
_id: {
coursename: "$course_name",
spec: "$course_specialization.spec_name",
Application_count: {
$ifNull: [
{
$first: "$student_application.count"
},
0
]
},
Declaration_count: {
$ifNull: [
{
$first: "$student_application.declatration"
},
0
]
},
Paid_Application_Count: {
$ifNull: [
{
$first: "$student_application.paid"
},
0
]
},
}
}
}
])
Mongo Playground

MongoDB match filters with grouping and get total count

My sample data:
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_2",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
{
"_id": "random_id_3",
"priority": "P2",
"owners": ["user-1", "user-2"],
},
I want to run an aggregation pipeline on the data involving match filters and grouping, also I want to limit the number of groups returned as well as the number of items in each group.
Essentially, if limit=2, limit_per_group=1, group_by=owner, priority=P1, I want the following results:
[
{
"data": [
{
"group_key": "user-1",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
{
"group_key": "user-2",
"total_items_in_group": 2,
"limited_items": [
{
"_id": "random_id_1",
"priority": "P1",
"owners": ["user-1", "user-2"],
},
],
},
]
},
{
"metadata": {
"total_items_matched": 2,
"total_groups": 2
}
},
]
Need some help on how to write an aggregation pipeline to get the required result.
My current query is as follows:
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 1,
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched"
}
]
}
}
Mongo playground link
I am unable to calculate the total number of groups.
add new stage of $addfields at the end of pipeline
db.collection.aggregate([
{
"$match": {
"priority": "P1"
}
},
{
"$facet": {
"data": [
{
$addFields: {
"group_by_owners": "$owners"
}
},
{
$unwind: "$group_by_owners"
},
{
$group: {
"_id": "$group_by_owners",
"total_items_in_group": {
$sum: 1
},
"items": {
$push: "$$ROOT"
}
}
},
{
$sort: {
"total": -1
}
},
{
$unset: "items.group_by_owners"
},
{
$project: {
"_id": 0,
"group_key": "$_id",
"total_items_in_group": 1,
"limited_items": {
$slice: [
"$items",
1
]
}
}
},
{
"$limit": 2
}
],
"metadata": [
{
$count: "total_items_matched",
}
]
}
},
{
"$addFields": {
"metadata.total_groups": {
"$size": "$data"
}
}
}
])
https://mongoplayground.net/p/y5a0jvr6fxI

Mongo Query to fetch distinct nested documents

I need to fetch distinct nested documents.
Please find the sample document:
{
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z"),
"HList":[
{
"productId": 123,
"name": "Dubai",
"tsh": true
}
],
"PList":[
{
"productId": 123,
"name": "Dubai",
"tsh": false
},
{
"productId": 234,
"name": "India",
"tsh": true
}
],
"CList":[
{
"productId": 234,
"name": "India",
"tsh": false
}
]
}
Expected result is:
{
"produts":[
{
"productId": 123,
"name": "Dubai"
},
{
"productId": 234,
"name": "India"
}
]
}
I tried with this query:
db.property.aggregate([
{
$match: {
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z")
}
},
{
"$project": {
"_id": 0,
"unique": {
"$filter": {
"input": {
"$setDifference": [
{
"$concatArrays": [
"$HList.productId",
"$PList.productId",
"$CList.productId"
]
},
[]
]
},
"cond": {
"$ne": [ "$$this", "" ]
}
}
}
}
}
]);
Is $setDifference aggregation is correct choice here?
My query returns only unique product ids but i need a productId with name.
Could someone help me to solve this?
Thanks in advance
You can use $projectfirst to get rid of tsh field and then run $setUnion which ignores duplicated entries:
db.collection.aggregate([
{
$project: {
"HList.tsh": 0,
"PList.tsh": 0,
"CList.tsh": 0,
}
},
{
$project: {
products: {
$setUnion: [ "$HList", "$PList", "$CList" ]
}
}
}
])
Mongo Playground
The following two aggregations return the expected and same result (you can use any of the two):
db.collection.aggregate( [
{
$project: {
_id: 0,
products: {
$reduce: {
input: { $setUnion: [ "$HList", "$PList", "$CList" ] },
initialValue: [],
in: {
$setUnion: [ "$$value", [ { productId: "$$this.productId", name: "$$this.name" } ] ]
}
}
}
}
}
] )
This one is little verbose:
db.collection.aggregate( [
{
$project: { list: { $setUnion: [ "$HList", "$PList", "$CList" ] } }
},
{
$unwind: "$list"
},
{
$group: {
_id: null,
products: { $addToSet: { "productId": "$list.productId", "name": "$list.name" } }
}
},
{
$project: { _id: 0 }
}
] )
db.collection.aggregate([
{
$match: {
"propertyId": 1001820437,
"date": ISODate("2020-07-17T00:00:00.000Z")
}
},
{
$project: {
products: {
$filter: {
input: { "$setUnion" : ["$CList", "$HList", "$PList"] },
as: 'product',
cond: {}
}
}
}
},
{
$project: {
"_id":0,
"products.tsh": 1,
"products.name": 1,
}
},
])

Group on field while getting the last document for each field with MongoDB

Problem
I'm trying to group a stock inventory by products. At first, my stock entries was fully filled each time so I made this aggregate:
[
{ $sort: { date: 1 } },
{
$group: {
_id: '$userId',
stocks: { $last: '$stocks' },
},
},
{ $unwind: '$stocks' },
{
$group: {
_id: '$stocks.productId',
totalQuantity: { $sum: '$stocks.quantity' },
stocks: { $push: { userId: '$_id', quantity: '$stocks.quantity' } },
},
},
]
Now, it can be possible that a stock entry doesn't contain all the products filled. So I'm stuck while writing the new aggregate.
Basically I need to group every products by productId and have an array of the last entry for each user.
Output
This is my expected output:
[
{
"_id": ObjectId("5e75eae1359fc8159d5b6073"),
"totalQuantity": 33,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 33
}
]
},
{
"_id": ObjectId("5e75eaea359fc8159d5b6074"),
"totalQuantity": 2,
"stocks": [
{
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"lastDate": "2020-03-21T11:45:53.077Z",
"quantity": 2
}
]
}
]
Documents
Documents (when fully filled):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
},
{
"productId": ObjectId("5e75eaea359fc8159d5b6074"),
"quantity": 2
}
]
}
Sometimes it won't be filled for the whole inventory (that's why I need the lastDate):
{
"_id": ObjectId("5e75fe71e4a3e0323ba47e0a"),
"date": "2020-03-21T11:45:53.077Z",
"userId": ObjectId("5e75f498359fc8159d5b6075"),
"stocks": [
{
"productId": ObjectId("5e75eae1359fc8159d5b6073"),
"quantity": 33
}
]
}
Try this one:
db.collection.aggregate([
{
$group: {
_id: "$userId",
root: {
$push: "$$ROOT"
}
}
},
{
$addFields: {
root: {
$map: {
input: "$root",
as: "data",
in: {
"stocks": {
$map: {
input: "$$data.stocks",
as: "stock",
in: {
"productId": "$$stock.productId",
"userId": "$$data.userId",
"quantity": "$$stock.quantity",
"lastDate": "$$data.date"
}
}
}
}
}
}
}
},
{
$unwind: "$root"
},
{
$replaceRoot: {
newRoot: "$root"
}
},
{
$unwind: "$stocks"
},
{
$sort: {
"stocks.lastDate": 1
}
},
{
$group: {
_id: "$stocks.productId",
totalQuantity: {
$last: "$stocks.quantity"
},
stocks: {
$last: "$stocks"
}
}
},
{
$addFields: {
stocks: [
{
"lastDate": "$stocks.lastDate",
"quantity": "$stocks.quantity",
"userId": "$stocks.userId"
}
]
}
}
])
MongoPlayground

I can nt make a sum in a nested mongo query

I tried all weekend to write this query, but it is not working. The nested field worked, but the other field does not show the $sum.
This is a example of the collections:
{
"_id": ObjectId("5dc28a3fd89a4d7bb82a75b4"),
"bedrijf": ObjectId("5c7ee51d2478a30fa4357b4c"),
"doelgroep": "Kinderen 12-",
"recreatieleider": 1,
"__v": 0
}
I like to group the key "bedrijf" and make a subgroup "doelgroep" The code I use is:
{
$group: {
_id: {
bedrijf: "$bedrijf",
doelgroep: "$doelgroep",
},
"totaal": {
$sum: 1
}
}
},
{
$group : {
_id : '$_id.bedrijf',
doelgroep : {
"$push": {
doelgroep:"$_id.doelgroep",
total:"$totaal"
}
},
"recreatieleider": {
$sum: "$recreatieleider"
}
}
}
But when I see the results the key "recreatieleder" does not make the sum of the entries.
{
"results": [
{
"_id": "5c69c5d939fbb38a1fcf3146",
"doelgroep": [
{
"doelgroep": "Gezinnen",
"total": 1
},
{
"doelgroep": "Kinderen 9-11",
"total": 9
}
],
"recreatieleider": 0
}
]
}
So how can I count the "recreatieleider" value?
My final expected output must be:
{
"results": [
{
"_id": "5c69c5d939fbb38a1fcf3146",
"doelgroep": [
{
"doelgroep": "Gezinnen",
"total": 1
},
{
"doelgroep": "Kinderen 9-11",
"total": 9
}
],
"recreatieleider": 20
}
]
}
added question
I ran to a other problem. You make me fixed the first problem. When a mak a other sub group it will not show me
I use this code
{
$group: {
_id: {
bedrijf: "$bedrijf",
doelgroep: "$doelgroep",
},
"totaal": {
$sum: 1
},
"hulpkrachten": {
$sum: "$hulpkrachten"
},
"recreatieleider": {
$sum: "$recreatieleider"
},
"stagiaires": {
$sum: "$stagiaires"
},
}
},
{
$group : {
_id : '$_id.bedrijf',
doelgroep : {
"$push": {
doelgroep:"$_id.doelgroep",
total:"$totaal"
}
},
soortactiviteit : {
"$push": {
soortactiviteit:"$_id.soortactiviteit",
total:"$totaal"
}
}
}
}
You can see I add "soortactiviteit" to it, but it wil not add u subgroup
The expeditor outcome must be
{
"results": [
{
"_id": "5c69c5d939fbb38a1fcf3146",
"doelgroep": [
{
"doelgroep": "Gezinnen",
"total": 1
},
{
"doelgroep": "Kinderen 9-11",
"total": 9
}
],
"soortactivieit": [
{
"doelgroep": "creativiteit",
"total": 20
},
{
"doelgroep": "sports,
"total": 9
}
],
"recreatieleider": 20
}
]
}
Maybe you can help with my last question of the subject.. Thank
Not sure, what you are trying to achieve with this query. However, below query works for your scenario.
{
$group:{
_id:{
bedrijf:"$bedrijf",
doelgroep:"$doelgroep",
},
"totaal":{
$sum:1
},
"recreatieleider":{
$sum:"$recreatieleider"
}
}
},
{
$group:{
_id:'$_id.bedrijf',
doelgroep:{
"$push":{
doelgroep:"$_id.doelgroep",
total:"$totaal"
}
},
"recreatieleider":{
$sum:"$recreatieleider"
}
}
}
When first $group runs the key recreatieleider is lost in
{
$group: {
_id: {
bedrijf: "$bedrijf",
doelgroep: "$doelgroep",
},
"totaal": {
$sum: 1
}
}
},
change it to and try then:
{
$group: {
_id: {
bedrijf: "$bedrijf",
doelgroep: "$doelgroep",
},
"totaal": {
$sum: 1
},
"recreatieleider": {
$sum: "$recreatieleider"
}
}
},