Retrieving a count that matches specified criteria in a $group aggregation - mongodb

So I am looking to group documents in my collection on a specific field, and for the output results of each group, I am looking to include the following:
A count of all documents in the group that match a specific query (i.e. a count of documents that satisfy some expression { "$Property": "Value" })
The total number of documents in the group
(Bonus, as I suspect that this is not easily accomplished) Properties of a document that correspond to a $min/$max accumulator
I am very new to the syntax used to query in mongo and don't quite understand how it all works, but after some research, I've managed to get it down to the following query (please note, I am currently using version 3.0.12 for my mongo db, but I believe we will upgrade in a couple of months time):
db.getCollection('myCollection').aggregate(
[
{
$group: {
_id: {
GroupID: "$GroupID",
Status: "$Status"
},
total: { $sum: 1 },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$DateCreated" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
},
{
$group: {
_id: "$_id.GroupID",
Statuses: {
$push: {
Status: "$_id.Status",
Count: "$total"
}
},
TotalCount: { $sum: "$total" },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$EarliestCreatedDate" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
}
]
)
Essentially what I am looking to retrieve is the Count for specific Status values, and project them into one final result document that looks like the following:
{
GroupName,
EarliestCreatedDate,
EarliestCreatedBy,
LastModifiedDate,
LastModifiedBy,
TotalCount,
PendingCount,
ClosedCount
}
Where PendingCount and ClosedCount are the total number of documents in each group that have a status Pending/Closed. I suspect I need to use $project with some other expression to extract this value, but I don't really understand the aggregation pipeline well enough to figure this out.
Also the EarliestCreatedBy and LastModifiedBy are the users who created/modified the document(s) corresponding to the EarliestCreatedDate and LastModifiedDate respectively. As I mentioned, I think retrieving these values will add another layer of complexity, so if there is no practical solution, I am willing to forgo this requirement.
Any suggestions/tips would be very much appreciated.

You can try below aggregation stages.
$group
Calculate all the necessary counts TotalCount, PendingCount and ClosedCount for each GroupID
Calculate $min and $max for EarliestCreatedDate and LastModifiedDate respectively and push all the fields to CreatedByLastModifiedBy to be compared later for fetching EarliestCreatedBy and LastModifiedBy for each GroupID
$project
Project all the fields for response
$filter the EarliestCreatedDate value against the data in the CreatedByLastModifiedBy and $map the matching CreatedBy to the EarliestCreatedBy and $arrayElemAt to convert the array to object.
Similar steps for calculating LastModifiedBy
db.getCollection('myCollection').aggregate(
[{
$group: {
_id: "$GroupID",
TotalCount: {
$sum: 1
},
PendingCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Pending"]
},
then: 1,
else: 0
}
}
},
ClosedCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Closed "]
},
then: 1,
else: 0
}
}
},
GroupName: {
$first: "$GroupName"
},
EarliestCreatedDate: {
$min: "$DateCreated"
},
LastModifiedDate: {
$max: "$LastModifiedDate"
},
CreatedByLastModifiedBy: {
$push: {
CreatedBy: "$CreatedBy",
LastModifiedBy: "$LastModifiedBy",
DateCreated: "$DateCreated",
LastModifiedDate: "$LastModifiedDate"
}
}
}
}, {
$project: {
_id: 0,
GroupName: 1,
EarliestCreatedDate: 1,
EarliestCreatedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "CrBy",
cond: {
"$eq": ["$EarliestCreatedDate", "$$CrBy.DateCreated"]
}
}
},
as: "EaCrBy",
in: {
"$$EaCrBy.CreatedBy"
}
}
}, 0]
},
LastModifiedDate: 1,
LastModifiedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
cond: {
"$eq": ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
}
}
},
as: "LaMoBy",
in: {
"$$LaMoBy.LastModifiedBy"
}
}
}, 0]
},
TotalCount: 1,
PendingCount: 1,
ClosedCount: 1
}
}]
)
Update for Version < 3.2
$filter is also not available in your version. Below is the equivalent.
The comparison logic is the same and creates an array with for every non matching entry the value of false or LastModifiedBy otherwise.
Next step is to use $setDifference to compare the previous array values with array [false] which returns the elements that only exist in the first set.
LastModifiedBy: {
$setDifference: [{
$map: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
in: {
$cond: [{
$eq: ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
},
"$$MoBy.LastModifiedBy",
false
]
}
}
},
[false]
]
}
Add $unwind stage after $project stage to change to object
{$unwind:"$LastModifiedBy"}
Similar steps for calculating EarliestCreatedBy

Related

mongodb - check a collection for mutual likes

I had a query that worked before when likes collection had users stored as from and to fields. The query below checked for the two fields and if like was true.
Since then I changed the way users are stored in the likes collection.
.collection('likes')
.aggregate([ {$match: {$and : [{$or: [{from: userId}, {to: userId}]}, {like: true}]}},
{$group: {
_id: 0,
from: {$addToSet: "$from"},
to: {$addToSet: "$to"},
}
},
{$project: {
_id: 0,
users: {
$filter: {
input: {$setIntersection: ["$from", "$to"]},
cond: {$ne: ["$$this", userId]}
}
}
}
},
This is how likes collection used to store data (and above query worked) in figuring out mutual likers for a userId passed in req.body
{
"_id": "xw5vk1s_PpJaal46di",
"from": "xw5vk1s",
"to": "PpJaal46di"
"like": true,
}
and now I changed users to an array.
{
"_id": "xw5vk1s_PpJaal46di",
"users": [
"xw5vk1s",//first element equivalent to from
"PpJaal46di"//equivalent to previously to
],
"like": true,
}
I am not sure how to modify the query to now check for array elements in users field now that from and to is not where the two users liking each other are stored.
For MongoDB v5.2+, you can use $sortArray to create a common key for the field users and $group to get the count. You will get mutual likes by count > 1.
db.collection.aggregate([
{
$match: {
users: "xw5vk1s"
}
},
{
$group: {
_id: {
$sortArray: {
input: "$users",
sortBy: 1
}
},
count: {
$sum: 1
}
}
},
{
"$match": {
count: {
$gt: 1
}
}
}
])
Mongo Playground
Prior to MongoDB v5.2, you can create sorted key for grouping through $unwind and $sort then $push back the users in $group.
db.collection.aggregate([
{
$match: {
users: "xw5vk1s"
}
},
{
"$unwind": "$users"
},
{
$sort: {
users: 1
}
},
{
$group: {
_id: "$_id",
users: {
$push: "$users"
},
like: {
$first: "$like"
}
}
},
{
"$group": {
"_id": "$users",
"count": {
"$sum": 1
}
}
},
{
"$match": {
count: {
$gt: 1
}
}
}
])
Mongo Playground

Finding top 3 students in each subject MongoDB

I have tried searching for ways to solve my problem, except that my database is set up differently,
My documents in my collection are something like this:
{name:"MAX",
date:"2020-01-01"
Math:98,
Science:60,
English:80},
{name:"JANE",
date:"2020-01-01"
Math:80,
Science:70,
English:79},
{name:"ALEX",
date:"2020-01-01"
Math:95,
Science:68,
English:70},
{name:"JOHN",
date:"2020-01-01"
Math:95,
Science:68,
English:70}
{name:"MAX",
date:"2020-06-01"
Math:97,
Science:78,
English:90},
{name:"JANE",
date:"2020-06-01"
Math:78,
Science:76,
English:66},
{name:"ALEX",
date:"2020-06-01"
Math:93,
Science:75,
English:82},
{name:"JOHN",
date:"2020-06-01"
Math:92,
Science:80,
English:50}
I want to find the top 3 students for each subject without regard for the dates. I only managed to find the top 3 students in 1 subject.
So i group the students by name first, and add a column for max scores of a subject. Math in this case. Sort it in descending order and limit results to 3.
db.student_scores.aggregate(
[
{$group:{
_id: "$name",
maxMath: { $max: "$Math" }}},
{$sort:{"maxMath":-1}},
{$limit : 3}
]
)
Is there any way to get the top 3 students for each subject?
So, it would be top 3 for math, top 3 for science, top 3 for english
{
Math:{MAX, JANE, JOHN},
Science:{JOHN, ALEX, JANE},
English:{JANE, MAX, JOHN}
}
I just applied your code 3 times, using $facet
If you prefer a more compact result add
{$project:{English:"$Eng._id", Science:"$sci._id", Math:"$math._id"}}
PLAYGROUND
PIPELINE
db.collection.aggregate([
{
"$facet": {
"math": [
{
$group: {
_id: "$name",
maxMath: {
$max: "$Math"
}
}
},
{
$sort: {
"maxMath": -1
}
},
{
$limit: 3
}
],
"sci": [
{
$group: {
_id: "$name",
maxSci: {
$max: "$Science"
}
}
},
{
$sort: {
"maxSci": -1
}
},
{
$limit: 3
}
],
"Eng": [
{
$group: {
_id: "$name",
maxEng: {
$max: "$English"
}
}
},
{
$sort: {
"maxEng": -1
}
},
{
$limit: 3
}
]
}
}
])
Your question is not clear, but i can predict 2 scenario,
Get repetitive students along with date:
$project to show required fields and convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$project: {
name: "$name",
date: "$date",
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$name",
date: "$date",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground
Sum of all date's score (means unique students):
$group by name, and get sum of all subjects using $sum,
$project to convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$group: {
_id: "$name",
Math: { $sum: "$Math" },
Science: { $sum: "$Science" },
English: { $sum: "$English" }
}
},
{
$project: {
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$_id",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground

MongoDB query based on last query by max date

I have the following collection
{
_id: ObjectId(...),
consumerId: "...",
dealId: "...",
status: "...",
stage: 0,
createdDate: 2020-09-06T00:17:16.719+00:00
}
I would like to figure out how to get distinct documents given a specific consumerId and status not equal to say "x" or "y" based on the latest createdDate. Also what would be the best indexes of fields for the collection?
You can use $match to apply the condition. $sort helps to do descending order of createdDate. Then groups helps to give the distinct data with the help of $first
[
{
$match: {
$expr: {
$and: [
{$eq: ["$consumerId","1"]},
{
$not: {
$in: ["$status",["x","y"]]
}
}
]
}
}
},
{
$sort: {
createdDate: -1
}
},
{
$group: {
_id: "$consumerId",
consumerId: { $first: "$consumerId" },
dealId: { $first: "$dealId" },
status: { $first: "$status" },
stage: { $first: "$stage" },
createdDate: { $first: "$createdDate" }
}
}
]
Working Mongo playground

MongoDB - aggregating with nested objects, and changeable keys

I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!
You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.

MongoDB sorting by condition

Hello guys i'm stuck in one condition where i need to sort the data that matches some conditions and then sort them by it's priority bases i.e
first i want to sort the data via it's priority in descending order and then sort them if priority is 1 (high) sort the document via createdAt field else via it's date field.
i currently create one aggregate by myself but it's not returning the correct data.
model.aggregate([
{
$match: {
jobId,
deprecated: false,
},
},
{
$project: {
document: "$$ROOT",
sort: {
$cond: {
if: {
$eq: ["$priority", 1],
},
then: "$createdAt",
else: "$date",
},
},
},
},
{
$sort: {
priority: -1,
sort: 1,
},
},
]);
hey guys thanks for your time i solve the issue i use
i just add one field in the $project due to i'm using document: "$$ROOT"
so it create document field and store all the data in it, and that is why priority field is not available,
i just add priority:1 in $project and it includes that field. below i post the working code may be it helps you if you also wants something like that.
model.aggregate([
{
$match: {
jobId: '5c501eed65816d61c6a0af77',
deprecated: false,
},
},
{
$project: {
document: "$$ROOT",
priority: 1,
sort: {
$cond: {
if: {
$eq: ["$priority", 1],
},
then: "$createdAt",
else: "$date",
},
},
},
},
{
$sort: {
priority: -1,
sort: 1,
},
},
])