Finding top 3 students in each subject MongoDB - mongodb

I have tried searching for ways to solve my problem, except that my database is set up differently,
My documents in my collection are something like this:
{name:"MAX",
date:"2020-01-01"
Math:98,
Science:60,
English:80},
{name:"JANE",
date:"2020-01-01"
Math:80,
Science:70,
English:79},
{name:"ALEX",
date:"2020-01-01"
Math:95,
Science:68,
English:70},
{name:"JOHN",
date:"2020-01-01"
Math:95,
Science:68,
English:70}
{name:"MAX",
date:"2020-06-01"
Math:97,
Science:78,
English:90},
{name:"JANE",
date:"2020-06-01"
Math:78,
Science:76,
English:66},
{name:"ALEX",
date:"2020-06-01"
Math:93,
Science:75,
English:82},
{name:"JOHN",
date:"2020-06-01"
Math:92,
Science:80,
English:50}
I want to find the top 3 students for each subject without regard for the dates. I only managed to find the top 3 students in 1 subject.
So i group the students by name first, and add a column for max scores of a subject. Math in this case. Sort it in descending order and limit results to 3.
db.student_scores.aggregate(
[
{$group:{
_id: "$name",
maxMath: { $max: "$Math" }}},
{$sort:{"maxMath":-1}},
{$limit : 3}
]
)
Is there any way to get the top 3 students for each subject?
So, it would be top 3 for math, top 3 for science, top 3 for english
{
Math:{MAX, JANE, JOHN},
Science:{JOHN, ALEX, JANE},
English:{JANE, MAX, JOHN}
}

I just applied your code 3 times, using $facet
If you prefer a more compact result add
{$project:{English:"$Eng._id", Science:"$sci._id", Math:"$math._id"}}
PLAYGROUND
PIPELINE
db.collection.aggregate([
{
"$facet": {
"math": [
{
$group: {
_id: "$name",
maxMath: {
$max: "$Math"
}
}
},
{
$sort: {
"maxMath": -1
}
},
{
$limit: 3
}
],
"sci": [
{
$group: {
_id: "$name",
maxSci: {
$max: "$Science"
}
}
},
{
$sort: {
"maxSci": -1
}
},
{
$limit: 3
}
],
"Eng": [
{
$group: {
_id: "$name",
maxEng: {
$max: "$English"
}
}
},
{
$sort: {
"maxEng": -1
}
},
{
$limit: 3
}
]
}
}
])

Your question is not clear, but i can predict 2 scenario,
Get repetitive students along with date:
$project to show required fields and convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$project: {
name: "$name",
date: "$date",
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$name",
date: "$date",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground
Sum of all date's score (means unique students):
$group by name, and get sum of all subjects using $sum,
$project to convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$group: {
_id: "$name",
Math: { $sum: "$Math" },
Science: { $sum: "$Science" },
English: { $sum: "$English" }
}
},
{
$project: {
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$_id",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground

Related

Mongodb aggregate grouping elements of an array type field

I have below data in my collection:
[
{
"_id":{
"month":"Jan",
"year":"2022"
},
"products":[
{
"product":"ProdA",
"status":"failed",
"count":15
},
{
"product":"ProdA",
"status":"success",
"count":5
},
{
"product":"ProdB",
"status":"failed",
"count":20
},
{
"product":"ProdB",
"status":"success",
"count":10
}
]
},
...//more such data
]
I want to group the elements of products array on the name of the product, so that we have record of how what was the count of failure of success of each product in each month. Every record is guaranteed to have both success and failure count each month. The output should look like below:
[
{
"_id":{
"month":"Jan",
"year":"2022"
},
"products":[
{
"product":"ProdA","status":[{"name":"success","count":5},{"name":"failed","count":15}]
},
{
"product":"ProdB","status":[{"name":"success","count":10},{"name":"failed","count":20}]
}
]
},
...//data for succeeding months
]
I have tried to do something like this:
db.collection.aggregate([{ $unwind: "$products" },
{
$group: {
"_id": {
month: "$_id.month",
year: "$_id.year"
},
products: { $push: { "product": "$product", status: { $push: { name: "$status", count: "$count" } } } }
}
}]);
But above query doesn't work.
On which level I need to group fields so as to obtain above output.
Please help me to find out what I am doing wrong.
Thank You!
Your first group stage needs to group by both the _id and the product name, aggregate a list of status counts and then another group stage which then forms the products list:
db.collection.aggregate([
{$unwind: "$products"},
{$group: {
_id: {
id: "$_id",
product: "$products.product",
},
status: {
$push: {
name: "$products.status",
count: "$products.count"
}
}
}
},
{$group: {
_id: "$_id.id",
products: {
$push: {
product: "$_id.product",
status: "$status"
}
}
}
}
])
Mongo Playground

How to group by date and by specific field in MongoDB

I want to print grouped by date and by "productId" within the date. In this example, the output should be as follow:
[
{
"_id": "2018-03-04",
"product1": 2,
"product2": 2
}
]
Data: https://mongoplayground.net/p/gzvm11EIPn2
How to make it?
When you use the $group stage in aggregation you learn to group by one field as such: { $group: { "_id": "$field1"...
When you want to group by two or more fields "_id" needs to be a subdocument and you pass the fields as key value pairs inside the subdocument as such:
db.mycollection.aggregate([
{
$group:
{
"_id": { "product1": "$product1", "product2": "$product2", ... }
}
}
])
... etc.
$group - Group by createdAt (date string) and productId and perform count via $sum.
$group - Group by createdAtand push data from (1) to products array field.
$replaceRoot - Replace input document with new document.
3.1. $arrayToObject - Convert the object from products array field to key value pair with productId (key) and count (value).
3.2. $mergeObjects - Create object with _id and merge the object from (3.2) into 1 object.
db.collection.aggregate([
{
$group: {
_id: {
createdAt: {
$dateToString: {
format: "%Y-%m-%d",
date: "$createdAt"
}
},
productId: "$productId"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: "$_id.createdAt",
products: {
$push: {
productId: "$_id.productId",
count: "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
_id: "$_id"
},
{
$arrayToObject: {
$map: {
input: "$products",
in: {
k: {
$toString: "$$this.productId"
},
v: "$$this.count"
}
}
}
}
]
}
}
}
])
Sample Mongo Playground
Output
[
{
"5e345223b3aa703b8a9a4f34": 2,
"5e345223b3aa703b8a9a4f35": 2,
"_id": "2018-03-04"
}
]

clone and rename a field of an array of subdocuments in Mongo

I've got a collection like this:
{
name: "A Name",
answers: [
{order: 1},
{order: 2},
{order: 3}
]
}
What I want to do is to add a new filed id to each element of the answers array based on the value of the order property - I want just to clone it, so the output is
{
name: "A Name",
answers: [
{order: 1, id: 1},
{order: 2, id: 2},
{order: 3, id: 3}
]
}
I looked at this post and this one too, but I don't know how to combine them to work properly for subdocuments.
In MongoDB documentation for the aggregate method, I found a simple example of how to update embedded documents here, but I have no idea how to use the order property instead of a fixed term. The following tries seem not to work as I need:
db.collection.aggregate([
{
$addFields: {
"answers.id": "answers.$.order"
}
}
])
db.collection.aggregate([
{
$addFields: {
"answers.id": "$answers.order"
}
}
])
Is it possible to achieve the expected result with the `aggregate method?
Demo - https://mongoplayground.net/p/M79MV6-Zp4C
db.collection.aggregate([
{
$set: {
answers: {
$map: {
input: "$answers",
as: "answer",
in: { $mergeObjects: [ "$$answer", { id: "$$answer.order" } ]
}
}
}
}
}
])
Updated Demo - https://mongoplayground.net/p/iyqIPGQ5-ld
db.collection.aggregate([
{ $unwind: "$answers" },
{
$group: {
_id: "$_id",
answers: { $push: { order: "$answers.order", id: "$answers.order" } },
name: { $first: "$name" } // preserve properties add them to the group pick 1st value
}
}
])
Demo - https://mongoplayground.net/p/Ln5CcmT-Kkm
db.collection.aggregate([
{ $unwind: "$answers" }, // break into individuals documents
{ $addFields: { "answers.id": "$answers.order" } }, // copy order value to id
{ $group: { _id: "$_id", answers: { $push: "$answers" } } } // join and group it back
])
If you want to sort n get id from index
Demo - https://mongoplayground.net/p/6U_sRYWHtDR
db.collection.aggregate([
{ $sort: { "answers.order": 1 } },
{ $unwind: { path: "$answers", includeArrayIndex: "index" } },
{ $group: { _id: "$_id", answers: { "$push": { order: "$answers.order", id: { $add: [ "$index", 1 ] } } } } }
])

MongoDB - aggregating with nested objects, and changeable keys

I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!
You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.

Retrieving a count that matches specified criteria in a $group aggregation

So I am looking to group documents in my collection on a specific field, and for the output results of each group, I am looking to include the following:
A count of all documents in the group that match a specific query (i.e. a count of documents that satisfy some expression { "$Property": "Value" })
The total number of documents in the group
(Bonus, as I suspect that this is not easily accomplished) Properties of a document that correspond to a $min/$max accumulator
I am very new to the syntax used to query in mongo and don't quite understand how it all works, but after some research, I've managed to get it down to the following query (please note, I am currently using version 3.0.12 for my mongo db, but I believe we will upgrade in a couple of months time):
db.getCollection('myCollection').aggregate(
[
{
$group: {
_id: {
GroupID: "$GroupID",
Status: "$Status"
},
total: { $sum: 1 },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$DateCreated" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
},
{
$group: {
_id: "$_id.GroupID",
Statuses: {
$push: {
Status: "$_id.Status",
Count: "$total"
}
},
TotalCount: { $sum: "$total" },
GroupName: { $first: "$GroupName" },
EarliestCreatedDate: { $min: "$EarliestCreatedDate" },
LastModifiedDate: { $max: "$LastModifiedDate" }
}
}
]
)
Essentially what I am looking to retrieve is the Count for specific Status values, and project them into one final result document that looks like the following:
{
GroupName,
EarliestCreatedDate,
EarliestCreatedBy,
LastModifiedDate,
LastModifiedBy,
TotalCount,
PendingCount,
ClosedCount
}
Where PendingCount and ClosedCount are the total number of documents in each group that have a status Pending/Closed. I suspect I need to use $project with some other expression to extract this value, but I don't really understand the aggregation pipeline well enough to figure this out.
Also the EarliestCreatedBy and LastModifiedBy are the users who created/modified the document(s) corresponding to the EarliestCreatedDate and LastModifiedDate respectively. As I mentioned, I think retrieving these values will add another layer of complexity, so if there is no practical solution, I am willing to forgo this requirement.
Any suggestions/tips would be very much appreciated.
You can try below aggregation stages.
$group
Calculate all the necessary counts TotalCount, PendingCount and ClosedCount for each GroupID
Calculate $min and $max for EarliestCreatedDate and LastModifiedDate respectively and push all the fields to CreatedByLastModifiedBy to be compared later for fetching EarliestCreatedBy and LastModifiedBy for each GroupID
$project
Project all the fields for response
$filter the EarliestCreatedDate value against the data in the CreatedByLastModifiedBy and $map the matching CreatedBy to the EarliestCreatedBy and $arrayElemAt to convert the array to object.
Similar steps for calculating LastModifiedBy
db.getCollection('myCollection').aggregate(
[{
$group: {
_id: "$GroupID",
TotalCount: {
$sum: 1
},
PendingCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Pending"]
},
then: 1,
else: 0
}
}
},
ClosedCount: {
$sum: {
$cond: {
if: {
$eq: ["Status", "Closed "]
},
then: 1,
else: 0
}
}
},
GroupName: {
$first: "$GroupName"
},
EarliestCreatedDate: {
$min: "$DateCreated"
},
LastModifiedDate: {
$max: "$LastModifiedDate"
},
CreatedByLastModifiedBy: {
$push: {
CreatedBy: "$CreatedBy",
LastModifiedBy: "$LastModifiedBy",
DateCreated: "$DateCreated",
LastModifiedDate: "$LastModifiedDate"
}
}
}
}, {
$project: {
_id: 0,
GroupName: 1,
EarliestCreatedDate: 1,
EarliestCreatedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "CrBy",
cond: {
"$eq": ["$EarliestCreatedDate", "$$CrBy.DateCreated"]
}
}
},
as: "EaCrBy",
in: {
"$$EaCrBy.CreatedBy"
}
}
}, 0]
},
LastModifiedDate: 1,
LastModifiedBy: {
$arrayElemAt: [{
$map: {
input: {
$filter: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
cond: {
"$eq": ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
}
}
},
as: "LaMoBy",
in: {
"$$LaMoBy.LastModifiedBy"
}
}
}, 0]
},
TotalCount: 1,
PendingCount: 1,
ClosedCount: 1
}
}]
)
Update for Version < 3.2
$filter is also not available in your version. Below is the equivalent.
The comparison logic is the same and creates an array with for every non matching entry the value of false or LastModifiedBy otherwise.
Next step is to use $setDifference to compare the previous array values with array [false] which returns the elements that only exist in the first set.
LastModifiedBy: {
$setDifference: [{
$map: {
input: "$CreatedByLastModifiedBy",
as: "MoBy",
in: {
$cond: [{
$eq: ["$LastModifiedDate", "$$MoBy.LastModifiedDate"]
},
"$$MoBy.LastModifiedBy",
false
]
}
}
},
[false]
]
}
Add $unwind stage after $project stage to change to object
{$unwind:"$LastModifiedBy"}
Similar steps for calculating EarliestCreatedBy