Related
I am using Mongo daily bucketing pattern. Each daily document contains an array with value calculated for every hour for that day:
{
meter: 'meterId',
date: 'dailyBucket',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...] // array with 24 values for every hour of a day
}
Now in one of my aggregation queries, I would like to group documents for the same day of multiple meters and get a result like this:
INPUT (consumption of multiple meters in a same day)
{
meter: 'MeterA',
date: '2021-05-01',
hourlyConsumption: [0,0,1,1,1,2,2,2,4,4,4,4,3,3,3...]
},
{
meter: 'MeterB',
date: '2021-05-01',
hourlyConsumption: [10,10,10,10,10,10,10,10,10,10,10,10,10,10,10...]
}
RESULT (combined into single document)
{
date: '2021-05-01',
hourlyConsumption: [10,10,11,11,11,12,12,12,14,14,14,14,13,13,13...]
}
is there a way to achieve this without using $accumulator?
You can use $reduce
db.collection.aggregate([
{
$group: {
_id: "$date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
},
{
$set: {
hourlyConsumption: {
$reduce: {
input: "$hourlyConsumption",
initialValue: [],
in: { $map: { input: { $range: [ 0, 23 ] },
as: "h",
in: {
$sum: [
{ $arrayElemAt: [ "$$value", "$$h" ] },
{ $arrayElemAt: [ "$$this", "$$h" ] }
]
}
}
}
}
}
}
}
])
Mongo Playground
Or you use $unwind and $group:
db.collection.aggregate([
{
$unwind: {
path: "$hourlyConsumption",
includeArrayIndex: "hour"
}
},
{
$group: {
_id: {
date: "$date",
hour: "$hour"
},
hourlyConsumption: { $sum: "$hourlyConsumption" }
}
},
{ $sort: { "_id.hour": 1 } },
{
$group: {
_id: "$_id.date",
hourlyConsumption: { $push: "$hourlyConsumption" }
}
}
])
Mongo Playground
However, when you use $unwind, then you actually contradict your bucketing design pattern.
I want to print grouped by date and by "productId" within the date. In this example, the output should be as follow:
[
{
"_id": "2018-03-04",
"product1": 2,
"product2": 2
}
]
Data: https://mongoplayground.net/p/gzvm11EIPn2
How to make it?
When you use the $group stage in aggregation you learn to group by one field as such: { $group: { "_id": "$field1"...
When you want to group by two or more fields "_id" needs to be a subdocument and you pass the fields as key value pairs inside the subdocument as such:
db.mycollection.aggregate([
{
$group:
{
"_id": { "product1": "$product1", "product2": "$product2", ... }
}
}
])
... etc.
$group - Group by createdAt (date string) and productId and perform count via $sum.
$group - Group by createdAtand push data from (1) to products array field.
$replaceRoot - Replace input document with new document.
3.1. $arrayToObject - Convert the object from products array field to key value pair with productId (key) and count (value).
3.2. $mergeObjects - Create object with _id and merge the object from (3.2) into 1 object.
db.collection.aggregate([
{
$group: {
_id: {
createdAt: {
$dateToString: {
format: "%Y-%m-%d",
date: "$createdAt"
}
},
productId: "$productId"
},
count: {
$sum: 1
}
}
},
{
$group: {
_id: "$_id.createdAt",
products: {
$push: {
productId: "$_id.productId",
count: "$count"
}
}
}
},
{
"$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{
_id: "$_id"
},
{
$arrayToObject: {
$map: {
input: "$products",
in: {
k: {
$toString: "$$this.productId"
},
v: "$$this.count"
}
}
}
}
]
}
}
}
])
Sample Mongo Playground
Output
[
{
"5e345223b3aa703b8a9a4f34": 2,
"5e345223b3aa703b8a9a4f35": 2,
"_id": "2018-03-04"
}
]
I have tried searching for ways to solve my problem, except that my database is set up differently,
My documents in my collection are something like this:
{name:"MAX",
date:"2020-01-01"
Math:98,
Science:60,
English:80},
{name:"JANE",
date:"2020-01-01"
Math:80,
Science:70,
English:79},
{name:"ALEX",
date:"2020-01-01"
Math:95,
Science:68,
English:70},
{name:"JOHN",
date:"2020-01-01"
Math:95,
Science:68,
English:70}
{name:"MAX",
date:"2020-06-01"
Math:97,
Science:78,
English:90},
{name:"JANE",
date:"2020-06-01"
Math:78,
Science:76,
English:66},
{name:"ALEX",
date:"2020-06-01"
Math:93,
Science:75,
English:82},
{name:"JOHN",
date:"2020-06-01"
Math:92,
Science:80,
English:50}
I want to find the top 3 students for each subject without regard for the dates. I only managed to find the top 3 students in 1 subject.
So i group the students by name first, and add a column for max scores of a subject. Math in this case. Sort it in descending order and limit results to 3.
db.student_scores.aggregate(
[
{$group:{
_id: "$name",
maxMath: { $max: "$Math" }}},
{$sort:{"maxMath":-1}},
{$limit : 3}
]
)
Is there any way to get the top 3 students for each subject?
So, it would be top 3 for math, top 3 for science, top 3 for english
{
Math:{MAX, JANE, JOHN},
Science:{JOHN, ALEX, JANE},
English:{JANE, MAX, JOHN}
}
I just applied your code 3 times, using $facet
If you prefer a more compact result add
{$project:{English:"$Eng._id", Science:"$sci._id", Math:"$math._id"}}
PLAYGROUND
PIPELINE
db.collection.aggregate([
{
"$facet": {
"math": [
{
$group: {
_id: "$name",
maxMath: {
$max: "$Math"
}
}
},
{
$sort: {
"maxMath": -1
}
},
{
$limit: 3
}
],
"sci": [
{
$group: {
_id: "$name",
maxSci: {
$max: "$Science"
}
}
},
{
$sort: {
"maxSci": -1
}
},
{
$limit: 3
}
],
"Eng": [
{
$group: {
_id: "$name",
maxEng: {
$max: "$English"
}
}
},
{
$sort: {
"maxEng": -1
}
},
{
$limit: 3
}
]
}
}
])
Your question is not clear, but i can predict 2 scenario,
Get repetitive students along with date:
$project to show required fields and convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$project: {
name: "$name",
date: "$date",
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$name",
date: "$date",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground
Sum of all date's score (means unique students):
$group by name, and get sum of all subjects using $sum,
$project to convert subjects object to array using $objectToArray
$unwind subjects array
$sort by subjects name in descending order
$group by subject name and get array of students
$project to get latest 3 students from students array
db.collection.aggregate([
{
$group: {
_id: "$name",
Math: { $sum: "$Math" },
Science: { $sum: "$Science" },
English: { $sum: "$English" }
}
},
{
$project: {
subjects: {
$objectToArray: {
Math: "$Math",
Science: "$Science",
English: "$English"
}
}
}
},
{ $unwind: "$subjects" },
{ $sort: { "subjects.v": -1 } },
{
$group: {
_id: "$subjects.k",
students: {
$push: {
name: "$_id",
score: "$subjects.v"
}
}
}
},
{
$project: {
_id: 0,
subject: "$_id",
students: { $slice: ["$students", 3] }
}
}
])
Playground
I have a document which describes counts of different things observed by a camera within a 15 minute period. It looks like this:
{
"_id" : ObjectId("5b1a709a83552d002516ac19"),
"start" : ISODate("2018-06-08T11:45:00.000Z"),
"end" : ISODate("2018-06-08T12:00:00.000Z"),
"recording" : ObjectId("5b1a654683552d002516ac16"),
"data" : {
"counts" : {
"5b434d05da1f0e00252566be" : 12,
"5b434d05da1f0e00252566cc" : 4,
"5b434d05da1f0e00252566ca" : 1
}
}
}
The keys inside the data.counts object change with each document and refer to additional data that is fetched at a later date. There are unlimited number of keys inside data.counts (but usually about 20)
I am trying to aggregate all these 15 minute documents up to daily aggregated documents.
I have this query at the moment to do that:
db.getCollection("segments").aggregate([
{$match:{
"recording": ObjectId("5bf7f68ad8293a00261dd83f")
}},
{$project:{
"start": 1,
"recording": 1,
"data": 1
}},
{$group:{
_id: { $dateToString: { format: "%Y-%m-%d", date: "$start" } },
"segments": { $push: "$$ROOT" }
}},
{$sort: {_id: -1}},
]);
This does the grouping and returns all the segments in an array.
I want to also aggregate the information inside data.counts, so that I get the sum of values for all keys that are the same within the daily group.
This would save me from having another service loop through each 15 minute segment summing values with the same keys. E.g. the query would return something like this:
{
"_id" : "2019-02-27",
"counts" : {
"5b434d05da1f0e00252566be" : 351,
"5b434d05da1f0e00252566cc" : 194,
"5b434d05da1f0e00252566ca" : 111
... any other keys that were found within a day
}
}
How might I amend the query I already have, or use a different query?
Thanks!
You could use the $facet pipeline stage to create two sub-pipelines; one for segments and another for counts. These sub-pipelines can be joined by using $zip to stitch them together and $map to merge each 2-element array produced from zip. Note this will only work correctly if the sub-pipelines output sorted arrays of the same size, which is why we group and sort by start_date in each sub-pipeline.
Here's the query:
db.getCollection("segments").aggregate([{
$match: {
recording: ObjectId("5b1a654683552d002516ac16")
}
}, {
$project: {
start: 1,
recording: 1,
data: 1,
start_date: { $dateToString: { format: "%Y-%m-%d", date: "$start" }}
}
}, {
$facet: {
segments_pipeline: [{
$group: {
_id: "$start_date",
segments: {
$push: {
start: "$start",
recording: "$recording",
data: "$data"
}
}
}
}, {
$sort: {
_id: -1
}
}],
counts_pipeline: [{
$project: {
start_date: "$start_date",
count: { $objectToArray: "$data.counts" }
}
}, {
$unwind: "$count"
}, {
$group: {
_id: {
start_date: "$start_date",
count_id: "$count.k"
},
count_sum: { $sum: "$count.v" }
}
}, {
$group: {
_id: "$_id.start_date",
counts: {
$push: {
$arrayToObject: [[{
k: "$_id.count_id",
v: "$count_sum"
}]]
}
}
}
}, {
$project: {
counts: { $mergeObjects: "$counts" }
}
}, {
$sort: {
_id: -1
}
}]
}
}, {
$project: {
result: {
$map: {
input: { $zip: { inputs: ["$segments_pipeline", "$counts_pipeline"] }},
in: { $mergeObjects: "$$this" }
}
}
}
}, {
$unwind: "$result"
}, {
$replaceRoot: {
newRoot: "$result"
}
}])
Try it out here: Mongoplayground.
I am using Mongoose aggregation (MongoDB version 3.2).
I have a field users which is an array. I want to $project first item in this array to a new field user.
I tried
{ $project: {
user: '$users[0]',
otherField: 1
}},
{ $project: {
user: '$users.0',
otherField: 1
}},
{ $project: {
user: { $first: '$users'},
otherField: 1
}},
But neither works.
How can I do it correctly? Thanks
Update:
Starting from v4.4 there is a dedicated operator $first:
{ $project: {
user: { $first: "$users" },
otherField: 1
}},
It's a syntax sugar to the
Original answer:
You can use arrayElemAt:
{ $project: {
user: { $arrayElemAt: [ "$users", 0 ] },
otherField: 1
}},
If it is an array of objects and you want to use just single object field, ie:
{
"users": [
{name: "John", surname: "Smith"},
{name: "Elon", surname: "Gates"}
]
}
you can use:
{ $project: { user: { $first: "$users.name" } }
Edit (exclude case - after comment from #haytham)
In order to exclude a single field from a nested document in array you have to do 2 projections:
{ $project: { user: { $first: "$users" } }
Which return whole first object, and then exclude field you do not want, ie:
{ $project: { "user.name" : 0 }
Starting Mongo 4.4, the aggregation operator $first can be used to access the first element of an array:
// { "users": ["Jean", "Paul", "Jack"] }
// { "users": ["Claude"] }
db.collection.aggregate([
{ $project: { user: { $first: "$users" } } }
])
// { "user" : "Jean" }
// { "user" : "Claude" }