How to regroup (or merge) objects in aggregation pipeline in MongoDB? - mongodb

I currently have an aggregation pipeline:
db.getCollection('forms').aggregate([
{ $unwind: //unwind },
{
$match: {
//some matches
}
},
{
$project: {
//some projections
}
},
{
//Finally, im grouping the results
$group: {
_id: {
year: { $year: '$createdAt' },
month: { $month: '$createdAt' },
raceEthnicity: '$demographic.raceEthnicity'
},
count: { $sum: 1 },
}
]
My current results are similar to:
[{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Asian"
},
"count" : 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Multiracial"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "White"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
"raceEthnicity" : "White"
},
"count" : 33.0
}]
Is there a way to add a new stage on the pipeline to "merge" results of the same year/month into a single object?
I want to achieve something like:
{
"_id" : {
"year" : 2020,
"month" : 11,
},
"Asian" : 1.0,
"Multiracial": 3.0,
"White": 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
},
"White": 33
}
Is it possible? How can I do that?

Add this one to your aggregation pipeline.
db.collection.aggregate([
{ $set: { "data": { k: "$_id.raceEthnicity", v: "$count" } } },
{ $group: { _id: { year: "$_id.year", month: "$_id.month" }, data: { $push: "$data" } } },
{ $set: { "data": { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$data"] } } },
{ $unset: "data" }
])
Unlike the solution from #wak786 you don't need to know all ethnicity at design time. It works for arbitrary ethnicity.

Add these stages to your pipeline.
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
$arrayToObject: [
[
{
k: "$_id.raceEthnicity",
v: "$count"
}
]
]
}
]
}
}
},
{
"$group": {
"_id": {
year: "$_id.year",
month: "$_id.month",
},
"Asian": {
"$sum": "$Asian"
},
"Multiracial": {
"$sum": "$Multiracial"
},
"White": {
"$sum": "$White"
}
}
}
])
Below is the mongo playground link. I have taken the current result of your pipeline as input to my query.
Try it here

Related

Grouping and summing after using $addToSet in MongoDB

Assume I have the following data:
[{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 1,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 2,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 3,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 4,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
}]
I want to group the data by year and get a total sum for that year. I also want an array of the items used during the group, grouped by a field and summed, if that makes sense. This is ultimately what I want to end up with:
{
"year": [
{
"year": "2019",
"totalYear": 14.86,
"dividends": [
{
"symbol": "T",
"amount": 10.00
},
{
"symbol": "SPHD",
"amount": 4.86
}
]
}
]
}
Below is the code I have written so far using Mongoose. The problem is that I can't figure out how to group and sum the items that I added to the set. I could always do that in the application layer but I was hoping to accomplish this entirely inside of a query.:
const [transactions] = await Transaction.aggregate([
{ $match: { type: TransactionType.DIVIDEND_OR_INTEREST, netAmount: { $gte: 0 } } },
{
$facet: {
year: [
{
$group: {
_id: { $dateToString: { format: '%Y', date: '$transactionDate' } },
totalYear: { $sum: '$netAmount' },
dividends: {
$addToSet: {
symbol: '$transactionItem.instrument.symbol',
amount: '$netAmount',
},
},
},
},
{ $sort: { _id: 1 } },
{
$project: {
year: '$_id',
totalYear: { $round: ['$totalYear', 2] },
dividends: '$dividends',
_id: false,
},
},
],
},
},
]).exec();
It requires to do two group stages,
First group by year and symbol
Second group by only year
If the transactionDate field has date type value then just use $year operator to get the year
I would suggest you do $sort after the immediate $match stage to use an index if you have created or planning for future
const [transactions] = await Transaction.aggregate([
{
$match: {
type: TransactionType.DIVIDEND_OR_INTEREST,
netAmount: { $gte: 0 }
}
},
{ $sort: { transactionDate: 1 } },
{
$facet: {
year: [
{
$group: {
_id: {
year: { $year: "$transactionDate" },
symbol: "$transactionItem.instrument.symbol"
},
netAmount: { $sum: "$netAmount" }
}
},
{
$group: {
_id: "$_id.year",
totalYear: { $sum: "$netAmount" },
dividends: {
$push: {
symbol: "$_id.symbol",
amount: "$netAmount"
}
}
}
},
{
$project: {
_id: 0,
year: "$_id",
totalYear: 1,
dividends: 1
}
}
]
}
}
]).exec();
Playground

Using Mongo to calculate sum in Aggregator pipeline

I have a timeseries data in mongodb and I want to calculate the sum per day between two given dates of every sensor after I have calculated the difference between the max and min reading of the day by the sensor, using the below query
db.ts_events.aggregate([
{ $match: {
"metadata.assetCode": { $in: [
"h"
]
},
"timestamp": { $gte: ISODate("2022-07-01T02:39:02.000+0000"), $lte: ISODate("2022-07-01T06:30:00.000+0000")
}
}
},
{
$project: {
date: {
$dateToParts: { date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: { $max: "$activeEnergy"
},
minValue: { $min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: { $subtract: [
"$maxValue",
"$minValue"
]
},
}
},
])
I get the following output
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "B"
},
"maxValue" : 1979.78,
"minValue" : 1979.77,
"differnce" : 0.009999999999990905
}
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "A"
},
"maxValue" : 7108.01,
"minValue" : 7098.18,
"differnce" : 9.829999999999927
}
I want to calculate the sum of both meter difference how can I do that?
Apart from this one more problem I am facing which I am putting forward in this edited version, as you can see date is in ISODate format but I will be getting a unix epoch format,
I tried to tweak the query but it is not working
db.ts_events.aggregate([
{
$project: {
date: {
$dateToParts: {
date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
"metadata.assetCode": 1,
"timestamp": 1,
startDate: {
$toDate: 1656686342000
},
endDate: {
$toDate: 1656700200000
}
}
},
{
$match: {
"metadata.assetCode": {
$in: [
"h"
]
},
"timestamp": {
$gte: "$startDate", $lte: "$endDate"
}
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: {
$max: "$activeEnergy"
},
minValue: {
$min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: {
$subtract: [
"$maxValue",
"$minValue"
]
},
}
},
{
$group: {
_id: "$_id.date", res: {
$push: '$$ROOT'
}, differnceSum: {
$sum: '$differnce'
}
}
}
])
Can you help me solve the problem?
One option is to add one more step like this (depending on your expected output format):
This step will group together your separate documents, into one document, which will allow you to sum their values together. Be careful when grouping, since now it is a one big document and a document has a size limit.
We use $$ROOT to keep the original document structure (here inside a new array)
{$group: {_id: 0, res: {$push: '$$ROOT'}, differnceSum: {$sum: $differnce'}}}

Need help to MongoDB aggregate $group state

I have a collection of 1000 documents like this:
{
"_id" : ObjectId("628b63d66a5951db6bb79905"),
"index" : 0,
"name" : "Aurelia Gonzales",
"isActive" : false,
"registered" : ISODate("2015-02-11T04:22:39.000+0000"),
"age" : 41,
"gender" : "female",
"eyeColor" : "green",
"favoriteFruit" : "banana",
"company" : {
"title" : "YURTURE",
"email" : "aureliagonzales#yurture.com",
"phone" : "+1 (940) 501-3963",
"location" : {
"country" : "USA",
"address" : "694 Hewes Street"
}
},
"tags" : [
"enim",
"id",
"velit",
"ad",
"consequat"
]
}
I want to group those by year and gender. Like In 2014 male registration 105 and female registration 131. And finally return documents like this:
{
_id:2014,
male:105,
female:131,
total:236
},
{
_id:2015,
male:136,
female:128,
total:264
}
I have tried till group by registered and gender like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } }
])
which is return document like this:
{
"_id" : {
"year" : 2014,
"gender" : "female"
},
"total" : 131
}
{
"_id" : {
"year" : 2014,
"gender" : "male"
},
"total" : 105
}
Please guide to figure out from this whole.
db.collection.aggregate([
{
"$group": { //Group things
"_id": "$_id.year",
"gender": {
"$addToSet": {
k: "$_id.gender",
v: "$total"
}
},
sum: { //Sum it
$sum: "$total"
}
}
},
{
"$project": {//Reshape it
g: {
"$arrayToObject": "$gender"
},
_id: 1,
sum: 1
}
},
{
"$project": { //Reshape it
_id: 1,
"g.female": 1,
"g.male": 1,
sum: 1
}
}
])
Play
Just add one more group stage to your aggregation pipeline, like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } },
{
$group: {
_id: "$_id.year",
male: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"male"
]
},
then: "$total",
else: 0
}
}
},
female: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"female"
]
},
then: "$total",
else: 0
}
}
},
total: {
$sum: "$total"
}
},
}
]);
Here's the working link. We are grouping by year in this last step, and calculating the counts for gender conditionally and the total is just the total of the counts irrespective of the gender.
Besides #Gibbs mentioned in the comment which proposes the solution with 2 $group stages,
You can achieve the result as below:
$group - Group by year of registered. Add gender value into genders array.
$sort - Order by _id.
$project - Decorate output documents.
3.1. male - Get the size of array from $filter the value of "male" in "genders" array.
3.2. female - Get the size of array from $filter the value of "female" in "genders" array.
3.3. total - Get the size of "genders" array.
Propose this method if you are expected to count and return the "male" and "female" gender fields.
db.collection.aggregate([
{
$group: {
_id: {
$year: "$registered"
},
genders: {
$push: "$gender"
}
}
},
{
$sort: {
"_id": 1
}
},
{
$project: {
_id: 1,
male: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"male"
]
}
}
}
},
female: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"female"
]
}
}
}
},
total: {
$size: "$genders"
}
}
}
])
Sample Mongo Playground

Mongodb: $unwind and compute $avg

I have documents storing IoT data.
Following MongoDB schema design best practices for IoT, I came to documents with the following structure:
"_id" : "AQ106_2020-09-12T09",
"date" : "2020-09-12T09:00:00.000Z",
"station" : {
"name" : "AQ106",
"loc" : {
"type" : "Point",
"coordinates" : [
14.339263,
40.814224
]
},
"properties" : {
}
},
"samples" : [
{
"t" : ISODate("2020-09-12T11:02:00.000+02:00"),
"data" : {
"pm1_mg_m3" : 2.7,
"pm2_5_mg_m3" : 4.6,
"pm10_mg_m3" : 12,
"P0" : 152,
"P1" : 16,
"P2" : 4.7,
"P3" : 0.8,
"P4" : 0.86,
"P5" : 0.6,
"P6" : 0.28,
"P7" : 0.152,
"P8" : 0.094,
"P9" : 0.092,
"P10" : 0.019,
"P11" : 0,
"P12" : 0,
"P13" : 0.0188,
"P14" : 0,
"P15" : 0,
"P16" : 0,
"P17" : 0,
"P18" : 0,
"P19" : 0,
"P20" : 0,
"P21" : 0,
"P22" : 0,
"P23" : 0,
"temp_celsius" : 32.59,
"humRelPercent" : 34,
"press_mBar" : 1010.79,
"CO2mA" : 4,
"NO2_WE_mV" : 226.419,
"NO2_AE_mV" : 229.553,
"OX_WE_mV" : 252.287,
"OX_AE_mV" : 220.419,
"CO_WE_mV" : 509.077,
"AE_WE_mV" : 348.51,
"batt_V" : 13.5,
"source_V" : 17.6
}
},
.... additional arrays
}
Now I want to compute hourly or daily averages (or another metric) to populate a new collection with only summarised data.
I coded the following solution for hourly means:
db.collection.aggregate([{$match: {
'station.name':'AQ104'
}}, {$unwind: {
path: "$samples"
}}, {$group: {
_id: "$date",
P0: {
$avg : "$samples.data.P0"
},
temp:{
$avg:"$samples.data.temp_celsius"
}
}}])
This works but I need to manually create a field for each property in samples.data in the original document and that's tedious.
Moreover, how to group both by date and station.name?
You can find a working example here.
Thanks.
Let's start with the easy question, how to group on multiple fields? With a simple syntax change:
{
$group: {
_id: {
date: "$date",
station: "$station.name"
}
}
Now for the second question this will be a bit more tedious. Mongo does not support "merging" objects by their keys with custom logic (in this case $avg). So we will have to convert the object to an array. unwind it, calculate the average per field and eventually group to restore the required structure like so:
db.collection.aggregate([
{
$match: {
"station.name": "AQ106"
}
},
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station.name",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$data",
"$_id"
]
}
}
}
])
MongoPlayground
------- EDIT ---------
For Mongo v4.4+ you can use $accumulator which allows you to execute custom javascript code in your pipeline. I am unsure how this will fare against the native Mongo pipeline in terms of performance in scale.
One thing to note is that I added the initial $addFields stage under the assumption that different samples may have different keys. if this is not the case it is not needed.
db.collection.aggregate([
{
$addFields: {
sampleKeys: {
$reduce: {
input: {
$map: {
input: "$samples",
as: "sample",
in: {
$map: {
input: {
"$objectToArray": "$$sample.data"
},
as: "sampleArrItem",
in: "$$sampleArrItem.k"
}
}
}
},
initialValue: [],
in: {
"$setUnion": [
"$$this",
"$$value"
]
}
}
}
}
},
{
$addFields: {
samples: {
$accumulator: {
init: function(keys){
return keys.map(k => {return {k: {v: 0, c: 0}}});
},
initArgs: ["$sampleKeys"],
accumulateArgs: ["$samples"],
accumulate: function(state, sample){
Object.keys(state).forEach((key) => {
if (key in sample.data) {
state[key].v += sample.data[key];
state[key].c++;
};
});
return state;
},
merge: function(state1, state2){
Object.keys(state1).forEach((key) => {
state1[key].v += state2[key].v;
state1[key].c += state2[key].c;
});
return state1;
},
lang: "js"
}
}
}
},
{
$replaceRoot: {
newRoot: {
$mergeObject: [
"$samples",
{station: "$station.name", date: "$date"},
]
}
}
}
])
I partially resolved my question in terms of grouping by multiple fields (MongoDB documentation was not so clear at this regard, in my opinion)
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$group: {
_id: {
date: "$date",
station: "$station.name"
},
P0: {
$avg: "$samples.data.P0"
},
temp: {
$avg: "$samples.data.temp_celsius"
}
}
}
])
Here the updated working example.
Thanks to Tom Slabbaert, I solved my question with the following query:
db.collection.aggregate([
{
$unwind: {
path: "$samples"
}
},
{
$addFields: {
objArr: {
"$objectToArray": "$samples.data"
}
}
},
{
$unwind: "$objArr"
},
{
$group: {
_id: {
date: "$date",
station: "$station",
objKey: "$objArr.k"
},
value: {
$avg: "$objArr.v"
}
}
},
{
$addFields: {
data: {
"$arrayToObject": [
[
{
k: "$_id.objKey",
v: "$value"
}
]
]
}
}
},
{
$group: {
_id: {
date: "$_id.date",
station: "$_id.station"
},
data: {
"$mergeObjects": "$data"
}
}
},
{
"$project": {
_id: "$_id.date",
station: "$_id.station",
data: 1
}
}
])
Here
I wonder if it is possible to simplify the above solution using the new $function operator.
Thanks.

Mongodb project results into single document

I have a collection like this:
{
"_id" : ObjectId("5f4e81f1da5ea3cb7c248a8f"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-08-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e8206da5ea3cb7c248a90"),
"type" : "TYPE_1",
"updateTime" : ISODate("2020-09-24T11:10:43.219+0000")
}
{
"_id" : ObjectId("5f4e821fda5ea3cb7c248a91"),
"type" : "TYPE_2",
"updateTime" : ISODate("2020-09-25T11:10:43.219+0000")
}
I want to know how many documents there are of each type and also obtain the date of the last global modification. For now I can get these results like this:
db.getCollection("test").aggregate(
// Pipeline
[
// Stage 1
{
$group: {
_id : "$type",
count: { $sum: 1 },
lastUpdate: { "$max": "$updateTime" }
}
},
// Stage 2
{
$sort: {
lastUpdate : -1
}
},
]
);
With which I get the results this way:
{
"_id" : "TYPE_2",
"count" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
{
"_id" : "TYPE_1",
"count" : 2.0,
"lastUpdate" : ISODate("2020-09-24T11:10:43.219+0000")
}
So I have both the sum of each document and the last modification (thanks to the sort).
But I would like to project and get the results like this, in a single result document:
{
"type1" : 2.0,
"type2" : 1.0,
"lastUpdate" : ISODate("2020-09-25T11:10:43.219+0000")
}
#varman's answer is good, this is just in different way,
$group you have already done by your self
$group create types array to combine all documents
$replaceWith to replace root with field types to convert $arrayToObject
db.collection.aggregate([
{
$group: {
_id: "$type",
count: { $sum: 1 },
lastUpdate: { $max: "$updateTime" }
}
},
{
$group: {
_id: null,
types: {
$push: {
k: "$_id",
v: "$count"
}
},
lastUpdate: { $max: "$lastUpdate" }
}
},
{
$replaceWith: {
$mergeObjects: [
{ lastUpdate: "$lastUpdate" },
{ $arrayToObject: "$types" }
]
}
}
])
Playground
You can use following stages after your stage.
{
$group: {
_id: null,
data: {
$push: {
type: "$_id",
count: "$count"
}
},
lastUpdate: {
$first: "$lastUpdate"
}
}
},
{
$project: {
data: {
$arrayToObject: {
$map: {
input: "$data",
in: {
k: "$$this.type",
v: "$$this.count"
}
}
}
},
lastUpdate: 1
}
},
{
$addFields: {
"data.lastUpdate": "$lastUpdate"
}
},
{
"$replaceRoot": {
"newRoot": "$data"
}
}
Working Mongo playground