Grouping and summing after using $addToSet in MongoDB - mongodb

Assume I have the following data:
[{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 1,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 2,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 3,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 4,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
}]
I want to group the data by year and get a total sum for that year. I also want an array of the items used during the group, grouped by a field and summed, if that makes sense. This is ultimately what I want to end up with:
{
"year": [
{
"year": "2019",
"totalYear": 14.86,
"dividends": [
{
"symbol": "T",
"amount": 10.00
},
{
"symbol": "SPHD",
"amount": 4.86
}
]
}
]
}
Below is the code I have written so far using Mongoose. The problem is that I can't figure out how to group and sum the items that I added to the set. I could always do that in the application layer but I was hoping to accomplish this entirely inside of a query.:
const [transactions] = await Transaction.aggregate([
{ $match: { type: TransactionType.DIVIDEND_OR_INTEREST, netAmount: { $gte: 0 } } },
{
$facet: {
year: [
{
$group: {
_id: { $dateToString: { format: '%Y', date: '$transactionDate' } },
totalYear: { $sum: '$netAmount' },
dividends: {
$addToSet: {
symbol: '$transactionItem.instrument.symbol',
amount: '$netAmount',
},
},
},
},
{ $sort: { _id: 1 } },
{
$project: {
year: '$_id',
totalYear: { $round: ['$totalYear', 2] },
dividends: '$dividends',
_id: false,
},
},
],
},
},
]).exec();

It requires to do two group stages,
First group by year and symbol
Second group by only year
If the transactionDate field has date type value then just use $year operator to get the year
I would suggest you do $sort after the immediate $match stage to use an index if you have created or planning for future
const [transactions] = await Transaction.aggregate([
{
$match: {
type: TransactionType.DIVIDEND_OR_INTEREST,
netAmount: { $gte: 0 }
}
},
{ $sort: { transactionDate: 1 } },
{
$facet: {
year: [
{
$group: {
_id: {
year: { $year: "$transactionDate" },
symbol: "$transactionItem.instrument.symbol"
},
netAmount: { $sum: "$netAmount" }
}
},
{
$group: {
_id: "$_id.year",
totalYear: { $sum: "$netAmount" },
dividends: {
$push: {
symbol: "$_id.symbol",
amount: "$netAmount"
}
}
}
},
{
$project: {
_id: 0,
year: "$_id",
totalYear: 1,
dividends: 1
}
}
]
}
}
]).exec();
Playground

Related

Using Mongo to calculate sum in Aggregator pipeline

I have a timeseries data in mongodb and I want to calculate the sum per day between two given dates of every sensor after I have calculated the difference between the max and min reading of the day by the sensor, using the below query
db.ts_events.aggregate([
{ $match: {
"metadata.assetCode": { $in: [
"h"
]
},
"timestamp": { $gte: ISODate("2022-07-01T02:39:02.000+0000"), $lte: ISODate("2022-07-01T06:30:00.000+0000")
}
}
},
{
$project: {
date: {
$dateToParts: { date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: { $max: "$activeEnergy"
},
minValue: { $min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: { $subtract: [
"$maxValue",
"$minValue"
]
},
}
},
])
I get the following output
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "B"
},
"maxValue" : 1979.78,
"minValue" : 1979.77,
"differnce" : 0.009999999999990905
}
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "A"
},
"maxValue" : 7108.01,
"minValue" : 7098.18,
"differnce" : 9.829999999999927
}
I want to calculate the sum of both meter difference how can I do that?
Apart from this one more problem I am facing which I am putting forward in this edited version, as you can see date is in ISODate format but I will be getting a unix epoch format,
I tried to tweak the query but it is not working
db.ts_events.aggregate([
{
$project: {
date: {
$dateToParts: {
date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
"metadata.assetCode": 1,
"timestamp": 1,
startDate: {
$toDate: 1656686342000
},
endDate: {
$toDate: 1656700200000
}
}
},
{
$match: {
"metadata.assetCode": {
$in: [
"h"
]
},
"timestamp": {
$gte: "$startDate", $lte: "$endDate"
}
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: {
$max: "$activeEnergy"
},
minValue: {
$min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: {
$subtract: [
"$maxValue",
"$minValue"
]
},
}
},
{
$group: {
_id: "$_id.date", res: {
$push: '$$ROOT'
}, differnceSum: {
$sum: '$differnce'
}
}
}
])
Can you help me solve the problem?
One option is to add one more step like this (depending on your expected output format):
This step will group together your separate documents, into one document, which will allow you to sum their values together. Be careful when grouping, since now it is a one big document and a document has a size limit.
We use $$ROOT to keep the original document structure (here inside a new array)
{$group: {_id: 0, res: {$push: '$$ROOT'}, differnceSum: {$sum: $differnce'}}}

Need help to MongoDB aggregate $group state

I have a collection of 1000 documents like this:
{
"_id" : ObjectId("628b63d66a5951db6bb79905"),
"index" : 0,
"name" : "Aurelia Gonzales",
"isActive" : false,
"registered" : ISODate("2015-02-11T04:22:39.000+0000"),
"age" : 41,
"gender" : "female",
"eyeColor" : "green",
"favoriteFruit" : "banana",
"company" : {
"title" : "YURTURE",
"email" : "aureliagonzales#yurture.com",
"phone" : "+1 (940) 501-3963",
"location" : {
"country" : "USA",
"address" : "694 Hewes Street"
}
},
"tags" : [
"enim",
"id",
"velit",
"ad",
"consequat"
]
}
I want to group those by year and gender. Like In 2014 male registration 105 and female registration 131. And finally return documents like this:
{
_id:2014,
male:105,
female:131,
total:236
},
{
_id:2015,
male:136,
female:128,
total:264
}
I have tried till group by registered and gender like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } }
])
which is return document like this:
{
"_id" : {
"year" : 2014,
"gender" : "female"
},
"total" : 131
}
{
"_id" : {
"year" : 2014,
"gender" : "male"
},
"total" : 105
}
Please guide to figure out from this whole.
db.collection.aggregate([
{
"$group": { //Group things
"_id": "$_id.year",
"gender": {
"$addToSet": {
k: "$_id.gender",
v: "$total"
}
},
sum: { //Sum it
$sum: "$total"
}
}
},
{
"$project": {//Reshape it
g: {
"$arrayToObject": "$gender"
},
_id: 1,
sum: 1
}
},
{
"$project": { //Reshape it
_id: 1,
"g.female": 1,
"g.male": 1,
sum: 1
}
}
])
Play
Just add one more group stage to your aggregation pipeline, like this:
db.persons.aggregate([
{ $group: { _id: { year: { $year: "$registered" }, gender: "$gender" }, total: { $sum: NumberInt(1) } } },
{ $sort: { "_id.year": 1,"_id.gender":1 } },
{
$group: {
_id: "$_id.year",
male: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"male"
]
},
then: "$total",
else: 0
}
}
},
female: {
$sum: {
$cond: {
if: {
$eq: [
"$_id.gender",
"female"
]
},
then: "$total",
else: 0
}
}
},
total: {
$sum: "$total"
}
},
}
]);
Here's the working link. We are grouping by year in this last step, and calculating the counts for gender conditionally and the total is just the total of the counts irrespective of the gender.
Besides #Gibbs mentioned in the comment which proposes the solution with 2 $group stages,
You can achieve the result as below:
$group - Group by year of registered. Add gender value into genders array.
$sort - Order by _id.
$project - Decorate output documents.
3.1. male - Get the size of array from $filter the value of "male" in "genders" array.
3.2. female - Get the size of array from $filter the value of "female" in "genders" array.
3.3. total - Get the size of "genders" array.
Propose this method if you are expected to count and return the "male" and "female" gender fields.
db.collection.aggregate([
{
$group: {
_id: {
$year: "$registered"
},
genders: {
$push: "$gender"
}
}
},
{
$sort: {
"_id": 1
}
},
{
$project: {
_id: 1,
male: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"male"
]
}
}
}
},
female: {
$size: {
$filter: {
input: "$genders",
cond: {
$eq: [
"$$this",
"female"
]
}
}
}
},
total: {
$size: "$genders"
}
}
}
])
Sample Mongo Playground

mongo group by multiple times

Lets asume I have this set of data:
{ ValidFrom: "2019-03-25T16:01:55.714+0000", ValidTo: "2019-03-25T16:01:55.714+0000" },
{ ValidFrom: "2019-03-26T16:01:55.714+0000", ValidTo: "2019-03-25T16:01:55.714+0000" },
{ ValidFrom: "2019-03-25T16:01:55.714+0000", ValidTo: "2019-03-27T16:01:55.714+0000" }
I would like to see this result with one query:
{ "Day": "2019-03-25", ValidFromCount: 2, ValidToCount: 2 },
{ "Day": "2019-03-26", ValidFromCount: 1, ValidToCount: 0 },
{ "Day": "2019-03-27", ValidFromCount: 0, ValidToCount: 1 }
Currently I wrote this aggregation but I am stuck now:
{
$addFields: {
ValidFromDay: { $dateToString: { format: "%Y-%m-%d", date: "$ValidFrom" } },
ValidUntilDay: { $dateToString: { format: "%Y-%m-%d", date: "$ValidUntil" } }
}
},
{
$group : {
_id: { FromDate: '$ValidFromDay', ToDate: '$ValidUntilDay' },
Count: { "$sum": 1 },
}
},
{
$group : {
_id: null,
FromDates: { "$addToSet": { "Date": "$_id.FromDate", "FromCount": { "$sum": "$Count" } } },
ToDate: { "$addToSet": { "Date": "$_id.ToDate", "UntilCount": "$Count" } }
}
}
Is it possible to produce the results I am looking for in some way?
You need to add an array of 2 fields, not just 2 fields. It will let you to unwind it and count by date:
{
$addFields: {
boundary: [
{ day: {$dateToString: { format: "%Y-%m-%d", date: "$ValidFrom" } }, from: 1 },
{ day: { $dateToString: { format: "%Y-%m-%d", date: "$ValidTo" } } , to: 1 }
]
}
},
{
$unwind: "$boundary"
},
{
$group: {
_id: "$boundary.day",
ValidFromCount: {$sum: "$boundary.from"},
ValidToCount: {$sum: "$boundary.to"},
}
}
I think this will do what you want. There are three stages to the pipeline. A$project that constructs a separate day, month and year fields.
> projector
{
"$project" : {
"day" : {
"$dayOfMonth" : "$ValidFrom"
},
"month" : {
"$month" : "$ValidFrom"
},
"year" : {
"$year" : "$ValidFrom"
},
"ValidFrom" : 1
}
}
Then a $group to create the totals and count them by individual day by using an _id of {year, month, day}.
> grouper
{
"$group" : {
"_id" : {
"year" : "$year",
"month" : "$month",
"day" : "$day"
},
"ValidFromCount" : {
"$sum" : 1
},
"ValidToCount" : {
"$sum" : 1
}
}
}
Finally, a projection to eliminate the spurious fields and also get the Day field into the format you want.
> converter
{
"$project" : {
"_id" : 0,
"Day" : {
"$concat" : [
{
"$toString" : "$_id.year"
},
"-",
{
"$toString" : "$_id.month"
},
"-",
{
"$toString" : "$_id.day"
}
]
},
"ValidFromCount" : 1,
"ValidToCount" : 1
}
}
to run just execute (I created your data in collection so2):
> db.so2.find()
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8d"), "ValidFrom" : "2019-03-25T16:01:55.714+0000", "ValidTo" : "2019-03-25T16:01:55.714+0000" }
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8e"), "ValidFrom" : "2019-03-26T16:01:55.714+0000", "ValidTo" : "2019-03-25T16:01:55.714+0000" }
{ "_id" : ObjectId("5ca75adfd1a64a2919883a8f"), "ValidFrom" : "2019-03-25T16:01:55.714+0000", "ValidTo" : "2019-03-27T16:01:55.714+0000" }
>
> db.so3.aggregate([projector,grouper,converter])
{ "ValidFromCount" : 1, "ValidToCount" : 1, "Day" : "2019-3-26" }
{ "ValidFromCount" : 2, "ValidToCount" : 2, "Day" : "2019-3-25" }
>
I'm not sure if the test data you supplied is correct because the second document appears to go back in time so the ValidTo is before the ValidFrom.

MongoDB nested object aggregation sum and sort

I have highly nested mongodb set of objects and i want to sort subofdocuments according to the result of sum their votes for example :
{
"_id":17846384es,
"company_name":"company1",
"products":[
{
"product_id":"123785",
"product_name":"product1",
"user_votes":[
{
"user_id":1,
"vote":1
},
{
"user_id":2,
"vote":2
}
]
},
{
"product_id":"98765",
"product_name":"product2",
"user_votes":[
{
"user_id":5,
"vote":3
},
{
"user_id":3,
"vote":3
}
]
}
]
}
i want to sort as descending products according to the result of sum their votes
the expected output is
{
"_id":17846384es,
"company_name":"company1",
"products":[
{
"product_id":"98765",
"product_name":"product2",
"user_votes":[
{
"user_id":5,
"vote":3
},
{
"user_id":3,
"vote":3
}
]
"votes":8
},
{
"product_id":"123785",
"product_name":"product1",
"user_votes":[
{
"user_id":1,
"vote":1
},
{
"user_id":2,
"vote":2
}
],
"votes":3
}
]
}
Any Idea ?
The following pipeline
db.products.aggregate([
{ $unwind: "$products" },
{
$project: {
company_name: 1,
products: 1,
totalVotes: {
$sum: "$products.user_votes.vote"
}
}
},
{ $sort: { totalVotes: -1 } },
{
$group: {
_id: "$_id",
company_name: { $first: "$company_name" },
products: { $push: "$products" }
}
}
])
would output
{
"_id" : "17846384es",
"company_name" : "company1",
"products" : [
{
"product_id" : "98765",
"product_name" : "product2",
"user_votes" : [
{
"user_id" : 5,
"vote" : 3
},
{
"user_id" : 3,
"vote" : 3
}
]
},
{
"product_id" : "123785",
"product_name" : "product1",
"user_votes" : [
{
"user_id" : 1,
"vote" : 1
},
{
"user_id" : 2,
"vote" : 2
}
]
}
]
}
In case you want to keep the sum of the votes at the product level as shown in your expected output simply modify the $project stage as follows
...
{
$project: {
company_name: 1,
products: {
product_id: 1,
product_name: 1,
user_votes: 1,
votes: { $sum: "$products.user_votes.vote" }
}
}
}
...

MongoDB aggregate using distinct

I have an aggregation that groups on a date and creates a sum.
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
The output is
"result" : [
{
"_id" : {
"notificationDate" : ISODate("2013-07-18T04:00:00Z")
},
"nd" : ISODate("2013-07-18T04:00:00Z"),
"count" : 484
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-19T04:00:00Z")
},
"nd" : ISODate("2013-07-19T04:00:00Z"),
"count" : 490
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-20T04:00:00Z")
},
"nd" : ISODate("2013-07-20T04:00:00Z"),
"count" : 174
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-21T04:00:00Z")
},
"nd" : ISODate("2013-07-21T04:00:00Z"),
"count" : 6
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-22T04:00:00Z")
},
"nd" : ISODate("2013-07-22T04:00:00Z"),
"count" : 339
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-23T04:00:00Z")
},
"nd" : ISODate("2013-07-23T04:00:00Z"),
"count" : 394
},
{
"_id" : {
"notificationDate" : ISODate("2013-07-24T04:00:00Z")
},
"nd" : ISODate("2013-07-24T04:00:00Z"),
"count" : 17
}
],
"ok" : 1
so far so good. What I need to do now is to keep this, but also add a distinct in the criteria (for argument's sake I want to use AccountId). The would yield me the count of the grouped dates only using distinct AccountId. Is distinct even possible within the aggregation framework?
you can use two group commands in the pipeline, the first to group by accoundId, followed by second group that does usual operation. something like this:
db.InboundWorkItems.aggregate(
{$match: {notificationDate: {$gte: ISODate("2013-07-18T04:00:00Z")}, dropType:'drop' }},
{$group: {_id:"accountId",notificationDate:"$notificationDate"}},
{$group: {_id:1, nd: {$first:"$notificationDate"}, count:{$sum:1} }},
{$sort:{nd:1}} )
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: "$AccountId",
notificationDate: {
$max: "$notificationDate"
},
dropType: {
$max: "$dropType"
}
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate"
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I think you might actually be looking for a single group (English is a bit confusing) like so:
db.InboundWorkItems.aggregate({
$match: {
notificationDate: {
$gte: ISODate("2013-07-18T04:00:00Z")
},
dropType: 'drop'
}
}, {
$group: {
_id: {
notificationDate: "$notificationDate", accountId: '$accountId'
},
nd: {
$first: "$notificationDate"
},
count: {
$sum: 1
}
}
}, {
$sort: {
nd: 1
}
})
I add the compound _id in the $group because of:
The would yield me the count of the grouped dates only using distinct AccountId.
Which makes me think you want the grouped date count by account ID.