Using Mongo to calculate sum in Aggregator pipeline - mongodb

I have a timeseries data in mongodb and I want to calculate the sum per day between two given dates of every sensor after I have calculated the difference between the max and min reading of the day by the sensor, using the below query
db.ts_events.aggregate([
{ $match: {
"metadata.assetCode": { $in: [
"h"
]
},
"timestamp": { $gte: ISODate("2022-07-01T02:39:02.000+0000"), $lte: ISODate("2022-07-01T06:30:00.000+0000")
}
}
},
{
$project: {
date: {
$dateToParts: { date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: { $max: "$activeEnergy"
},
minValue: { $min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: { $subtract: [
"$maxValue",
"$minValue"
]
},
}
},
])
I get the following output
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "B"
},
"maxValue" : 1979.78,
"minValue" : 1979.77,
"differnce" : 0.009999999999990905
}
{
"_id" : {
"date" : {
"year" : NumberInt(2022),
"month" : NumberInt(7),
"day" : NumberInt(1)
},
"meter" : "A"
},
"maxValue" : 7108.01,
"minValue" : 7098.18,
"differnce" : 9.829999999999927
}
I want to calculate the sum of both meter difference how can I do that?
Apart from this one more problem I am facing which I am putting forward in this edited version, as you can see date is in ISODate format but I will be getting a unix epoch format,
I tried to tweak the query but it is not working
db.ts_events.aggregate([
{
$project: {
date: {
$dateToParts: {
date: "$timestamp"
}
},
activeEnergy: 1,
"metadata.meterId": 1,
"metadata.assetCode": 1,
"timestamp": 1,
startDate: {
$toDate: 1656686342000
},
endDate: {
$toDate: 1656700200000
}
}
},
{
$match: {
"metadata.assetCode": {
$in: [
"h"
]
},
"timestamp": {
$gte: "$startDate", $lte: "$endDate"
}
}
},
{
$group: {
_id: {
date: {
year: "$date.year",
month: "$date.month",
day: "$date.day"
},
meter: "$metadata.meterId",
},
maxValue: {
$max: "$activeEnergy"
},
minValue: {
$min: "$activeEnergy"
},
}
},
{
$addFields: {
differnce: {
$subtract: [
"$maxValue",
"$minValue"
]
},
}
},
{
$group: {
_id: "$_id.date", res: {
$push: '$$ROOT'
}, differnceSum: {
$sum: '$differnce'
}
}
}
])
Can you help me solve the problem?

One option is to add one more step like this (depending on your expected output format):
This step will group together your separate documents, into one document, which will allow you to sum their values together. Be careful when grouping, since now it is a one big document and a document has a size limit.
We use $$ROOT to keep the original document structure (here inside a new array)
{$group: {_id: 0, res: {$push: '$$ROOT'}, differnceSum: {$sum: $differnce'}}}

Related

Grouping and summing after using $addToSet in MongoDB

Assume I have the following data:
[{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 1,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 2,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 3,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 4,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
}]
I want to group the data by year and get a total sum for that year. I also want an array of the items used during the group, grouped by a field and summed, if that makes sense. This is ultimately what I want to end up with:
{
"year": [
{
"year": "2019",
"totalYear": 14.86,
"dividends": [
{
"symbol": "T",
"amount": 10.00
},
{
"symbol": "SPHD",
"amount": 4.86
}
]
}
]
}
Below is the code I have written so far using Mongoose. The problem is that I can't figure out how to group and sum the items that I added to the set. I could always do that in the application layer but I was hoping to accomplish this entirely inside of a query.:
const [transactions] = await Transaction.aggregate([
{ $match: { type: TransactionType.DIVIDEND_OR_INTEREST, netAmount: { $gte: 0 } } },
{
$facet: {
year: [
{
$group: {
_id: { $dateToString: { format: '%Y', date: '$transactionDate' } },
totalYear: { $sum: '$netAmount' },
dividends: {
$addToSet: {
symbol: '$transactionItem.instrument.symbol',
amount: '$netAmount',
},
},
},
},
{ $sort: { _id: 1 } },
{
$project: {
year: '$_id',
totalYear: { $round: ['$totalYear', 2] },
dividends: '$dividends',
_id: false,
},
},
],
},
},
]).exec();
It requires to do two group stages,
First group by year and symbol
Second group by only year
If the transactionDate field has date type value then just use $year operator to get the year
I would suggest you do $sort after the immediate $match stage to use an index if you have created or planning for future
const [transactions] = await Transaction.aggregate([
{
$match: {
type: TransactionType.DIVIDEND_OR_INTEREST,
netAmount: { $gte: 0 }
}
},
{ $sort: { transactionDate: 1 } },
{
$facet: {
year: [
{
$group: {
_id: {
year: { $year: "$transactionDate" },
symbol: "$transactionItem.instrument.symbol"
},
netAmount: { $sum: "$netAmount" }
}
},
{
$group: {
_id: "$_id.year",
totalYear: { $sum: "$netAmount" },
dividends: {
$push: {
symbol: "$_id.symbol",
amount: "$netAmount"
}
}
}
},
{
$project: {
_id: 0,
year: "$_id",
totalYear: 1,
dividends: 1
}
}
]
}
}
]).exec();
Playground

MongoDB - convert string to timestamp, group by hour

I have the following documents stored in a collection:
{
"REQUESTTIMESTAMP" : "26-JUN-19 01.34.10.095000000 AM",
"UNHANDLED_INTENT" : 0,
"USERID" : "John",
"START_OF_INTENT_SKILL_CONVERSATION" : 0,
"PROPERTYCODE" : ""
}
I want to group this by the hour(which we will get from 'REQUESTTIMESTAMP')
Earlier, I had this document stored in the collection in a different way, where I had a separate field for hours, and used that hours field to group:
Previous aggregation query :
collection.aggregate([
{'$match': query}, {
'$group': {
"_id": {
"hour": "$hour",
"sessionId": "$sessionId"
}
}
}, {
"$group": {
"_id": "$_id.hour",
"count": {
"$sum": 1
}
}
}
])
Previous collection structure:
{
"timestamp" : "1581533210921",
"date" : "12-02-2020",
"hour" : "13",
"month" : "02",
"time" : "13:46:50",
"weekDay" : "Wednesday",
"__v" : 0
}
How can I do the above same Previous aggregation query with the new document structure (After extracting hours from 'REQUESTTIMESTAMP' field?)
You should convert your timestamp to Date object then take hour from your date object.
db.collection.aggregate([{
'$match': query
}, {
$project: {
date: {
$dateFromString: {
dateString: '$REQUESTTIMESTAMP',
format: "%m-%d-%Y" //This should be your date format
}
}
}
}, {
$group: {
_id: {
hour: {
$hour: "$date"
}
}
}
}])
Problem is months names are not supported by MongoDB. Either you write a lot of code or you use libraries like moments.js. First update your REQUESTTIMESTAMP to proper Date object, then you can group it.
db.collection.find().forEach(function (doc) {
var d = moment(doc.REQUESTTIMESTAMP, "DD-MMM-YY hh.mm.ss.SSS a");
db.collection.updateOne(
{ _id: doc._id },
{ $set: { date: d.toDate() } }
);
})
db.collection.aggregate([
{
$group: {
_id: { $hour: "$date" },
count: { $sum: 1 }
}
}
])
In case if you're not able to update DB with actual date field & still wanted to proceed with existing format, try this query it will add hour field extracted from given string field REQUESTTIMESTAMP :
Query :
db.collection.aggregate([
{
$addFields: {
hour: {
$let: {
/** split string into three parts date + hours + AM/PM */
vars: { hour: { $slice: [{ $split: ["$REQUESTTIMESTAMP", " "] }, 1, 2] } },
in: {
$cond: [{ $in: ["AM", "$$hour"] }, // Check AM exists in array
{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, // If yes then return int of first 2 letters of first element in hour array
{ $add: [{ $toInt: { $substr: [{ $arrayElemAt: ["$$hour", 0] }, 0, 2] } }, 12] } ] // If PM add 12 to int of first 2 letters of first element in hour array
}
}
}
}
}
])
Test : MongoDB-Playground

MongoDb aggregate pipeline with multiple groupings

I'm trying to get my head around an aggregate pipeline in MongoDb with multiple groups.
I have the following data: https://gist.github.com/bomortensen/36e6b3fbc987a096be36a66bbfe30d82
Expected data would be: https://gist.github.com/bomortensen/7b220df1f1da83be838acfb2ed79a2ee (total quantity sum based on highest version, hourly)
I need to write a query which does the following:
Group the data by the field MeterId to get unique meter groups.
In each group I then need to group by the StartDate's year, month, day and hour since all objects StartDate is stored as quarters, but I need to aggregate them into whole hours.
Finally, I need to only get the highest version from the Versions array by VersionNumber
I've tried the following query, but must admit I'm stuck:
mycollection.aggregate([
{ $group: {
_id : { ediel: "$_id.MeterId", start: "$_id.StartDate" },
versions: { $push: "$Versions" }
}
},
{ $unwind: { path: "$versions" } },
{ $group: {
_id: {
hour: { $hour: "$_id.start.DateTime" },
key: "$_id"
},
quantitySum: { $sum: "$Versions.Quantity" }
}
},
{ $sort: { "_id.hour": -1 } }
]);
Does anyone know how I should do this? :-)
This would give :
1 $project : get $hour from date, create a maxVersion field per record
1 $unwind to remove the Versions array
1 $project to add a keep field that will contain a boolean to check if the record should be kept or not
1 $match that match only higher version number eg keep == true
1 $group that group by id/hour and sum the quantity
1 $project to set up your required format
Query is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$_id._id", StartDate: "$StartDate" },
hour: "$_id.hour",
QuantitySum: 1
}
}])
In your example output you take into account only the first higher versionNumber, You have { "VersionNumber" : 2, "Quantity" : 7.5 } and { "VersionNumber" : 2, "Quantity" : 8.4 } for hour 2 and id 1234 but you only take { "VersionNumber" : 2, "Quantity" : 7.5 }
I dont know if this is intended or not but in this case you want to take only the first MaxVersion number. After the $match, I added :
1 $group that push versions previously filter in an array
1 $project that $slice this array to take only the first element
1 $unwind to remove this array (which contains only one elemement)
The query that match your output is :
db.mycollection.aggregate([{
$project: {
_id: 1,
Versions: 1,
hour: {
"$hour": "$_id.StartDate"
},
maxVersion: { $max: "$Versions.VersionNumber" }
}
}, {
$unwind: "$Versions"
}, {
$project: {
_id: 1,
Versions: 1,
hour: 1,
maxVersion: 1,
keep: { $eq: ["$Versions.VersionNumber", "$maxVersion"] }
}
}, {
$match: { "keep": true }
}, {
$group: {
_id: { _id: "$_id.MeterId", StartDate: "$_id.StartDate" },
Versions: { $push: "$Versions" },
hour: { "$first": "$hour" }
}
}, {
$project: {
_id: 1,
hour: 1,
Versions: { $slice: ["$Versions", 1] }
}
}, {
$unwind: "$Versions"
}, {
$sort: {
_id: 1
}
}, {
$group: {
_id: { _id: "$_id._id", hour: "$hour" },
StartDate: { $first: "$_id.StartDate" },
QuantitySum: { $sum: "$Versions.Quantity" }
}
}, {
$project: {
_id: { _id: "$MeterId._id", StartDate: "$StartDate" },
Hour: "$_id.hour",
QuantitySum: 1
}
}])
Output is :
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "4567", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T03:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 3 }
{ "_id" : { "MeterId" : "1234", "StartDate" : ISODate("2016-09-20T02:00:00Z") }, "QuantitySum" : 25.9, "Hour" : 2 }
Sorry, I just dont find a straight forward way to round the hour. You can try the following. You will unwind the versions, so you can apply the grouping to collect the max version, push the versions for the next step, which is to project to filter the matching records with max version and final project to sum the max versions quantity. Right now start dt is the min from the group. You should be fine as long as you have versions at the top of the hour.
db.collection.aggregate([{
$unwind: {
path: "$Versions"
}
}, {
$group: {
_id: {
MeterId: "$_id.MeterId",
start: {
$hour: "$_id.StartDate"
}
},
startDate: {
$min: "$_id.StartDate"
},
maxVersion: {
$max: "$Versions.VersionNumber"
},
Versions: {
$push: "$Versions"
}
}
}, {
$sort: {
"_id.start": -1
}
}, {
$project: {
_id: {
MeterId: "$_id.MeterId",
StartDate: "$startDate"
},
hour: "$_id.start",
Versions: {
$filter: {
input: "$Versions",
as: "version",
cond: {
$eq: ["$maxVersion", "$$version.VersionNumber"]
}
}
}
}
}, {
$project: {
_id: 1,
hour: 1,
QuantitySum: {
$sum: "$Versions.Quantity"
}
}
}]);
Sample Output
{
"_id": {
"MeterId": "1234",
"StartDate": ISODate("2016-09-20T02:00:00Z")
},
"QuantitySum": 15,
"hour": 2
}

How to simplify this Aggregation Framework Query (with Date Formatting & Comparisons)

I already have a working query for what I need (included below), but I can't help but feel that there must be a better way to accomplish this. My requirements are fairly simple, but the resulting query itself is the definition of eye-bleed code.
Here's a sample document that we're iterating over (with irrelevant properties removed):
> db.Thing.find().limit(1).pretty()
{
"_id": ObjectId(...),
"created": ISODate(...),
"updated": ISODate(...)
}
My requirements for the query are:
Only match on Things where created > updated.
Group on the YYYY-MM value of the created field, and reduce to a count.
Output should look like the following:
{ "count": 93592, "month": "2014-06" },
{ "count": 81629, "month": "2014-07" },
{ "count": 126183, "month": "2014-08" },
...
Again, this feels like it should be really simple. Here's my correctly functioning query that currently does this:
db.Thing.aggregate([
{ $project: {
cmpDates: { $cmp: ['$created', '$updated'] },
created: '$created'
}},
{ $match: {
cmpDates: { $ne: 0 }
}},
{ $project: {
month: {
$concat: [
{ $substr: [ { $year: '$created' }, 0, 4 ] },
'-',
{ $cond: [
{ $lte: [ { $month: '$created' }, 9 ] },
{ $concat: [
'0',
{ $substr: [ { $month: '$created' }, 0, 2 ] }
]},
{ $substr: [ { $month: '$created' }, 0, 2 ] }
] }
]
},
_id: 0
}},
{ $group: {
_id: '$month',
count: { $sum: 1 }
}},
{ $project: {
month: '$_id',
count: 1,
_id: 0
}},
{ $sort: { month: 1 } }
]);
My question: Can this query be simplified, and if so, how?
Thanks!
Try this:
db.test.aggregate([
{ "$project" : {
"cmpDates" : { "$cmp" : ["$created", "$updated"] },
"createdYear" : { "$year" : "$created" },
"createdMonth" : { "$month" : "$created" }
} },
{ "$match" : { "cmpDates" { "$ne" : 0 } } },
{ "$group" : {
"_id" : { "y" : "$createdYear", "m" : "$createdMonth" },
"count" : { "$sum" : 1 }
} }
])
The big difference is that I used a compound key for the group, so I'm grouping the pair (year, month) instead of constructing a string value YYYY-MM to accomplish the same purpose.

MongoDB aggregate return count of 0 if no results

I have the following MongoDB query that groups by date and result and gives a count. I'd like to have the query also return a count of 0 for a particular date and result if data doesn't exist for that day.
For example I have the following result statuses: SUCCESS and FAILED. If on the 21st there were no results of FAILED I would want a count returned of 0:
{
"_id" : {
"month" : 1,
"day" : 21,
"year" : 2014,
"buildResult" : "FAILURE"
},
"count" : 0
}
I've done something similar with a relational database and a calendar table, but I'm not sure how to approach this with MongoDB. Is this possible or should I do something programatically after running the query?
Here is an example of a document (simplified) in the database:
{
"_id" : ObjectId("52deab2fe4b0a491abb54108"),
"type" : "build",
"time" : ISODate("2014-01-21T17:15:27.471Z"),
"data" : {
"buildNumber" : 43,
"buildDuration" : 997308,
"buildResult" : "SUCCESS"
}
}
Here is my current query:
db.builds.aggregate([
{ $match: { "data.buildResult" : { $ne : null} }},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$data.buildResult",
},
count: { $sum: 1}
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1} }
])
If I correctly understand what do you want, you could try this:
db.builds.aggregate([
{ $project:
{
time: 1,
projectedData: { $ifNull: ['$data.buildResult', 'none'] }
}
},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$projectedData"
},
count: { $sum: { $cond: [ { $eq: [ "$projectedData", "none" ] }, 0, 1 ] } }
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])
Update:
You want to get from output more documents that been in input, it is possible only with unwind operator that works with arrays, but you haven't any arrays, so as I know it is impossible to get more documents in your case. So you should add some logic after query result to create new data for existing dates with 0 count for another type of buildResult...