MongoDB aggregation $divide computed fields - mongodb

I am trying to compute a percentage in a MongoDB query based on computed fields - not sure if this is possible or not. What I'd like to be able to do is calculate the failure percentage: (failed count / total) * 100
Here are a few sample documents:
{
"_id" : ObjectId("52dda5afe4b0a491abb5407f"),
"type" : "build",
"time" : ISODate("2014-01-20T22:39:43.880Z"),
"data" : {
"buildNumber" : 30,
"buildResult" : "SUCCESS"
}
},
{
"_id" : ObjectId("52dd9fede4b0a491abb5407a"),
"type" : "build",
"time" : ISODate("2014-01-20T22:15:07.901Z"),
"data" : {
"buildNumber" : 4,
"buildResult" : "FAILURE"
}
},
{
"_id" : ObjectId("52dda153e4b0a491abb5407b"),
"type" : "build",
"time" : ISODate("2014-01-20T22:21:07.790Z"),
"data" : {
"buildNumber" : 118,
"buildResult" : "SUCCESS"
}
}
Here is the query I am trying to work with. The issue is in the FailPercent/$divide line:
db.col.aggregate([
{ $match: { "data.buildResult" : { $ne : null } } },
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
},
Aborted: { $sum: { $cond : [{ $eq : ["$data.buildResult", "ABORTED"]}, 1, 0]} },
Failure: { $sum: { $cond : [{ $eq : ["$data.buildResult", "FAILURE"]}, 1, 0]} },
Unstable: { $sum: { $cond : [{ $eq : ["$data.buildResult", "UNSTABLE"]}, 1, 0]} },
Success: { $sum: { $cond : [{ $eq : ["$data.buildResult", "SUCCESS"]}, 1, 0]} },
Total: { $sum: 1 },
FailPercent: { $divide: [ "Failure", "Total" ] }
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])

You almost got it. Only change that would be required is that you'll have to compute the FailPercent in an additional project phase, because the total is only available after the completion of the group phase. Try this:
db.foo.aggregate([
{ $match: { "data.buildResult" : { $ne : null } } },
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
},
Aborted: { $sum: { $cond : [{ $eq : ["$data.buildResult", "ABORTED"]}, 1, 0]} },
Failure: { $sum: { $cond : [{ $eq : ["$data.buildResult", "FAILURE"]}, 1, 0]} },
Unstable: { $sum: { $cond : [{ $eq : ["$data.buildResult", "UNSTABLE"]}, 1, 0]} },
Success: { $sum: { $cond : [{ $eq : ["$data.buildResult", "SUCCESS"]}, 1, 0]} },
Total: { $sum: 1 }
} },
{$project:{Aborted:1, Failure:1, Unstable:1, Success:1, Total:1, FailPercent: { $divide: [ "$Failure", "$Total" ]}}},
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])

Related

Grouping and summing after using $addToSet in MongoDB

Assume I have the following data:
[{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 1,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-01-01T17:02:36+0000",
"transactionId" : 2,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 2.43,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 3,
"transactionItem" : {
"instrument" : {
"symbol" : "SPHD"
}
}
},
{
"type" : "DIVIDEND_OR_INTEREST",
"netAmount" : 5.00,
"transactionDate" : "2019-02-01T17:02:36+0000",
"transactionId" : 4,
"transactionItem" : {
"instrument" : {
"symbol" : "ATT"
}
}
}]
I want to group the data by year and get a total sum for that year. I also want an array of the items used during the group, grouped by a field and summed, if that makes sense. This is ultimately what I want to end up with:
{
"year": [
{
"year": "2019",
"totalYear": 14.86,
"dividends": [
{
"symbol": "T",
"amount": 10.00
},
{
"symbol": "SPHD",
"amount": 4.86
}
]
}
]
}
Below is the code I have written so far using Mongoose. The problem is that I can't figure out how to group and sum the items that I added to the set. I could always do that in the application layer but I was hoping to accomplish this entirely inside of a query.:
const [transactions] = await Transaction.aggregate([
{ $match: { type: TransactionType.DIVIDEND_OR_INTEREST, netAmount: { $gte: 0 } } },
{
$facet: {
year: [
{
$group: {
_id: { $dateToString: { format: '%Y', date: '$transactionDate' } },
totalYear: { $sum: '$netAmount' },
dividends: {
$addToSet: {
symbol: '$transactionItem.instrument.symbol',
amount: '$netAmount',
},
},
},
},
{ $sort: { _id: 1 } },
{
$project: {
year: '$_id',
totalYear: { $round: ['$totalYear', 2] },
dividends: '$dividends',
_id: false,
},
},
],
},
},
]).exec();
It requires to do two group stages,
First group by year and symbol
Second group by only year
If the transactionDate field has date type value then just use $year operator to get the year
I would suggest you do $sort after the immediate $match stage to use an index if you have created or planning for future
const [transactions] = await Transaction.aggregate([
{
$match: {
type: TransactionType.DIVIDEND_OR_INTEREST,
netAmount: { $gte: 0 }
}
},
{ $sort: { transactionDate: 1 } },
{
$facet: {
year: [
{
$group: {
_id: {
year: { $year: "$transactionDate" },
symbol: "$transactionItem.instrument.symbol"
},
netAmount: { $sum: "$netAmount" }
}
},
{
$group: {
_id: "$_id.year",
totalYear: { $sum: "$netAmount" },
dividends: {
$push: {
symbol: "$_id.symbol",
amount: "$netAmount"
}
}
}
},
{
$project: {
_id: 0,
year: "$_id",
totalYear: 1,
dividends: 1
}
}
]
}
}
]).exec();
Playground

How to regroup (or merge) objects in aggregation pipeline in MongoDB?

I currently have an aggregation pipeline:
db.getCollection('forms').aggregate([
{ $unwind: //unwind },
{
$match: {
//some matches
}
},
{
$project: {
//some projections
}
},
{
//Finally, im grouping the results
$group: {
_id: {
year: { $year: '$createdAt' },
month: { $month: '$createdAt' },
raceEthnicity: '$demographic.raceEthnicity'
},
count: { $sum: 1 },
}
]
My current results are similar to:
[{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Asian"
},
"count" : 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "Multiracial"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 11,
"raceEthnicity" : "White"
},
"count" : 3.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
"raceEthnicity" : "White"
},
"count" : 33.0
}]
Is there a way to add a new stage on the pipeline to "merge" results of the same year/month into a single object?
I want to achieve something like:
{
"_id" : {
"year" : 2020,
"month" : 11,
},
"Asian" : 1.0,
"Multiracial": 3.0,
"White": 1.0
},
{
"_id" : {
"year" : 2020,
"month" : 10,
},
"White": 33
}
Is it possible? How can I do that?
Add this one to your aggregation pipeline.
db.collection.aggregate([
{ $set: { "data": { k: "$_id.raceEthnicity", v: "$count" } } },
{ $group: { _id: { year: "$_id.year", month: "$_id.month" }, data: { $push: "$data" } } },
{ $set: { "data": { $arrayToObject: "$data" } } },
{ $replaceRoot: { newRoot: { $mergeObjects: ["$$ROOT", "$data"] } } },
{ $unset: "data" }
])
Unlike the solution from #wak786 you don't need to know all ethnicity at design time. It works for arbitrary ethnicity.
Add these stages to your pipeline.
db.collection.aggregate([
{
$replaceRoot: {
newRoot: {
"$mergeObjects": [
"$$ROOT",
{
$arrayToObject: [
[
{
k: "$_id.raceEthnicity",
v: "$count"
}
]
]
}
]
}
}
},
{
"$group": {
"_id": {
year: "$_id.year",
month: "$_id.month",
},
"Asian": {
"$sum": "$Asian"
},
"Multiracial": {
"$sum": "$Multiracial"
},
"White": {
"$sum": "$White"
}
}
}
])
Below is the mongo playground link. I have taken the current result of your pipeline as input to my query.
Try it here

Compare 2 count aggregations

I have a collection in MongoDB that looks something like the following:
{ "_id" : 1, "type" : "start", userid: "101", placementid: 1 }
{ "_id" : 2, "type" : "start", userid: "101", placementid: 2 }
{ "_id" : 3, "type" : "start", userid: "101", placementid: 3 }
{ "_id" : 4, "type" : "end", userid: "101", placementid: 1 }
{ "_id" : 5, "type" : "end", userid: "101", placementid: 2 }
and I want to group results by userid then placementid and then count the types of "start" and "end", but only when the two counts are different. In this particular example I would want to get placementid: 3 because when grouped and counted this is the only case where the counts don't match.
I've written a query that gets the 2 counts and the grouping but I can't do the filtering when counts don't match. This is my query:
db.getCollection('mycollection').aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
countStart: {$ne: "$countEnd"}
}
}
])
It seems like I'm using the match aggregation incorrectly because I'm seeing results where countStart and countEnd are the same.
{ "_id" : {"userid" : "101", "placementid" : "1"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "2"}, "countStart" : 1.0, "countEnd" : 1.0 }
{ "_id" : {"userid" : "101", "placementid" : "3"}, "countStart" : 1.0, "countEnd" : 0 }
Can anybody point into the right direction please?
To compare two fields inside $match stage you need $expr which is available in MongoDB 3.6:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$match: {
$expr: { $ne: [ "$countStart", "$countEnd" ] }
}
}
])
If you're using older version of MongoDB you can use $redact:
db.myCollection.aggregate([
{
$project: {
userid: 1,
placementid: 1,
isStart: {
$cond: [ { $eq: ["$type", "start"] }, 1, 0]
},
isEnd: {
$cond: [ { $eq: ["$type", "end"] }, 1, 0]
}
}
},
{
$group: {
_id: { userid:"$userid", placementid:"$placementid" },
countStart:{ $sum: "$isStart" },
countEnd: { $sum: "$isEnd" }
}
},
{
$redact: {
$cond: { if: { $ne: [ "$countStart", "$countEnd" ] }, then: "$$KEEP", else: "$$PRUNE" }
}
}
])
You run do the following pipeline to get this - no need to use $expr or $redact or anything special really:
db.mycollection.aggregate({
$group: {
_id: {
"userid": "$userid",
"placementid": "$placementid"
},
"sum": {
$sum: {
$cond: {
if: { $eq: [ "$type", "start" ] },
then: 1, // +1 for start
else: -1 // -1 for anything else
}
}
}
}
}, {
$match: {
"sum": { $ne: 0 } // only return the non matching-up ones
}
})

Get data for month wise in mongodb

I want to get data to each month. in my table data is stored like this:-
"patient" : [
{
"status" : 'arrived',
start_time: '2017-08-17T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-08-16T07:17:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-07-12T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-07-05T08:10:00.000Z
},
{
"status" : 'arrived',
start_time: '2017-06-02T09:17:00.000Z
},
{
"status" : 'arraived',
start_time: '2017-05-05T08:16:00.000Z
}
]
etc,
and I want to sum of patient of each month (jan to des), like this :-
{
"month" : 8,
"count" : 2
}and like this month 1 to 12
I assume, patient array is associated with a customer and the date is stored in mongo ISO format.
So, the actual document would look like :
{
name: "stackOverflow",
"patient" : [
{
"status" : 'arrived',
"start_time": ISODate("2017-08-17T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-08-16T07:17:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-07-12T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-07-05T08:10:00.000Z")
},
{
"status" : 'arrived',
"start_time": ISODate("2017-06-02T09:17:00.000Z")
},
{
"status" : 'arraived',
"start_time": ISODate("2017-05-05T08:16:00.000Z")
}
]
}
here is a sample query which you can try -
db.test.aggregate([
{$unwind: "$patient"},
{ $group: {
_id: {name: "$name", month: {$month: "$patient.start_time"}},
count: { $sum: 1}
}},
{$group: {
_id: "$_id.name",
patient: {$push: {month: "$_id.month", count: "$count"}}
}}
])
Sample output:
{
"_id" : "stackOverflow",
"patient" : [
{
"month" : 5,
"count" : 1
},
{
"month" : 6,
"count" : 1
},
{
"month" : 7,
"count" : 2
},
{
"month" : 8,
"count" : 2
}
]
}
You can change query according to your use-case. hope this will help you!
This is my code:-
db.appointments.aggregate( [
{
$project:
{
"patient_id": 1,
"start_time": 1,
"status": 1
}
},
{
$match: {
'start_time' : { $gte: startdate.toISOString() },
'status': { $eq: 'arrived' }
} ,
},
{ $group: {
_id: {id: "$_id", start_time: {$month: "$appointments.start_time"}},
count: { $sum: 1}
}}
])
When I used this :-
{ $group: {
_id: {id: "$_id", start_time: {$month: "$start_time"}},
count: { $sum: 1}
}
}
its showing error message:-
{"name":"MongoError","message":"can't convert from BSON type missing to Date","ok":0,"errmsg":"can't convert from BSON type missing to Date","code":16006,"codeName":"Location16006"}
And when I comment this its showing this :-
Out Put here:-
:[{"count":{"_id":"595b6f95ab43ec1f6c92b898","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-07-04T10:35:00.000Z","status":"arrived"}},
{"count":{"_id":"595dff870960d425d4f14633","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-03-08T09:14:00.000Z","status":"arrived"}},{"count":{"_id":"595dffaa0960d425d4f14634","patient_id":"595649904dbe9525c0e036ef","start_time":"2017-03-17T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dffcf0960d425d4f14635","patient_id":"595648394dbe9525c0e036ec","start_time":"2017-06-08T09:15:00.000Z","status":"arrived"}},{"count":{"_id":"595dfffb0960d425d4f14636","patient_id":"5956478b4dbe9525c0e036ea","start_time":"2017-06-20T09:16:00.000Z","status":"arrived"}},{"count":{"_id":"595e00160960d425d4f14637","patient_id":"5959ea7f80388b19e0b57817","start_time":"2017-08-17T09:17:00.000Z","status":"arrived"}}]}
const group = {
$group: {
_id: { month: { $month: "$createdAt" } },
count: { $sum: 1 },
},
};
const groups = {
$group: {
_id: null,
patient: { $push: { month: '$_id.month', count: '$count' } },
},
};
return db.Patient.aggregate([group, groups]);

MongoDB aggregate return count of 0 if no results

I have the following MongoDB query that groups by date and result and gives a count. I'd like to have the query also return a count of 0 for a particular date and result if data doesn't exist for that day.
For example I have the following result statuses: SUCCESS and FAILED. If on the 21st there were no results of FAILED I would want a count returned of 0:
{
"_id" : {
"month" : 1,
"day" : 21,
"year" : 2014,
"buildResult" : "FAILURE"
},
"count" : 0
}
I've done something similar with a relational database and a calendar table, but I'm not sure how to approach this with MongoDB. Is this possible or should I do something programatically after running the query?
Here is an example of a document (simplified) in the database:
{
"_id" : ObjectId("52deab2fe4b0a491abb54108"),
"type" : "build",
"time" : ISODate("2014-01-21T17:15:27.471Z"),
"data" : {
"buildNumber" : 43,
"buildDuration" : 997308,
"buildResult" : "SUCCESS"
}
}
Here is my current query:
db.builds.aggregate([
{ $match: { "data.buildResult" : { $ne : null} }},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$data.buildResult",
},
count: { $sum: 1}
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1} }
])
If I correctly understand what do you want, you could try this:
db.builds.aggregate([
{ $project:
{
time: 1,
projectedData: { $ifNull: ['$data.buildResult', 'none'] }
}
},
{ $group: {
_id: {
month: { $month: "$time" },
day: { $dayOfMonth: "$time" },
year: { $year: "$time" },
buildResult: "$projectedData"
},
count: { $sum: { $cond: [ { $eq: [ "$projectedData", "none" ] }, 0, 1 ] } }
} },
{ $sort: { "_id.year": 1, "_id.month": 1, "_id.day": 1 } }
])
Update:
You want to get from output more documents that been in input, it is possible only with unwind operator that works with arrays, but you haven't any arrays, so as I know it is impossible to get more documents in your case. So you should add some logic after query result to create new data for existing dates with 0 count for another type of buildResult...