MongoDB: aggregating fields from arrays of subdocuments - mongodb

I have a mongodb collection called Events, containing baseball games. Here is an example of one record in the table:
{
"name" : "Game# 814",
"dateStart" : ISODate("2012-09-28T14:47:53.695Z"),
"_id" : ObjectId("53a1b24de3f25f4443d9747e"),
"stats" : [
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"_id" : ObjectId("53a1b24de3f25f4443d97480"),
"score" : 17
},
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"_id" : ObjectId("53a1b24de3f25f4443d9747f"),
"score" : 12
}
]
"__v" : 0
}
I need help writing the query that returns standings for all teams. The result set should look like:
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"wins" : <<number of Yankees wins>>
"losses" : <<number of Yankees losses>>
"draws" : <<number of Yankees draws>>
}
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"wins" : <<number of Reds wins>>
"losses" : <<number of Reds losses>>
"draws" : <<number of Reds draws>>
}
...
Here's the query I've started with...
db.events.aggregate(
{"$unwind": "$stats" },
{ $group : {
_id : "$stats.team",
gamesPlayed : { $sum : 1},
totalScore : { $sum : "$stats.score" }
}}
);
... which returns results:
{
"result" : [
{
"_id" : ObjectId("53a11a43a8de6dd8375c93cb"),
"gamesPlayed" : 125, // not a requirement... just trying to get $sum working
"totalScore" : 1213 // ...same here
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c955f"),
"gamesPlayed" : 128,
"totalScore" : 1276
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c9661"),
"gamesPlayed" : 152,
"totalScore" : 1509
},
....

It would seem advisable for you to keep your "wins", "losses", "draws" within your documents as you create or update them. But it is possible to do with aggregate if a little long winded
db.events.aggregate([
// Unwind the "stats" array
{ "$unwind": "$stats" },
// Combine the document with new fields
{ "$group": {
"_id": "$_id",
"firstTeam": { "$first": "$stats.team" },
"firstTeamName": { "$first": "$stats.teamName" },
"firstScore": { "$first": "$stats.score" },
"lastTeam": { "$last": "$stats.team" },
"lastTeamName": { "$last": "$stats.teamName" },
"lastScore": { "$last": "$stats.score" },
"minScore": { "$min": "$stats.score" },
"maxScore": { "$max": "$stats.score" }
}},
// Calculate by comparing scores
{ "$project": {
"firstTeam": 1,
"firstTeamName": 1,
"firstScore": 1,
"lastTeam": 1,
"lastTeamName": 1,
"lastScore": 1,
"firstWins": {
"$cond": [
{ "$gt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstLosses": {
"$cond": [
{ "$lt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstDraws": {
"$cond": [
{ "$eq": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"lastWins": {
"$cond": [
{ "$gt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastLosses": {
"$cond": [
{ "$lt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastDraws": {
"$cond": [
{ "$eq": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"type": { "$literal": [ true, false ] }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Group teams conditionally on "type"
{ "$group": {
"_id": {
"team": {
"$cond": [
"$type",
"$firstTeam",
"$lastTeam"
]
},
"teamName": {
"$cond": [
"$type",
"$firstTeamName",
"$lastTeamName"
]
}
},
"owins": {
"$sum": {
"$cond": [
"$type",
"$firstWins",
"$lastWins"
]
}
},
"olosses": {
"$sum": {
"$cond": [
"$type",
"$firstLosses",
"$lastLosses"
]
}
},
"odraws": {
"$sum": {
"$cond": [
"$type",
"$firstDraws",
"$lastDraws"
]
}
}
}},
// Project your final form
{ "$project": {
"_id": 0,
"team": "$_id.team",
"teamName": "$_id.teamName",
"wins": "$owins",
"losses": "$olosses",
"draws": "$odraws"
}}
])
The first part is to "re-shape" the document by unwinding the array and then grouping with "first" and "last" for defining fields for your two teams.
Then you want to $project through those documents and calculate your "wins", "losses" and "draws" for each team in the pairing. The additional thing is adding an array field for the two values true/false is convenient here. If you are on a pre 2.6 version of mongodb the $literal can be replaced with $const which is not documented but does the same thing.
Once you $unwind that "type" array, the documents can be split apart in the $group stage by evaluating whether to choose the "first" or "last" team field values via the use of $cond. This is a ternary operator that evaluates a true/false condition and returns the appropriate value according to that condition.
With a final $project your documents are formed exactly how you want.

Related

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

Group and count over a start and end range

If I have data in the following format:
[
{
_id: 1,
startDate: ISODate("2017-01-1T00:00:00.000Z"),
endDate: ISODate("2017-02-25T00:00:00.000Z"),
type: 'CAR'
},
{
_id: 2,
startDate: ISODate("2017-02-17T00:00:00.000Z"),
endDate: ISODate("2017-03-22T00:00:00.000Z"),
type: 'HGV'
}
]
Is it possible to retrieve data grouped by 'type', but also with a count of the type for each of month in a given date range e.g. between 2017/1/1 to 2017/4/1 would return:
[
{
_id: 'CAR',
monthCounts: [
/*January*/
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 1
},
/*February*/
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
/*March*/
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 0
},
]
},
{
_id: 'HGV',
monthCounts: [
{
from: ISODate("2017-01-1T00:00:00.000Z"),
to: ISODate("2017-01-31T23:59:59.999Z"),
count: 0
},
{
from: ISODate("2017-02-1T00:00:00.000Z"),
to: ISODate("2017-02-28T23:59:59.999Z"),
count: 1
},
{
from: ISODate("2017-03-1T00:00:00.000Z"),
to: ISODate("2017-03-31T23:59:59.999Z"),
count: 1
},
]
}
]
The returned format is not really important, but what I am trying to achieve is in a single query to retrieve a number of counts for the same grouping (one per month). The input could be simply a start and end date to report from or more likely it could be an array of the date ranges to group by.
The algorithm for this is to basically "iterate" values between the interval of the two values. MongoDB has a couple of ways to deal with this, being what has always been present with mapReduce() and with new features available to the aggregate() method.
I'm going expand on your selection to deliberately show an overlapping month since your examples did not have one. This will result in the "HGV" values appearing in "three" months of output.
{
"_id" : 1,
"startDate" : ISODate("2017-01-01T00:00:00Z"),
"endDate" : ISODate("2017-02-25T00:00:00Z"),
"type" : "CAR"
}
{
"_id" : 2,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-03-22T00:00:00Z"),
"type" : "HGV"
}
{
"_id" : 3,
"startDate" : ISODate("2017-02-17T00:00:00Z"),
"endDate" : ISODate("2017-04-22T00:00:00Z"),
"type" : "HGV"
}
Aggregate - Requires MongoDB 3.4
db.cars.aggregate([
{ "$addFields": {
"range": {
"$reduce": {
"input": { "$map": {
"input": { "$range": [
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$startDate", new Date(0) ] },
1000
]
}},
{ "$trunc": {
"$divide": [
{ "$subtract": [ "$endDate", new Date(0) ] },
1000
]
}},
60 * 60 * 24
]},
"as": "el",
"in": {
"$let": {
"vars": {
"date": {
"$add": [
{ "$multiply": [ "$$el", 1000 ] },
new Date(0)
]
},
"month": {
}
},
"in": {
"$add": [
{ "$multiply": [ { "$year": "$$date" }, 100 ] },
{ "$month": "$$date" }
]
}
}
}
}},
"initialValue": [],
"in": {
"$cond": {
"if": { "$in": [ "$$this", "$$value" ] },
"then": "$$value",
"else": { "$concatArrays": [ "$$value", ["$$this"] ] }
}
}
}
}
}},
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": { "$sum": 1 }
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
The key to making this work is the $range operator which takes values for a "start" and and "end" as well as an "interval" to apply. The result is an array of values taken from the "start" and incremented until the "end" is reached.
We use this with startDate and endDate to generate the possible dates in between those values. You will note that we need to do some math here since the $range only takes a 32-bit integer, but we can take the milliseconds away from the timestamp values so that is okay.
Because we want "months", the operations applied extract the month and year values from the generated range. We actually generate the range as the "days" in between since "months" are difficult to deal with in math. The subsequent $reduce operation takes only the "distinct months" from the date range.
The result therefore of the first aggregation pipeline stage is a new field in the document which is an "array" of all the distinct months covered between startDate and endDate. This gives an "iterator" for the rest of the operation.
By "iterator" I mean than when we apply $unwind we get a copy of the original document for every distinct month covered in the interval. This then allows the following two $group stages to first apply a grouping to the common key of "month" and "type" in order to "total" the counts via $sum, and next $group makes the key just the "type" and puts the results in an array via $push.
This gives the result on the above data:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
}
]
}
Note that the coverage of "months" is only present where there is actual data. Whilst possible to produce zero values over a range, it requires quite a bit of wrangling to do so and is not very practical. If you want zero values then it is better to add that in post processing in the client once the results have been retrieved.
If you really have your heart set on the zero values, then you should separately query for $min and $max values, and pass these in to "brute force" the pipeline into generating the copies for each supplied possible range value.
So this time the "range" is made externally to all documents, and you then use a $cond statement into the accumulator to see if the current data is within the grouped range produced. Also since the generation is "external", we really don't need the MongoDB 3.4 operator of $range, so this can be applied to earlier versions as well:
// Get min and max separately
var ranges = db.cars.aggregate(
{ "$group": {
"_id": null,
"startRange": { "$min": "$startDate" },
"endRange": { "$max": "$endDate" }
}}
).toArray()[0]
// Make the range array externally from all possible values
var range = [];
for ( var d = new Date(ranges.startRange.valueOf()); d <= ranges.endRange; d.setUTCMonth(d.getUTCMonth()+1)) {
var v = ( d.getUTCFullYear() * 100 ) + d.getUTCMonth()+1;
range.push(v);
}
// Run conditional aggregation
db.cars.aggregate([
{ "$addFields": { "range": range } },
{ "$unwind": "$range" },
{ "$group": {
"_id": {
"type": "$type",
"month": "$range"
},
"count": {
"$sum": {
"$cond": {
"if": {
"$and": [
{ "$gte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$startDate" }, 100 ] },
{ "$month": "$startDate" }
]}
]},
{ "$lte": [
"$range",
{ "$add": [
{ "$multiply": [ { "$year": "$endDate" }, 100 ] },
{ "$month": "$endDate" }
]}
]}
]
},
"then": 1,
"else": 0
}
}
}
}},
{ "$sort": { "_id": 1 } },
{ "$group": {
"_id": "$_id.type",
"monthCounts": {
"$push": { "month": "$_id.month", "count": "$count" }
}
}}
])
Which produces the consistent zero fills for all possible months on all groupings:
{
"_id" : "HGV",
"monthCounts" : [
{
"month" : 201701,
"count" : 0
},
{
"month" : 201702,
"count" : 2
},
{
"month" : 201703,
"count" : 2
},
{
"month" : 201704,
"count" : 1
}
]
}
{
"_id" : "CAR",
"monthCounts" : [
{
"month" : 201701,
"count" : 1
},
{
"month" : 201702,
"count" : 1
},
{
"month" : 201703,
"count" : 0
},
{
"month" : 201704,
"count" : 0
}
]
}
MapReduce
All versions of MongoDB support mapReduce, and the simple case of the "iterator" as mentioned above is handled by a for loop in the mapper. We can get output as generated up to the first $group from above by simply doing:
db.cars.mapReduce(
function () {
for ( var d = this.startDate; d <= this.endDate;
d.setUTCMonth(d.getUTCMonth()+1) )
{
var m = new Date(0);
m.setUTCFullYear(d.getUTCFullYear());
m.setUTCMonth(d.getUTCMonth());
emit({ id: this.type, date: m},1);
}
},
function(key,values) {
return Array.sum(values);
},
{ "out": { "inline": 1 } }
)
Which produces:
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-01-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "CAR",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 1
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-02-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-03-01T00:00:00Z")
},
"value" : 2
},
{
"_id" : {
"id" : "HGV",
"date" : ISODate("2017-04-01T00:00:00Z")
},
"value" : 1
}
So it does not have the second grouping to compound to arrays, but we did produce the same basic aggregated output.

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}

How could I not only preserve the max value but also the record having the max value

I want to use $max operator to select the max value.
And also keep the max record with the key "original_document"
How could I do it in mongoDB
expect result
{ "_id" : "abc", "maxTotalAmount" : 100,
"maxQuantity" : 10,
"original_document": {{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }}}
current result
{ "_id" : "abc", "maxTotalAmount" : 100, "maxQuantity" : 10 }
documents
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
aggregation
db.sales.aggregate(
[
{
$group:
{
_id: "$item",
maxTotalAmount: { $max: { $multiply: [ "$price", "$quantity" ] } },
maxQuantity: { $max: "$quantity" }
}
}
]
)
When you want detail from the same grouping item then you use $sort and $first for the field(s) from the document you wish to preserve:
db.sales.aggregate([
{ "$project": {
"item": 1,
"TotalAmount": { "$multiply": [ "$price", "$quantity" ] },
"quantity": 1
}},
{ "$sort": { "TotalAmount": -1 } },
{ "$group": {
"_id": "$item",
"maxTotalAmount": { "$max": "$TotalAmount" },
"maxQuantity": { "$max": "$quantity" },
"doc_id": { "$first": "$_id" },
"doc_quantity": { "$first": "$quantity" }
}}
])
The aggregation "accumulators" cannot use embedded fields, and pushing all to an array makes little sense. But you can name like above and even rename with another $project or in your code if you want to.
Just to demonstrate how impractical this is to do otherwise, there is this example:
db.sales.aggregate([
{ "$group": {
"_id": "$item",
"maxTotalAmount": { "$max": { "$multiply": [ "$price", "$quantity" ] } },
"maxQuantity": { "$max": "$quantity" },
"docs": { "$push": {
"_id": "$_id",
"quantity": "$quantity",
"TotalAmount": { "$multiply": [ "$price", "$quantity" ] }
}}
}},
{ "$project": {
"maxTotalAmount": 1,
"maxQuantity": 1,
"maxTotalDocs": {
"$setDifference": [
{ "$map": {
"input": "$docs",
"as": "doc",
"in": {
"$cond": [
{ "$eq": [ "$maxTotalAmount", "$$doc.TotalAmount" ] },
"$$doc",
false
]
}
}},
[false]
]
}
}}
])
Which is not a great idea since you are pushing every document within the grouping condition into an array, only to filter out the ones you want later. On any reasaonable data size this is not practical and likely to break.
Please check the below :
db.qt.aggregate([
{ "$project": { "maxTotalAmount" : { "$multiply" :
[ "$price", "$quantity" ]
} ,
"currentDocumnet" : { "_id" : "$_id" ,
"item" : "$item", "price" : "$price",
"quantity" : "$quantity",
"date" : "$date" } }
},
{"$sort" : { "currentDocumnet.item" : 1 , maxTotalAmount : -1}},
{"$group" :{ _id : "$currentDocumnet.item" ,
currentDocumnet : { "$first" : "$currentDocumnet"} ,
maxTotalAmount : { "$first" : "$maxTotalAmount"} ,
maxQuantity: { "$max" : "$currentDocumnet.quantity" }}
}
]);

MongoDB aggregation on another aggreatation suggestions

I have a Json file imported into MongoDB. Every line on it is a user, and I have a field product, with the name of it. I know the value of every product, they are just few.
But this information is not stored on the Json.
I was able to do aggregation to retrieve the number of time that a user bought a product, but I would like to do a query to get directly the amount of money that each user spent.
This is my query:
db.source.aggregate([
{"$match": {
"$and":[
{"productName":{
"$in":[
"product2","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": {
"id_user": "$id_user",
"productName": "$productName"
},
"count": { "$sum": 1}
}},
{ "$sort" : { "count": -1 } }
])
so the output is like that:
{ "_id" : { "id_user" : "user1", "productID" : "product2" }, "count" : 433 }
{ "_id" : { "id_user" : "user2", "productID" : "product1" }, "count" : 370 }
{ "_id" : { "id_user" : "user1", "productID" : "product3" }, "count" : 300 }
{ "_id" : { "id_user" : "user3", "productID" : "product6" }, "count" : 250 }
{ "_id" : { "id_user" : "user2", "productID" : "product5" }, "count" : 140 }
{ "_id" : { "id_user" : "user3", "productID" : "product4" }, "count" : 90 }
I know that product 1 costs 20$, product 2 costs 40$, product 3 costs 55$, product 4 costs -90$, product 5 costs 110$, product 6 costs 200$.
I would like to have an output like that:
{ "_id" : { "id_user" : "user1"}, "money_spent" : 600$ }
{ "_id" : { "id_user" : "user2"}, "money_spent" : 400$ }
etc
Can you help to get that result, I am new with MongoDB.
Thanks in advance.
If you cannot go to the original source data an are only working with an import then do this:
db.source.aggregate([
{"$match": {
"$and":[
{ "productName": {
"$in":[
"product1","product2","product3",
"product4","product5","product6"
]
}},
{ "$or": [
{"appID" : "nameOfAPP"},
{"appID": "NameOfAPP2"}
]}
]
}},
{ "$group": {
"_id": "$id_user",
"cost": {
"$sum": {
"$cond": [
{ "$eq": ["$_id.productId", "product1"] },
20,
{ "$cond": [
{ "$eq": ["$productName", "product2"] },
40,
{ "$cond": [
{ "$eq": [ "$productName", "product3"] },
55,
{ "$cond": [
{ "$eq": [ "$productName", "product4" ] },
-90,
{ "$cond": [
{ "$eq": [ "$productName", "product5" ] },
110,
200
]}
]}
]}
]}
}
}
}
}}
])
The $cond operator evaluates whether your field value matches the condition and places the appropriate value simply just $sum to get your result.
$cond provides a "ternary" operator or "if .. then .. else" that is used to evaluate the condition you provide in the first argument. You construct this to "cascade" where the condition evaluates to false in order to move on to the next condition to evaluate, otherwise return the value that matches your condition.
In this way your "known" values are applied as you aggregate for your expected total.