MongoDB Aggregation, group by subobject keys - mongodb

I have a mongo collection whose schema looks like this:
_id: ObjectId(),
segments: {
activity: 'value1',
activation: 'value2',
plan: 'value3'
}
I'm trying to use the aggregation framework to find out how many of my documents have the value1 for the segment activity for instance.
The problem is that I want to do that for every segment in the same request if possible, and that I don't know how many segments I'll have or even their name.
Basically here's what I'd like to do:
If I have these two documents:
{ _id: 1, segments: { activity: 'active', activation: 'inactive', plan: 'free' }
{ _id: 2, segments: { activity: 'inactive', activation: 'inactive', plan: 'free' }
I want to be able to see that two of them have the activation segment to inactive and the free plan, and that activity have 1 inactive and 1 active values. Here is what I want to get:
{
activity: {
active: 1,
inactive: 1
},
activation: {
inactive: 2
},
plan: {
free: 2
}
}
So basically, if you could just $group by key it would be great! Something like this:
{
$group: {
_id: { $concat: [ '$segments.$key', '-', '$segments.$key.$value' ],
count: { $sum: 1 }
}
}
Or if I could unwind on each key...

To get the counts, take advantage of the $cond operator in the $group pipeline step to evaluate the counts based on the subdocuments value, something like the following:
db.collection.aggregate([
{
"$group": {
"_id": "$_id",
"activity_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "active" ] }, 1, 0 ]
}
},
"activity_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"activation_active": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activation", "active" ] }, 1, 0 ]
}
},
"activation_inactive": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.activity", "inactive" ] }, 1, 0 ]
}
},
"plan_free": {
"$sum": {
"$cond": [ { "$eq": [ "$segment.plan", "free" ] }, 1, 0 ]
}
}
}
},
{
"$project": {
"_id": 0,
"activity": {
"active": "$activity_active",
"inactive": "$activity_inactive"
},
"activation": {
"active": "$activation_active",
"inactive": "$activation_inactive"
},
"plan": {
"free": "$plan_free"
}
}
}
])

there could be a generic solution to this problem, but might need a bit post processing:
to get output similat to this:
{
"_id" : {
"activity" : "active",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}, {
"type" : "paid",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "inactive"
},
"plan" : [{
"type" : "free",
"total" : 1
}
]
}, {
"_id" : {
"activity" : "inactive",
"activation" : "active"
},
"plan" : [{
"type" : "paid",
"total" : 3
}, {
"type" : "free",
"total" : 6
}
]
}
use query like that:
db.collection.aggregate([{
$group : {
_id : {
activity : "$segments.activity",
activation : "$segments.activation",
plan : "$segments.plan"
},
total : {
$sum : 1
}
}
}, {
$group : {
_id : {
activity : "$_id.activity",
activation : "$_id.activation"
},
plan : {
$push : {
type : "$_id.plan",
total : "$total"
}
}
}
},
])

Related

Counting results in aggregate selection

My MongoDB database have a structure
{
"_id" : ObjectId("5c1ccc20fc0f60769227d455"),
"type" : 0,
"id" : "hwJyzAHyfjXUlrGhblT7txWd",
"userowner" : 1.0,
"campid" : "9548",
"date" : 1545391136,
"useragent" : "mozilla/5.0 (windows nt 10.0; win64; x64; rv:65.0) gecko/20100101 firefox/65.0",
"domain" : "",
"referer" : "",
"country" : "en",
"language" : "en-US",
"languages" : [
"en-US",
"en"
],
"screenres" : [
"1920*1080"
],
"avscreenres" : [
"1080*1858"
],
"webgl" : "angle (nvidia geforce gtx 1060 6gb direct3d11 vs_5_0 ps_5_0)",
"hash" : 123,
"timezone" : -180,
"result" : true,
"resultreason" : "learning",
"remoteip" : "0.0.0.0"
}
Every a document have a vield "result" with a bool value.
I make aggregation selection:
db.getCollection('clicks').aggregate([
{ $match: {userowner: 1, date:{$gte: 0, $lte: 9545392055}} },
{ $group : {_id : "$campid",
number: {$sum: 1}}}
])
and get a Result:
/* 1 */
{
"_id" : "4587",
"number" : 2.0
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0
}
How can count the amount of value "true" and "false" in a field "result" and get a result like this:
/* 1 */
{
"_id" : "4587",
"number" : 2.0,
"passed":100,
"blocked":120
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0,
"passed":100,
"blocked":120
}
I hope this works as per your requirement.
db.getCollection('clicks').aggregate(
[
{
$match: {
userowner: 1, date: {
$gte: 0, $lte: 9545392055
}
}
},
{
$group: {
_id: "$campid", passed: {
$sum: {
$cond:
[
{ $eq: ["$result", true] },
1, 0
]
}
},
blocked: {
$sum: {
$cond:
[
{
$eq: ["$result", false]
}
, 1, 0]
}
},
number: { $sum: 1 }
}
},
{
$project: {
_id: 0,
campid: "$_id",
number: 1,
passed: 1,
blocked: 1
}
}
])
Output:-
{
"passed" : 3,
"blocked" : 2,
"number" : 5,
"campid" : "4587"
}
{
"passed" : 2,
"blocked" : 1,
"number" : 3,
"campid" : "9548"
}
Refer $group, $cond, and $eq for more info.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $replaceRoot pipeline to get the desired result.
You would need to group the documents intially by the campid and the result field, aggregate the sum and pass the results to yet another group pipeline stage. This group stage will prepare the documents in a way that $arrayToObject operator will give you the desired object by creating a key-value array using $push.
The result from this is then fed to the $replaceRoot pipeline to bring the passed and blocked fields to the root of the document.
The following aggregate pipeline describes the above:
db.getCollection('clicks').aggregate([
{ "$match": { "userowner": 1, "date": { "$gte": 0, "$lte": 9545392055 } } },
{ "$group": {
"_id": {
"campid": "$campid",
"result": { "$cond": [ "$result", "passed", "blocked" ] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.campid",
"number": { "$sum": "$count" },
"counts": {
"$push": {
"k": "$_id.result",
"v": "$count"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

MongoDB aggregate count based on multiple query fields - (Multiple field count)

My collection will look this,
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "xxx",
"salary" : 10000,
"type" : "type1"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "aaa",
"salary" : 10000,
"type" : "type2"
}
{
"_id" : ObjectId("55c8bd1d85b83e06dc54c0eb"),
"name" : "ccc",
"salary" : 10000,
"type" : "type2"
}
My query params will be coming as,
{salary=10000, type=type2}
so based on the query I need to fetch the count of above query params
The result should be something like this,
{ category: 'type1', count: 500 } { category: 'type2', count: 200 } { category: 'name', count: 100 }
Now I am getting count by hitting three different queries and constructing the result (or) server side iteration I can get the result.
Can anyone suggest or provide me good way to get above result
Your quesstion is not very clearly presented, but what it seems you wanted to do here was count the occurances of the data in the fields, optionally filtering those fields by the values that matches the criteria.
Here the $cond operator allows you to tranform a logical condition into a value:
db.collection.aggregate([
{ "$group": {
"_id": null,
"name": { "$sum": 1 },
"salary": {
"$sum": {
"$cond": [
{ "$gte": [ "$salary", 1000 ] },
1,
0
]
}
},
"type": {
"$sum": {
"$cond": [
{ "$eq": [ "$type", "type2" ] },
1,
0
]
}
}
}}
])
All values are in the same document, and it does not really make any sense to split them up here as this is additional work in the pipeline.
{ "_id" : null, "name" : 3, "salary" : 3, "type" : 2 }
Otherwise in the long form, which is not very performant due to needing to make a copy of each document for every key looks like this:
db.collection.aggregate([
{ "$project": {
"name": 1,
"salary": 1,
"type": 1,
"category": { "$literal": ["name","salary","type"] }
}},
{ "$unwind": "$category" },
{ "$group": {
"_id": "$category",
"count": {
"$sum": {
"$cond": [
{ "$and": [
{ "$eq": [ "$category", "name"] },
{ "$ifNull": [ "$name", false ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "salary" ] },
{ "$gte": [ "$salary", 1000 ] }
]},
1,
{ "$cond": [
{ "$and": [
{ "$eq": [ "$category", "type" ] },
{ "$eq": [ "$type", "type2" ] }
]},
1,
0
]}
]}
]
}
}
}}
])
And it's output:
{ "_id" : "type", "count" : 2 }
{ "_id" : "salary", "count" : 3 }
{ "_id" : "name", "count" : 3 }
If your documents do not have uniform key names or otherwise cannot specify each key in your pipeline condition, then apply with mapReduce instead:
db.collection.mapReduce(
function() {
var doc = this;
delete doc._id;
Object.keys(this).forEach(function(key) {
var value = (( key == "salary") && ( doc[key] < 1000 ))
? 0
: (( key == "type" ) && ( doc[key] != "type2" ))
? 0
: 1;
emit(key,value);
});
},
function(key,values) {
return Array.sum(values);
},
{
"out": { "inline": 1 }
}
);
And it's output:
"results" : [
{
"_id" : "name",
"value" : 3
},
{
"_id" : "salary",
"value" : 3
},
{
"_id" : "type",
"value" : 2
}
]
Which is basically the same thing with a conditional count, except that you only specify the "reverse" of the conditions you want and only for the fields you want to filter conditions on. And of course this output format is simple to emit as separate documents.
The same approach applies where to test the condition is met on the fields you want conditions for and return 1 where the condition is met or 0 where it is not for the summing the count.
You can use aggregation as following query:
db.collection.aggregate({
$match: {
salary: 10000,
//add any other condition here
}
}, {
$group: {
_id: "$type",
"count": {
$sum: 1
}
}
}, {
$project: {
"category": "$_id",
"count": 1,
_id: 0
}
}

Check if an element appears in an array during the projection stage of a mongo aggregation pipeline

I've got a collection of mongo documents like -
{
"_id" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"events" : [
"clickOut"
"showHoverAd",
"closeHoverAd"
]
}
{
"_id" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"events" : [
"zoom",
"pan"
]
}
{
"_id" : "9179b26e-e45c-48ab-93f6-e73b8ebe559b",
"events" : [
"clickOut"
]
}
{
"_id" : "db0b82ad-7a33-4ce8-9117-f6ecf041d0d9",
"events" : [
"adjustStars",
"adjustPrice",
"closeHoverAd",
"showHoverAd"
]
}
I'm trying to use a projection stage in an aggregation pipeline to identify if a particular string appears in the events field.
db.events.aggreate([
{$project: {
session: '$_id',
clickedOut: {
$cond: [{$elemMatch: {'$events':'clickOut'}},true,false]
}
}}
])
I'm getting an error - exception: invalid operator '$elemMatch'.
I want my output documents to look like -
{
"session" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"clickedOut" : false
}
{
"session" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"clickedOut" : true
}
But I can't seem to find a way of doing it. I've tried using $in and $all or simply
$cond: {'$events':'clickOut'}
but I'm not getting anywhere.
Use the following aggregation:
db.events.aggregate([
{
"$unwind": "$events"
},
{
"$project": {
"_id": 0,
"session": "$_id",
"clickedOut": {
"$cond": [ { "$eq": [ "$events", "clickOut" ] }, 1, 0 ]
}
}
},
{
"$group": {
"_id": "$session",
"count": {
"$sum": "$clickedOut"
}
}
},
{
"$project": {
"_id": 0,
"session": "$_id",
"clickedOut": {
"$cond": [ { "$eq": [ "$count", 1 ] }, true, false ]
}
}
},
]);
Output:
/* 1 */
{
"result" : [
{
"session" : "db0b82ad-7a33-4ce8-9117-f6ecf041d0d9",
"clickedOut" : false
},
{
"session" : "9179b26e-e45c-48ab-93f6-e73b8ebe559b",
"clickedOut" : true
},
{
"session" : "d0dcb2be-f8bc-45cd-8337-d89a16063b08",
"clickedOut" : false
},
{
"session" : "c959e4d6-961d-4043-ade6-2f93aa055e11",
"clickedOut" : true
}
],
"ok" : 1
}

MongoDB: aggregating fields from arrays of subdocuments

I have a mongodb collection called Events, containing baseball games. Here is an example of one record in the table:
{
"name" : "Game# 814",
"dateStart" : ISODate("2012-09-28T14:47:53.695Z"),
"_id" : ObjectId("53a1b24de3f25f4443d9747e"),
"stats" : [
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"_id" : ObjectId("53a1b24de3f25f4443d97480"),
"score" : 17
},
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"_id" : ObjectId("53a1b24de3f25f4443d9747f"),
"score" : 12
}
]
"__v" : 0
}
I need help writing the query that returns standings for all teams. The result set should look like:
{
"team" : ObjectId("53a11a43a8de6dd8375c938d"),
"teamName" : "Yankees",
"wins" : <<number of Yankees wins>>
"losses" : <<number of Yankees losses>>
"draws" : <<number of Yankees draws>>
}
{
"team" : ObjectId("53a11a43a8de6dd8375c940b"),
"teamName" : "Reds",
"wins" : <<number of Reds wins>>
"losses" : <<number of Reds losses>>
"draws" : <<number of Reds draws>>
}
...
Here's the query I've started with...
db.events.aggregate(
{"$unwind": "$stats" },
{ $group : {
_id : "$stats.team",
gamesPlayed : { $sum : 1},
totalScore : { $sum : "$stats.score" }
}}
);
... which returns results:
{
"result" : [
{
"_id" : ObjectId("53a11a43a8de6dd8375c93cb"),
"gamesPlayed" : 125, // not a requirement... just trying to get $sum working
"totalScore" : 1213 // ...same here
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c955f"),
"gamesPlayed" : 128,
"totalScore" : 1276
},
{
"_id" : ObjectId("53a11a44a8de6dd8375c9661"),
"gamesPlayed" : 152,
"totalScore" : 1509
},
....
It would seem advisable for you to keep your "wins", "losses", "draws" within your documents as you create or update them. But it is possible to do with aggregate if a little long winded
db.events.aggregate([
// Unwind the "stats" array
{ "$unwind": "$stats" },
// Combine the document with new fields
{ "$group": {
"_id": "$_id",
"firstTeam": { "$first": "$stats.team" },
"firstTeamName": { "$first": "$stats.teamName" },
"firstScore": { "$first": "$stats.score" },
"lastTeam": { "$last": "$stats.team" },
"lastTeamName": { "$last": "$stats.teamName" },
"lastScore": { "$last": "$stats.score" },
"minScore": { "$min": "$stats.score" },
"maxScore": { "$max": "$stats.score" }
}},
// Calculate by comparing scores
{ "$project": {
"firstTeam": 1,
"firstTeamName": 1,
"firstScore": 1,
"lastTeam": 1,
"lastTeamName": 1,
"lastScore": 1,
"firstWins": {
"$cond": [
{ "$gt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstLosses": {
"$cond": [
{ "$lt": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"firstDraws": {
"$cond": [
{ "$eq": [ "$firstScore", "$lastScore" ] },
1,
0
]
},
"lastWins": {
"$cond": [
{ "$gt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastLosses": {
"$cond": [
{ "$lt": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"lastDraws": {
"$cond": [
{ "$eq": [ "$lastScore", "$firstScore" ] },
1,
0
]
},
"type": { "$literal": [ true, false ] }
}},
// Unwind the "type"
{ "$unwind": "$type" },
// Group teams conditionally on "type"
{ "$group": {
"_id": {
"team": {
"$cond": [
"$type",
"$firstTeam",
"$lastTeam"
]
},
"teamName": {
"$cond": [
"$type",
"$firstTeamName",
"$lastTeamName"
]
}
},
"owins": {
"$sum": {
"$cond": [
"$type",
"$firstWins",
"$lastWins"
]
}
},
"olosses": {
"$sum": {
"$cond": [
"$type",
"$firstLosses",
"$lastLosses"
]
}
},
"odraws": {
"$sum": {
"$cond": [
"$type",
"$firstDraws",
"$lastDraws"
]
}
}
}},
// Project your final form
{ "$project": {
"_id": 0,
"team": "$_id.team",
"teamName": "$_id.teamName",
"wins": "$owins",
"losses": "$olosses",
"draws": "$odraws"
}}
])
The first part is to "re-shape" the document by unwinding the array and then grouping with "first" and "last" for defining fields for your two teams.
Then you want to $project through those documents and calculate your "wins", "losses" and "draws" for each team in the pairing. The additional thing is adding an array field for the two values true/false is convenient here. If you are on a pre 2.6 version of mongodb the $literal can be replaced with $const which is not documented but does the same thing.
Once you $unwind that "type" array, the documents can be split apart in the $group stage by evaluating whether to choose the "first" or "last" team field values via the use of $cond. This is a ternary operator that evaluates a true/false condition and returns the appropriate value according to that condition.
With a final $project your documents are formed exactly how you want.