mongodb: match, group by multiple fields, project and count - mongodb

So I'm learning mongodb and I got a collection of writers to train.
Here I'm trying to count works by sorting them by country and gender of the author. This is what I accoplished so far:
db.writers.aggregate([
{ "$match": { "gender": {"$ne": male}}},
{ "$group": {
"_id": {
"country_id": "$country_id",
"type": "$type"
},
}},
{ "$group": {
"_id": "$_id.country_id",
"literary_work": {
"$push": {
"type": "$_id.type",
"count": { "$sum": "$type" }
}
},
"total": { "$sum": "$type" }
}},
{ "$sort": { "country_id": 1 } },
{ "$project": {
"literary_work": { "$slice": [ "$literary_work", 3 ] },
"total": { "$sum": "$type" }
}}
])
Sadly, the output that I get is not the one I'm expecting:
"_id" : GREAT BRITAIN,
"literary_work" : [
{
"type" : "POEM",
"count" : 0
},
{
"type" : "NOVEL",
"count" : 0
},
{
"type" : "SHORT STORY",
"count" : 0
}
],
"total" : 0
Could anyone tell me where do I insert the count stage or what is my mistake?)
upd:
Data sample:
{
"_id" : ObjectId("5f115c5d5f62f9f482cd7a49"),
"author" : George Sand,
"gender" : female,
"country_id" : FRANCE,
"title": "Consuelo",
"type" : "NOVEL",
}
Expected result (NB! this is a result for both genders):
{
"_id" : FRANCE,
"count" : 59.0,
"literary_work" : [
{
"type" : "POEM",
"count" : 14.0
},
{
"type" : "NOVEL",
"count" : 34.0
},
{
"type" : "SHORT STORY",
"count" : 11.0
}
]
}

Your implementation is correct way but there are missing things:
missed count in first $group
on the base of first group count it can count whole count of literary_work
and $project is not needed from your query
Corrected things in query,
db.writers.aggregate([
{
$match: {
gender: { $ne: "male" }
}
},
{
$group: {
_id: {
country_id: "$country_id",
type: "$type"
},
// missed this
count: { $sum: 1 }
}
},
{
$group: {
_id: "$_id.country_id",
// this count will be on the base of first group count
count: { $sum: "$count" },
literary_work: {
$push: {
type: "$_id.type",
// add count in inner count
count: "$count"
}
}
}
},
// corrected from country_id to _id
{
$sort: { "_id": 1 }
}
])
Working Playground: https://mongoplayground.net/p/JWP7qdDY6cc

Related

Counting the two value in a attribute using aggregate in mongodb

I have some documents in a collection which looks like this
{
"_id" : "5a2e50b32d43ba00010041e5",
account_id:"23232323"
status:"accepted",
keyname:"java"
},
{
"_id" : "5a2e54332d43ba00010041e5",
account_id:"2323233"
status:"pending",
keyname:"java"
},
{
"_id" : "5a2e54332d43ba00010041e5",
account_id:"23232sdsd3"
status:"pending",
keyname:"Nodejs"
}
I need to get the counts of the pending and accepted status for each keyname for a particular account_id
eg: should give a result like this.
{
keyname:"java",
pending:10,
accepted:10
}
This is the code that I have tried out
db.getCollection("programs").aggregate([
{ "$match": { "account_id": "1" } },
{ "$group": { "_id": "$keyname", "count": { "$sum": 1 } } },
{ "$match": { "_id": { "$ne": null } } }
])
which gives a result like this
{
"_id" : "java",
"count" : 3.0
},
{
"_id" : "nodejs",
"count" : 3.0
},
{
"_id" : "C#",
"count" : 3.0
}
You can use below aggregation
db.collection.aggregate([
{ "$match": { "account_id": "1" } },
{ "$group": {
"_id": "$keyname",
"accepted": {
"$sum": {
"$cond": [
{ "$eq": ["$status", "accepted"] },
0,
1
]
}
},
"pending": {
"$sum": {
"$cond": [
{ "$eq": ["$status", "pending"] },
0,
1
]
}
}
}}
])

Combine results based on condition during group by

Mongo query generated out of java code:
{
"pipeline": [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},
{
"$group": {
"_id": "$result",
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}]
}
Field 'result' can have values like Approved, Rejected, null and "" (empty string). What I am trying to achieve is combining the count of both null and empty together.
So that the empty string Id will have the count of both null and "", which is equal to 4
I'm sure theres a more "proper" way but this is what i could quickly come up with:
[
{
"$group" : {
"_id" : "$result",
"id" : {
"$first" : "$result"
},
"labelKey" : {
"$first" : {
"$ifNull" : [
"$result",
"$result"
]
}
},
"value" : {
"$sum" : 1.0
}
}
},
{
"$group" : {
"_id" : {
"$cond" : [{
$or: [
{"$eq": ["$_id", "Approved"]},
{"$eq": ["$_id", "Rejected"]},
]}},
"$_id",
""
]
},
"temp" : {
"$push" : {
"_id" : "$_id",
"labelKey" : "$labelKey"
}
},
"count" : {
"$sum" : "$value"
}
}
},
{
"$unwind" : "$temp"
},
{
"$project" : {
"_id" : "$temp._id",
"labelKey": "$temp.labelKey",
"count" : "$count"
}
}
],
);
Due to the fact the second group is only on 4 documents tops i don't feel too bad about doing this.
I have used $facet.
The MongoDB stage $facet lets you run several independent pipelines within the stage of a pipeline, all using the same data. This means that you can run several aggregations with the same preliminary stages, and successive stages.
var queries = [{
"$match": {
"Id": "09cd9a5a-85c5-4948-808b-20a52d92381a"
}
},{
$facet: {//
"empty": [
{
$match : {
result : { $in : ['',null]}
}
},{
"$group" : {
"_id" : null,
value : { $sum : 1}
}
}
],
"non_empty": [
{
$match : {
result : { $nin : ['',null]}
}
},{
"$group" : {
"_id" : '$result',
value : { $sum : 1}
}
}
]
}
},
{
$project: {
results: {
$concatArrays: [ "$empty", "$non_empty" ]
}
}
}];
Output :
{
"results": [{
"_id": null,
"value": 52 // count of both '' and null.
}, {
"_id": "Approved",
"value": 83
}, {
"_id": "Rejected",
"value": 3661
}]
}
Changing the group by like below solved the problem
{
"$group": {
"_id": {
"$ifNull": ["$result", ""]
},
"id": {
"$first": "$result"
},
"labelKey": {
"$first": {
"$ifNull": ["$result",
"$result"]
}
},
"value": {
"$sum": 1
}
}
}

Counting results in aggregate selection

My MongoDB database have a structure
{
"_id" : ObjectId("5c1ccc20fc0f60769227d455"),
"type" : 0,
"id" : "hwJyzAHyfjXUlrGhblT7txWd",
"userowner" : 1.0,
"campid" : "9548",
"date" : 1545391136,
"useragent" : "mozilla/5.0 (windows nt 10.0; win64; x64; rv:65.0) gecko/20100101 firefox/65.0",
"domain" : "",
"referer" : "",
"country" : "en",
"language" : "en-US",
"languages" : [
"en-US",
"en"
],
"screenres" : [
"1920*1080"
],
"avscreenres" : [
"1080*1858"
],
"webgl" : "angle (nvidia geforce gtx 1060 6gb direct3d11 vs_5_0 ps_5_0)",
"hash" : 123,
"timezone" : -180,
"result" : true,
"resultreason" : "learning",
"remoteip" : "0.0.0.0"
}
Every a document have a vield "result" with a bool value.
I make aggregation selection:
db.getCollection('clicks').aggregate([
{ $match: {userowner: 1, date:{$gte: 0, $lte: 9545392055}} },
{ $group : {_id : "$campid",
number: {$sum: 1}}}
])
and get a Result:
/* 1 */
{
"_id" : "4587",
"number" : 2.0
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0
}
How can count the amount of value "true" and "false" in a field "result" and get a result like this:
/* 1 */
{
"_id" : "4587",
"number" : 2.0,
"passed":100,
"blocked":120
}
/* 2 */
{
"_id" : "9548",
"number" : 1346.0,
"passed":100,
"blocked":120
}
I hope this works as per your requirement.
db.getCollection('clicks').aggregate(
[
{
$match: {
userowner: 1, date: {
$gte: 0, $lte: 9545392055
}
}
},
{
$group: {
_id: "$campid", passed: {
$sum: {
$cond:
[
{ $eq: ["$result", true] },
1, 0
]
}
},
blocked: {
$sum: {
$cond:
[
{
$eq: ["$result", false]
}
, 1, 0]
}
},
number: { $sum: 1 }
}
},
{
$project: {
_id: 0,
campid: "$_id",
number: 1,
passed: 1,
blocked: 1
}
}
])
Output:-
{
"passed" : 3,
"blocked" : 2,
"number" : 5,
"campid" : "4587"
}
{
"passed" : 2,
"blocked" : 1,
"number" : 3,
"campid" : "9548"
}
Refer $group, $cond, and $eq for more info.
With MongoDb 3.6 and newer, you can leverage the use of $arrayToObject operator within a $replaceRoot pipeline to get the desired result.
You would need to group the documents intially by the campid and the result field, aggregate the sum and pass the results to yet another group pipeline stage. This group stage will prepare the documents in a way that $arrayToObject operator will give you the desired object by creating a key-value array using $push.
The result from this is then fed to the $replaceRoot pipeline to bring the passed and blocked fields to the root of the document.
The following aggregate pipeline describes the above:
db.getCollection('clicks').aggregate([
{ "$match": { "userowner": 1, "date": { "$gte": 0, "$lte": 9545392055 } } },
{ "$group": {
"_id": {
"campid": "$campid",
"result": { "$cond": [ "$result", "passed", "blocked" ] }
},
"count": { "$sum": 1 }
} },
{ "$group": {
"_id": "$_id.campid",
"number": { "$sum": "$count" },
"counts": {
"$push": {
"k": "$_id.result",
"v": "$count"
}
}
} },
{ "$replaceRoot": {
"newRoot": {
"$mergeObjects": [
{ "$arrayToObject": "$counts" },
"$$ROOT"
]
}
} },
{ "$project": { "counts": 0 } }
])

MongoDB Group By count occurences of values and output as new field

I have a 3 Collections Assignments, Status, Assignee.
Assignments Fields : [_id, status, Assignee]
Assignee and Status Fields : [_id, name].
There can be many assignments associated with various Status and Assignee collections(linked via _id field), There is no nesting or complex data.
I need a query for all assignments ids where Assignees are the row, Status are the Columns, there combined cell is the count with Total counts at the end.
To help you visualize, I am attaching below image. I am new to complex Mongo DB Aggregate framework, kindly guide me to achieve query.
Note: Data in Status and Assignee collection will be dynamic. Nothing is predetermined in the Query. So, the Rows and Columns are going to grow dynamically in future, If the query is given pagination, then it would be of great help. I cannot write a query with hard coded status names like 'pending', 'completed' etc. As data shall grow and existing data may change like 'pending task', 'completed work'.
Below is my query
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
]);
Below is the output format:
{
"_id" : "John",
"statuses" : {
"statusId" : "Pending",
"count" : 3.0
},
"count" : 3.0
}
{
"_id" : "Katrina",
"statuses" : [{
"statusId" : "Pending",
"count" : 1.0
},
{
"statusId" : "Completed",
"count" : 1.0
},
{
"statusId" : "Assigned",
"count" : 1.0
}],
"count" : 3.0
}
{
"_id" : "Collins",
"statuses" : {
"statusId" : "Pending",
"count" : 4.0
},
"count" : 4.0
}
Expected Output is:
{
"_id" : "Katrina",
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
"totalCount" : 3.0
}
Any Idea on how to many various statusId for different assignee as keys and not values in single document.
You need another $group stage after $unwind to count number of status based on statusId string value:
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
The final aggregate command:
db.getCollection('Assignments').aggregate([
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
}
},
{
"$group": {
"_id": "$_id.assignee",
"statuses": {
"$push": {
"statusId": "$_id.statusId",
"count": "$statusCount"
},
},
"count": { "$sum": "$statusCount" }
}
},
{ "$unwind": "$statuses" },
{
"$group": {
"_id": "$_id",
"Pending" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Pending"
]},
"$statuses.count",
0
]
}
},
"Completed" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Completed"
]},
"$statuses.count",
0
]
}
},
"Assigned" : {
"$sum": {
"$cond": [
{ "$eq": [
"$statuses.statusId",
"Assigned"
]},
"$statuses.count",
0
]
}
},
"totalCount": { "$sum": 1 }
}
}
]);
Why not just keep statuses as an object so each status is a key/val pair. If that works you do the following
db.getCollection('Assignments').aggregate([
[
{
"$group": {
"_id": {
"assignee": "$assignee",
"statusId": "$statusId"
},
"statusCount": { "$sum": 1 }
},
},
{
"$group" : {
"_id" : "$_id.assignee",
"statuses" : {
"$push" : {
"k" : "$_id.statusId", // <- "k" as key value important for $arrayToObject Function
"v" : "$statusCount" // <- "v" as key value important for $arrayToObject Function
}
},
"count" : {
"$sum" : "$statusCount"
}
}
},
{
"$project" : {
"_id" : 1.0,
"statuses" : {
"$arrayToObject" : "$statuses"
},
"totalCount" : "$count"
}
}
],
{
"allowDiskUse" : false
}
);
This gives you:
{
"_id" : "Katrina",
"statuses": {
"Pending" : 1.0,
"Completed" : 1.0,
"Assigned" : 1.0,
},
"totalCount" : 3.0
}
A compromise having it one layer deeper but still the shape of statuses you wanted and dynamic with each new statusId added.

Sum array in aggregation query

I'm pretty new to MongoDB, and having some problems getting my query as I want it. The documents contain "errors" that have happened a specific time. The result I want from the query is an error count for each month per user. This I have already figured out, but additionally I want the total errorcount per user.
This is what I've got so far:
db.Logger.aggregate([
{ "$group": {
"_id": {
"name": "$name",
"month": { "$month": "$errorTime" }
},
"totalErrors": { "$sum": 1 }
}},
{ $group :
{ _id: { name : "$_id.name"},
errors: { $addToSet: { totalErrors: { errorsThisMonth: "$totalErrors", currentMonth : "$_id.month" } } },
}
}
])
The result is:
{
"_id" : {
"name" : "abhos"
},
"errors" : [
{
"totalErrors" : {
"errorsThisMonth" : 6,
"currentMonth" : 2
}
},
{
"totalErrors" : {
"errorsThisMonth" : 6,
"currentMonth" : 1
}
}
]
},
Will it be possible to get what I want by adding to that query?
All you need is an additional $sum in your second $group:
db.Logger.aggregate([
{ "$group": {
"_id": {
"name": "$name",
"month": { "$month": "$errorTime" }
},
"totalErrors": { "$sum": 1 }
}},
{ "$group": {
"_id": "$_id.name",
"errors": {
"$addToSet": {
"errorsThisMonth": "$totalErrors",
"currentMonth" : "$_id.month"
}
},
"totalErrors": { "$sum": "$totalErrors" }
}}
])
Also you have a few extra document levels you do not need in there, such as extra fields under the _id and the "errors" "set" produced in the grouping. This output is just a little different without those additional levels:
{
"_id": "abhos"
"errors" : [
{
"errorsThisMonth" : 6,
"currentMonth" : 2
},
{
"errorsThisMonth" : 6,
"currentMonth" : 1
}
],
"totalErrors": 12
},