How do I create nested aggregations with count on MongoDB? - mongodb

I am learning MongoDB in order to see if it matches our needs.
Currently we use heavily aggregations, so I am testing the flexibility of the Aggregation Framework.
I started with this hierarchy
db.companytest3.insert({"name":"A", age:7})
db.companytest3.insert({"name":"B", age:17, owner:"A"})
db.companytest3.insert({"name":"C", age:12, owner:"A"})
db.companytest3.insert({"name":"D", age:7, owner:"B"})
db.companytest3.insert({"name":"E", age:13, owner:"B"})
db.companytest3.insert({"name":"F", age:23, owner:"C"})
So I have:
db.companytest3.find()
{ "_id" : ObjectId("5457c2c0fa82c305e0b80006"), "name" : "A", "age" : 7 }
{ "_id" : ObjectId("5457c2cafa82c305e0b80007"), "name" : "A", "age" : 7 }
{ "_id" : ObjectId("5457c2d0fa82c305e0b80008"), "name" : "B", "age" : 17, "owner" : "A" }
{ "_id" : ObjectId("5457c2d6fa82c305e0b80009"), "name" : "C", "age" : 12, "owner" : "A" }
{ "_id" : ObjectId("5457c2ddfa82c305e0b8000a"), "name" : "D", "age" : 7, "owner" : "B" }
{ "_id" : ObjectId("5457c2e4fa82c305e0b8000b"), "name" : "E", "age" : 13, "owner" : "B" }
{ "_id" : ObjectId("5457c2eafa82c305e0b8000c"), "name" : "F", "age" : 23, "owner" : "C" }
My goal is to aggregate the children using their ages, so I have something like this:
{
"_id" : null,
"children" : [
{
"range:" : "lower than 10",
total: 1,
names: ["A"]
}
{
"range:" : "higher than 10",
total: 0,
names: []
}
],
"total" : 1
}
{
"_id" : "A",
"children" : [
{
"range:" : "lower than 10",
total: 0,
names: []
}
{
"range:" : "higher than 10",
total: 2,
names: ["C","B"]
}
],
"total" : 1
}
{
"_id" : "B",
"children" : [
{
"range:" : "lower than 10",
total: 1,
names: ["D"]
}
{
"range:" : "higher than 10",
total: 13,
names: ["E"]
}
],
"total" : 1
}
{
"_id" : "C",
"children" : [
{
"range:" : "lower than 10",
total: 0,
names: []
}
{
"range:" : "higher than 10",
total: 1,
names: ["F"]
}
],
"total" : 1
}
I feel I am getting near, I've got this query:
db.companytest3.aggregate(
{ $project: {
"_id": 0,
"range": {
$concat: [{
$cond: [ { $lte: ["$age", 10] }, "até 10", "" ]
}, {
$cond: [ { $gte: ["$age", 11] }, "mais de 10", "" ]
}]
},
"owner": "$owner",
"name" : "$name"
}
},
{
$group: {
_id: { owner: "$owner", range: "$range" },
children: { $addToSet: { name: "$name", range: "$range"} } ,
total: { $sum: 1}
}
},
{
$group: {
_id: { owner:"$_id.owner" },
children: { $addToSet: "$children" }
}
}
)
which gives me the following output:
{ "_id" : { "owner" : null }, "children" : [ [ { "name" : "A", "range" : "até 10" } ] ] }
{ "_id" : { "owner" : "A" }, "children" : [ [ { "name" : "C", "range" : "mais de 10" }, { "name" : "B", "range" : "mais de 10" } ] ] }
{ "_id" : { "owner" : "B" }, "children" : [ [ { "name" : "D", "range" : "até 10" } ], [ { "name" : "E", "range" : "mais de 10" } ] ] }
{ "_id" : { "owner" : "C" }, "children" : [ [ { "name" : "F", "range" : "mais de 10" } ] ] }
Now I am having issues to group the items by owner and keep sum the total, I am stuck and I do not know how to proceed. I've been trying many diferent alternatives using groups variations but I do not feel they are worth posting here.
How can I change my current query so I group the children by range and add the count?
thanks! :D

It should be possible in earlier versions, but even basically looking at how you want to manipulate the result, the simplest way I can see is with the help of some operators introduced in MongoDB 2.6.
db.companytest3.aggregate([
{ "$group": {
"_id": "$owner",
"lowerThanTenNames": {
"$addToSet": {
"$cond": [
{ "$lte": [ "$age", 10 ] },
"$name",
false
]
}
},
"lowerThanTenTotal": {
"$sum": {
"$cond": [
{ "$lte": [ "$age", 10 ] },
1,
0
]
}
},
"moreThanTenNames": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$age", 11 ] },
"$name",
false
]
}
},
"moreThanTenTotal": {
"$sum": {
"$cond": [
{ "$gte": [ "$age", 11 ] },
1,
0
]
}
}
}},
{ "$project": {
"children": {
"$map": {
"input": { "$literal": ["L", "M"] },
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "L" ] },
{
"range": { "$literal": "lower than 10" },
"total": "$lowerThanTenTotal",
"names": {
"$setDifference": [
"$lowerThanTenNames",
[false]
]
}
},
{
"range": { "$literal": "higher than 10" },
"total": "$moreThanTenTotal",
"names": {
"$setDifference": [
"$moreThanTenNames",
[false]
]
}
}
]
}
}
},
"total": { "$add": [ "$lowerThanTenTotal", "$moreThanTenTotal" ]},
}},
{ "$sort": { "_id": 1 } }
])
Basically you want to separate these out into two sets of results for each grouping, being one for each age range. Due to the use of conditional operators, the "names" sets then need to be filtered for any false values where the conditions did not match.
The other thing that needs to be done is to coerce these results from separate fields into an array. The $map operator makes this simple by just providing a two element template with effectively "A/B" choices to do the re-mapping.
Since we had discrete fields here before they were re-mapped onto an array, you can just supply each "total" field as an argument to $add in order to get the combined total.
Produces exactly this:
{
"_id" : null,
"children" : [
{
"range" : "lower than 10",
"total" : 1,
"names" : ["A"]
},
{
"range" : "higher than 10",
"total" : 0,
"names" : [ ]
}
],
"total" : 1
}
{
"_id" : "A",
"children" : [
{
"range" : "lower than 10",
"total" : 0,
"names" : [ ]
},
{
"range" : "higher than 10",
"total" : 2,
"names" : ["C","B"]
}
],
"total" : 2
}
{
"_id" : "B",
"children" : [
{
"range" : "lower than 10",
"total" : 1,
"names" : ["D"]
},
{
"range" : "higher than 10",
"total" : 1,
"names" : ["E"]
}
],
"total" : 2
}
{
"_id" : "C",
"children" : [
{
"range" : "lower than 10",
"total" : 0,
"names" : [ ]
},
{
"range" : "higher than 10",
"total" : 1,
"names" : ["F"]
}
],
"total" : 1
}

Related

mongodb getting an array inside a nested array

I'm still new to mongodb, I have this basic enrollment system data:
{ "_id" : ObjectId("62277d92a561e550d5ec73ca"), "sid" : 1, "sname" : "sad", "semail" : "dsa", "scourse" : "it", "enrolled" : [ { "subjid" : 3 } ] }
{ "_id" : ObjectId("6227875bdbcc41a56a863697"), "sid" : 2, "sname" : "daws", "semail" : "dws", "scourse" : "cs", "enrolled" : [ { "subjid" : 1, "grades" : [ { "prelim" : "A", "midterm" : "B", "prefinal" : "B", "final" : "A" } ] }, { "subjid" : 2, "grades" : [ { "prelim" : "D", "midterm" : "A", "prefinal" : "B", "final" : "F" } ] } ] }
I want display the grades of sid 2 who has enrolled subjid 1.
I tried using this aggregation line:
db.students2.aggregate( [{"$match":{"sid":{"$eq":2},"enrolled.subjid":{"$eq":2}}}, {$group: {_id:'$enrolled.subjid[1]', prelim:{$first:'$enrolled.grades.prelim'},midterm:{$first:'$enrolled.grades.midterm'},prefinal:{$first:'$enrolled.grades.prefinal'},"final":{$first:'$enrolled.grades.final'} } } ])
but this was the result:
{ "_id" : [ ], "prelim" : [ [ "A" ], [ "D" ] ], "midterm" : [ [ "B" ], [ "A" ] ], "prefinal" : [ [ "B" ], [ "B" ] ], "final" : [ [ "A" ], [ "F" ] ] }
I only wanted to get the grades of subjid 1 but it also got the grades of subjid 2
Maybe you need something like this:
db.collection.aggregate([
{
"$match": {
"sid": 2,
"enrolled.subjid": 1
}
},
{
"$addFields": {
"enrolled": {
"$filter": {
"input": "$enrolled",
"as": "en",
"cond": {
$eq: [
"$$en.subjid",
1
]
}
}
}
}
},
{
$unwind: "$enrolled"
},
{
$unwind: "$enrolled.grades"
},
{$limit:1}
,
{
$project: {
_id: "$enrolled.subjid",
prelim: "$enrolled.grades.prelim",
midterm: "$enrolled.grades.midterm",
prefinal: "$enrolled.grades.prefinal",
"final": "$enrolled.grades.final"
}
}
])
Explained:
Match the necessary documents (sid=2,subjid=1)
Filter only the enrolled elements based on subjid ( subjid=1 )
unwind the two array
limit to the first result document available only in case there is more.
project the necesary fields
playground

Inner array total count in mongodb

I have a journal doc. Which contains details of journal_volumes, journal issues and journal articles. I have to list the journals along with the count of volumes, issues and articles in each journal.
Here is my doc:
{
"_id" : ObjectId("5c470fc3135edb4413b0ea24"),
"jnl_code" : "KEG",
"jnl_volumes" : [
{
"name" : "1",
"created_date" : "2019-03-01",
"status" : "0",
"issue_flag" : "0",
"jnl_issues" : [
{
"issue_name" : "1",
"created_date" : "2019-03-04",
"jnl_articles" : [
"test",
"test2"
]
},
{
"issue_name" : "2",
"created_date" : "2019-03-04",
"jnl_articles" : [
"a"
]
},
{
"issue_name" : "3",
"created_date" : "2019-03-04",
"jnl_articles" : [
"b"
]
},
{
"issue_name" : "3",
"created_date" : "2019-03-05",
"jnl_articles" : [
"Q"
]
}
]
},
{
"name" : "2",
"created_date" : "2019-03-01",
"status" : "0",
"issue_flag" : "0",
"jnl_issues" : [
{
"issue_name" : "1",
"created_date" : "2019-03-05",
"jnl_articles" : [
"W"
]
},
{
"issue_name" : "1",
"created_date" : "2019-03-05",
"jnl_articles" : [
"S"
]
},
{
"issue_name" : "1",
"created_date" : "2019-03-05",
"jnl_articles" : [
"R"
]
},
{
"issue_name" : "1",
"created_date" : "2019-03-05",
"jnl_articles" : [
"R"
]
}
]
},
{
"name" : "3",
"created_date" : "2019-03-05",
"status" : "0",
"issue_flag" : "0"
}
]
}
My requirement is to get the count of jnl_volumes, total jnl_issues count and total jnl_articles count in single query..
Thanks to Neil Lunn to redirect me to the similar question (Calculate the count of nested objects with C#
). I referred the answer and wrote a query:
db.getCollection('rvh_journals').aggregate([
{
$project: {
"volumes" : { "$size" : { "$ifNull" : [ "$jnl_volumes", [] ] } },
"issues" : {
"$sum" : {
"$map" : {
"input" : "$jnl_volumes",
"in": { "$size" : { "$ifNull" : [ "$$this.jnl_issues", [] ] } }
}
}
},
"articles" : {
"$sum" : {
"$map" : {
"input" : "$jnl_volumes.jnl_issues.jnl_articles",
"in" : { "$size" : { "$ifNull" : [ "$$this", [] ] } }
}
}
}
}
}
])
This returns with an incorrect article count. Actual article count is 9 but the query returns 8
{
"_id" : ObjectId("5c470fc3135edb4413b0ea24"),
"volumes" : 3,
"issues" : 8,
"articles" : 8
}
Yes I got curious after your edit to the previous question, and noticed your statement was incorrect.
This one is correct:
db.getCollection('rvh_journals').aggregate([
{ "$project": {
"volumes": { "$size": "$jnl_volumes" },
"issues": {
"$sum": {
"$map": {
"input": "$jnl_volumes",
"in": { "$size": { "$ifNull": ["$$this.jnl_issues", [] ] } }
}
}
},
"articles": {
"$sum": {
"$map": {
"input": "$jnl_volumes",
"in": {
"$sum": {
"$map": {
"input": { "$ifNull": [ "$$this.jnl_issues", [] ] },
"in": { "$size": { "$ifNull": [ "$$this.jnl_articles", [] ] } }
}
}
}
}
}
}
}}
])
Returns:
{
"_id" : ObjectId("5c470fc3135edb4413b0ea24"),
"volumes" : 3,
"issues" : 8,
"articles" : 9
}
Note the traversal of the arrays.
You might go and read some of the actual words I used on that original linked answer, because I would have explained that nesting arrays like this is not a good idea. More details on why it's not a good idea and practical approaches to take otherwise are on Updating a Nested Array with MongoDB
enter image description here
.itcount() - is showing distinct count.
Need to show total count of count variable which is declare in $group
Currently it is showing based on company.

multiple group in mongodb

My collection look likes this.
{
"_id" : ObjectId("572c4ed33c1b5f51215219a8"),
"name" : "This is an angular course, and integeration with php",
"description" : "After we connected we can query or update the database just how we would using the mongo API with the exception that we use a callback. The format for callbacks is always callback(error, value) where error is null if no exception has occured. The update methods save, remove, update and findAndModify also pass the lastErrorObject as the last argument to the callback function.",
"difficulty_level" : "Beginner",
"type" : "Fast Track",
"tagged_skills" : [
{
"_id" : "5714e894e09a0f7d804b2254",
"name" : "PHP"
},
{
"_id" : "5717355806313b1f1715fa50",
"name" : "c++"
},
{
"_id" : "5715025bc2c5dbb4675180da",
"name" : "java"
},
{
"_id" : "5714f188ec325f5359979e33",
"name" : "symphony"
}
]}
I want to group by the collection on the basis of type,difficulty level and tagged skills and also get the count in a single query.
I am not been able to add skills count.
My query is as follows:-
db.course.aggregate([
{$unwind:"$tagged_skills"},
{$group:{
_id:null,
skills: { $addToSet: "$tagged_skills.name" },
Normal_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Normal"] },
1,
0
]
}},
Beginner_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Beginner"] },
1,
0
]
}},
Intermediate_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Intermediate"] },
1,
0
]
}},
Advanced_df:{$sum:{
"$cond": [
{ "$eq":[ "$difficulty_level","Advanced"] },
1,
0
]
}},
Fast_Track_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Fast Track"] },
1,
0
]
}},
Normal_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Normal"] },
1,
0
]
}},
Beginner_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Beginner"] },
1,
0
]
}},
Normal_Track_type:{$sum:{
"$cond": [
{ "$eq":[ "$type","Normal Track"] },
1,
0
]
}},
}}
])
The result is as follows:-
{
"_id" : null,
"skills" : [
"SQL",
"PHP",
"java",
"Angular Js",
"Laravel 23",
"c++",
"Node Js",
"symphony",
"Mysql",
"Express Js",
"JAVA"
],
"Normal_df" : 1,
"Beginner_df" : 14,
"Intermediate_df" : 7,
"Advanced_df" : 2,
"Fast_Track_type" : 8,
"Normal_type" : 6,
"Beginner_type" : 1,
"Normal_Track_type" : 9
}
I also want to get all skills with their count.
To get all the skills with their count, you need to first get a list of all the skills. You can obtain this list with running a distinct command on the approapriate fields. With this list you can then construct the appropriate $group pipeline document that will use the $sum and $cond operators.
Consider the following use case:
var difficultyLevels = db.course.distinct("difficulty_level"),
types = db.course.distinct("type"),
skills = db.course.distinct("tagged_skills.name"),
unwindOperator = { "$unwind": "$tagged_skills" },
groupOperator = {
"$group": {
"_id": null,
"skills": { "$addToSet": "$tagged_skills.name" }
}
};
difficultyLevels.forEach(function (df){
groupOperator["$group"][df+"_df"] = {
"$sum": {
"$cond": [ { "$eq": ["$difficulty_level", df] }, 1, 0]
}
}
});
types.forEach(function (type){
groupOperator["$group"][type.replace(" ", "_")+"_type"] = {
"$sum": {
"$cond": [ { "$eq": ["$type", type] }, 1, 0]
}
}
});
skills.forEach(function (skill){
groupOperator["$group"][skill] = {
"$sum": {
"$cond": [ { "$eq": ["$tagged_skills.name", skill] }, 1, 0]
}
}
});
//printjson(groupOperator);
db.course.aggregate([unwindOperator, groupOperator]);
In the first line, we obtain an array with the difficulty levels by running the distinct command on the difficulty_level field
db.course.distinct("difficulty_level")
This will produce the array
var difficultyLevels = ["Normal", "Beginner", "Intermediate", "Advanced"]
Likewise, the preceding distinct operations will return the list of possible unique values for that key.
After getting these lists, you can then create the pipeline objects using the forEach() method to populate the document keys for each given item in the list. You can then use the resulting document, which will look like this
printjson(groupOperator);
{
"$group" : {
"_id" : null,
"skills" : {
"$addToSet" : "$tagged_skills.name"
},
"Beginner_df" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$difficulty_level",
"Beginner"
]
},
1,
0
]
}
},
"Intermediate_df" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$difficulty_level",
"Intermediate"
]
},
1,
0
]
}
},
"Fast_Track_type" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$type",
"Fast Track"
]
},
1,
0
]
}
},
"PHP" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"PHP"
]
},
1,
0
]
}
},
"c++" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"c++"
]
},
1,
0
]
}
},
"java" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"java"
]
},
1,
0
]
}
},
"symphony" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"symphony"
]
},
1,
0
]
}
},
"C#" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"C#"
]
},
1,
0
]
}
},
"Scala" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"Scala"
]
},
1,
0
]
}
},
"javascript" : {
"$sum" : {
"$cond" : [
{
"$eq" : [
"$tagged_skills.name",
"javascript"
]
},
1,
0
]
}
}
}
}

Aggregate group multiple fields

Given the following dataset:
{ "_id" : 1, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 25, "Q3" : 0, "Q4" : 0 }
{ "_id" : 2, "city" : "Reno", "cat": "roads", "Q1" : 30, "Q2" : 0, "Q3" : 0, "Q4" : 60 }
{ "_id" : 3, "city" : "Yuma", "cat": "parks", "Q1" : 0, "Q2" : 0, "Q3" : 45, "Q4" : 0 }
{ "_id" : 4, "city" : "Reno", "cat": "parks", "Q1" : 35, "Q2" : 0, "Q3" : 0, "Q4" : 0 }
{ "_id" : 5, "city" : "Yuma", "cat": "roads", "Q1" : 0, "Q2" : 15, "Q3" : 0, "Q4" : 20 }
I'm trying to achieve the following result. It would be great to just return the totals greater than zero, and also compress each city, cat and Qx total to a single record.
{
"city" : "Yuma",
"cat" : "roads",
"Q2total" : 40
},
{
"city" : "Reno",
"cat" : "roads",
"Q1total" : 30
},
{
"city" : "Reno",
"cat" : "roads",
"Q4total" : 60
},
{
"city" : "Yuma",
"cat" : "parks",
"Q3total" : 45
},
{
"city" : "Reno",
"cat" : "parks",
"Q1total" : 35
},
{
"city" : "Yuma",
"cat" : "roads",
"Q4total" : 20
}
Possible?
We could ask, to what end? Your documents already have a nice consistent Object structure which is recommended. Having objects with varying keys is not a great idea. Data is "data" and should not really be the name of the keys.
With that in mind, the aggregation framework actually follows this sense and does not allow for the generation of arbitrary key names from data contained in the document. But you could get a similar result with the output as data points:
db.junk.aggregate([
// Aggregate first to reduce the pipeline documents somewhat
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat"
},
"Q1": { "$sum": "$Q1" },
"Q2": { "$sum": "$Q2" },
"Q3": { "$sum": "$Q3" },
"Q4": { "$sum": "$Q4" }
}},
// Convert the "quarter" elements to array entries with the same keys
{ "$project": {
"totals": {
"$map": {
"input": { "$literal": [ "Q1", "Q2", "Q3", "Q4" ] },
"as": "el",
"in": { "$cond": [
{ "$eq": [ "$$el", "Q1" ] },
{ "quarter": "$$el", "total": "$Q1" },
{ "$cond": [
{ "$eq": [ "$$el", "Q2" ] },
{ "quarter": "$$el", "total": "$Q2" },
{ "$cond": [
{ "$eq": [ "$$el", "Q3" ] },
{ "quarter": "$$el", "total": "$Q3" },
{ "quarter": "$$el", "total": "$Q4" }
]}
]}
]}
}
}
}},
// Unwind the array produced
{ "$unwind": "$totals" },
// Filter any "0" resutls
{ "$match": { "totals.total": { "$ne": 0 } } },
// Maybe project a prettier "flatter" output
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$totals.quarter",
"total": "$totals.total"
}}
])
Which gives you results like this:
{ "city" : "Reno", "cat" : "parks", "quarter" : "Q1", "total" : 35 }
{ "city" : "Yuma", "cat" : "parks", "quarter" : "Q3", "total" : 45 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q1", "total" : 30 }
{ "city" : "Reno", "cat" : "roads", "quarter" : "Q4", "total" : 60 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q2", "total" : 40 }
{ "city" : "Yuma", "cat" : "roads", "quarter" : "Q4", "total" : 20 }
You could alternately use mapReduce which allows "some" flexibility with key names. The catch is though that your aggregation is still by "quarter", so you need that as part of the primary key, which cannot be changed once emitted.
Additionally, you cannot "filter" any aggregated results of "0" without a second pass after outputting to a collection, so it's not really of much use for what you want to do, unless you can live with a second mapReduce operation of "transform" query on the output collection.
Worth note is if you look at what is being done in the "second" pipeline stage here with $project and $map you will see that the document structure is essentially being altered to sometime like what you could alternately structure your documents like originally, like this:
{
"city" : "Reno",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 35 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 0 },
{ "quarter" : "Q4", "total" : 0 }
]
},
{
"city" : "Yuma",
"cat" : "parks"
"totals" : [
{ "quarter" : "Q1", "total" : 0 },
{ "quarter" : "Q2", "total" : 0 },
{ "quarter" : "Q3", "total" : 45 },
{ "quarter" : "Q4", "total" : 0 }
]
}
Then the aggregation operation becomes simple for your documents to the same results as shown above:
db.collection.aggregate([
{ "$unwind": "$totals" },
{ "$group": {
"_id": {
"city": "$city",
"cat": "$cat",
"quarter": "$totals.quarter"
},
"ttotal": { "$sum": "$totals.total" }
}},
{ "$match": { "ttotal": { "$ne": 0 } },
{ "$project": {
"_id": 0,
"city": "$_id.city",
"cat": "$_id.cat",
"quarter": "$_id.quarter",
"total": "$ttotal"
}}
])
So it might make more sense to consider structuring your documents in that way to begin with and avoid any overhead required by the document transformation.
I think you'll find that consistent key names makes a far better object model to program to, where you should be reading the data point from the key-value and not the key-name. If you really need to, then it's a simple matter of reading the data from the object and transforming the keys of each already aggregated result in post processing.

mongodb aggregation match multiple $and on the same field

i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}