Grouping over multiple fields in MongoDb - mongodb

How would I go about grouping over multiple fields? I need to get a unique count for case insensitive true over multiple independent documents.
I've looked at both map/reduce and aggregation and I don't quite know what would be the best approach.
Lets say I have the following data in my collection:
/* 0 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
}
}
/* 1 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
"D" : "TRUE"
}
}
/* 2 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "TRUE",
"C" : "",
"D" : "false"
}
}
/* 3 */
{
"_id" : ObjectId("****"),
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "false",
"A" : "true",
"B" : "false",
"D" : "true"
}
}
I would like to output the following data, formatting is not important:
isMail : 3
A : 4
B : 3
C : 0
D : 2
Total : 4

Using the conditional operator $cond to map "true" to 1 and anything else to 0, you might achieve the desired result. This is only complicated by the fact your "boolean" values are in fact strings, and that you have case variation on the "true" value -- that's why I use $toLower in the code below:
db.test.sample.aggregate([
{
$group: { _id:null,
isMail: { $sum: { $cond: [{$eq: [{$toLower:"$Data.isMail"}, "true"]}, 1, 0] }},
A: { $sum: { $cond: [{$eq: [{$toLower:"$Data.A"}, "true"]}, 1, 0] }},
B: { $sum: { $cond: [{$eq: [{$toLower:"$Data.B"}, "true"]}, 1, 0] }},
C: { $sum: { $cond: [{$eq: [{$toLower:"$Data.C"}, "true"]}, 1, 0] }},
D: { $sum: { $cond: [{$eq: [{$toLower:"$Data.D"}, "true"]}, 1, 0] }},
total: { $sum: 1 },
}
},
{
$project: {
_id: 0,
A: 1, B: 1, C:1, D:1, total:1, isMail:1,
}
}
])
Producing:
{ "isMail" : 3, "A" : 4, "B" : 3, "C" : 0, "D" : 2, "total" : 4 }

If you could change the schema design so that the data keys become the values, it would go a long way in making it easier for you to do some aggregation operations on the data. A better shchema would look like this:
{
"_id" : ObjectId("5548de01180e84997293903f"),
"IsPartOfBatch" : false,
"Data" : [
{
"key" : "isMail",
"value" : true
},
{
"key" : "A",
"value" : true
},
{
"key" : "B",
"value" : true
},
{
"key" : "C",
"value" : false
},
{
"key" : "D",
"value" : false
}
]
}
Let's use the sample data set you provided in your question:
db.test.insert([
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : ""
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "true",
"C" : "",
"D" : "TRUE"
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "true",
"A" : "true",
"B" : "TRUE",
"C" : "",
"D" : "false"
}
},
{
"IsPartOfBatch" : false,
"Data" : {
"isMail" : "false",
"A" : "true",
"B" : "false",
"D" : "true"
}
}
]);
To change the schema so that it follows the above recommended structure, use the following code snippet (performance may be slow over very large datasets):
db.test.find({ "Data.isMail": { $type : 2 } }).forEach(function (doc){
var data = [];
if (doc.Data) {
for(key in doc.Data) {
var isTrueSet = (doc.Data[key] === "true" || doc.Data[key] === "TRUE")
var obj = {};
obj["key"] = key;
obj["value"] = isTrueSet;
data.push(obj);
};
}
doc.Data = data;
db.test.save(doc);
});
A simple db.test.findOne() query will give the result:
{
"_id" : ObjectId("5548de01180e84997293903f"),
"IsPartOfBatch" : false,
"Data" : [
{
"key" : "isMail",
"value" : true
},
{
"key" : "A",
"value" : true
},
{
"key" : "B",
"value" : true
},
{
"key" : "C",
"value" : false
},
{
"key" : "D",
"value" : false
}
]
}
Now you can use aggregation framework to get the counts of the keys with true values:
db.test.aggregate([
{
"$unwind": "$Data"
},
{
"$project": {
"_id": 0,
"key": "$Data.key",
"isTrue": {
"$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
}
}
},
{
"$group": {
"_id": "$key",
"count": {
"$sum": "$isTrue"
}
}
}
])
Output
/* 0 */
{
"result" : [
{
"_id" : "D",
"count" : 2
},
{
"_id" : "C",
"count" : 0
},
{
"_id" : "B",
"count" : 3
},
{
"_id" : "A",
"count" : 4
},
{
"_id" : "isMail",
"count" : 3
}
],
"ok" : 1
}
You can then further modify the result using native JavaScript functions as MongoDB's aggregation framework cannot project the field values as the keys thus you will have to rely on JS to do this:
var pipeline = [
{
"$unwind": "$Data"
},
{
"$project": {
"_id": 0,
"key": "$Data.key",
"isTrue": {
"$cond": [{ "$eq": [ "$Data.value", true ] }, 1, 0]
}
}
},
{
"$group": {
"_id": "$key",
"count": {
"$sum": "$isTrue"
}
}
}],
agg = db.test.aggregate(pipeline),
obj = {},
result = [];
agg.forEach(function (doc){
obj[doc._id] = doc.count;
result.push(obj);
});

Related

Mongodb - Array indexed by string in $addToSet operator

Suppose we have these two documents:
{
"_id" : ObjectId("5f3cdd1d0fefeba343ff3093"),
"country" : "C1",
"time" : "1994",
"value" : NumberInt(100),
"type" : "type1",
"origin" : "O1"
}
{
"_id" : ObjectId("5f3cdd1d0fefeba343ff3094"),
"country" : "C1",
"time" : "1994",
"value" : NumberInt(200),
"type" : "type1",
"origin" : "O2"
}
I want to retrieve the aggregation with the origin array indexed by strings (the value of "type"); expected output:
{
"_id" : {
"country" : "C1",
"time" : "1994"
},
"TOT" : NumberInt(300),
"count" : 2.0,
"origin" : [
"O1": NumberInt(100),
"O2": NumberInt(200)
]
}
Here we have the type of the "origin" array as {[key: string]: number}.
With the following query, the origin array is instead indexed by numbers:
use local;
db.getCollection("test_collection").aggregate(
[
{
"$match" : {
"type" : {
"$in" : [
"type1"
]
}
}
},
{
"$group" : {
"_id" : {
"country" : "$country",
"time" : "$time"
},
"TOT" : {
"$sum" : "$value"
},
"count" : {
"$sum" : 1.0
},
"origin" : {
"$addToSet" : "$value"
}
}
}
],
{
"allowDiskUse" : false
}
);
You can try $arrayToObject after adding in origin,
use local;
db.getCollection("test_collection").aggregate([
{ "$match": { "type": { "$in": ["type1"] } } },
{
"$group": {
"_id": {
"country": "$country",
"time": "$time"
},
"TOT": { "$sum": "$value" },
"count": { "$sum": 1.0 },
// add object like this
"origin": {
"$addToSet": {
k: "$origin",
v: "$value"
}
}
}
},
// add this
{ $addFields: { origin: { $arrayToObject: "$origin" } } }
],
{ "allowDiskUse": false }
)
Playground

Group and Merge array of objects

I am struggling around with the aggregation pipeline feature from MongoDB.
So far the output for one result looks like this:
{
"type": "inbound",
"sender": "postAG",
"receiver": "maxMusterMan",
"datetime": "20191125",
"info": [
{
"q": "A",
"value": "5",
"name": null,
"plz": 1234
},
{
"q": "B",
"value": "AS",
"name": "ABS",
"plz": null
},
{
"q": "A",
"value": "5",
"name": "aa",
"plz": null
},
... more objects
]
}
The final result should look like:
{
"type": "inbound",
"sender": "postAG",
"receiver": "maxMusterMan",
"datetime": "20191125",
"info": [
{
"q": "A",
"value": "0",
"name": "aa",
"plz": 1234
},
{
"q": "B",
"value": "AS",
"name": "ABS"
}
]
}
So in a nutshell, I want to group the values from the array field info by the "q" field and merge the objects (newer one overwrites the old value).
Further I would like to remove all the values with value "" or null;
There are more fields in the real payload, so I would like to avoid to add a $cond for each field of the object.
Some approaches so far from my side:
for the cleanup, use a UDF, but this is not possible in the pipeline.
use map-reduce for the group and merge, not available in the pipeline.
Please consider that the input file is the output from the several pipeline steps.
So I can not just use map-reduce alone, first I need the pipeline too.
My idea was to create two views, first will do the pipeline stuff and second map-reduce, is this a good solution?
Thx
Andreas
I didn't really understand from your explanation if you can or cannot use map-reduce.
However assuming you can't and you have to 'concat' the pipelines there is no 'generic' workaround for multiple fields - you have to create a condition for each in the pipeline.
With that said here is a working pipeline:
db.collection.aggregate(
[
{
"$unwind" : "$info"
},
{
"$group" : {
"_id" : "$info.q",
"type" : {
"$first" : "$type"
},
"sender" : {
"$first" : "$sender"
},
"receiver" : {
"$first" : "$receiver"
},
"datetime" : {
"$first" : "$datetime"
},
"values" : {
"$push" : "$info.value"
},
"names" : {
"$push" : "$info.name"
},
"plz" : {
"$push" : "$info.plz"
}
}
},
{
"$project" : {
"_id" : 1.0,
"type" : 1.0,
"sender" : 1.0,
"receiver" : 1.0,
"datetime" : 1.0,
"values" : {
"$filter" : {
"input" : "$values",
"as" : "curr",
"cond" : {
"$or" : [
{
"$ne" : [
"$$curr",
null
]
},
{
"$ne" : [
"$$curr",
""
]
}
]
}
}
},
"names" : {
"$filter" : {
"input" : "$names",
"as" : "curr",
"cond" : {
"$or" : [
{
"$ne" : [
"$$curr",
null
]
},
{
"$ne" : [
"$$curr",
""
]
}
]
}
}
},
"plz" : {
"$filter" : {
"input" : "$plz",
"as" : "curr",
"cond" : {
"$or" : [
{
"$ne" : [
"$$curr",
null
]
},
{
"$ne" : [
"$$curr",
""
]
}
]
}
}
}
}
},
{
"$project" : {
"sender" : 1.0,
"receiver" : 1.0,
"datetime" : 1.0,
"type" : 1.0,
"_id" : 1.0,
"value" : {
"$cond" : {
"if" : {
"$gt" : [
{
"$size" : "$values"
},
0.0
]
},
"then" : {
"$arrayElemAt" : [
"$values",
-1.0
]
},
"else" : null
}
},
"name" : {
"$cond" : {
"if" : {
"$gt" : [
{
"$size" : "$names"
},
0.0
]
},
"then" : {
"$arrayElemAt" : [
"$names",
-1.0
]
},
"else" : null
}
},
"plz" : {
"$cond" : {
"if" : {
"$gt" : [
{
"$size" : "$plz"
},
0.0
]
},
"then" : {
"$arrayElemAt" : [
"$plz",
-1.0
]
},
"else" : null
}
}
}
},
{
"$addFields" : {
"infoArray" : [
{
"k" : "type",
"v" : "$_id"
},
{
"k" : "value",
"v" : "$value"
},
{
"k" : "name",
"v" : "$name"
},
{
"k" : "plz",
"v" : "$plz"
}
]
}
},
{
"$addFields" : {
"info" : {
"$arrayToObject" : {
"$filter" : {
"input" : "$infoArray",
"as" : "curr",
"cond" : {
"$ne" : [
"$$curr.v",
null
]
}
}
}
}
}
},
{
"$group" : {
"_id" : null,
"type" : {
"$first" : "$type"
},
"sender" : {
"$first" : "$sender"
},
"receiver" : {
"$first" : "$receiver"
},
"datetime" : {
"$first" : "$datetime"
},
"info" : {
"$push" : "$info"
}
}
}
]
)

how to apply $setunion for this aggregation?

Here my db is:
{
"_id" : ObjectId("5d28667fb0adb622b905ccd2"),
"requestedDate" : ISODate("2019-07-12T10:52:47.711Z"),
"requestType" : "A",
"isRequestSuccess" : false,
"responseDate" : ISODate("2019-07-12T10:53:19.213Z"),
"__v" : 0
},
{
"_id" : ObjectId("5d28667fb0adb622b905ccd2"),
"requestedDate" : ISODate("2019-07-12T10:52:47.711Z"),
"requestType" : "C",
"isRequestSuccess" : false,
"responseDate" : ISODate("2019-07-12T10:53:19.213Z"),
"__v" : 0
},
{
"_id" : ObjectId("5d28667fb0adb622b905ccd2"),
"requestedDate" : ISODate("2019-07-12T10:52:47.711Z"),
"requestedType" : "A",
"isRequestSuccess" : false,
"responseDate" : ISODate("2019-07-12T10:53:19.213Z"),
"__v" : 0
}
I need to get the values by each requestType and also isRequestSuccess: sucess or failure wise.
[
{ requestedType: "A", isRequestSuccess: 2, isRequestFalse: 1 },
{ requestedType: "C", isRequestSuccess: 1, isRequestFalse: 3 }
]
How can I get those values?
db.getCollection('test').aggregate({
"$group": {
"_id": "$requestedType",
"isRequestSuccess": {
$sum: {
$cond: ["$isRequestSuccess", 1, 0]
}
},
"isRequestFalse": {
$sum: {
$cond: ["$isRequestSuccess", 0, 1]
}
}
}
})

mongodb aggregation match multiple $and on the same field

i have a document like this :
{
"ExtraFields" : [
{
"value" : "print",
"fieldID" : ObjectId("5535627631efa0843554b0ea")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
{
"value" : "POLYE",
"fieldID" : ObjectId("5535627631efa0843554b0ec")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627631efa0843554b0ed")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627631efa0843554b0ee")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627731efa0843554b0ef")
},
{
"value" : "0",
"fieldID" : ObjectId("5535627831efa0843554b0f0")
},
{
"value" : "42",
"fieldID" : ObjectId("5535627831efa0843554b0f1")
},
{
"value" : "30",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
{
"value" : "14",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
{
"value" : "19",
"fieldID" : ObjectId("5535627831efa0843554b0f4")
}
],
"id" : ObjectId("55369e60733e4914550832d0"), "title" : "A product"
}
what i want is to match one or more sets from the ExtraFields array. For example, all the products that contain the values print and 30. Since a value may be found in more than one fieldID (like 0 or true) we need to create a set like
WHERE (fieldID : ObjectId("5535627631efa0843554b0ea"), value : "print")
Where i'm having problems is when querying more than one fields. The pipeline i came up with is :
db.products.aggregate([
{'$unwind': '$ExtraFields'},
{
'$match': {
'$and': [{
'$and': [{'ExtraFields.value': {'$in': ["A52A2A"]}}, {
'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0ea")
}]
}
,
{
'$and': [{'ExtraFields.value': '14'}, {'ExtraFields.fieldID': ObjectId("5535627631efa0843554b0eb")}]
}
]
}
},
]);
This returns zero results, but this is what i want to do in theory. Match all items that contain set 1 AND all that contain set 2.
The end result should look like a faceted search output :
[
{
"_id" : {
"values" : "18",
"fieldID" : ObjectId("5535627831efa0843554b0f3")
},
"count" : 2
},
{
"_id" : {
"values" : "33",
"fieldID" : ObjectId("5535627831efa0843554b0f2")
},
"count" : 1
}
]
Any ideas?
You could try the following aggregation pipeline
db.products.aggregate([
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$unwind": "$ExtraFields"
},
{
"$match": {
"ExtraFields.value": { "$in": ["A52A2A", "14"] },
"ExtraFields.fieldID": {
"$in": [
ObjectId("5535627631efa0843554b0ea"),
ObjectId("5535627631efa0843554b0eb")
]
}
}
},
{
"$group": {
"_id": {
"value": "$ExtraFields.value",
"fieldID": "$ExtraFields.fieldID"
},
"count": {
"$sum": 1
}
}
}
])
With the sample document provided, this gives the output:
/* 1 */
{
"result" : [
{
"_id" : {
"value" : "14",
"fieldID" : ObjectId("5535627631efa0843554b0eb")
},
"count" : 1
}
],
"ok" : 1
}

How do I create nested aggregations with count on MongoDB?

I am learning MongoDB in order to see if it matches our needs.
Currently we use heavily aggregations, so I am testing the flexibility of the Aggregation Framework.
I started with this hierarchy
db.companytest3.insert({"name":"A", age:7})
db.companytest3.insert({"name":"B", age:17, owner:"A"})
db.companytest3.insert({"name":"C", age:12, owner:"A"})
db.companytest3.insert({"name":"D", age:7, owner:"B"})
db.companytest3.insert({"name":"E", age:13, owner:"B"})
db.companytest3.insert({"name":"F", age:23, owner:"C"})
So I have:
db.companytest3.find()
{ "_id" : ObjectId("5457c2c0fa82c305e0b80006"), "name" : "A", "age" : 7 }
{ "_id" : ObjectId("5457c2cafa82c305e0b80007"), "name" : "A", "age" : 7 }
{ "_id" : ObjectId("5457c2d0fa82c305e0b80008"), "name" : "B", "age" : 17, "owner" : "A" }
{ "_id" : ObjectId("5457c2d6fa82c305e0b80009"), "name" : "C", "age" : 12, "owner" : "A" }
{ "_id" : ObjectId("5457c2ddfa82c305e0b8000a"), "name" : "D", "age" : 7, "owner" : "B" }
{ "_id" : ObjectId("5457c2e4fa82c305e0b8000b"), "name" : "E", "age" : 13, "owner" : "B" }
{ "_id" : ObjectId("5457c2eafa82c305e0b8000c"), "name" : "F", "age" : 23, "owner" : "C" }
My goal is to aggregate the children using their ages, so I have something like this:
{
"_id" : null,
"children" : [
{
"range:" : "lower than 10",
total: 1,
names: ["A"]
}
{
"range:" : "higher than 10",
total: 0,
names: []
}
],
"total" : 1
}
{
"_id" : "A",
"children" : [
{
"range:" : "lower than 10",
total: 0,
names: []
}
{
"range:" : "higher than 10",
total: 2,
names: ["C","B"]
}
],
"total" : 1
}
{
"_id" : "B",
"children" : [
{
"range:" : "lower than 10",
total: 1,
names: ["D"]
}
{
"range:" : "higher than 10",
total: 13,
names: ["E"]
}
],
"total" : 1
}
{
"_id" : "C",
"children" : [
{
"range:" : "lower than 10",
total: 0,
names: []
}
{
"range:" : "higher than 10",
total: 1,
names: ["F"]
}
],
"total" : 1
}
I feel I am getting near, I've got this query:
db.companytest3.aggregate(
{ $project: {
"_id": 0,
"range": {
$concat: [{
$cond: [ { $lte: ["$age", 10] }, "até 10", "" ]
}, {
$cond: [ { $gte: ["$age", 11] }, "mais de 10", "" ]
}]
},
"owner": "$owner",
"name" : "$name"
}
},
{
$group: {
_id: { owner: "$owner", range: "$range" },
children: { $addToSet: { name: "$name", range: "$range"} } ,
total: { $sum: 1}
}
},
{
$group: {
_id: { owner:"$_id.owner" },
children: { $addToSet: "$children" }
}
}
)
which gives me the following output:
{ "_id" : { "owner" : null }, "children" : [ [ { "name" : "A", "range" : "até 10" } ] ] }
{ "_id" : { "owner" : "A" }, "children" : [ [ { "name" : "C", "range" : "mais de 10" }, { "name" : "B", "range" : "mais de 10" } ] ] }
{ "_id" : { "owner" : "B" }, "children" : [ [ { "name" : "D", "range" : "até 10" } ], [ { "name" : "E", "range" : "mais de 10" } ] ] }
{ "_id" : { "owner" : "C" }, "children" : [ [ { "name" : "F", "range" : "mais de 10" } ] ] }
Now I am having issues to group the items by owner and keep sum the total, I am stuck and I do not know how to proceed. I've been trying many diferent alternatives using groups variations but I do not feel they are worth posting here.
How can I change my current query so I group the children by range and add the count?
thanks! :D
It should be possible in earlier versions, but even basically looking at how you want to manipulate the result, the simplest way I can see is with the help of some operators introduced in MongoDB 2.6.
db.companytest3.aggregate([
{ "$group": {
"_id": "$owner",
"lowerThanTenNames": {
"$addToSet": {
"$cond": [
{ "$lte": [ "$age", 10 ] },
"$name",
false
]
}
},
"lowerThanTenTotal": {
"$sum": {
"$cond": [
{ "$lte": [ "$age", 10 ] },
1,
0
]
}
},
"moreThanTenNames": {
"$addToSet": {
"$cond": [
{ "$gte": [ "$age", 11 ] },
"$name",
false
]
}
},
"moreThanTenTotal": {
"$sum": {
"$cond": [
{ "$gte": [ "$age", 11 ] },
1,
0
]
}
}
}},
{ "$project": {
"children": {
"$map": {
"input": { "$literal": ["L", "M"] },
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "L" ] },
{
"range": { "$literal": "lower than 10" },
"total": "$lowerThanTenTotal",
"names": {
"$setDifference": [
"$lowerThanTenNames",
[false]
]
}
},
{
"range": { "$literal": "higher than 10" },
"total": "$moreThanTenTotal",
"names": {
"$setDifference": [
"$moreThanTenNames",
[false]
]
}
}
]
}
}
},
"total": { "$add": [ "$lowerThanTenTotal", "$moreThanTenTotal" ]},
}},
{ "$sort": { "_id": 1 } }
])
Basically you want to separate these out into two sets of results for each grouping, being one for each age range. Due to the use of conditional operators, the "names" sets then need to be filtered for any false values where the conditions did not match.
The other thing that needs to be done is to coerce these results from separate fields into an array. The $map operator makes this simple by just providing a two element template with effectively "A/B" choices to do the re-mapping.
Since we had discrete fields here before they were re-mapped onto an array, you can just supply each "total" field as an argument to $add in order to get the combined total.
Produces exactly this:
{
"_id" : null,
"children" : [
{
"range" : "lower than 10",
"total" : 1,
"names" : ["A"]
},
{
"range" : "higher than 10",
"total" : 0,
"names" : [ ]
}
],
"total" : 1
}
{
"_id" : "A",
"children" : [
{
"range" : "lower than 10",
"total" : 0,
"names" : [ ]
},
{
"range" : "higher than 10",
"total" : 2,
"names" : ["C","B"]
}
],
"total" : 2
}
{
"_id" : "B",
"children" : [
{
"range" : "lower than 10",
"total" : 1,
"names" : ["D"]
},
{
"range" : "higher than 10",
"total" : 1,
"names" : ["E"]
}
],
"total" : 2
}
{
"_id" : "C",
"children" : [
{
"range" : "lower than 10",
"total" : 0,
"names" : [ ]
},
{
"range" : "higher than 10",
"total" : 1,
"names" : ["F"]
}
],
"total" : 1
}