Mongodb - Group by Array element - mongodb

I am trying to group my JSON data using the element inside array.
I think db.collection.aggregate will work. But I need to write custom function (reduce function) instead of built-in aggregation operators/expressions. Any suggestions?
Sample data:
{
"name" : "Person Name",
"age" : 50,
"expense" : [
{
"category" : "food",
"date" : "2017-01-01",
"amount" : 100
},
{
"category" : "travel",
"date" : "2017-01-02",
"amount" : 200
}
]
}
My code:
db.collection.group({
key: { "expense.category": 1 },
initial: { total : 0 },
reduce: function( curr, result ) {
result.total += curr.amount;
}
})
where, key: { "expense.category": 1 } does not work for me.

Related

Group based on discrete date ranges

I am new to MongoDB and I've been struggling to get a specific query to work without any luck.
I have a collection with millions of documents having a date and an amount, I want to get the aggregations for specific periods of time.
For example, I want to get the count, amount summations for the periods between 1/1/2015 - 15/1/2015 and between 1/2/2015 - 15/2/2015
A sample collection is
{ "_id" : "148404972864202083547392254", "account" : "3600", "amount" : 50, "date" : ISODate("2017-01-01T12:02:08.642Z")}
{ "_id" : "148404972864202085437392254", "account" : "3600", "amount" : 50, "date" : ISODate("2017-01-03T12:02:08.642Z")}
{ "_id" : "148404372864202083547392254", "account" : "3600", "amount" : 70, "date" : ISODate("2017-01-09T12:02:08.642Z")}
{ "_id" : "148404972864202083547342254", "account" : "3600", "amount" : 150, "date" : ISODate("2017-01-22T12:02:08.642Z")}
{ "_id" : "148404922864202083547392254", "account" : "3600", "amount" : 200, "date" : ISODate("2017-02-02T12:02:08.642Z")}
{ "_id" : "148404972155502083547392254", "account" : "3600", "amount" : 30, "date" : ISODate("2017-02-7T12:02:08.642Z")}
{ "_id" : "148404972864202122254732254", "account" : "3600", "amount" : 10, "date" : ISODate("2017-02-10T12:02:08.642Z")}
for date ranges between 1/1/2017 - 10/10/2017 and 1/2/2017 - 10/2/2017 the output would be like this:
1/1/2017 - 10/1/2017 - count =3, amount summation: 170
10/2/2017 - 15/2/2017 - count =2, amount summation: 40
Is it possible to work with such different date ranges? The code would be in Java, but as an example in mongo, can someone please help me?
There must be a more elegant solution than this. Anyways you can wrap it into a function and generalize date related arguments.
First, you need to make a projection at the same time deciding into which range an item goes (note the huge $switch expression). By default, an item goes into 'null' range.
Then, you filter out results that didn't match your criteria (i.e. range != null).
The very last step is to group items by the range and make all needed calculations.
db.items.aggregate([
{ $project : {
amount : true,
account : true,
date : true,
range : {
$switch : {
branches : [
{
case : {
$and : [
{ $gte : [ "$date", ISODate("2017-01-01T00:00:00.000Z") ] },
{ $lt : [ "$date", ISODate("2017-01-10T00:00:00.000Z") ] }
]
},
then : { $concat : [
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-01-01T00:00:00.000Z") } },
{ $literal : " - " },
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-01-10T00:00:00.000Z") } }
] }
},
{
case : {
$and : [
{ $gte : [ "$date", ISODate("2017-02-01T00:00:00.000Z") ] },
{ $lt : [ "$date", ISODate("2017-02-10T00:00:00.000Z") ] }
]
},
then : { $concat : [
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-02-01T00:00:00.000Z") } },
{ $literal : " - " },
{ $dateToString: { format: "%d/%m/%Y", date: ISODate("2017-02-10T00:00:00.000Z") } }
] }
}
],
default : null
}
}
} },
{ $match : { range : { $ne : null } } },
{ $group : {
_id : "$range",
count : { $sum : 1 },
"amount summation" : { $sum : "$amount" }
} }
])
Based on your data it will give the following results*:
{ "_id" : "01/02/2017 - 10/02/2017", "count" : 2, "amount summation" : 230 }
{ "_id" : "01/01/2017 - 10/01/2017", "count" : 3, "amount summation" : 170 }
*I believe you have few typos in your questions, that's why the data look different.

How to select and count distinct value of embedded array of collection in MongoDB?

Collection A have:
[
{
name: 'peter',
types: ['human', 'male', 'young']
},
{
name: 'mice',
types: ['male', 'young']
},
{
name: 'hen',
types: ['female', 'old']
}
]
I know how to get all distinct values of types but how to get its no. of appearance. How to extract it with Mongo query?
It would be great if you can show the solution in Doctrine QueryBuilder way.
Thanks
with aggregation framework you can sum apperance of all array elements using query provide below:
db.collection.aggregate([{
$project : {
_id : 0,
types : 1
}
}, {
$unwind : "$types"
}, {
$group : {
_id : "$types",
count : {
$sum : 1
}
}
}
])
and output:
{
"_id" : "human",
"count" : 1
}, {
"_id" : "old",
"count" : 1
}, {
"_id" : "male",
"count" : 2
}, {
"_id" : "young",
"count" : 2
}, {
"_id" : "female",
"count" : 1
}

$avg in mongodb aggregation

Document looks like this:
{
"_id" : ObjectId("361de42f1938e89b179dda42"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{ "NAME" : "asd", "OBTAINED_SCORE" : 30}, { "NAME" : "acd", "OBTAINED_SCORE" : 36}
]
}
Aggregation function:
db.coll.aggregate([ {$unwind:"$SCORES"}, {$group : { _id : { user_id : "$user_id", evaluator_id : "$evaluator_id"}, AVG_SCORE : { $avg : "$SCORES.OBTAINED_SCORE" }}} ])
Suppose if there are two documents with same "user_id" (say u1) and different "evaluator_id" (say e1 and e2).
For example:
1) Average will work like this ((30 + 20) / 2 = 25). This is working for me.
2) But for { evaluator_id : "e1" } document, score is 30 for { "NAME" : "asd" } and { evaluator_id : "e2" } document, score is 0 for { "NAME" : "asd" }. In this case, I want the AVG_SCORE to be 30 only (not (30 + 0) / 2 = 15).
Is it possible through aggregation??
Could any one help me out.
It's possible by placing a $match between the $unwind and $group aggregation pipelines to first filter the arrays which match the specified condition to include in the average computation and that is, score array where the obtained score is not equal to 0 "SCORES.OBTAINED_SCORE" : { $ne : 0 }
db.coll.aggregate([
{
$unwind: "$SCORES"
},
{
$match : {
"SCORES.OBTAINED_SCORE" : { $ne : 0 }
}
},
{
$group : {
_id : {
user_id : "$user_id",
evaluator_id : "$evaluator_id"
},
AVG_SCORE : {
$avg : "$SCORES.OBTAINED_SCORE"
}
}
}
])
For example, the aggregation result for this document:
{
"_id" : ObjectId("5500aaeaa7ef65c7460fa3d9"),
"user_id" : "u1",
"evaluator_id" : "e1",
"candidate_id" : ObjectId("54f65356294160421ead3ca1"),
"OVERALL_SCORE" : 150,
"SCORES" : [
{
"NAME" : "asd",
"OBTAINED_SCORE" : 0
},
{
"NAME" : "acd",
"OBTAINED_SCORE" : 36
}
]
}
will yield:
{
"result" : [
{
"_id" : {
"user_id" : "u1",
"evaluator_id" : "e1"
},
"AVG_SCORE" : 36
}
],
"ok" : 1
}

Aggregate of different subtypes in document of a collection

abstract document in collection md given:
{
vals : [{
uid : string,
val : string|array
}]
}
the following, partially correct aggregation is given:
db.md.aggregate(
{ $unwind : "$vals" },
{ $match : { "vals.uid" : { $in : ["x", "y"] } } },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
that may lead to the following result:
"result" : [
{
"_id" : {
"uid" : "x"
},
"vals" : [
[
"24ad52bc-c414-4349-8f3a-24fd5520428e",
"e29dec2f-57d2-43dc-818a-1a6a9ec1cc64"
],
[
"5879b7a4-b564-433e-9a3e-49998dd60b67",
"24ad52bc-c414-4349-8f3a-24fd5520428e"
]
]
},
{
"_id" : {
"uid" : "y"
},
"vals" : [
"0da5fcaa-8d7e-428b-8a84-77c375acea2b",
"1721cc92-c4ee-4a19-9b2f-8247aa53cfe1",
"5ac71a9e-70bd-49d7-a596-d317b17e4491"
]
}
]
as x is the result aggregated on documents containing an array rather than a string, the vals in the result is an array of arrays. what i look for in this case is to have a flattened array (like the result for y).
for me it seems like that what i want to achieve by one aggegration call only, is currently not supported by any given operation as e.g. a type conversion cannot be done or unwind expectes in every case an array as input type.
is map reduce the only option i have? if not ... any hints?
thanks!
You can use the aggregation to do the computation you want without changing your schema (though you might consider changing your schema simply to make queries and aggregations of this field easier to write).
I broke up the pipeline into multiple steps for readability. I also simplified your document slightly, again for readability.
Sample input:
> db.md.find().pretty()
{
"_id" : ObjectId("512f65c6a31a92aae2a214a3"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a4"),
"uid" : "x",
"val" : "string"
}
{
"_id" : ObjectId("512f65c6a31a92aae2a214a5"),
"uid" : "y",
"val" : "string2"
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a6"),
"uid" : "y",
"val" : [
"string3",
"string4"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a7"),
"uid" : "z",
"val" : [
"string"
]
}
{
"_id" : ObjectId("512f65e8a31a92aae2a214a8"),
"uid" : "y",
"val" : [
"string1",
"string2"
]
}
Pipeline stages:
> project1 = {
"$project" : {
"uid" : 1,
"val" : 1,
"isArray" : {
"$cond" : [
{
"$eq" : [
"$val.0",
[ ]
]
},
true,
false
]
}
}
}
> project2 = {
"$project" : {
"uid" : 1,
"valA" : {
"$cond" : [
"$isArray",
"$val",
[
null
]
]
},
"valS" : {
"$cond" : [
"$isArray",
null,
"$val"
]
},
"isArray" : 1
}
}
> unwind = { "$unwind" : "$valA" }
> project3 = {
"$project" : {
"_id" : 0,
"uid" : 1,
"val" : {
"$cond" : [
"$isArray",
"$valA",
"$valS"
]
}
}
}
Final aggregation:
> db.md.aggregate(project1, project2, unwind, project3, group)
{
"result" : [
{
"_id" : "z",
"vals" : [
"string"
]
},
{
"_id" : "y",
"vals" : [
"string1",
"string4",
"string3",
"string2"
]
},
{
"_id" : "x",
"vals" : [
"string"
]
}
],
"ok" : 1
}
If you modify your schema using always "vals.val" field as an array field (even when the record contains only one element) you can do it easily as follows:
db.test_col.insert({
vals : [
{
uid : "uuid1",
val : ["value1"]
},
{
uid : "uuid2",
val : ["value2", "value3"]
}]
});
db.test_col.insert(
{
vals : [{
uid : "uuid2",
val : ["value4", "value5"]
}]
});
Using this approach you only need to use two $unwind operations: one unwinds the "parent" array and the second unwinds every "vals.val" value. So, querying like
db.test_col.aggregate(
{ $unwind : "$vals" },
{ $unwind : "$vals.val" },
{
$group : {
_id : { uid : "$vals.uid" },
vals : { $addToSet : "$vals.val" }
}
}
);
You can obtain your expected value:
{
"result" : [
{
"_id" : {
"uid" : "uuid2"
},
"vals" : [
"value5",
"value4",
"value3",
"value2"
]
},
{
"_id" : {
"uid" : "uuid1"
},
"vals" : [
"value1"
]
}
],
"ok" : 1
}
And no, you can't execute this query using your current schema, since $unwind fails when the field isn't an array field.

Select Max() with "group by" in mongodb

Please help me to convert this select sentence to mongodb:
Select Name, Max(Value) From table1 Group By Name
I read this document: http://www.mongodb.org/display/DOCS/Aggregation#Aggregation-Group
but still dont know how to apply Max() method instead SUM() as that document.
Thank you.
I have created Mongo Collection as follows.
{ "_id" : ObjectId("4fb36bfd3d1c88bfa15103b1"), "name" : "bob", "value" : 5 }
{ "_id" : ObjectId("4fb36c033d1c88bfa15103b2"), "name" : "bob", "value" : 3 }
{ "_id" : ObjectId("4fb36c063d1c88bfa15103b3"), "name" : "bob", "value" : 7 }
{ "_id" : ObjectId("4fb36c0c3d1c88bfa15103b4"), "name" : "john", "value" : 2 }
{ "_id" : ObjectId("4fb36c103d1c88bfa15103b5"), "name" : "john", "value" : 4 }
{ "_id" : ObjectId("4fb36c143d1c88bfa15103b6"), "name" : "john", "value" : 8 }
{ "_id" : ObjectId("4fb36c163d1c88bfa15103b7"), "name" : "john", "value" : 6 }
Then by using the following code I group it by their name and max(value)
db.table1.group(
{key: {name:true},
reduce: function(obj,prev) {
if (prev.maxValue < obj.value) {
prev.maxValue = obj.value;
}
},
initial: { maxValue: 0 }}
);
The result is shown as
[
{
"name" : "bob",
"maxValue" : 7
},
{
"name" : "john",
"maxValue" : 8
}
]
It is much simpler with the aggregation framework. You can get the same result with the following code by using aggregation framework.
db.table1.aggregate([
{
$group:{_id:"$name", "maxValue": {$max:"$value"}}
}
]);
Using the Aggregation Framework:
db.table1.aggregate({$group:{'_id':'$name', 'max':{$max:'$value'}}},
{$sort:{'max':1}}).result
var myresult = db.table1.aggregate([{
$group: {
_id:"$Name",
value: { $max: "$Value" }
}
}]);
print(myresult)
Since MongoDB supports mapreduce below function should do.
db.employee.insert({name:"Tejas",Value:2})
db.employee.insert({name:"Tejas",Value:3})
db.employee.insert({name:"Varma",Value:1})
db.employee.insert({name:"Varma",Value:6})
var map=function(){
var key={name:this.name};
var value={value:this.Value};
emit(key,value);
};
var reduce=function(key,values){
var max=-1;
values.forEach(function(value){
if(max==-1){
max=value['value'];
}
if(max<value['value']){
max=value['value'];
}
});
return {max:max};
};
db.employee.mapReduce(map,reduce,{out:{inline:1}});