Mongodb arithmetic expression of elements inside an array - mongodb

{
"_id" : ObjectId("5a4d0712368ff072a56a66e8"),
"a" : {
"nodes" : [
{
"b" : 1
},
{
"c" : {
"nodes" : [
{
"d" : 2
},
{
"e" : 2
}
]
}
}
]
},
"r" : 1
}
{
"_id" : ObjectId("5a4d0712368ff072a56a66e9"),
"a" : {
"nodes" : [
{
"b" : 4
},
{
"c" : {
"nodes" : [
{
"e" : 9
}
]
}
}
]
},
"r" : 2
}
Above are the sample documents, I want to find the sum of a.nodes.b * a.nodes.c.nodes.d * a.nodes.c.nodes.e
I have tried many pipelines, but not able to generalize the aggregation. Any help would be appreciated.
nodes array can be present again at any level. If I get the solution for this, I will try to generalize the aggregation pipeline.
MongoDB version 3.2
Expected calculation and output
(1 * 2 * 2) + (4 * 0 * 9) = 4

I doubt it is possible to handle "nodes array can be present again at any level." with aggregation.
With map-reduce it could be something like this:
db.collection.mapReduce(
function(){
let reducer = (result, node) => {
Object.keys(node).map(key => {
if (typeof node[key] === "object") {
if (node[key].nodes && Array.isArray(node[key].nodes)) {
result = node[key].nodes.reduce(reducer, result);
}
} else {
result.product = result.product * node[key];
result.keys.add(key);
}
});
return result;
};
let {product, keys} = this.a.nodes.reduce(reducer, {product: 1, keys: new Set()});
emit(null, {product, keys: Array.from(keys)})
},
function(key, values){
return values.reduce((total, item) => {
let totalSum = total.sum, itemSum = item.product;
for (let key of item.keys) if (!total.keys.includes(key)) totalSum = 0;
for (let key of total.keys) if (!item.keys.includes(key)) itemSum = 0;
return {sum: totalSum + itemSum, keys: Array.from(new Set([...total.keys, ...item.keys]))};
}, {sum: 0, keys: []}).sum;
},
{ query: { "a.nodes": { $exists: true } } , out: { inline: 1 }}
);
Map function recursively multiplies all keys, reducer checks the keys and calculate totals.

Related

Use index of object in MongoDB to return result

Below is a collection in mongodb. I need to return the bus_number given a start stop and an end stop. the constraint is that that the index of the end stop has to be greater than the index of the start stop.
So, given start=226 & end=229 the returned value should be 1A.
start=229 & end=226 should return nothing.
{
"_id" : ObjectId("59be068"),
"route" : "1A",
"route_patterns" : [
{
"route_pattern_id" : "00010001",
"route_stops" : [
{
"stop_id" : "226",
},
{
"stop_id" : "228"
},
{
"stop_id" : "229"
},
{
"stop_id" : "227"
}
]
}
]}
Edit:
my structure now looks like this:
{
"route":"1A",
"route_pattern_id":"00010001",
"route_stops":[
{
"stop_id":"226",
"stop_number":0
},
{
"stop_id":"228",
"stop_number":1
},
{
"stop_id":"229",
"stop_number":2
},
{
"stop_id":"227",
"stop_number":3
}
]
}
This way I am now using the stop_number. Not a clean solution.
This is what I have done:
entity_start = db.routes.find({
"route_patterns.route_stops.stop_id": str(start_stop)
})
dumps(entity_start)
start_buses = [routes['route'] for routes in entity_start]
entity_end = db.routes.find({
"route_patterns.route_stops.stop_id": str(end_stop)
})
end_buses = [routes['route'] for routes in entity_end]
set_two = set(start_buses)
set_one = set(end_buses)
return dumps(set_one.intersection(set_two))

MongoDB query to return documents that only have keys amongst a predefined set

The MongoDB query language allows filtering documents based on the existence or absence of a given field with the $exists operator.
Is there a way, with the MongoDB syntax, and given a set K of allowed fields, to exclude documents that have fields not in K from the results, but:
not knowing in advance which extra fields (outside K) can be encountered
not using JavaScript, that is, the $where operator?
Example:
{
"Some field" : "foo"
}
{
"Some field" : "bar",
"Some other field" : "foobar"
}
With the set K = [ "Some field" ], only the first document is to be returned.
Note how this is not to be confused with a projection, which would return both documents but removing the extra field.
I'm not sure if MongoDB do support such kind of operations out of box but you can achieve so with help of mapReduce.
Assuming your sample data set;
// Variable for map
var map = function () {
var isAcceptable = true;
Object.keys(this).forEach(function (key) {
if (key != "_id" && white_list.indexOf(key) == -1) {
isAcceptable = false;
}
});
if (isAcceptable == true) {
emit(1, this);
}
};
// Variable for reduce
var reduce = function (key, values) {
return values;
};
db.collection.mapReduce(
map,
reduce,
{
scope: {"white_list": ["Some field"]},
out: {"inline": 1}
}
);
Will return:
{
"results" : [
{
"_id" : 1,
"value" : {
"_id" : ObjectId("57cd7503e55de957c62fb9c8"),
"Some field" : "foo"
}
}
],
"timeMillis" : 13,
"counts" : {
"input" : 2,
"emit" : 1,
"reduce" : 0,
"output" : 1
},
"ok" : 1
}
Desired result will be in results.values of returned document. However, keep in mind limitation of MongoDB mapReduce and maximum size of BSON document.
Given a set of known fields K, you can construct a query that takes the set as input and gives a query with the $exists operator along with the corresponding fields projection. Using an example, suppose you have the following documents in a test collection
db.test.insert({ "fieldX": "foo", "fieldY": "bar", "fieldZ": 1 })
db.test.insert({ "fieldX": "123", "fieldY": "bar", "fieldZ": 2 })
db.test.insert({ "fieldY": "abc", "fieldZ": 3 })
db.test.insert({ "fieldX": "xyz", "fieldZ": 4 })
db.test.insert({ "fieldZ": 5 })
Then you can construct a query Q and a projection P from an input set K as follows:
var K = [ "fieldX", "fieldZ" ];
var or = K.map(function(field) {
var obj = {};
obj[field] = { "$exists": true };
return obj;
});
var P = K.reduce(function(doc, field) {
doc[field] = 1;
return doc;
}, {} );
var Q = { "$or": or };
db.test.find(Q, P);
Sample Output:
/* 1 */
{
"_id" : ObjectId("57cd78322c241f5870c82b7d"),
"fieldX" : "foo",
"fieldZ" : 1
}
/* 2 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7e"),
"fieldX" : "123",
"fieldZ" : 2
}
/* 3 */
{
"_id" : ObjectId("57cd78332c241f5870c82b7f"),
"fieldZ" : 3
}
/* 4 */
{
"_id" : ObjectId("57cd78332c241f5870c82b80"),
"fieldX" : "xyz",
"fieldZ" : 4
}
/* 5 */
{
"_id" : ObjectId("57cd78332c241f5870c82b81"),
"fieldZ" : 5
}

mongodb: reduce function not action when only one result is there

I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?
This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.

Mongo: count the number of word occurrences in a set of documents

I have a set of documents in Mongo. Say:
[
{ summary:"This is good" },
{ summary:"This is bad" },
{ summary:"Something that is neither good nor bad" }
]
I'd like to count the number of occurrences of each word (case insensitive), then sort in descending order. The result should be something like:
[
"is": 3,
"bad": 2,
"good": 2,
"this": 2,
"neither": 1,
"nor": 1,
"something": 1,
"that": 1
]
Any idea how to do this? Aggregation framework would be preferred, as I understand it to some degree already :)
MapReduce might be a good fit that can process the documents on the server without doing manipulation on the client (as there isn't a feature to split a string on the DB server (open issue).
Start with the map function. In the example below (which likely needs to be more robust), each document is passed to the map function (as this). The code looks for the summary field and if it's there, lowercases it, splits on a space, and then emits a 1 for each word found.
var map = function() {
var summary = this.summary;
if (summary) {
// quick lowercase to normalize per your requirements
summary = summary.toLowerCase().split(" ");
for (var i = summary.length - 1; i >= 0; i--) {
// might want to remove punctuation, etc. here
if (summary[i]) { // make sure there's something
emit(summary[i], 1); // store a 1 for each word
}
}
}
};
Then, in the reduce function, it sums all of the results found by the map function and returns a discrete total for each word that was emitted above.
var reduce = function( key, values ) {
var count = 0;
values.forEach(function(v) {
count +=v;
});
return count;
}
Finally, execute the mapReduce:
> db.so.mapReduce(map, reduce, {out: "word_count"})
The results with your sample data:
> db.word_count.find().sort({value:-1})
{ "_id" : "is", "value" : 3 }
{ "_id" : "bad", "value" : 2 }
{ "_id" : "good", "value" : 2 }
{ "_id" : "this", "value" : 2 }
{ "_id" : "neither", "value" : 1 }
{ "_id" : "or", "value" : 1 }
{ "_id" : "something", "value" : 1 }
{ "_id" : "that", "value" : 1 }
A basic MapReduce example
var m = function() {
var words = this.summary.split(" ");
if (words) {
for(var i=0; i<words.length; i++) {
emit(words[i].toLowerCase(), 1);
}
}
}
var r = function(k, v) {
return v.length;
};
db.collection.mapReduce(
m, r, { out: { merge: "words_count" } }
)
This will insert word counts into a collection name words_count which you can sort (and index)
Note that it doesn't use stemming, omit punctuation, handles stop words etc.
Also note you can optimize the map function by accumulating repeating word(s) occurrences and emitting the count, not just 1
You can use #split.
Try Below query
db.summary.aggregate([
{ $project : { summary : { $split: ["$summary", " "] } } },
{ $unwind : "$summary" },
{ $group : { _id: "$summary" , total : { "$sum" : 1 } } },
{ $sort : { total : -1 } }
]);
Old question but since 4.2 this can be done with $regexFindAll now.
db.summaries.aggregate([
{$project: {
occurences: {
$regexFindAll: {
input: '$summary',
regex: /\b\w+\b/, // match words
}
}
}},
{$unwind: '$occurences'},
{$group: {
_id: '$occurences.match', // group by each word
totalOccurences: {
$sum: 1 // add up total occurences
}
}},
{$sort: {
totalOccurences: -1
}}
]);
This will output docs in the following format:
{
_id: "matchedwordstring",
totalOccurences: number
}

MongoDB group by Functionalities

In MySQL
select a,b,count(1) as cnt from list group by a, b having cnt > 2;
I have to execute the group by function using having condition in mongodb.
But i am getting following error. Please share your input.
In MongoDB
> res = db.list.group({key:{a:true,b:true},
... reduce: function(obj,prev) {prev.count++;},
... initial: {count:0}}).limit(10);
Sat Jan 7 16:36:30 uncaught exception: group command failed: {
"errmsg" : "exception: group() can't handle more than 20000 unique keys",
"code" : 10043,
"ok" : 0
Once it will be executed, we need to run the following file on next.
for (i in res) {if (res[i].count>2) printjson(res[i])};
Regards,
Kumaran
MongoDB group by is very limited in most cases, for instance
- the result set must be lesser than 10000 keys.
- it will not work in sharded environments
So its better to use map reduce. so the query would be like this
map = function() { emit({a:true,b:true},{count:1}); }
reduce = function(k, values) {
var result = {count: 0};
values.forEach(function(value) {
result.count += value.count;
});
return result;
}
and then
db.list.mapReduce(map,reduce,{out: { inline : 1}})
Its a untested version. let me know if it works
EDIT:
The earlier map function was faulty. Thats why you are not getting the results. it should have been
map = function () {
emit({a:this.a, b:this.b}, {count:1});
}
Test data:
> db.multi_group.insert({a:1,b:2})
> db.multi_group.insert({a:2,b:2})
> db.multi_group.insert({a:3,b:2})
> db.multi_group.insert({a:1,b:2})
> db.multi_group.insert({a:3,b:2})
> db.multi_group.insert({a:7,b:2})
> db.multi_group.mapReduce(map,reduce,{out: { inline : 1}})
{
"results" : [
{
"_id" : {
"a" : 1,
"b" : 2
},
"value" : {
"count" : 2
}
},
{
"_id" : {
"a" : 2,
"b" : 2
},
"value" : {
"count" : 1
}
},
{
"_id" : {
"a" : 3,
"b" : 2
},
"value" : {
"count" : 2
}
},
{
"_id" : {
"a" : 7,
"b" : 2
},
"value" : {
"count" : 1
}
}
],
"timeMillis" : 1,
"counts" : {
"input" : 6,
"emit" : 6,
"reduce" : 2,
"output" : 4
},
"ok" : 1,
}
EDIT2:
Complete solution including applying having count >= 2
map = function () {
emit({a:this.a, b:this.b}, {count:1,_id:this._id});
}
reduce = function(k, values) {
var result = {count: 0,_id:[]};
values.forEach(function(value) {
result.count += value.count;
result._id.push(value._id);
});
return result;
}
>db.multi_group.mapReduce(map,reduce,{out: { replace : "multi_result"}})
> db.multi_result.find({'value.count' : {$gte : 2}})
{ "_id" : { "a" : 1, "b" : 2 }, "value" : { "_id" : [ ObjectId("4f0adf2884025491024f994c"), ObjectId("4f0adf3284025491024f994f") ], "count" : 2 } }
{ "_id" : { "a" : 3, "b" : 2 }, "value" : { "_id" : [ ObjectId("4f0adf3084025491024f994e"), ObjectId("4f0adf3584025491024f9950") ], "count" : 2 } }
You should use MapReduce instead. Group has its limitations.
In future you'll be able to use the Aggregation Framework. But for now, use map/reduce.
Depends on the number of your groups, you might find a simpler and faster solution than group or MapReduce by using distinct:
var res = [];
for( var cur_a = db.list.distinct('a'); cur_a.hasNext(); ) {
var a = cur_a.next();
for( var cur_b = db.list.distinct('b'); cur_b.hasNext(); ) {
var b = cur_b.next();
var cnt = db.list.count({'a':a,'b':b})
if (cnt > 2)
res.push({ 'a': a, 'b' : b 'cnt': cnt}
}
}
It will be faster if you have indexes on a and b
db.list.ensureIndex({'a':1,'b':1})