MongoDB - Geospatial Index with Aggregation - mongodb

I've read from the to docs that it is not possible to use a geospatial index on an aggregation with MongoDB. Is there an alternative to this? I am attempting to run a query take grab all activities within a certain radius, then group/sort them by the number of times that activity has occurred. Is there way around this issue?

You can use map-reduce on a geo query. Here is an example based on geo_mapreduce.js (from the mongodb jstests):
// setup test collection
var act_date = new Date(2010,06,07);
for (i = 1; i <= 10; i++) {
db.activity.insert( { "geo" : { "lat" : 32.68331909, "long" : 69.41610718 }, "date":act_date, "activity" : 9 * i } );
db.activity.insert( { "geo" : { "lat" : 35.01860809, "long" : 70.92027283 }, "date":act_date, "activity" : 3 } );
db.activity.insert( { "geo" : { "lat" : 31.11639023, "long" : 64.19970703 }, "date":act_date, "activity" : 11 } );
db.activity.insert( { "geo" : { "lat" : 32.64500046, "long" : 69.36251068 }, "date":act_date, "activity" : 9 } );
db.activity.insert( { "geo" : { "lat" : 33.23638916, "long" : 69.81360626 }, "date":act_date, "activity" : 22 } );
act_date.setDate(act_date.getDate() + 1);
}
db.activity.ensureIndex( { "geo" : "2d" } );
center = [ 32.68, 69.41 ];
radius = 10 / 111; // 10km; 1 arcdegree ~= 111km
geo_query = { geo : { '$within' : { '$center' : [ center, radius ] } } };
// map function
m = function() {
emit( this.date, { "activity" : this.activity } );
};
// reduce function
r = function(key, values) {
var total = 0;
for ( var i = 0; i < values.length; i++ ) {
total += values[i].activity;
}
return {"activity":total };
};
// mapreduce with geo query
res = db.activity.mapReduce( m, r, { out : { inline : 1 }, query : geo_query } );
// sort results
res.results.sort(function(a, b){return b.value.activity - a.value.activity})
for (var i=0; i < res.results.length; i++) {
print("Date: " + res.results[i]._id + " Activity: "
+ res.results[i].value.activity)
}

Related

Mongodb arithmetic expression of elements inside an array

{
"_id" : ObjectId("5a4d0712368ff072a56a66e8"),
"a" : {
"nodes" : [
{
"b" : 1
},
{
"c" : {
"nodes" : [
{
"d" : 2
},
{
"e" : 2
}
]
}
}
]
},
"r" : 1
}
{
"_id" : ObjectId("5a4d0712368ff072a56a66e9"),
"a" : {
"nodes" : [
{
"b" : 4
},
{
"c" : {
"nodes" : [
{
"e" : 9
}
]
}
}
]
},
"r" : 2
}
Above are the sample documents, I want to find the sum of a.nodes.b * a.nodes.c.nodes.d * a.nodes.c.nodes.e
I have tried many pipelines, but not able to generalize the aggregation. Any help would be appreciated.
nodes array can be present again at any level. If I get the solution for this, I will try to generalize the aggregation pipeline.
MongoDB version 3.2
Expected calculation and output
(1 * 2 * 2) + (4 * 0 * 9) = 4
I doubt it is possible to handle "nodes array can be present again at any level." with aggregation.
With map-reduce it could be something like this:
db.collection.mapReduce(
function(){
let reducer = (result, node) => {
Object.keys(node).map(key => {
if (typeof node[key] === "object") {
if (node[key].nodes && Array.isArray(node[key].nodes)) {
result = node[key].nodes.reduce(reducer, result);
}
} else {
result.product = result.product * node[key];
result.keys.add(key);
}
});
return result;
};
let {product, keys} = this.a.nodes.reduce(reducer, {product: 1, keys: new Set()});
emit(null, {product, keys: Array.from(keys)})
},
function(key, values){
return values.reduce((total, item) => {
let totalSum = total.sum, itemSum = item.product;
for (let key of item.keys) if (!total.keys.includes(key)) totalSum = 0;
for (let key of total.keys) if (!item.keys.includes(key)) itemSum = 0;
return {sum: totalSum + itemSum, keys: Array.from(new Set([...total.keys, ...item.keys]))};
}, {sum: 0, keys: []}).sum;
},
{ query: { "a.nodes": { $exists: true } } , out: { inline: 1 }}
);
Map function recursively multiplies all keys, reducer checks the keys and calculate totals.

mongoDB mapreduce taking so long time for running 3m document

I have one collection with 3 million documents. Each document has 40 fields. Fields are like the follow .
{
"b_date" : "2016-04-05",
"d_date" : "2016-06-25",
"pos" : "MISC",
"origin" : "DXB",
"destination" : "HGA",
"pax" : 1,
"pax_1" : 2
},
{
"b_date" : "2016-04-05",
"d_date" : "2016-06-25",
"pos" : "MISC",
"origin" : "DXB",
"destination" : "HGA",
"pax" : 4,
"pax_1" : 5
},
{
"b_date" : "2016-04-05",
"d_date" : "2016-06-26",
"pos" : "MISC",
"origin" : "DXB",
"destination" : "HGA",
"pax" : 3,
"pax_1" : 3
}
Now I want to get the sum of pax and pax_1 by grouping b_date,d_date,pos,origin,destination fields.
cumulative pax is grouping of pos,origin,destination fields but cumulative pax and pax_1 should increase based on ascending order of b_date and d_date.
Expected result is.
{
"_id.dep_date" : "2016-04-05",
"_id.sale_date" : "2016-06-25",
"_id.pos" : "MISC",
"_id.origin" : "DXB",
"_id.destination" : "HGA",
"value.pax" : 5,
"value.cumulative_pax":5,
"value.pax_1" : 7,
"value.cumulative_pax_1":7,
},
{
"_id.dep_date" : "2016-04-05",
"_id.sale_date" : "2016-06-26",
"_id.pos" : "MISC",
"_id.origin" : "DXB",
"_id.destination" : "HGA",
"value.pax" : 3,
"value.cumulative_pax":8,
"value.pax_1" : 3,
"value.cumulative_pax_1":10,
}
my mapReduce code
db.collection.mapReduce(
function() {
emit(
{
"pos" : this.pos,
"origin" : this.origin,
"destination" : this.destination,
'dep_date': this.d_date,
'sale_date': this.b_date,
},
{
'pax':this.pax,
'pax_1':this.pax_1,
}
);
}
,
function(key,values) {
paxt = 0;
paxt_1 = 0;
for (var i in values){
paxt += values[i].pax;
paxt_1 += values[i].pax_1;
}
return {'pax':paxt,
'pax_1':paxt_1,
};
}
,
{
'scope':{
'pos':'',
'origin':'',
'destination':'',
'dep_date': '',
'sale_date': '',
'result':{}
}
,
'finalize':function(key,value) {
if (pos != key.pos ||
origin != key.origin ||
destination != key.destination ||
){
result['pax'] = 0;
result['pax_1'] = 0;
result['cumulative_pax'] = 0;
result['cumulative_pax_1'] = 0;
}
result['pax'] += value.pax;
result['cumulative_pax'] = value.pax;
result['pax_1'] += value.pax_1;
result['cumulative_pax_1'] = value.pax_1;
pos = key.pos;
origin = key.origin;
destination = key.destination;
dep_date = key.dep_date;
sale_date = key.sale_date;
return result;
}
,
'out':'some_collection'
}
)
This map reduce returning expected value but it took so much of time like 3 hours. Is that because of 'b_date' and 'd_date' is being string formatted date? or how to do optimization.
Aggregation is returning result within 3 minutes but I couldn't get cumulative pax by using aggregation.
Map Reduce code,
db.collection.mapReduce(
function() {
emit(
{
"pos" : this.pos,
"origin" : this.origin,
"destination" : this.destination,
'dep_date': this.d_date,
'sale_date': this.b_date,
},
{
'pax':this.pax,
'pax_1':this.pax_1,
}
);
}
,
function(key,values) {
paxt = 0;
paxt_1 = 0;
for (var i in values){
paxt += values[i].pax;
paxt_1 += values[i].pax_1;
}
return {'pax':paxt,
'pax_1':paxt_1,
};
}
,
{
'scope':{
'pos':'',
'origin':'',
'destination':'',
'dep_date': '',
'sale_date': '',
'result':{}
}
,
'finalize':function(key,value) {
if (pos != key.pos ||
origin != key.origin ||
destination != key.destination ||
){
result['pax'] = 0;
result['pax_1'] = 0;
result['cumulative_pax'] = 0;
result['cumulative_pax_1'] = 0;
}
result['pax'] += value.pax;
result['cumulative_pax'] = value.pax;
result['pax_1'] += value.pax_1;
result['cumulative_pax_1'] = value.pax_1;
pos = key.pos;
origin = key.origin;
destination = key.destination;
dep_date = key.dep_date;
sale_date = key.sale_date;
return result;
}
,
'out':'some_collection'
}
)

mongodb: reduce function not action when only one result is there

I need to do the weighted average.
Did the coding as below
db.runCommand(
{ mapreduce : "<collecton>" ,
map: function ()
{
emit ({nkey: this.nkey}, {price: this.tags["31"], qty: this.tags["32"]});
},
reduce: function(key, vals)
{
var ret = {wavg:0};
var mul1 = 0.0;
var sum1 = 0.0;
for (var i=0;i<vals.length;i++)
{ mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
out: 'res2', verbose: true
}
);
> db.res2.find()
{ "_id" : { "nkey" : "key1" }, "value" : { "wavg" : 311.7647058823529 } }
{ "_id" : { "nkey" : "ke2" }, "value" : { "wavg" : 585.7142857142857 } }
{ "_id" : { "nkey" : "key3" }, "value" : { "price" : 1000, "qty" : 110 } }
{ "_id" : { "nkey" : "key4" }, "value" : { "wavg" : 825 } }
If you notice, in the final reducer output(third row), it dint actually go thru the reduce functionality. The key occur only once, hence one result will be emitted. But I still want the reduce function to be acting on that to get the weighted average. I can't just go ahead with price and qty wherever I have only one occurence of the key, where I need weighted average for that key also.
Is there any way to achieve this ?
This is essentially how mapReduce works in that the reducer is never called when you only have one result. But you can always alter such results with a finalize stage:
db.runCommand({
"mapreduce" : "<collecton>" ,
"map": function () {
emit (
{ "nkey": this.nkey},
{ "price": this.tags["31"], qty: this.tags["32"]}
);
},
"reduce": function(key, vals) {
var ret = { "wavg": 0 };
var mul1 = 0.0;
var sum1 = 0.0;
for ( var i=0; i<vals.length; i++ ) {
mul1 += vals[i].price * vals[i].qty;
sum1 += vals[i].qty;
}
ret.wavg = mul1/sum1;
return ret;
},
"finalize": function(key,value) {
if (value.hasOwnProperty("price") {
value.wavg = value.price;
delete value["price"];
}
return value;
},
"out": 'res2',
"verbose": true
});
Or otherwise alternately just sum your keys in the reduce stage and do all the division in the finalize stage if that suits you thinking better. But then you would need to do your "multiplication" part in the "mapper" for that to work.

MongoDB Map - Reduce result not exactly

I had data structure in MongoDB as below
{
"_id" : ObjectId("523aab00045624a385e5f549"),
"name" : "English Book 29",
"SKU" : 1000549081,
"price" : 249000,
"image" : null,
"category_id" : ObjectId("523a7802b50418baf38b4575"),
"category_name" : "English Book",
"details" : {
"Title" : "Title 549081",
"Binding" : 1,
"Author" : "Author 0",
"Publication data" : 0.5263832447608386,
"Publisher name" : "Publisher name 14",
"Number of page" : 90
}
}
Binding of book has 2 values:
0 that means soft binding, and 1 that means hard binding. I write Map Reduce to statistics for each values.
var map = function()
{
for(var key in this.details)
{
if(key == 'Binding')
{
emit({name: key}, {
'data':
[
{
name: this.details[key],
count: 1
}
]
});
}
}
};
var reduce = function (key, values) {
var reduced = {};
for(var i in values)
{
var inter = values[i];
for(var j in inter.data)
{
if(typeof(reduced[inter.data[j].name]) != "undefined")
{
reduced[inter.data[j].name] += inter.data[j].count;
}
else
{
reduced[inter.data[j].name] = 1;
}
}
}
return reduced;
};
When I run with small data (50 records) result return exactly. But when I run it with real data (192000 records) result return Not exactly. The result as below
{
"_id" : {
"name" : "Binding"
},
"value" : {
"0" : 50,
"1" : 50
}
}
I checked return data when Map/Reduce done, result as below
"counts" : {
"input" : 192000,
"emit" : 192000,
"reduce" : 1920,
"output" : 1
},
What wrong with it. Welcome any suggestion, explanation.
Thanks and best regards,
After researching about Map/Reduce yesterday, I realized that, "Emit" send 100 elements once, and "Reduce" perform on this data set. So my above code is wrong because it only "SUM" on small data set.
Below that is my new code for Map-Reduce
var map = function ()
{
for(var key in this.details)
{
if(key == 'Binding')
{
var value = {};
value[this.details[key]] = 1;
emit(key, value);
}
}
}
var reduce = function (key, values)
{
var reduced = {};
for(var idx = 0; idx < values.length; idx++)
{
var inner = values[idx];
for (var j in inner)
{
if (typeof (reduced[j]) == 'undefined')
{
reduced[j] = 0;
}
reduced[j] += inner[j];
}
}
return reduced;
}
I post here for anyone who meet similar situation. Thanks for reading.

MongoDB : query collection based on Date

I have a collection in MongoDB in this format
db.logins.find().pretty()
{
"cust_id" : "samueal",
"created_at" : "2011-03-09 10:31:02.765"
}
{
"cust_id" : "sade",
"created_at" : "2011-03-09 10:33:11.157"
}
{
"cust_id" : "sade",
"created_at" : "2011-03-10 10:33:35.595"
}
{
"cust_id" : "samueal",
"created_at" : "2011-03-10 10:39:06.388"
}
This is my mapReduce function
m = function() { emit(this.cust_id, 1); }
r = function (k, vals) { var sum = 0; for (var i in vals) { sum += vals[i]; } return sum; }
q = function() {
var currentDate = new Date();
currentDate.setDate(currentDate.getDate()-32);
var month = (currentDate.getMonth() < 10 ? "0"+ (currentDate.getMonth()+1) : (currentDate.getMonth()+1));
var date = currentDate.getFullYear() + "-" + month ;
var patt = new RegExp(date);
var query = {"created_at":patt};
return query;
}
res = db.logins.mapReduce(m, r, { query : q(), out : "userLoginCountMonthly" });
With this i am geting the output as
{ "_id" : "sade", "value" : 2 }
{ "_id" : "samueal", "value" : 2 }
but i need genearte a Report output in this format
For Example
Name Date Logins
sade 2011-03-09 1
sade 2011-03-10 2
samueal 2011-03-09 1
samueal 2011-03-10 1
Could anybody please help me how to achive , its
Edited Part
Currently I am getting the output as
{ "_id" : "dsstest 2011-03-09", "value" : 4 }
{ "_id" : "dsstest 2011-03-10", "value" : 14 }
Is it possible that i can get in this format
{ "_id" : "dsstest" , "date" : "2011-03-09", "value" : 4 }
{ "_id" : "dsstest" , "date" : "2011-03-10", "value" : 14 }
Your mapping function is insufficient as it doesn't produce key that has the date in it.
I also don't quite understand why in the sample of what you want sade gets two logins. From what you are saying you want, you should need:
var m = function() {
var aux = this.created_at.indexOf(' ');
aux = this.created_at.substring(0,aux);
// this 'if' block will filter out the entries you don't want
// to be included in the result.
if (aux < "2011-11-11") {
return;
}
emit({cust:this.cust_id,day:aux},1);
}
var r = function(k, values) {
var l = values.length;
var r = 0;
var i;
for (i=0; i<l; i++) {
r+=values[i];
}
return r;
}
And to run:
> db.logins.mapReduce(m, r, { query : q(), out : "userLoginCountMonthly" });
And finally, to produce the report:
> db.userLoginCountMonthly.find().forEach(function(e){print(e._id.cust +' ' + e._id.day+' '+e.value);})
This would list the amount of logins for each user, for each day (within your search scope).