Average Aggregation Queries in Meteor - mongodb

Ok, still in my toy app, I want to find out the average mileage on a group of car owners' odometers. This is pretty easy on the client but doesn't scale. Right? But on the server, I don't exactly see how to accomplish it.
Questions:
How do you implement something on the server then use it on the client?
How do you use the $avg aggregation function of mongo to leverage its optimized aggregation function?
Or alternatively to (2) how do you do a map/reduce on the server and make it available to the client?
The suggestion by #HubertOG was to use Meteor.call, which makes sense and I did this:
# Client side
Template.mileage.average_miles = ->
answer = null
Meteor.call "average_mileage", (error, result) ->
console.log "got average mileage result #{result}"
answer = result
console.log "but wait, answer = #{answer}"
answer
# Server side
Meteor.methods average_mileage: ->
console.log "server mileage called"
total = count = 0
r = Mileage.find({}).forEach (mileage) ->
total += mileage.mileage
count += 1
console.log "server about to return #{total / count}"
total / count
That would seem to work fine, but it doesn't because as near as I can tell Meteor.call is an asynchronous call and answer will always be a null return. Handling stuff on the server seems like a common enough use case that I must have just overlooked something. What would that be?
Thanks!

As of Meteor 0.6.5, the collection API doesn't support aggregation queries yet because there's no (straightforward) way to do live updates on them. However, you can still write them yourself, and make them available in a Meteor.publish, although the result will be static. In my opinion, doing it this way is still preferable because you can merge multiple aggregations and use the client-side collection API.
Meteor.publish("someAggregation", function (args) {
var sub = this;
// This works for Meteor 0.6.5
var db = MongoInternals.defaultRemoteCollectionDriver().mongo.db;
// Your arguments to Mongo's aggregation. Make these however you want.
var pipeline = [
{ $match: doSomethingWith(args) },
{ $group: {
_id: whatWeAreGroupingWith(args),
count: { $sum: 1 }
}}
];
db.collection("server_collection_name").aggregate(
pipeline,
// Need to wrap the callback so it gets called in a Fiber.
Meteor.bindEnvironment(
function(err, result) {
// Add each of the results to the subscription.
_.each(result, function(e) {
// Generate a random disposable id for aggregated documents
sub.added("client_collection_name", Random.id(), {
key: e._id.somethingOfInterest,
count: e.count
});
});
sub.ready();
},
function(error) {
Meteor._debug( "Error doing aggregation: " + error);
}
)
);
});
The above is an example grouping/count aggregation. Some things of note:
When you do this, you'll naturally be doing an aggregation on server_collection_name and pushing the results to a different collection called client_collection_name.
This subscription isn't going to be live, and will probably be updated whenever the arguments change, so we use a really simple loop that just pushes all the results out.
The results of the aggregation don't have Mongo ObjectIDs, so we generate some arbitrary ones of our own.
The callback to the aggregation needs to be wrapped in a Fiber. I use Meteor.bindEnvironment here but one can also use a Future for more low-level control.
If you start combining the results of publications like these, you'll need to carefully consider how the randomly generated ids impact the merge box. However, a straightforward implementation of this is just a standard database query, except it is more convenient to use with Meteor APIs client-side.
TL;DR version: Almost anytime you are pushing data out from the server, a publish is preferable to a method.
For more information about different ways to do aggregation, check out this post.

I did this with the 'aggregate' method. (ver 0.7.x)
if(Meteor.isServer){
Future = Npm.require('fibers/future');
Meteor.methods({
'aggregate' : function(param){
var fut = new Future();
MongoInternals.defaultRemoteCollectionDriver().mongo._getCollection(param.collection).aggregate(param.pipe,function(err, result){
fut.return(result);
});
return fut.wait();
}
,'test':function(param){
var _param = {
pipe : [
{ $unwind:'$data' },
{ $match:{
'data.y':"2031",
'data.m':'01',
'data.d':'01'
}},
{ $project : {
'_id':0
,'project_id' : "$project_id"
,'idx' : "$data.idx"
,'y' : '$data.y'
,'m' : '$data.m'
,'d' : '$data.d'
}}
],
collection:"yourCollection"
}
Meteor.call('aggregate',_param);
}
});
}

If you want reactivity, use Meteor.publish instead of Meteor.call. There's an example in the docs where they publish the number of messages in a given room (just above the documentation for this.userId), you should be able to do something similar.

You can use Meteor.methods for that.
// server
Meteor.methods({
average: function() {
...
return something;
},
});
// client
var _avg = { /* Create an object to store value and dependency */
dep: new Deps.Dependency();
};
Template.mileage.rendered = function() {
_avg.init = true;
};
Template.mileage.averageMiles = function() {
_avg.dep.depend(); /* Make the function rerun when _avg.dep is touched */
if(_avg.init) { /* Fetch the value from the server if not yet done */
_avg.init = false;
Meteor.call('average', function(error, result) {
_avg.val = result;
_avg.dep.changed(); /* Rerun the helper */
});
}
return _avg.val;
});

Related

Meteor: Increment DB value server side when client views page

I'm trying to do something seemingly simple, update a views counter in MongoDB every time the value is fetched.
For example I've tried it with this method.
Meteor.methods({
'messages.get'(messageId) {
check(messageId, String);
if (Meteor.isServer) {
var message = Messages.findOne(
{_id: messageId}
);
var views = message.views;
// Increment views value
Messages.update(
messageId,
{ $set: { views: views++ }}
);
}
return Messages.findOne(
{_id: messageId}
);
},
});
But I can't get it to work the way I intend. For example the if(Meteor.isServer) code is useless because it's not actually executed on the server.
Also the value doesn't seem to be available after findOne is called, so it's likely async but findOne has no callback feature.
I don't want clients to control this part, which is why I'm trying to do it server side, but it needs to execute everytime the client fetches the value. Which sounds hard since the client has subscribed to the data already.
Edit: This is the updated method after reading the answers here.
'messages.get'(messageId) {
check(messageId, String);
Messages.update(
messageId,
{ $inc: { views: 1 }}
);
return Messages.findOne(
{_id: messageId}
);
},
For example the if(Meteor.isServer) code is useless because it's not
actually executed on the server.
Meteor methods are always executed on the server. You can call them from the client (with callback) but the execution happens server side.
Also the value doesn't seem to be available after findOne is called,
so it's likely async but findOne has no callback feature.
You don't need to call it twice. See the code below:
Meteor.methods({
'messages.get'(messageId) {
check(messageId, String);
var message = Messages.findOne({_id:messageId});
if (message) {
// Increment views value on current doc
message.views++;
// Update by current doc
Messages.update(messageId,{ $set: { views: message.views }});
}
// return current doc or null if not found
return message;
},
});
You can call that by your client like:
Meteor.call('messages.get', 'myMessageId01234', function(err, res) {
if (err || !res) {
// handle err, if res is empty, there is no message found
}
console.log(res); // your message
});
Two additions here:
You may split messages and views into separate collections for sake of scalability and encapsulation of data. If your publication method does not restrict to public fields, then the client, who asks for messages also receives the view count. This may work for now but may violate on a larger scale some (future upcoming) access rules.
views++ means:
Use the current value of views, i.e. build the modifier with the current (unmodified) value.
Increment the value of views, which is no longer useful in your case because you do not use that variable for anything else.
Avoid these increment operator if you are not clear how they exactly work.
Why not just using a mongo $inc operator that could avoid having to retrieve the previous value?

I cannot consume my publications in Meteor template's helper

I defined the following publication on the server side:
Meteor.publish('observedQuestionsFeed',function(){
_self = this;
return Questions.find({
observedByUsers : {$exists: true,$elemMatch: {$eq:_self.userId}}});
});
which returns what I need;
then i defined a subscription to that publication in my template as follows:
Template.observedQuestions.onCreated(function(){
var self = this;
self.autorun(function(){
self.subscribe('observedQuestionsFeed');
});
});
but I cannot consume that publication in my helper as I would need to repeat same query as I understand but $eq is not recognised;
I put :
Template.observedQuestions.helpers({
observedQuestions : function(){
questions = Questions.find({
observedByUsers : {$exists: true,$elemMatch: {$eq:Meteor.userId()}}});
return questions;
}
});
which does not work due to $eq not recognised.
I want to use this very specific publication only in this particular template.How should I do it? (observedByUsers field is a simple array of usersIds or is undefined in my mongo collection)
In your publish function, you're already filtering the records down to what you want. If that is five records for example, you're only sending those five records to the client. Because of that you should just be able to do:
Template.observedQuestions.helpers({
observedQuestions : function(){
return Questions.find();
}
});

Returning the Results as well as subscribing in Meteor Mongo Geosearch

Using Meteor.js I've got the following code for my pub/sub working flawlessly. I'm able to pass my arguments through and return the cursors with no problem.
My objective is to display the distance between the current user location and the database result.
As mongodb has already calculated the distance to get the result set I don't want to calculate it again someplace else. I'd like to return the geoNear.results[n].dis results of the $geoNear documented here but can't work out a practical way to go about it. I appreciate the publish only returns a cursor to the docs but wondered if there was some way to attach the results somehow...
Meteor.publishComposite("public", function(location, distance) {
return {
find: function() {
return Tutors.find({},
{
$geoNear: {
$geometry: {
type: "Point" ,
coordinates: [ location.lng , location.lat ]
},
$maxDistance: distance,
},
}
);
}
}
});
My subscribe arguments are simply a lat/lng object and distance in metres.
What if I told you that you could use Mongo aggregation? The general idea here is to get the distance between the current user location and the database result to update automatically with a change in the 'Tutors' collection, thus use publication with an observe to achieve this.
Here's the set-up. The first step is to get the aggregation framework package which wraps up some Mongo methods for you. Just meteor add meteorhacks:aggregate and you should be home and dry. This will add an aggregate() method to your collections.
An alternative to adding the aggregation framework support is to call directly your mongoDB and access the underlying collection methods, which in this case you need the aggregate() method. So, use this to connect in the mongoDB :
var db = MongoInternals.defaultRemoteCollectionDriver().mongo.db,
Tutors = db.collection("tutors");
Now you can dive into the aggregation framework and build up your pipeline queries. The following example demonstrates how to get the aggregation in the publish reactive use a observe in the publish with ES6 in Meteor. This follows the 'counts-by-room' example in the meteor docs. With the observe, you know if a new location has been added, changes or removed. For simplicity re-run the aggregation each time (except remove) and if the location was previously published then update the publish, if the location was removed then remove the location from the publish and then for a new location use added:
Meteor.publish('findNearestTutors', function(opts) {
let initializing = 1, run = (action) => {
// Define the aggregation pipeline
let pipeline = [
{
$geoNear: {
near: {type: 'Point', coordinates: [Number(opts.lng), Number(opts.lat)]},
distanceField: 'distance',
maxDistance: opts.distance,
spherical: true,
sort: -1
}
}
]
Tutors.aggregate(pipeline).forEach((location) => {
// Add each of the results to the subscription.
this[action]('nearest-locations', location._id, location)
this.ready()
})
}
// Run the aggregation initially to add some data to your aggregation collection
run('added')
// Track any changes on the collection you are going to use for aggregation
let handle = Tutors.find({}).observeChanges({
added(id) {
// observeChanges only returns after the initial `added` callbacks
// have run. Until then, you don't want to send a lot of
// `self.changed()` messages - hence tracking the
// `initializing` state.
if (initializing && initializing--)
run('changed')
},
removed(id) {
run('changed')
},
changed(id) {
run('changed')
},
error(err) {
throw new Meteor.Error("Houston, we've got a problem here!", err.message)
}
})
// Stop observing the cursor when client unsubs.
// Stopping a subscription automatically takes
// care of sending the client any removed messages.
this.onStop(function () {
handle.stop();
})
})

mapreduce between consecutive documents

Setup:
I got a large collection with the following entries
Name - String
Begin - time stamp
End - time stamp
Problem:
I want to get the gaps between documents, Using the map-reduce paradigm.
Approach:
I'm trying to set a new collection of pairs mid, after that I can compute differences from it using $unwind and Pair[1].Begin - Pair[0].End
function map(){
emit(0, this)
}
function reduce(){
var i = 0;
var pairs = [];
while ( i < values.length -1){
pairs.push([values[i], values[i+1]]);
i = i + 1;
}
return {"pairs":pairs};
}
db.collection.mapReduce(map, reduce, sort:{begin:1}, out:{replace:"mid"})
This works with limited number of document because of the 16MB document cap. I'm not sure if I need to get the collection into memory and doing it there, How else can I approach this problem?
The mapReduce function of MongoDB has a different way of handling what you propose than the method you are using to solve it. The key factor here is "keeping" the "previous" document in order to make the comparison to the next.
The actual mechanism that supports this is the "scope" functionality, which allows a sort of "global" variable approach to use in the overall code. As you will see, what you are asking when that is considered takes no "reduction" at all as there is no "grouping", just emission of document "pair" data:
db.collection.mapReduce(
function() {
if ( last == null ) {
last = this;
} else {
emit(
{
"start_id": last._id,
"end_id": this._id
},
this.Begin - last.End
);
last = this;
}
},
function() {}, // no reduction required
{
"out": { "inline": 1 },
"scope": { "last": null }
}
)
Out with a collection as the output as required to your size.
But this way by using a "global" to keep the last document then the code is both simple and efficient.

Meteor Publish Distinct Values of Field in Collection

I'm stuck on a pretty simple scenario in Meteor:
I have a huge collection of things with many fields, some of them containing quite a bit of text.
I want to create a page for searching that collection.
One of the fields that each item in the collection has is "category".
I'd like to give the user the ability to filter by that category.
For that, I need to publish just the distinct values of the category field in the collection.
I can't figure out a way to do that without publishing the whole collection which takes way too long. How can I publish just the distinct categories and use them to fill a dropdown?
Bonus question and somewhat related: How do I publish a count of all items in the collection without publishing the whole collection?
A good starting point to make this easier would be to normalize your categories into a separate database collection.
However assuming that is not possible or practical, the best (though imperfect) solution will be to publish two separate versions of your collection, one which returns only the categories field of the entire collection and another which returns all fields of the collection for the selected category only. That would look like the following:
// SERVER
Meteor.startup(function(){
Meteor.publish('allThings', function() {
// return only id and categories field for all your things
return Things.find({}, {fields: {categories: 1}});
});
Meteor.publish('thingsByCategory', function(category) {
// return all fields for things having the selected category
// you can then subscribe via something like a client-side Session variable
// e.g., Meteor.subscribe("thingsByCategory", Session.get("category"));
return Things.find({category: category});
});
});
Note that you will still need to assemble your array of categories client side from the Things cursor (for example, by using underscore's _.pluck and _.uniq methods to grab the categories and remove any dups). But the data set will be much smaller as you are only working with single-field documents now.
(Note that ideally, you would want to use Mongo's distinct() method in your publish function to publish only the distinct categories, but that is not possible directly as it returns an array which cannot be published).
You could use the internal this._documents.collectionName to only send new categories down to the client. Tracking which categories to remove becomes a bit ugly so you probably will still end up maintaining a separate 'categories' collection eventually.
Example:
Meteor.publish( 'categories', function(){
var self = this;
largeCollection.find({},{fields: {category: 1}).observeChanges({
added: function( id, doc ){
if( ! self._documents.categories[ doc.category ] )
self.added( 'categories', doc.category, {category: doc.category});
},
removed: function(){
_.keys( self._documents.categories ).forEach( category ){
if ( largeCollection.find({category: category},{limit: 1}).count() === 0 )
self.removed( 'categories', category );
}
}
});
self.ready();
};
Re: the bonus question, publishing counts: take a look at the meteorite package publish-counts. I think that does what you want.
These patterns might be helpful to you. Here is a publication that publishes counts:
/*****************************************************************************/
/* Counts Publish Function
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("countsByProject", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
//check(arguments.video_id, Integer);
// observeChanges only returns after the initial `added` callbacks
// have run. Until then, we don't want to send a lot of
// `self.changed()` messages - hence tracking the
// `initializing` state.
Videos.find({'projectId': arguments.projectId}).forEach(function (video) {
var count = 0;
var initializing = true;
var video_id = video.video_id;
var handle = Observations.find({video_id: video_id}).observeChanges({
added: function (id) {
//console.log(video._id);
count++;
if (!initializing)
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
},
removed: function (id) {
count--;
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
}
// don't care about changed
});
// Instead, we'll send one `self.added()` message right after
// observeChanges has returned, and mark the subscription as
// ready.
initializing = false;
self.added("counts", video_id, {'video_id': video_id, 'observations': count});
self.ready();
// Stop observing the cursor when client unsubs.
// Stopping a subscription automatically takes
// care of sending the client any removed messages.
self.onStop(function () {
handle.stop();
});
}); // Videos forEach
} //if _.contains
} // if userId
return this.ready();
});
And here is one that creates a new collection from a specific field:
/*****************************************************************************/
/* Tags Publish Functions
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("tags", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
var observations, tags, initializing, projectId;
initializing = true;
projectId = arguments.projectId;
observations = Observations.find({'projectId' : projectId}, {fields: {tags: 1}}).fetch();
tags = _.pluck(observations, 'tags');
tags = _.flatten(tags);
tags = _.uniq(tags);
var handle = Observations.find({'projectId': projectId}, {fields : {'tags' : 1}}).observeChanges({
added: function (id, fields) {
if (!initializing) {
tags = _.union(tags, fields.tags);
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
},
removed: function (id) {
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
});
initializing = false;
self.added("tags", projectId, {'projectId': projectId, 'tags': tags});
self.ready();
self.onStop(function () {
handle.stop();
});
} //if _.contains
} // if userId
return self.ready();
});
I have not tested it on Meteor, and according to the replies, I'm getting skeptical that it will work but using a mongoDB distinct would do the trick.
http://docs.mongodb.org/manual/reference/method/db.collection.distinct/