How do I publish two random items from a Meteor collection? - mongodb

I'm making an app where two random things from a collection are displayed to the user. Every time the user refreshes the page or clicks on a button, she would get another random pair of items.
For example, if the collection were of fruits, I'd want something like this:
apple vs banana
peach vs pineapple
banana vs peach
The code below is for the server side and it works except for the fact that the random pair is generated only once. The pair doesn't update until the server is restarted. I understand it is because generate_pair() is only called once. I have tried calling generate_pair() from one of the Meteor.publish functions but it only sometimes works. Other times, I get no items (errors) or only one item.
I don't mind publishing the entire collection and selecting random items from the client side. I just don't want to crash the browser if Items has 30,000 entries.
So to conclude, does anyone have any ideas of how to get two random items from a collection appearing on the client side?
var first_item, second_item;
// This is the best way I could find to get a random item from a Meteor collection
// Every item in Items has a 'random_number' field with a randomly generated number between 0 and 1
var random_item = function() {
return Items.find({
random_number: {
$gt: Math.random()
}
}, {
limit: 1
});
};
// Generates a pair of items and ensure that they're not duplicates.
var generate_pair = function() {
first_item = random_item();
second_item = random_item();
// Regenerate second item if it is a duplicate
while (first_item.fetch()[0]._id === second_item.fetch()[0]._id) {
second_item = random_item();
}
};
generate_pair();
Meteor.publish('first_item', function() {
return first_item;
});
// Is this good Meteor style to have two publications doing essentially the same thing?
Meteor.publish('second_item', function() {
return second_item;
});

The problem with your approach is that subscribing to the same publication with the same arguments (no arguments in this case) over and over in the client will only get you subscribed only once to the server-side logic, this is because Meteor is optimizing its internal Pub/Sub mechanism.
To truly discard the previous subscription and get the server-side publish code to re-execute and send two new random documents, you need to introduce a useless random argument to your publication, your client-side code will subscribe over and over to the publication with a random number and each time you'll get unsubscribed and resubscribed to new random documents.
Here is a full implementation of this pattern :
server/server.js
function randomItemId(){
// get the total items count of the collection
var itemsCount = Items.find().count();
// get a random number (N) between [0 , itemsCount - 1]
var random = Math.floor(Random.fraction() * itemsCount);
// choose a random item by skipping N items
var item = Items.findOne({},{
skip: random
});
return item && item._id;
}
function generateItemIdPair(){
// return an array of 2 random items ids
var result = [
randomItemId(),
randomItemId()
];
//
while(result[0] == result[1]){
result[1] = randomItemId();
}
//
return result;
}
Meteor.publish("randomItems",function(random){
var pair = generateItemIdPair();
// publish the 2 items whose ids are in the random pair
return Items.find({
_id: {
$in: pair
}
});
});
client/client.js
// every 5 seconds subscribe to 2 new random items
Meteor.setInterval(function(){
Meteor.subscribe("randomItems", Random.fraction(), function(){
console.log("fetched these random items :", Items.find().fetch());
});
}, 5000);
You'll need to meteor add random for this code to work.

Meteor.publish 'randomDocs', ->
ids = _(Docs.find().fetch()).pluck '_id'
randomIds = _(ids).sample 2
Docs.find _id: $in: randomIds

Here's another approach, uses the excellent publishComposite package to populate matches in a local (client-only) collection so it doesn't conflict with other uses of the main collection:
if (Meteor.isClient) {
randomDocs = new Mongo.Collection('randomDocs');
}
if (Meteor.isServer) {
Meteor.publishComposite("randomDocs",function(select_count) {
return {
collectionName:"randomDocs",
find: function() {
let self=this;
_.sample(baseCollection.find({}).fetch(),select_count).forEach(function(doc) {
self.added("randomDocs",doc._id,doc);
},self);
self.ready();
}
}
});
}
in onCreated: this.subscribe("randomDocs",3);
(then in a helper): return randomDocs.find({},{$limit:3});

Related

Publishing Counts for objects in Meteor

On my server side I have for various objects a publication which basically returns the count. Every different object has a different publication name like this:
Meteor.publish('object1Count', function(...
Meteor.publish('object2Count', function(...
Which are something like this:
Meteor.publish('object1Count', function(arg) {
var self = this;
var count = 0;
var initializing = true;
var query = arg?{arg:arg}:{};
var projection = !arg?{limit:1}:{};
var handle = Object1.find(query, projection).observeChanges({
added: function (idx) {
count++;
if (!initializing)
self.changed("totalcounts", 1, {count: count});
},
removed: function (idx) {
count--;
self.changed("totalcounts", 1, {count: count});
}
});
initializing = false;
self.added("totalcounts", 1, {count: count});
self.ready();
self.onStop(function () {
handle.stop();
});
});
But as you see inside each of these methods there will be this line
self.added("totalcounts", 1, {count: count});
In fact on the client side when I need to access the count of an Object I do like this:
template.subscribe('object1Count', template.reactiveEventId.get());
...
TotalCounts = (typeof TotalCounts==='undefined')?new Mongo.Collection("totalcounts"):TotalCounts;
It apparently works, but now that I read it twice I wonder why, the "totalcounts" collection looks like the same for all the objects, so if I switch between pages needing different totalcounts (for different objects), I guess that the client destroys the local collection totalcounts and creates a new one. Does this happen also server side?
So finally my question is: what is the best practice? The projects need the total counts for various reasons: pagination, charts, etc.. I want to create the total counts server side and just pass the minimum data for that. Should I create different "totalcounts" for every object? What's the efficient way of doing this?
Thanks
self.added("totalcounts", 1, {count: count});
it means add to collection name totalcounts a document with _id is 1 and the rest of data is {count: count}.
Because they have the same _id then you can't make more than 1 subscription.
Btw, when the template is "unmounted" it will auto stop subscriptions.

Meteor Publish Distinct Values of Field in Collection

I'm stuck on a pretty simple scenario in Meteor:
I have a huge collection of things with many fields, some of them containing quite a bit of text.
I want to create a page for searching that collection.
One of the fields that each item in the collection has is "category".
I'd like to give the user the ability to filter by that category.
For that, I need to publish just the distinct values of the category field in the collection.
I can't figure out a way to do that without publishing the whole collection which takes way too long. How can I publish just the distinct categories and use them to fill a dropdown?
Bonus question and somewhat related: How do I publish a count of all items in the collection without publishing the whole collection?
A good starting point to make this easier would be to normalize your categories into a separate database collection.
However assuming that is not possible or practical, the best (though imperfect) solution will be to publish two separate versions of your collection, one which returns only the categories field of the entire collection and another which returns all fields of the collection for the selected category only. That would look like the following:
// SERVER
Meteor.startup(function(){
Meteor.publish('allThings', function() {
// return only id and categories field for all your things
return Things.find({}, {fields: {categories: 1}});
});
Meteor.publish('thingsByCategory', function(category) {
// return all fields for things having the selected category
// you can then subscribe via something like a client-side Session variable
// e.g., Meteor.subscribe("thingsByCategory", Session.get("category"));
return Things.find({category: category});
});
});
Note that you will still need to assemble your array of categories client side from the Things cursor (for example, by using underscore's _.pluck and _.uniq methods to grab the categories and remove any dups). But the data set will be much smaller as you are only working with single-field documents now.
(Note that ideally, you would want to use Mongo's distinct() method in your publish function to publish only the distinct categories, but that is not possible directly as it returns an array which cannot be published).
You could use the internal this._documents.collectionName to only send new categories down to the client. Tracking which categories to remove becomes a bit ugly so you probably will still end up maintaining a separate 'categories' collection eventually.
Example:
Meteor.publish( 'categories', function(){
var self = this;
largeCollection.find({},{fields: {category: 1}).observeChanges({
added: function( id, doc ){
if( ! self._documents.categories[ doc.category ] )
self.added( 'categories', doc.category, {category: doc.category});
},
removed: function(){
_.keys( self._documents.categories ).forEach( category ){
if ( largeCollection.find({category: category},{limit: 1}).count() === 0 )
self.removed( 'categories', category );
}
}
});
self.ready();
};
Re: the bonus question, publishing counts: take a look at the meteorite package publish-counts. I think that does what you want.
These patterns might be helpful to you. Here is a publication that publishes counts:
/*****************************************************************************/
/* Counts Publish Function
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("countsByProject", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
//check(arguments.video_id, Integer);
// observeChanges only returns after the initial `added` callbacks
// have run. Until then, we don't want to send a lot of
// `self.changed()` messages - hence tracking the
// `initializing` state.
Videos.find({'projectId': arguments.projectId}).forEach(function (video) {
var count = 0;
var initializing = true;
var video_id = video.video_id;
var handle = Observations.find({video_id: video_id}).observeChanges({
added: function (id) {
//console.log(video._id);
count++;
if (!initializing)
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
},
removed: function (id) {
count--;
self.changed("counts", video_id, {'video_id': video_id, 'observations': count});
}
// don't care about changed
});
// Instead, we'll send one `self.added()` message right after
// observeChanges has returned, and mark the subscription as
// ready.
initializing = false;
self.added("counts", video_id, {'video_id': video_id, 'observations': count});
self.ready();
// Stop observing the cursor when client unsubs.
// Stopping a subscription automatically takes
// care of sending the client any removed messages.
self.onStop(function () {
handle.stop();
});
}); // Videos forEach
} //if _.contains
} // if userId
return this.ready();
});
And here is one that creates a new collection from a specific field:
/*****************************************************************************/
/* Tags Publish Functions
/*****************************************************************************/
// server: publish the current size of a collection
Meteor.publish("tags", function (arguments) {
var self = this;
if (this.userId) {
var roles = Meteor.users.findOne({_id : this.userId}).roles;
if ( _.contains(roles, arguments.projectId) ) {
var observations, tags, initializing, projectId;
initializing = true;
projectId = arguments.projectId;
observations = Observations.find({'projectId' : projectId}, {fields: {tags: 1}}).fetch();
tags = _.pluck(observations, 'tags');
tags = _.flatten(tags);
tags = _.uniq(tags);
var handle = Observations.find({'projectId': projectId}, {fields : {'tags' : 1}}).observeChanges({
added: function (id, fields) {
if (!initializing) {
tags = _.union(tags, fields.tags);
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
},
removed: function (id) {
self.changed("tags", projectId, {'projectId': projectId, 'tags': tags});
}
});
initializing = false;
self.added("tags", projectId, {'projectId': projectId, 'tags': tags});
self.ready();
self.onStop(function () {
handle.stop();
});
} //if _.contains
} // if userId
return self.ready();
});
I have not tested it on Meteor, and according to the replies, I'm getting skeptical that it will work but using a mongoDB distinct would do the trick.
http://docs.mongodb.org/manual/reference/method/db.collection.distinct/

Linear funnel from a collection of events with MongoDB aggregation, is it possible?

I have a number of event documents, each event has a number of fields, but the ones that are relevant for my query are:
person_id - a reference to the person that triggered the event
event - a string key to identify the event
occurred_at - the utc of the time the event occurred
What I want to achieve is:
for a list of event keys eg `['event_1','event_2', 'event_3']
get counts of the number of people that performed each event and all the event previous to that event, in order, ie:
the number of people who performed event_1
the number of people who performed event_1, and then event_2
the number of people who performed event_1, and then event_2, and then event_3
etc
a secondary goal is to be able to get the average occurred_at date for each event so that I can calculate the average time between each event
The best I have got is the following two map reduces:
db.events.mapReduce(function () {
emit(this.person_id, {
e: [{
e: this.event,
o: this.occurred_at
}]
})
}, function (key, values) {
return {
e: [].concat.apply([], values.map(function (x) {
return x.e
}))
}
}, {
query: {
account_id: ObjectId('52011239b1b9229f92000003'),
event: {
$in: ['event_a', 'event_b', 'event_c','event_d','event_e','event_f']
}
},
out: 'people_funnel_chains',
sort: { person_id: 1, occurred_at: 1 }
})
And then:
db.people_funnel_chains.mapReduce(function() {
funnel = ['event_a', 'event_b', 'event_c','event_d','event_e','event_f']
events = this.value.e;
for (var e in funnel) {
e = funnel[e];
if ((i = events.map(function (x) {
return x.e
}).indexOf(e)) > -1) {
emit(e, { c: 1, o: events[i].o })
events = events.slice(i + 1, events.length);
} else {
break;
}
}
}, function(key,values) {
return {
c: Array.sum(values.map(function(x) { return x.c })),
o: new Date(Array.sum(values.map(function(x) { return x.o.getTime() }))/values.length)
};
}, { out: {inline: 1} })
I would like to achieve this is in real time using the aggregate framework but can see no way to do it. For 10s of thousands of records this is taking 10s of seconds, I can run it incrementally which means its fast enough for new data coming in but if I want to modify the original query (eg change the event chain) it can't be done in a single request which I would love it to be able to do.
Update using Cursor.forEach()
Using Cursor.forEach() I've managed to get huge improvement on this (essentially removing the requirement for the first map reduce).
var time = new Date().getTime(), funnel_event_keys = ['event_a', 'event_b', 'event_c','event_d','event_e','event_f'], looking_for_i = 0, looking_for = funnel_event_keys[0], funnel = {}, last_person_id = null;
for (var i in funnel_event_keys) { funnel[funnel_event_keys[i]] = [0,null] };
db.events.find({
account_id: ObjectId('52011239b1b9229f92000003'),
event: {
$in: funnel_event_keys
}
}, { person_id: 1, event: 1, occurred_at: 1 }).sort({ person_id: 1, occurred_at: 1 }).forEach(function(e) {
var current_person_id = e['person_id'].str;
if (last_person_id != current_person_id) {
looking_for_i = 0;
looking_for = funnel_event_keys[0]
}
if (e['event'] == looking_for) {
var funnel_event = funnel[looking_for]
funnel_event[0] = funnel_event[0] + 1;
funnel_event[1] = ((funnel_event[1] || e['occurred_at'].getTime()) + e['occurred_at'].getTime())/2;
looking_for_i = looking_for_i + 1;
looking_for = funnel_event_keys[looking_for_i]
}
last_person_id = current_person_id;
})
funnel;
new Date().getTime() - time;
I wonder if something custom with data in memory would be able to improve on this? Getting 100s of thousands of records out of MongoDB into memory (on a different machine) is going to be a bottle neck, is there a technology I'm not aware of that could do this?
I wrote up a complete answer on my MongoDB blog but as a summary, what you have to do is project your actions based on which ones you care about to map values of action field into appropriate key names, group by person aggregating for the three actions when they did them (and optionally how many times) and then project new fields which check if action2 was done after action1, and action3 was done after action2... Last phase just sums up the number of people who did just 1, or 1 and then 2, or 1 and then 2 and then 3.
Using a function to generate the aggregation pipeline, it's possible to generate results based on array of actions passed in.
In my test case, the entire pipeline ran in under 200ms for a collection of 40,000 documents (this was on my small laptop).
As it was correctly pointed out, the general solution I describe assumes that while an actor can take any action multiple times that they can only advance from action1 to action2 but that they cannot skip directly from action1 to action3 (interpreting action order as describing prerequisites where you cannot do action3 until you've done action2).
As it turns out, aggregation framework can be used even for sequences of events where the order is completely arbitrary but you still want to know how many people at some point did the sequence action1, action2, action3.
The main adjustment to make on the original answer is to add an extra two-stage step in the middle. This step unwinds the collected by person document to re-group it finding the first occurrence of the second action that comes after the first occurrence of the first action.
Once we have that the final comparison becomes for action1, followed by earliest occurrence of action2 and compare that to the latest occurrence of action3.
It can probably be generalized to handle arbitrary number of events but every additional event past two would add two more stages to the aggregation.
Here is my write-up of the modification of the pipeline to achieve the answer you are looking for.

Average Aggregation Queries in Meteor

Ok, still in my toy app, I want to find out the average mileage on a group of car owners' odometers. This is pretty easy on the client but doesn't scale. Right? But on the server, I don't exactly see how to accomplish it.
Questions:
How do you implement something on the server then use it on the client?
How do you use the $avg aggregation function of mongo to leverage its optimized aggregation function?
Or alternatively to (2) how do you do a map/reduce on the server and make it available to the client?
The suggestion by #HubertOG was to use Meteor.call, which makes sense and I did this:
# Client side
Template.mileage.average_miles = ->
answer = null
Meteor.call "average_mileage", (error, result) ->
console.log "got average mileage result #{result}"
answer = result
console.log "but wait, answer = #{answer}"
answer
# Server side
Meteor.methods average_mileage: ->
console.log "server mileage called"
total = count = 0
r = Mileage.find({}).forEach (mileage) ->
total += mileage.mileage
count += 1
console.log "server about to return #{total / count}"
total / count
That would seem to work fine, but it doesn't because as near as I can tell Meteor.call is an asynchronous call and answer will always be a null return. Handling stuff on the server seems like a common enough use case that I must have just overlooked something. What would that be?
Thanks!
As of Meteor 0.6.5, the collection API doesn't support aggregation queries yet because there's no (straightforward) way to do live updates on them. However, you can still write them yourself, and make them available in a Meteor.publish, although the result will be static. In my opinion, doing it this way is still preferable because you can merge multiple aggregations and use the client-side collection API.
Meteor.publish("someAggregation", function (args) {
var sub = this;
// This works for Meteor 0.6.5
var db = MongoInternals.defaultRemoteCollectionDriver().mongo.db;
// Your arguments to Mongo's aggregation. Make these however you want.
var pipeline = [
{ $match: doSomethingWith(args) },
{ $group: {
_id: whatWeAreGroupingWith(args),
count: { $sum: 1 }
}}
];
db.collection("server_collection_name").aggregate(
pipeline,
// Need to wrap the callback so it gets called in a Fiber.
Meteor.bindEnvironment(
function(err, result) {
// Add each of the results to the subscription.
_.each(result, function(e) {
// Generate a random disposable id for aggregated documents
sub.added("client_collection_name", Random.id(), {
key: e._id.somethingOfInterest,
count: e.count
});
});
sub.ready();
},
function(error) {
Meteor._debug( "Error doing aggregation: " + error);
}
)
);
});
The above is an example grouping/count aggregation. Some things of note:
When you do this, you'll naturally be doing an aggregation on server_collection_name and pushing the results to a different collection called client_collection_name.
This subscription isn't going to be live, and will probably be updated whenever the arguments change, so we use a really simple loop that just pushes all the results out.
The results of the aggregation don't have Mongo ObjectIDs, so we generate some arbitrary ones of our own.
The callback to the aggregation needs to be wrapped in a Fiber. I use Meteor.bindEnvironment here but one can also use a Future for more low-level control.
If you start combining the results of publications like these, you'll need to carefully consider how the randomly generated ids impact the merge box. However, a straightforward implementation of this is just a standard database query, except it is more convenient to use with Meteor APIs client-side.
TL;DR version: Almost anytime you are pushing data out from the server, a publish is preferable to a method.
For more information about different ways to do aggregation, check out this post.
I did this with the 'aggregate' method. (ver 0.7.x)
if(Meteor.isServer){
Future = Npm.require('fibers/future');
Meteor.methods({
'aggregate' : function(param){
var fut = new Future();
MongoInternals.defaultRemoteCollectionDriver().mongo._getCollection(param.collection).aggregate(param.pipe,function(err, result){
fut.return(result);
});
return fut.wait();
}
,'test':function(param){
var _param = {
pipe : [
{ $unwind:'$data' },
{ $match:{
'data.y':"2031",
'data.m':'01',
'data.d':'01'
}},
{ $project : {
'_id':0
,'project_id' : "$project_id"
,'idx' : "$data.idx"
,'y' : '$data.y'
,'m' : '$data.m'
,'d' : '$data.d'
}}
],
collection:"yourCollection"
}
Meteor.call('aggregate',_param);
}
});
}
If you want reactivity, use Meteor.publish instead of Meteor.call. There's an example in the docs where they publish the number of messages in a given room (just above the documentation for this.userId), you should be able to do something similar.
You can use Meteor.methods for that.
// server
Meteor.methods({
average: function() {
...
return something;
},
});
// client
var _avg = { /* Create an object to store value and dependency */
dep: new Deps.Dependency();
};
Template.mileage.rendered = function() {
_avg.init = true;
};
Template.mileage.averageMiles = function() {
_avg.dep.depend(); /* Make the function rerun when _avg.dep is touched */
if(_avg.init) { /* Fetch the value from the server if not yet done */
_avg.init = false;
Meteor.call('average', function(error, result) {
_avg.val = result;
_avg.dep.changed(); /* Rerun the helper */
});
}
return _avg.val;
});

how to calculate count and unique count over two fields in mongo reduce function

I have a link tracking table that has (amongst other fields) track_redirect and track_userid. I would like to output both the total count for a given link, and also the unique count - counting duplicates by the user id. So we can differentiate if someone has clicked the same link 5 times.
I've tried emitting this.track_userid in both the key and values parts but can't get to grips with how to correctly access them in the reduce function.
So if I roll back to when it actually worked, I have the very simple code below - just like it would be in a 'my first mapreduce function' example
map
function() {
if(this.track_redirect) {
emit(this.track_redirect,1);
}
}
reduce
function(k, vals) {
var sum = 0;
for (var i in vals) {
sum += vals[i];
}
return sum;
}
I'd like to know the correct way to emit the additional userid information and access it in the mapreduce please. or am i thinking about it in the wrong way?
in case it's not clear, I don't want to calculate the total clicks a userid has made, but to count the unique clicks of each url + userid - not counting any duplicate clicks a userid made on each link
can someone point me in the right direction please? thanks!
You can actually pass arbitrary object on the second parameter of the emit call. That means you can take advantage of this and store the userid in it. For example, your map function can look like this:
var mapFunc = function() {
if (this.track_redirect) {
var tempDoc = {};
tempDoc[this.track_userid] = 1;
emit(this.track_redirect, {
users_clicked: tempDoc,
total_clicks: 1
});
}
};
And your reduce function might look like this:
var reduceFunc = function(key, values) {
var summary = {
users_clicked: {},
total_clicks: 0
};
values.forEach(function (doc) {
summary.total_clicks += doc.total_clicks;
// Merge the properties of 2 objects together
// (and these are actually the userids)
Object.extend(summary.users_clicked, doc.users_clicked);
});
return summary;
};
The users_clicked property of the summary object basically stores the id of every user as a property (since you can't have duplicate properties, you can guarantee that it will store unique users). Also note that you have to be careful of the fact that some of the values passed to the reduce function can be result of a previous reduce and the sample code above takes that into account. You can find more about the said behavior in the docs here.
In order to get the unique count, you can pass in the finalizer function that gets called when the reduce phase is completed:
var finalFunc = function(key, value) {
// Counts the keys of an object. Taken from:
// http://stackoverflow.com/questions/18912/how-to-find-keys-of-a-hash
var countKeys = function(obj) {
var count = 0;
for(var i in obj) {
if (obj.hasOwnProperty(i))
{
count++;
}
}
return count;
};
return {
redirect: key,
total_clicks: value.total_clicks,
unique_clicks: countKeys(value.users_clicked)
};
};
Finally, you can execute the map reduce job like this (modify the out attribute to fit your needs):
db.users.mapReduce(mapFunc, reduceFunc, { finalize: finalFunc, out: { inline: 1 }});